Compare commits

..

87 Commits

Author SHA1 Message Date
bddf3f96e6 Use Debian instead of Alpine in Dockerfile 2023-02-20 19:38:22 +01:00
1e9ac00800 Merge #3505
3505: Csv delimiter r=irevoire a=irevoire

Fixes https://github.com/meilisearch/meilisearch/issues/3442
Closes https://github.com/meilisearch/meilisearch/pull/2803
Specified in https://github.com/meilisearch/specifications/pull/221

This PR is a reimplementation of https://github.com/meilisearch/meilisearch/pull/2803, on the new engine. Thanks for your idea and initial PR `@MixusMinimax;` sorry I couldn’t update/merge your PR. Way too many changes happened on the engine in the meantime.

**Attention to reviewer**; I had to update deserr to implement the support of deserializing `char`s

-------

It introduces four new error messages;
- Invalid value in parameter csvDelimiter: expected a string of one character, but found an empty string
- Invalid value in parameter csvDelimiter: expected a string of one character, but found the following string of 5 characters: doggo
- csv delimiter must be an ascii character. Found: 🍰 
- The Content-Type application/json does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type text/csv.

And one error code;
- `invalid_index_csv_delimiter`

The `invalid_content_type` error code is now also used when we encounter the `csvDelimiter` query parameter with a non-csv content type.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 17:01:36 +00:00
b08a49a16e Merge #3319 #3470
3319: Transparently resize indexes on MaxDatabaseSizeReached errors r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/discussions/3280, depends on https://github.com/meilisearch/milli/pull/760

## What does this PR do?

### User standpoint

- Meilisearch no longer fails tasks that encounter the `milli::UserError(MaxDatabaseSizeReached)` error.
- Instead, these tasks are retried after increasing the maximum size allocated to the index where the failure occurred.

### Implementation standpoint

- Add `Batch::index_uid` to get the `index_uid` of a batch of task if there is one
- `IndexMapper::create_or_open_index` now takes an additional `size` argument that allows to (re)open indexes with a size different from the base `IndexScheduler::index_size` field
- `IndexScheduler::tick` now returns a `Result<TickOutcome>` instead of a `Result<usize>`. This offers more explicit control over what the behavior should be wrt the next tick.
- Add `IndexStatus::BeingResized` that contains a handle that a thread can use to await for the resize operation to complete and the index to be available again.
- Add `IndexMapper::resize_index` to increase the size of an index.
- In `IndexScheduler::tick`, intercept task batches that failed due to `MaxDatabaseSizeReached` and resize the index that caused the error, then request a new tick that will eventually handle the still enqueued task.

## Testing the PR

The following diff can be applied to this branch to make testing the PR easier:

<details>


```diff
diff --git a/index-scheduler/src/index_mapper.rs b/index-scheduler/src/index_mapper.rs
index 553ab45a..022b2f00 100644
--- a/index-scheduler/src/index_mapper.rs
+++ b/index-scheduler/src/index_mapper.rs
`@@` -228,13 +228,15 `@@` impl IndexMapper {
 
         drop(lock);
 
+        std:đź§µ:sleep_ms(2000);
+
         let current_size = index.map_size()?;
         let closing_event = index.prepare_for_closing();
-        log::info!("Resizing index {} from {} to {} bytes", name, current_size, current_size * 2);
+        log::error!("Resizing index {} from {} to {} bytes", name, current_size, current_size * 2);
 
         closing_event.wait();
 
-        log::info!("Resized index {} from {} to {} bytes", name, current_size, current_size * 2);
+        log::error!("Resized index {} from {} to {} bytes", name, current_size, current_size * 2);
 
         let index_path = self.base_path.join(uuid.to_string());
         let index = self.create_or_open_index(&index_path, None, 2 * current_size)?;
`@@` -268,8 +270,10 `@@` impl IndexMapper {
             match index {
                 Some(Available(index)) => break index,
                 Some(BeingResized(ref resize_operation)) => {
+                    log::error!("waiting for resize end");
                     // Deadlock: no lock taken while doing this operation.
                     resize_operation.wait();
+                    log::error!("trying our luck again!");
                     continue;
                 }
                 Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 11b17d05..242dc095 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
`@@` -908,6 +908,7 `@@` impl IndexScheduler {
     ///
     /// Returns the number of processed tasks.
     fn tick(&self) -> Result<TickOutcome> {
+        log::error!("ticking!");
         #[cfg(test)]
         {
             *self.run_loop_iteration.write().unwrap() += 1;
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index 050c825a..63f312f6 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
`@@` -25,7 +25,7 `@@` fn setup(opt: &Opt) -> anyhow::Result<()> {
 
 #[actix_web::main]
 async fn main() -> anyhow::Result<()> {
-    let (opt, config_read_from) = Opt::try_build()?;
+    let (mut opt, config_read_from) = Opt::try_build()?;
 
     setup(&opt)?;
 
`@@` -56,6 +56,8 `@@` We generated a secure master key for you (you can safely copy this token):
         _ => (),
     }
 
+    opt.max_index_size = byte_unit::Byte::from_str("1MB").unwrap();
+
     let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
 
     #[cfg(all(not(debug_assertions), feature = "analytics"))]
```
</details>

Mainly, these debug changes do the following:

- Set the default index size to 1MiB so that index resizes are initially frequent
- Turn some logs from info to error so that they can be displayed with `--log-level ERROR` (hiding the other infos)
- Add a long sleep between the beginning and the end of the resize so that we can observe the `BeingResized` index status (otherwise it would never come up in my tests)

## Open questions

- Is the growth factor of x2 the correct solution? For a `Vec` in memory it makes sense, but here we're manipulating quantities that are potentially in the order of 500GiBs. For bigger indexes it may make more sense to add at most e.g. 100GiB on each resize operation, avoiding big steps like 500GiB -> 1TiB.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


3470: Autobatch addition and deletion r=irevoire a=irevoire

This PR adds the capability to meilisearch to batch document addition and deletion together.

Fix https://github.com/meilisearch/meilisearch/issues/3440

--------------

Things to check before merging;

- [x] What happens if we delete multiple time the same documents -> add a test
- [x] If a documentDeletion gets batched with a documentAddition but the index doesn't exist yet? It should not work

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 15:00:19 +00:00
a8f6f108e0 Merge #3515
3515: Consider null as a valid geo field r=irevoire a=irevoire

Fix #3497
Associated spec; https://github.com/meilisearch/specifications/pull/222

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 14:12:55 +00:00
1479050f7a apply review suggestions 2023-02-20 14:53:37 +01:00
97b8c32e22 Merge #3514
3514: Bump version of mini-dashboard to v0.2.6 r=irevoire a=bidoubiwa

Update the version of the mini-dashboard to v0.2.6.

See [release notes](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.6).

Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
2023-02-20 13:21:00 +00:00
35f6c624bc Make sure we don't leave the in memory hashmap in an inconsistent state 2023-02-20 13:55:32 +01:00
1116788475 Resize indexes when they're full 2023-02-20 13:55:32 +01:00
951a5b5832 Add IndexMapper::resize_index fn 2023-02-20 13:55:32 +01:00
1c670d7fa0 Add IndexStatus::BeingResized 2023-02-20 13:55:32 +01:00
6cc3797aa1 IndexScheduler::tick returns a TickOutcome 2023-02-20 13:55:31 +01:00
faf1e17a27 create_or_open_index takes a map_size argument 2023-02-20 13:55:31 +01:00
4c519c2ab3 Add Batch::index_uid 2023-02-20 13:55:31 +01:00
dd120e0e16 Bump version of mini-dashboard to v0.2.6 2023-02-20 13:45:57 +01:00
18796d6e6a Consider null as a valid geo object 2023-02-20 13:45:51 +01:00
c91bfeaf15 Merge #3467
3467: Identify builds git tagged with `prototype-...` in CLI and analytics r=curquiza a=dureuill

# Pull Request

## What does this PR do?

- Parses the last git tag to extract a prototype name if:
  - Current build uses the prototype tag (not after the tag) precisely
  - The prototype tag name respects the following conditions:
    1. starts with `prototype-`
    2. ends with a number
    3. the hyphen-separated segment right before the number is not a number (required to reject commits after the tag).
- Display the prototype name in the launch summary in the CLI
- Send the prototype name to analytics if any
- Update prototypes instructions in CONTRIBUTING.md

|`VERGEN_GIT_SEMVER_LIGHTWEIGHT` value | Prototype |
|---|---|
| `Some("prototype-geo-bounding-box-0-139-gcde89018")` | `None` (does not end with a number) |
| `Some("prototype-geo-bounding-box-0-139-89018")` | `None` (before the last segment is a number) |
| `Some("prototype-geo-bounding-box-0")` | `Some("prototype-geo-bounding-box-0")` |
| `Some("prototype-geo-bounding-box")` | `None` (does not end with a number") |
| `Some("geo-bounding-box-0")` | `None` (does not start with "prototype") |
| `None` | `None` | 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-20 09:27:51 +00:00
28961b2ad1 Merge #3499
3499: Use the workspace inheritance r=Kerollmops a=irevoire

Use the workspace inheritance [introduced in rust 1.64](https://blog.rust-lang.org/2022/09/22/Rust-1.64.0.html#cargo-improvements-workspace-inheritance-and-multi-target-builds).

It allows us to define the version of meilisearch once in the main `Cargo.toml` and let all the other `Cargo.toml` uses this version.

`@curquiza` I added you as a reviewer because I had to patch some CI scripts

And `@Kerollmops,` I had to bump the `cargo_toml` crates because our version was getting old and didn't support the feature yet.

Also, in another PR, I would like to unify some of our dependencies to ensure we always stay in sync between all our crates.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-17 09:52:29 +00:00
895ab2906c apply review suggestions 2023-02-16 18:42:47 +01:00
f11c7d4b62 cargo run execute meilisearch by default 2023-02-16 18:03:45 +01:00
e79f6f87f6 make cargo fmt&clippy happy 2023-02-16 18:00:40 +01:00
5367d8f05a add two tests on the indexing of csvs 2023-02-16 17:37:11 +01:00
52686da028 test various error on the document ressource 2023-02-16 17:37:10 +01:00
8c074f5028 implements the csv delimiter without tests
Co-authored-by: Maxi Barmetler <maxi.barmetler@gmail.com>
2023-02-16 17:35:36 +01:00
49e18da23e Do not escape tag name
$() syntax is not interpreted by the Dockerfile
2023-02-16 10:53:14 +01:00
54240db495 Add note in code so one does not forget next time 2023-02-16 10:53:14 +01:00
e1ed4bc750 Change Dockerfile to also pass the VERGEN_GIT_SEMVER_LIGHTWEIGHT when building 2023-02-16 10:53:14 +01:00
9bd1cfb3a3 Ignore -dirty flag 2023-02-16 10:53:14 +01:00
a341c94871 Update contributing.md 2023-02-16 10:53:14 +01:00
f46cf46b8c Add prototype to analytics if any 2023-02-16 10:53:14 +01:00
c3a30a5a91 If using a prototype, display its name at Meilisearch startup 2023-02-16 10:53:14 +01:00
143e3cf948 Merge #3490
3490: Fix attributes set candidates r=curquiza a=ManyTheFish

# Pull Request

Fix attributes set candidates for v1.1.0

## details

The attribute criterion was not returning the remaining candidates when its internal algorithm was been exhausted.
We had a loss of candidates by the attribute criterion leading to the bug reported in the issue linked below.
After some investigation, it seems that it was the only criterion that had this behavior.

We are now returning the remaining candidates instead of an empty bitmap.

## Related issue

Fixes #3483
PR on milli for v1.0.1: https://github.com/meilisearch/milli/pull/777


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-02-15 17:38:07 +00:00
ab2adba183 update our CI scripts accordingly 2023-02-15 13:56:24 +01:00
74d1a67a99 Use the workspace inheritance feature of rust 1.64 2023-02-15 13:51:07 +01:00
91ce8a5e67 Merge #3492
3492: Bump deserr r=Kerollmops a=irevoire

Bump deserr to the latest version;
- We now use the default actix-web extractors that deserr provides (which were copy/pasted from meilisearch)
- We also use the default `JsonError` message provided by deserr instead of defining our own in meilisearch
- Finally, we get the new `did you mean?` error message. Fix #3493

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-15 10:05:05 +00:00
fd7ae1883b Merge #3495
3495: Add tests with rust nightly in CI r=curquiza a=ztkmkoo

# Pull Request

## Related issue
Fixes #3402 

## What does this PR do?
- add ci test with rust nightly
- make test with rust stable not run on schedule event

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Kebron <ztkmkoo@gmail.com>
2023-02-15 07:53:17 +00:00
42a3cdca66 get rids of the unwrap_any function in favor of take_cf_content 2023-02-14 20:06:31 +01:00
a43765d454 use the pre-defined deserr extractors 2023-02-14 20:05:30 +01:00
769576fd94 get rids of the whole error_message module since it has been integrated into the last version of deserr 2023-02-14 20:05:27 +01:00
8fb7b1d10f bump deserr 2023-02-14 20:04:30 +01:00
d494c29768 Merge #3479
3479: Unify "Bad latitude" & "Bad longitude" errors r=irevoire a=cymruu

# Pull Request

## Related issue
Fix part of #3006

## What does this PR do?
- Moved out `BadGeoLat`, `BadGeoLng`, `BadGeoBoundingBoxTopIsBelowBottom` from `FilterError` into newly introduced error type `ParseGeoError`. 
- Renamed `BadGeo` error  to `ReservedGeo`
- Used new `ParseGeoError` type in `FilterError` and `AscDescError`

Screenshot: 
![image](https://user-images.githubusercontent.com/2981598/217927231-fe23b6a3-2ea8-4145-98af-38eb61c4ff16.png)

I ran `cargo test --package milli -- --test-threads 1` and tests passed.
`--test-threads` was set to 1 because my OS complained about too many opened files.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Filip Bachul <filipbachul@gmail.com>
Co-authored-by: filip <filipbachul@gmail.com>
2023-02-14 18:35:51 +00:00
74dcfe9676 Fix a bug when you update a document that was already present in the db, deleted and then inserted again in the same transform 2023-02-14 19:09:40 +01:00
1b1703a609 make a small optimization to merge obkvs a little bit faster 2023-02-14 18:32:41 +01:00
fb5e4957a6 fix and test the early exit in case a grenad ends with a deletion 2023-02-14 18:23:57 +01:00
8de3c9f737 Update milli/src/update/index_documents/transform.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-02-14 17:57:14 +01:00
43a19d0709 document the operation enum + the grenads 2023-02-14 17:55:26 +01:00
29d14bed90 get rids of the let/else syntax 2023-02-14 17:45:46 +01:00
f3b54337f9 Merge #3174
3174: Allow wildcards at the end of index names for API Keys and Tenant tokens r=irevoire a=Kerollmops

This PR introduces the wildcards at the end of the index names when identifying indexes in the API Keys and tenant tokens. It fixes #2788 and fixes #2908. This PR is based on `@akhildevelops'` work.

Note that when a tenant token filter is chosen to restrict a search, it is always the most restrictive pattern that is chosen. If we have an index pattern _prod*_ that defines _filter1_ and _p*_ that defines _filter2_, the engine will choose _filter1_ over _filter2_ as it is defined for a most restrictive pattern, _prod*_. This restrictiveness is defined by 1. is it exact, without _*_ 2. the length of the pattern.

It is a continuation of work that has already started and should close #2869.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-02-14 16:12:01 +00:00
7f3ae40204 Remove a useless comment regarding the index pattern error code 2023-02-14 17:09:20 +01:00
a53536836b fmt 2023-02-14 17:04:22 +01:00
b095325bf8 Add tests with rust nightly in CI 2023-02-14 15:33:12 +00:00
d7ad39ad77 fix: clippy error 2023-02-14 00:15:35 +01:00
849de089d2 add thiserror for AscDescError 2023-02-14 00:15:35 +01:00
7f25007d31 Update milli/src/asc_desc.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2023-02-14 00:15:35 +01:00
c810af3ebf implement From<ParseGeoError> for AscDescError 2023-02-14 00:15:35 +01:00
c0b77773ba fmt asc_desc 2023-02-14 00:15:35 +01:00
7481559e8b move BadGeo to FilterError 2023-02-14 00:15:35 +01:00
83c765ce6c implement From<ParseGeoError> for FilterError 2023-02-14 00:15:35 +01:00
4c91037602 use ParseGeoError in sort parser 2023-02-14 00:15:35 +01:00
825923f6fc export ParseGeoError 2023-02-14 00:15:35 +01:00
e405702733 chore: introduce new error ParseGeoError type 2023-02-14 00:15:35 +01:00
6fa877efb0 Fix attributes set candidates 2023-02-13 17:49:52 +01:00
4b1cd10653 Return an internal error when index pattern should be valid 2023-02-13 17:49:42 +01:00
47748395dc Update an authentication comment
Co-authored-by: Many the fish <many@meilisearch.com>
2023-02-13 17:20:08 +01:00
ff595156d7 Merge #3480
3480: Gitignore vscode & jetbrains IDE folders r=curquiza a=AymanHamdoun

# Pull Request

## Related issue
There is no issue for it, and i couldn't find an appropriate category to make an issue for it.

## What does this PR do?
- Its just a gitignore edit so people who use vscode and jetbrains IDEs (like IntelliJ) dont have to deal with committing the folder the IDE generates to store local project configs by mistake. (I honestly wanted to fork the repo to add something else but this bothered me enough to make a PR for it first) 

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ✔️ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [✔️  ] Have you read the contributing guidelines?
- [✔️  ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ayman <ayman.s.hamdoun@gmail.com>
2023-02-13 10:47:25 +00:00
8770088df3 remove idea folder 2023-02-10 11:45:02 +04:00
827c1c8447 edit gitignore to ignore .idea and .vscode folders 2023-02-10 11:42:19 +04:00
764df24b7d Make clippy happy (again) 2023-02-09 13:21:20 +01:00
4570d5bf3a Merge remote-tracking branch 'origin/main' into temp-wildcard 2023-02-09 13:14:05 +01:00
746b31c1ce makes clippy happy 2023-02-09 12:23:01 +01:00
eaad84bd1d fix the test to handle the document deletion correctly 2023-02-09 11:29:13 +01:00
c690c4fec4 Added and modified the current API Key and Tenant Token tests 2023-02-09 11:17:30 +01:00
ea9ac46f28 stop autobatching the deletion without the index creation right with the addition 2023-02-08 21:24:27 +01:00
93db755d57 add a test to ensure we handle correctly a deletion of multiple time the same document 2023-02-08 21:03:34 +01:00
93f130a400 fix all warnings 2023-02-08 20:57:35 +01:00
860c993ef7 Handle the autobatching of deletion and addition in the scheduler 2023-02-08 20:53:19 +01:00
67dda0678f cleanup the autobatcher a little bit 2023-02-08 18:10:59 +01:00
2db6347686 update the autobatcher to batch the addition and deletion together 2023-02-08 18:07:59 +01:00
421a9cf05e provide a new method on the transform to remove documents 2023-02-08 16:06:09 +01:00
7b4b57ecc8 Fix the current tests 2023-02-08 14:54:05 +01:00
8f64fba1ce rewrite the current transform to handle a new byte specifying the kind of operation it's merging 2023-02-08 12:53:38 +01:00
a36b1dbd70 Fix the tasks with the new patterns 2023-02-01 18:21:45 +01:00
d563ed8a39 Making it work with index uid patterns 2023-02-01 17:51:30 +01:00
ec7de4bae7 Make it work for any all routes including stats and index swaps 2023-01-25 16:12:40 +01:00
184b8afd9e Make it work in the CreateApiKey struct 2023-01-25 15:01:50 +01:00
29961b8c6b Make it work with the dumps 2023-01-25 14:41:36 +01:00
0b08413c98 Introduce the IndexUidPattern type 2023-01-25 14:22:17 +01:00
474d4ec498 Add tests for the index patterns 2023-01-25 14:22:16 +01:00
92 changed files with 3239 additions and 1878 deletions

View File

@ -3,7 +3,7 @@
# check_tag $current_tag $file_tag $file_name
function check_tag {
if [[ "$1" != "$2" ]]; then
echo "Error: the current tag does not match the version in $3: found $2 - expected $1"
echo "Error: the current tag does not match the version in Cargo.toml: found $2 - expected $1"
ret=1
fi
}
@ -11,12 +11,8 @@ function check_tag {
ret=0
current_tag=${GITHUB_REF#'refs/tags/v'}
toml_files='*/Cargo.toml'
for toml_file in $toml_files;
do
file_tag="$(grep '^version = ' $toml_file | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
check_tag $current_tag $file_tag $toml_file
done
file_tag="$(grep '^version = ' Cargo.toml | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
check_tag $current_tag $file_tag
lock_file='Cargo.lock'
lock_tag=$(grep -A 1 'name = "meilisearch-auth"' $lock_file | grep version | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')

View File

@ -92,7 +92,7 @@ jobs:
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=$(printf "%q" ${{ github.ref_name }})
GIT_TAG=${{ github.ref_name }}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team

View File

@ -2,6 +2,9 @@ name: Rust
on:
workflow_dispatch:
schedule:
# Everyday at 5:00am
- cron: '0 5 * * *'
pull_request:
push:
# trying and staging branches are for Bors config
@ -27,10 +30,18 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
- name: Run test with Rust stable
if: github.event_name != 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run test with Rust nightly
if: github.event_name == 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
# Disable cache due to disk space issues with Windows workers in CI
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0

View File

@ -29,7 +29,7 @@ jobs:
run: |
raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
new_string="version = \"$raw_new_version\""
sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml
sd '^version = "\d+.\d+.\w+"$' "$new_string" Cargo.toml
- name: Build Meilisearch to update Cargo.lock
run: cargo build
- name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
.idea/
.vscode/
/target
**/*.csv
**/*.json_lines

61
Cargo.lock generated
View File

@ -36,9 +36,9 @@ dependencies = [
[[package]]
name = "actix-http"
version = "3.2.2"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c83abf9903e1f0ad9973cc4f7b9767fd5a03a583f51a5b7a339e07987cd2724"
checksum = "0070905b2c4a98d184c4e81025253cb192aa8a73827553f38e9410801ceb35bb"
dependencies = [
"actix-codec",
"actix-rt",
@ -46,7 +46,7 @@ dependencies = [
"actix-tls",
"actix-utils",
"ahash",
"base64 0.13.1",
"base64 0.21.0",
"bitflags",
"brotli",
"bytes",
@ -68,7 +68,10 @@ dependencies = [
"rand",
"sha1",
"smallvec",
"tokio",
"tokio-util",
"tracing",
"zstd 0.12.3+zstd.1.5.2",
]
[[package]]
@ -164,9 +167,9 @@ dependencies = [
[[package]]
name = "actix-web"
version = "4.2.1"
version = "4.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d48f7b6534e06c7bfc72ee91db7917d4af6afe23e7d223b51e68fffbb21e96b9"
checksum = "464e0fddc668ede5f26ec1f9557a8d44eda948732f40c6b0ad79126930eb775f"
dependencies = [
"actix-codec",
"actix-http",
@ -606,9 +609,9 @@ dependencies = [
[[package]]
name = "cargo_toml"
version = "0.13.3"
version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "497049e9477329f8f6a559972ee42e117487d01d1e8c2cc9f836ea6fa23a9e1a"
checksum = "2bfbc36312494041e2cdd5f06697b7e89d4b76f42773a0b5556ac290ff22acc2"
dependencies = [
"serde",
"toml",
@ -1110,20 +1113,26 @@ dependencies = [
[[package]]
name = "deserr"
version = "0.3.0"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28380303ca15ec07e1d5b079baf19cf849b09edad5cab219c1c51b2bd07523de"
checksum = "c71c14985c842bf1e520b1ebcd22daff6aeece32f510e11f063cecf9b308c04b"
dependencies = [
"actix-http",
"actix-utils",
"actix-web",
"deserr-internal",
"futures",
"serde-cs",
"serde_json",
"serde_urlencoded",
"strsim",
]
[[package]]
name = "deserr-internal"
version = "0.3.0"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "860928cd8af78d223a3d70dd581f21d7c3de8aa2eecd938e0c0a399ded7c1451"
checksum = "cae1c51b191528c9e4e5d6cff671de94f61fcda1c206cc891251e0cf438c941a"
dependencies = [
"convert_case 0.5.0",
"proc-macro2",
@ -2527,6 +2536,7 @@ dependencies = [
"base64 0.13.1",
"enum-iterator",
"hmac",
"maplit",
"meilisearch-types",
"rand",
"roaring",
@ -4422,7 +4432,7 @@ dependencies = [
"pbkdf2",
"sha1",
"time",
"zstd",
"zstd 0.11.2+zstd.1.5.2",
]
[[package]]
@ -4431,7 +4441,16 @@ version = "0.11.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
dependencies = [
"zstd-safe",
"zstd-safe 5.0.2+zstd.1.5.2",
]
[[package]]
name = "zstd"
version = "0.12.3+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806"
dependencies = [
"zstd-safe 6.0.4+zstd.1.5.4",
]
[[package]]
@ -4445,10 +4464,20 @@ dependencies = [
]
[[package]]
name = "zstd-sys"
version = "2.0.5+zstd.1.5.2"
name = "zstd-safe"
version = "6.0.4+zstd.1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edc50ffce891ad571e9f9afe5039c4837bede781ac4bb13052ed7ae695518596"
checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.7+zstd.1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5"
dependencies = [
"cc",
"libc",

View File

@ -16,6 +16,15 @@ members = [
"benchmarks"
]
[workspace.package]
version = "1.0.0"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
readme = "README.md"
edition = "2021"
license = "MIT"
[profile.release]
codegen-units = 1

View File

@ -1,7 +1,5 @@
# Compile
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
FROM rust:bullseye AS compiler
WORKDIR /meilisearch
@ -13,20 +11,20 @@ ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
arch="$(dpkg --print-architecture)"; \
if [ "$arch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
# Run
FROM alpine:3.16
FROM debian:11.6
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
RUN apt update -q \
&& apt install -q -y tini
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.

View File

@ -1,9 +1,15 @@
[package]
name = "benchmarks"
version = "1.0.0"
edition = "2018"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.65"
csv = "1.1.6"

View File

@ -1,7 +1,14 @@
[package]
name = "dump"
version = "1.0.0"
edition = "2021"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
edition.workspace = true
homepage.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.65"

View File

@ -203,12 +203,11 @@ pub(crate) mod test {
use big_s::S;
use maplit::btreeset;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self};
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::star_or::StarOr;
use meilisearch_types::tasks::{Details, Status};
use serde_json::{json, Map, Value};
use time::macros::datetime;
@ -341,7 +340,7 @@ pub(crate) mod test {
name: Some(S("doggos_key")),
uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(),
actions: vec![Action::DocumentsAll],
indexes: vec![StarOr::Other(IndexUid::from_str("doggos").unwrap())],
indexes: vec![IndexUidPattern::from_str("doggos").unwrap()],
expires_at: Some(datetime!(4130-03-14 12:21 UTC)),
created_at: datetime!(1960-11-15 0:00 UTC),
updated_at: datetime!(2022-11-10 0:00 UTC),
@ -351,7 +350,7 @@ pub(crate) mod test {
name: Some(S("master_key")),
uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(),
actions: vec![Action::All],
indexes: vec![StarOr::Star],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: datetime!(0000-01-01 00:01 UTC),
updated_at: datetime!(1964-05-04 17:25 UTC),

View File

@ -181,10 +181,8 @@ impl CompatV5ToV6 {
.indexes
.into_iter()
.map(|index| match index {
v5::StarOr::Star => v6::StarOr::Star,
v5::StarOr::Other(uid) => {
v6::StarOr::Other(v6::IndexUid::new_unchecked(uid.as_str()))
}
v5::StarOr::Star => v6::IndexUidPattern::all(),
v5::StarOr::Other(uid) => v6::IndexUidPattern::new_unchecked(uid.as_str()),
})
.collect(),
expires_at: key.expires_at,

View File

@ -34,8 +34,7 @@ pub type PaginationSettings = meilisearch_types::settings::PaginationSettings;
// everything related to the api keys
pub type Action = meilisearch_types::keys::Action;
pub type StarOr<T> = meilisearch_types::star_or::StarOr<T>;
pub type IndexUid = meilisearch_types::index_uid::IndexUid;
pub type IndexUidPattern = meilisearch_types::index_uid_pattern::IndexUidPattern;
// everything related to the errors
pub type ResponseError = meilisearch_types::error::ResponseError;

View File

@ -1,7 +1,14 @@
[package]
name = "file-store"
version = "1.0.0"
edition = "2021"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.3.0"

View File

@ -1,10 +1,16 @@
[package]
name = "filter-parser"
version = "1.0.0"
edition = "2021"
description = "The parser for the Meilisearch filter syntax"
publish = false
version.workspace = true
authors.workspace = true
# description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
nom = "7.1.1"
nom_locate = "4.0.0"

View File

@ -1,11 +1,17 @@
[package]
name = "flatten-serde-json"
version = "1.0.0"
edition = "2021"
description = "Flatten serde-json objects like elastic search"
readme = "README.md"
publish = false
version.workspace = true
authors.workspace = true
# description.workspace = true
homepage.workspace = true
# readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
serde_json = "1.0"

View File

@ -1,7 +1,14 @@
[package]
name = "index-scheduler"
version = "1.0.0"
edition = "2021"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.64"

View File

@ -88,11 +88,11 @@ pub enum BatchKind {
DocumentClear {
ids: Vec<TaskId>,
},
DocumentImport {
DocumentOperation {
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
import_ids: Vec<TaskId>,
operation_ids: Vec<TaskId>,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
@ -102,12 +102,12 @@ pub enum BatchKind {
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
SettingsAndDocumentImport {
SettingsAndDocumentOperation {
settings_ids: Vec<TaskId>,
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
import_ids: Vec<TaskId>,
operation_ids: Vec<TaskId>,
},
Settings {
allow_index_creation: bool,
@ -131,9 +131,9 @@ impl BatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
BatchKind::DocumentImport { allow_index_creation, .. }
BatchKind::DocumentOperation { allow_index_creation, .. }
| BatchKind::ClearAndSettings { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentImport { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
@ -141,8 +141,8 @@ impl BatchKind {
fn primary_key(&self) -> Option<Option<&str>> {
match self {
BatchKind::DocumentImport { primary_key, .. }
| BatchKind::SettingsAndDocumentImport { primary_key, .. } => {
BatchKind::DocumentOperation { primary_key, .. }
| BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
Some(primary_key.as_deref())
}
_ => None,
@ -173,22 +173,22 @@ impl BatchKind {
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
{
(
Continue(BatchKind::DocumentImport {
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key: pk,
import_ids: vec![task_id],
operation_ids: vec![task_id],
}),
allow_index_creation,
)
}
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
K::DocumentImport { method, allow_index_creation, primary_key } => (
Break(BatchKind::DocumentImport {
Break(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
import_ids: vec![task_id],
operation_ids: vec![task_id],
}),
allow_index_creation,
),
@ -249,7 +249,7 @@ impl BatchKind {
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids }
| BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids }
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
) => {
@ -258,7 +258,7 @@ impl BatchKind {
}
(
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
| BatchKind::SettingsAndDocumentImport { import_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
| BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
K::IndexDeletion,
) => {
ids.push(id);
@ -278,63 +278,108 @@ impl BatchKind {
K::DocumentImport { .. } | K::Settings { .. },
) => Break(this),
(
BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids },
BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids },
K::DocumentClear,
) => {
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
operation_ids.push(id);
Continue(BatchKind::DocumentClear { ids: operation_ids })
}
// we can autobatch the same kind of document additions / updates
(
BatchKind::DocumentImport { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut import_ids },
BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. },
) => {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method: ReplaceDocuments,
allow_index_creation,
import_ids,
operation_ids,
primary_key: pk,
})
}
(
BatchKind::DocumentImport { method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. },
) => {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method: UpdateDocuments,
allow_index_creation,
primary_key: pk,
import_ids,
operation_ids,
})
}
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
K::DocumentDeletion,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids,
})
}
// but we can't autobatch documents if it's not the same kind
// this match branch MUST be AFTER the previous one
(
this @ BatchKind::DocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
this @ BatchKind::DocumentOperation { .. },
K::DocumentImport { .. },
) => Break(this),
(
BatchKind::DocumentImport { method, allow_index_creation, primary_key, import_ids },
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
K::Settings { .. },
) => Continue(BatchKind::SettingsAndDocumentImport {
) => Continue(BatchKind::SettingsAndDocumentOperation {
settings_ids: vec![id],
method,
allow_index_creation,
primary_key,
import_ids,
operation_ids,
}),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
(this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. }) => Break(this),
// we can autobatch the deletion and import if the index already exists
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if index_already_exists => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can autobatch the deletion and import if both can't create an index
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if !allow_index_creation => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
(
this @ BatchKind::DocumentDeletion { .. },
K::DocumentImport { .. }
) => {
Break(this)
}
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids })
@ -403,60 +448,60 @@ impl BatchKind {
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: _, import_ids: mut other, allow_index_creation, primary_key: _ },
BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
K::DocumentClear,
) => {
other.push(id);
operation_ids.push(id);
Continue(BatchKind::ClearAndSettings {
settings_ids,
other,
other: operation_ids,
allow_index_creation,
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: ReplaceDocuments, mut import_ids, allow_index_creation, primary_key: _},
BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
) => {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
settings_ids,
method: ReplaceDocuments,
allow_index_creation,
primary_key: pk2,
import_ids,
operation_ids,
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
) => {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
settings_ids,
method: UpdateDocuments,
allow_index_creation,
primary_key: pk2,
import_ids,
operation_ids,
})
}
// But we can't batch a settings and a doc op with another doc op
// this MUST be AFTER the two previous branch
(
this @ BatchKind::SettingsAndDocumentImport { .. },
this @ BatchKind::SettingsAndDocumentOperation { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::SettingsAndDocumentImport { mut settings_ids, method, allow_index_creation,primary_key, import_ids },
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
Continue(BatchKind::SettingsAndDocumentOperation {
settings_ids,
method,
allow_index_creation,
primary_key,
import_ids,
operation_ids,
})
}
(
@ -588,29 +633,29 @@ mod tests {
fn autobatch_simple_operation_together() {
// we can autobatch one or multiple `ReplaceDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// we can autobatch one or multiple `UpdateDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
@ -628,56 +673,83 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
// We can autobatch document addition with document deletion
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
}
#[test]
fn simple_document_operation_dont_autobatch_with_other() {
// addition, updates and deletion can't batch together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
}
#[test]
fn document_addition_batch_with_settings() {
// simple case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
// multiple settings and doc addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
// addition and setting unordered
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
// We ensure this kind of batch doesn't batch with forbidden operations
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
}
#[test]
@ -789,67 +861,73 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what
// follows because we first need to process the erronous batch.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
}
#[test]
fn allowed_and_disallowed_index_creation() {
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// batch deletion and addition
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
}
#[test]
fn autobatch_primary_key() {
// ==> If I have a pk
// With a single update
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// ==> If I don't have a pk
// With a single update
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
}
}

View File

@ -28,8 +28,7 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
};
use meilisearch_types::milli::{self, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@ -86,15 +85,21 @@ pub(crate) enum Batch {
},
}
#[derive(Debug)]
pub(crate) enum DocumentOperation {
Add(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug)]
pub(crate) enum IndexOperation {
DocumentImport {
DocumentOperation {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
content_files: Vec<Uuid>,
operations: Vec<DocumentOperation>,
tasks: Vec<Task>,
},
DocumentDeletion {
@ -121,13 +126,13 @@ pub(crate) enum IndexOperation {
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<Task>,
},
SettingsAndDocumentImport {
SettingsAndDocumentOperation {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
content_files: Vec<Uuid>,
operations: Vec<DocumentOperation>,
document_import_tasks: Vec<Task>,
// The boolean indicates if it's a settings deletion or creation.
@ -149,13 +154,13 @@ impl Batch {
tasks.iter().map(|task| task.uid).collect()
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentImport { tasks, .. }
IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::SettingsAndDocumentImport {
IndexOperation::SettingsAndDocumentOperation {
document_import_tasks: tasks,
settings_tasks: other,
..
@ -169,17 +174,33 @@ impl Batch {
Batch::IndexSwap { task } => vec![task.uid],
}
}
/// Return the index UID associated with this batch
pub fn index_uid(&self) -> Option<&str> {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletion(_)
| SnapshotCreation(_)
| Dump(_)
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentImport { index_uid, .. }
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
| IndexOperation::SettingsAndDocumentImport { index_uid, .. } => index_uid,
| IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
}
}
}
@ -206,17 +227,22 @@ impl IndexScheduler {
},
must_create_index,
})),
BatchKind::DocumentImport { method, import_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, import_ids)?;
let primary_key = match &tasks[0].kind {
KindWithContent::DocumentAdditionOrUpdate { primary_key, .. } => {
primary_key.clone()
}
_ => unreachable!(),
};
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks
.iter()
.find_map(|task| match task.kind {
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
// we want to stop on the first document addition
Some(primary_key.clone())
}
KindWithContent::DocumentDeletion { .. } => None,
_ => unreachable!(),
})
.flatten();
let mut documents_counts = Vec::new();
let mut content_files = Vec::new();
let mut operations = Vec::new();
for task in tasks.iter() {
match task.kind {
@ -226,19 +252,23 @@ impl IndexScheduler {
..
} => {
documents_counts.push(documents_count);
content_files.push(content_file);
operations.push(DocumentOperation::Add(content_file));
}
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
documents_counts.push(documents_ids.len() as u64);
operations.push(DocumentOperation::Delete(documents_ids.clone()));
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentImport {
op: IndexOperation::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
content_files,
operations,
tasks,
},
must_create_index,
@ -322,12 +352,12 @@ impl IndexScheduler {
must_create_index,
}))
}
BatchKind::SettingsAndDocumentImport {
BatchKind::SettingsAndDocumentOperation {
settings_ids,
method,
allow_index_creation,
primary_key,
import_ids,
operation_ids,
} => {
let settings = self.create_next_batch_index(
rtxn,
@ -339,11 +369,11 @@ impl IndexScheduler {
let document_import = self.create_next_batch_index(
rtxn,
index_uid.clone(),
BatchKind::DocumentImport {
BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
import_ids,
operation_ids,
},
must_create_index,
)?;
@ -352,10 +382,10 @@ impl IndexScheduler {
(
Some(Batch::IndexOperation {
op:
IndexOperation::DocumentImport {
IndexOperation::DocumentOperation {
primary_key,
documents_counts,
content_files,
operations,
tasks: document_import_tasks,
..
},
@ -366,12 +396,12 @@ impl IndexScheduler {
..
}),
) => Ok(Some(Batch::IndexOperation {
op: IndexOperation::SettingsAndDocumentImport {
op: IndexOperation::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
content_files,
operations,
document_import_tasks,
settings,
settings_tasks,
@ -987,12 +1017,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::DocumentImport {
IndexOperation::DocumentOperation {
index_uid: _,
primary_key,
method,
documents_counts,
content_files,
documents_counts: _,
operations,
mut tasks,
} => {
let mut primary_key_has_been_set = false;
@ -1037,26 +1067,82 @@ impl IndexScheduler {
|| must_stop_processing.get(),
)?;
let mut results = Vec::new();
for content_uuid in content_files.into_iter() {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
match operation {
DocumentOperation::Add(content_uuid) => {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
let user_result = match user_result {
Ok(count) => Ok(DocumentAdditionResult {
indexed_documents: count,
number_of_documents: count, // TODO: this is wrong, we should use the value stored in the Details.
}),
Err(e) => Err(milli::Error::from(e)),
};
let received_documents =
if let Some(Details::DocumentAdditionOrUpdate {
received_documents,
..
}) = task.details
{
received_documents
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
results.push(user_result);
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(count),
})
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
DocumentOperation::Delete(document_ids) => {
let (new_builder, user_result) =
builder.remove_documents(document_ids)?;
builder = new_builder;
let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details
{
provided_ids
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
}
}
if results.iter().any(|res| res.is_ok()) {
if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?;
info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set {
@ -1071,29 +1157,6 @@ impl IndexScheduler {
)?;
}
for (task, (ret, count)) in
tasks.iter_mut().zip(results.into_iter().zip(documents_counts))
{
match ret {
Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: number_of_documents,
indexed_documents: Some(indexed_documents),
});
}
Err(error) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: count,
// if there was an error we indexed 0 documents.
indexed_documents: Some(0),
});
task.error = Some(error.into())
}
}
}
Ok(tasks)
}
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
@ -1136,12 +1199,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::SettingsAndDocumentImport {
IndexOperation::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
content_files,
operations,
document_import_tasks,
settings,
settings_tasks,
@ -1159,12 +1222,12 @@ impl IndexScheduler {
let mut import_tasks = self.apply_index_operation(
index_wtxn,
index,
IndexOperation::DocumentImport {
IndexOperation::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
content_files,
operations,
tasks: document_import_tasks,
},
)?;

View File

@ -9,10 +9,11 @@ use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::Index;
use synchronoise::SignalEvent;
use time::OffsetDateTime;
use uuid::Uuid;
use self::IndexStatus::{Available, BeingDeleted};
use self::IndexStatus::{Available, BeingDeleted, BeingResized};
use crate::uuid_codec::UuidCodec;
use crate::{clamp_to_page_size, Error, Result};
@ -45,6 +46,8 @@ pub struct IndexMapper {
pub enum IndexStatus {
/// Do not insert it back in the index map as it is currently being deleted.
BeingDeleted,
/// Temporarily do not insert the index in the index map as it is currently being resized.
BeingResized(Arc<SignalEvent>),
/// You can use the index without worrying about anything.
Available(Index),
}
@ -71,9 +74,10 @@ impl IndexMapper {
&self,
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
map_size: usize,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(self.index_size));
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
if let Some((created, updated)) = date {
@ -102,14 +106,15 @@ impl IndexMapper {
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
let index = self.create_or_open_index(&index_path, date)?;
let index = self.create_or_open_index(&index_path, date, self.index_size)?;
wtxn.commit()?;
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
// This is very unlikely to happen in practice.
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
if let Some(BeingDeleted) =
self.index_map.write().unwrap().insert(uuid, Available(index.clone()))
if self.index_map.write().unwrap().insert(uuid, Available(index.clone())).is_some()
{
panic!("Uuid v4 conflict.");
panic!("Uuid v4 conflict: index with UUID {uuid} already exists.");
}
Ok(index)
@ -131,13 +136,23 @@ impl IndexMapper {
wtxn.commit()?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
let closing_event = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => Some(index.prepare_for_closing()),
_ => None,
};
let closing_event = loop {
let mut lock = self.index_map.write().unwrap();
let resize_operation = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => break Some(index.prepare_for_closing()),
// The target index is in the middle of a resize operation.
// Wait for this operation to complete, then try again.
Some(BeingResized(resize_operation)) => resize_operation.clone(),
// The index is already being deleted or doesn't exist.
// It's OK to remove it from the map again.
_ => break None,
};
drop(lock);
// Avoiding deadlocks: we need to drop the lock before waiting for the end of the resize, which
// will involve operations on the very map we're locking.
drop(lock);
resize_operation.wait();
};
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
@ -171,6 +186,87 @@ impl IndexMapper {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
/// Resizes the maximum size of the specified index to the double of its current maximum size.
///
/// This operation involves closing the underlying environment and so can take a long time to complete.
///
/// # Panics
///
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
/// in memory hash map.
pub fn resize_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
// fixme: factor to a function?
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
// signal that will be sent when the resize operation completes
let resize_operation = Arc::new(SignalEvent::manual(false));
let index = match lock.insert(uuid, BeingResized(resize_operation)) {
Some(Available(index)) => index,
Some(previous_status) => {
lock.insert(uuid, previous_status);
panic!(
"Attempting to resize index {name} that is already being resized or deleted."
)
}
None => {
panic!("Could not find the status of index {name} in the in-memory index mapper.")
}
};
drop(lock);
let resize_succeeded = (move || {
let current_size = index.map_size()?;
let new_size = current_size * 2;
let closing_event = index.prepare_for_closing();
log::debug!("Waiting for index {name} to close");
if !closing_event.wait_timeout(std::time::Duration::from_secs(600)) {
// fail after 10 minutes waiting
panic!("Could not resize index {name} (unable to close it)");
}
log::info!("Resized index {name} from {current_size} to {new_size} bytes");
let index_path = self.base_path.join(uuid.to_string());
let index = self.create_or_open_index(&index_path, None, new_size)?;
Ok(index)
})();
// Put the map back to a consistent state.
// Even if there was an error we don't want to leave the map in an inconsistent state as it would cause
// deadlocks.
let mut lock = self.index_map.write().unwrap();
let (resize_operation, resize_succeeded) = match resize_succeeded {
Ok(index) => {
// insert the resized index
let Some(BeingResized(resize_operation)) = lock.insert(uuid, Available(index)) else {
panic!("Index state for index {name} was modified while it was being resized")
};
(resize_operation, Ok(()))
}
Err(error) => {
// there was an error, not much we can do... delete the index from the in-memory map to prevent future errors
let Some(BeingResized(resize_operation)) = lock.remove(&uuid) else {
panic!("Index state for index {name} was modified while it was being resized")
};
(resize_operation, Err(error))
}
};
// drop the lock before signaling completion so that other threads don't immediately await on the lock after waking up.
drop(lock);
resize_operation.signal();
resize_succeeded
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
let uuid = self
@ -179,31 +275,47 @@ impl IndexMapper {
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// we clone here to drop the lock before entering the match
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = match index {
Some(Available(index)) => index,
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
let index = loop {
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = self.create_or_open_index(&index_path, None)?;
entry.insert(Available(index.clone()));
index
match index {
Some(Available(index)) => break index,
Some(BeingResized(ref resize_operation)) => {
// Avoiding deadlocks: no lock taken while doing this operation.
resize_operation.wait();
continue;
}
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
let index =
self.create_or_open_index(&index_path, None, self.index_size)?;
entry.insert(Available(index.clone()));
break index;
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => break index.clone(),
BeingResized(resize_operation) => {
// Avoiding the deadlock: we drop the lock before waiting
let resize_operation = resize_operation.clone();
drop(index_map);
resize_operation.wait();
continue;
}
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => index.clone(),
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
}
};

View File

@ -43,6 +43,7 @@ use file_store::FileStore;
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{self, Database, Env, RoTxn};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli;
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
@ -422,12 +423,12 @@ impl IndexScheduler {
#[cfg(test)]
run.breakpoint(Breakpoint::Init);
loop {
run.wake_up.wait();
run.wake_up.wait();
loop {
match run.tick() {
Ok(0) => (),
Ok(_) => run.wake_up.signal(),
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => {
log::error!("{}", e);
// Wait one second when an irrecoverable error occurs.
@ -440,7 +441,6 @@ impl IndexScheduler {
) {
std::thread::sleep(Duration::from_secs(1));
}
run.wake_up.signal();
}
}
}
@ -630,7 +630,7 @@ impl IndexScheduler {
&self,
rtxn: &RoTxn,
query: &Query,
authorized_indexes: &Option<Vec<String>>,
authorized_indexes: &Option<Vec<IndexUidPattern>>,
) -> Result<RoaringBitmap> {
let mut tasks = self.get_task_ids(rtxn, query)?;
@ -648,7 +648,7 @@ impl IndexScheduler {
let all_indexes_iter = self.index_tasks.iter(rtxn)?;
for result in all_indexes_iter {
let (index, index_tasks) = result?;
if !authorized_indexes.contains(&index.to_owned()) {
if !authorized_indexes.iter().any(|p| p.matches_str(index)) {
tasks -= index_tasks;
}
}
@ -668,7 +668,7 @@ impl IndexScheduler {
pub fn get_tasks_from_authorized_indexes(
&self,
query: Query,
authorized_indexes: Option<Vec<String>>,
authorized_indexes: Option<Vec<IndexUidPattern>>,
) -> Result<Vec<Task>> {
let rtxn = self.env.read_txn()?;
@ -764,8 +764,8 @@ impl IndexScheduler {
Ok(task)
}
/// Register a new task comming from a dump in the scheduler.
/// By takinig a mutable ref we're pretty sure no one will ever import a dump while actix is running.
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(
&mut self,
task: TaskDump,
@ -926,7 +926,7 @@ impl IndexScheduler {
/// 5. Reset the in-memory list of processed tasks.
///
/// Returns the number of processed tasks.
fn tick(&self) -> Result<usize> {
fn tick(&self) -> Result<TickOutcome> {
#[cfg(test)]
{
*self.run_loop_iteration.write().unwrap() += 1;
@ -937,8 +937,9 @@ impl IndexScheduler {
let batch =
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
Some(batch) => batch,
None => return Ok(0),
None => return Ok(TickOutcome::WaitForSignal),
};
let index_uid = batch.index_uid().map(ToOwned::to_owned);
drop(rtxn);
// 1. store the starting date with the bitmap of processing tasks.
@ -1009,7 +1010,23 @@ impl IndexScheduler {
// the `started_at` date times and `processings` of the current processing tasks.
// This date time is used by the task cancelation to store the right `started_at`
// date in the task on disk.
return Ok(0);
return Ok(TickOutcome::TickAgain(0));
}
// If an index said it was full, we need to:
// 1. identify which index is full
// 2. close the associated environment
// 3. resize it
// 4. re-schedule tasks
Err(Error::Milli(milli::Error::UserError(
milli::UserError::MaxDatabaseSizeReached,
))) if index_uid.is_some() => {
// fixme: add index_uid to match to avoid the unwrap
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort().map_err(Error::HeedTransaction)?;
return Ok(TickOutcome::TickAgain(0));
}
// In case of a failure we must get back and patch all the tasks with the error.
Err(err) => {
@ -1049,7 +1066,7 @@ impl IndexScheduler {
#[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing);
Ok(processed_tasks)
Ok(TickOutcome::TickAgain(processed_tasks))
}
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
@ -1084,6 +1101,16 @@ impl IndexScheduler {
}
}
/// The outcome of calling the [`IndexScheduler::tick`] function.
pub enum TickOutcome {
/// The scheduler should immediately attempt another `tick`.
///
/// The `usize` field contains the number of processed tasks.
TickAgain(usize),
/// The scheduler should wait for an external signal before attempting another `tick`.
WaitForSignal,
}
#[cfg(test)]
mod tests {
use std::io::{BufWriter, Seek, Write};
@ -1679,6 +1706,105 @@ mod tests {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded");
}
#[test]
fn document_addition_and_document_deletion() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn document_deletion_and_document_addition() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
// The deletion should have failed because it can't create an index
handle.advance_one_failed_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion");
// The addition should works
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn do_not_batch_task_of_different_indexes() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
@ -2521,7 +2647,11 @@ mod tests {
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_owned()]))
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.unwrap();
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
// associated with doggo -> empty result
@ -2529,7 +2659,11 @@ mod tests {
let query = Query::default();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_owned()]))
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
// -> only the index creation of doggo should be returned
@ -2540,7 +2674,10 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec!["catto".to_owned(), "doggo".to_owned()]),
&Some(vec![
IndexUidPattern::new_unchecked("catto"),
IndexUidPattern::new_unchecked("doggo"),
]),
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
@ -2588,7 +2725,11 @@ mod tests {
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_string()]))
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.unwrap();
// Return only 1 because the user is not authorized to see task 2
snapshot!(snapshot_bitmap(&tasks), @"[1,]");

View File

@ -0,0 +1,42 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,9 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 3,
"doggo": "bork"
}
]

View File

@ -0,0 +1,37 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,43 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,45 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,17 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 1,
"doggo": "jean bob"
},
{
"id": 2,
"catto": "jorts"
},
{
"id": 3,
"doggo": "bork"
}
]

View File

@ -0,0 +1,36 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -439,20 +439,29 @@ impl IndexScheduler {
provided_ids: received_document_ids,
deleted_documents,
} => {
if let Some(deleted_documents) = deleted_documents {
assert_eq!(status, Status::Succeeded);
assert!(deleted_documents <= received_document_ids as u64);
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
let (index_uid, documents_ids) =
if let KindWithContent::DocumentDeletion {
ref index_uid,
ref documents_ids,
} = kind
{
(index_uid, documents_ids)
} else {
unreachable!()
};
assert_eq!(&task_index_uid.unwrap(), index_uid);
match &kind {
KindWithContent::DocumentDeletion { index_uid, documents_ids } => {
assert_eq!(&task_index_uid.unwrap(), index_uid);
assert!(documents_ids.len() >= received_document_ids);
}
_ => panic!(),
match status {
Status::Enqueued | Status::Processing => (),
Status::Succeeded => {
assert!(deleted_documents.unwrap() <= received_document_ids as u64);
assert!(documents_ids.len() == received_document_ids);
}
Status::Failed | Status::Canceled => {
assert!(deleted_documents == Some(0));
assert!(documents_ids.len() == received_document_ids);
}
} else {
assert_ne!(status, Status::Succeeded);
}
}
Details::ClearAll { deleted_documents } => {

View File

@ -1,10 +1,16 @@
[package]
name = "json-depth-checker"
version = "1.0.0"
edition = "2021"
description = "A library that indicates if a JSON must be flattened"
publish = false
version.workspace = true
authors.workspace = true
# description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
serde_json = "1.0"

View File

@ -1,7 +1,14 @@
[package]
name = "meili-snap"
version = "1.0.0"
edition = "2021"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
insta = { version = "^1.19.1", features = ["json", "redactions"] }

View File

@ -1,12 +1,20 @@
[package]
name = "meilisearch-auth"
version = "1.0.0"
edition = "2021"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
base64 = "0.13.1"
enum-iterator = "1.1.3"
hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.0", features = ["serde"] }

View File

@ -7,9 +7,10 @@ use std::path::Path;
use std::sync::Arc;
use error::{AuthControllerError, Result};
use maplit::hashset;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::star_or::StarOr;
use serde::{Deserialize, Serialize};
pub use store::open_auth_store_env;
use store::{generate_key_as_hexa, HeedAuthStore};
@ -85,29 +86,12 @@ impl AuthController {
search_rules: Option<SearchRules>,
) -> Result<AuthFilter> {
let mut filters = AuthFilter::default();
let key = self
.store
.get_api_key(uid)?
.ok_or_else(|| AuthControllerError::ApiKeyNotFound(uid.to_string()))?;
let key = self.get_key(uid)?;
if !key.indexes.iter().any(|i| i == &StarOr::Star) {
filters.search_rules = match search_rules {
// Intersect search_rules with parent key authorized indexes.
Some(search_rules) => SearchRules::Map(
key.indexes
.into_iter()
.filter_map(|index| {
search_rules.get_index_search_rules(&format!("{index}")).map(
|index_search_rules| (index.to_string(), Some(index_search_rules)),
)
})
.collect(),
),
None => SearchRules::Set(key.indexes.into_iter().map(|x| x.to_string()).collect()),
};
} else if let Some(search_rules) = search_rules {
filters.search_rules = search_rules;
}
filters.search_rules = match search_rules {
Some(search_rules) => search_rules,
None => SearchRules::Set(key.indexes.into_iter().collect()),
};
filters.allow_index_creation = self.is_key_authorized(uid, Action::IndexesAdd, None)?;
@ -150,9 +134,7 @@ impl AuthController {
.get_expiration_date(uid, action, None)?
.or(match index {
// else check if the key has access to the requested index.
Some(index) => {
self.store.get_expiration_date(uid, action, Some(index.as_bytes()))?
}
Some(index) => self.store.get_expiration_date(uid, action, Some(index))?,
// or to any index if no index has been requested.
None => self.store.prefix_first_expiration_date(uid, action)?,
}) {
@ -192,42 +174,54 @@ impl Default for AuthFilter {
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(untagged)]
pub enum SearchRules {
Set(HashSet<String>),
Map(HashMap<String, Option<IndexSearchRules>>),
Set(HashSet<IndexUidPattern>),
Map(HashMap<IndexUidPattern, Option<IndexSearchRules>>),
}
impl Default for SearchRules {
fn default() -> Self {
Self::Set(Some("*".to_string()).into_iter().collect())
Self::Set(hashset! { IndexUidPattern::all() })
}
}
impl SearchRules {
pub fn is_index_authorized(&self, index: &str) -> bool {
match self {
Self::Set(set) => set.contains("*") || set.contains(index),
Self::Map(map) => map.contains_key("*") || map.contains_key(index),
Self::Set(set) => {
set.contains("*")
|| set.contains(index)
|| set.iter().any(|pattern| pattern.matches_str(index))
}
Self::Map(map) => {
map.contains_key("*")
|| map.contains_key(index)
|| map.keys().any(|pattern| pattern.matches_str(index))
}
}
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
match self {
Self::Set(set) => {
if set.contains("*") || set.contains(index) {
Self::Set(_) => {
if self.is_index_authorized(index) {
Some(IndexSearchRules::default())
} else {
None
}
}
Self::Map(map) => {
map.get(index).or_else(|| map.get("*")).map(|isr| isr.clone().unwrap_or_default())
// We must take the most retrictive rule of this index uid patterns set of rules.
map.iter()
.filter(|(pattern, _)| pattern.matches_str(index))
.max_by_key(|(pattern, _)| (pattern.is_exact(), pattern.len()))
.and_then(|(_, rule)| rule.clone())
}
}
}
/// Return the list of indexes such that `self.is_index_authorized(index) == true`,
/// or `None` if all indexes satisfy this condition.
pub fn authorized_indexes(&self) -> Option<Vec<String>> {
pub fn authorized_indexes(&self) -> Option<Vec<IndexUidPattern>> {
match self {
SearchRules::Set(set) => {
if set.contains("*") {
@ -248,7 +242,7 @@ impl SearchRules {
}
impl IntoIterator for SearchRules {
type Item = (String, IndexSearchRules);
type Item = (IndexUidPattern, IndexSearchRules);
type IntoIter = Box<dyn Iterator<Item = Self::Item>>;
fn into_iter(self) -> Self::IntoIter {

View File

@ -5,20 +5,21 @@ use std::convert::{TryFrom, TryInto};
use std::fs::create_dir_all;
use std::path::Path;
use std::str;
use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId;
use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use meilisearch_types::star_or::StarOr;
use sha2::Sha256;
use time::OffsetDateTime;
use uuid::fmt::Hyphenated;
use uuid::Uuid;
use super::error::Result;
use super::error::{AuthControllerError, Result};
use super::{Action, Key};
const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB
@ -129,7 +130,7 @@ impl HeedAuthStore {
}
}
let no_index_restriction = key.indexes.contains(&StarOr::Star);
let no_index_restriction = key.indexes.iter().any(|p| p.matches_all());
for action in actions {
if no_index_restriction {
// If there is no index restriction we put None.
@ -214,11 +215,28 @@ impl HeedAuthStore {
&self,
uid: Uuid,
action: Action,
index: Option<&[u8]>,
index: Option<&str>,
) -> Result<Option<Option<OffsetDateTime>>> {
let rtxn = self.env.read_txn()?;
let tuple = (&uid, &action, index);
Ok(self.action_keyid_index_expiration.get(&rtxn, &tuple)?)
let tuple = (&uid, &action, index.map(|s| s.as_bytes()));
match self.action_keyid_index_expiration.get(&rtxn, &tuple)? {
Some(expiration) => Ok(Some(expiration)),
None => {
let tuple = (&uid, &action, None);
for result in self.action_keyid_index_expiration.prefix_iter(&rtxn, &tuple)? {
let ((_, _, index_uid_pattern), expiration) = result?;
if let Some((pattern, index)) = index_uid_pattern.zip(index) {
let index_uid_pattern = str::from_utf8(pattern)?;
let pattern = IndexUidPattern::from_str(index_uid_pattern)
.map_err(|e| AuthControllerError::Internal(Box::new(e)))?;
if pattern.matches_str(index) {
return Ok(Some(expiration));
}
}
}
Ok(None)
}
}
}
pub fn prefix_first_expiration_date(

View File

@ -1,15 +1,21 @@
[package]
name = "meilisearch-types"
version = "1.0.0"
authors = ["marin <postma.marin@protonmail.com>"]
edition = "2021"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
actix-web = { version = "4.2.1", default-features = false }
anyhow = "1.0.65"
convert_case = "0.6.0"
csv = "1.1.6"
deserr = "0.3.0"
deserr = "0.5.0"
either = { version = "1.6.1", features = ["serde"] }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }

View File

@ -1,328 +0,0 @@
/*!
This module implements the error messages of deserialization errors.
We try to:
1. Give a human-readable description of where the error originated.
2. Use the correct terms depending on the format of the request (json/query param)
3. Categorise the type of the error (e.g. missing field, wrong value type, unexpected error, etc.)
*/
use deserr::{ErrorKind, IntoValue, ValueKind, ValuePointerRef};
use super::{DeserrJsonError, DeserrQueryParamError};
use crate::error::{Code, ErrorCode};
/// Return a description of the given location in a Json, preceded by the given article.
/// e.g. `at .key1[8].key2`. If the location is the origin, the given article will not be
/// included in the description.
pub fn location_json_description(location: ValuePointerRef, article: &str) -> String {
fn rec(location: ValuePointerRef) -> String {
match location {
ValuePointerRef::Origin => String::new(),
ValuePointerRef::Key { key, prev } => rec(*prev) + "." + key,
ValuePointerRef::Index { index, prev } => format!("{}[{index}]", rec(*prev)),
}
}
match location {
ValuePointerRef::Origin => String::new(),
_ => {
format!("{article} `{}`", rec(location))
}
}
}
/// Return a description of the list of value kinds for a Json payload.
fn value_kinds_description_json(kinds: &[ValueKind]) -> String {
// Rank each value kind so that they can be sorted (and deduplicated)
// Having a predictable order helps with pattern matching
fn order(kind: &ValueKind) -> u8 {
match kind {
ValueKind::Null => 0,
ValueKind::Boolean => 1,
ValueKind::Integer => 2,
ValueKind::NegativeInteger => 3,
ValueKind::Float => 4,
ValueKind::String => 5,
ValueKind::Sequence => 6,
ValueKind::Map => 7,
}
}
// Return a description of a single value kind, preceded by an article
fn single_description(kind: &ValueKind) -> &'static str {
match kind {
ValueKind::Null => "null",
ValueKind::Boolean => "a boolean",
ValueKind::Integer => "a positive integer",
ValueKind::NegativeInteger => "a negative integer",
ValueKind::Float => "a number",
ValueKind::String => "a string",
ValueKind::Sequence => "an array",
ValueKind::Map => "an object",
}
}
fn description_rec(kinds: &[ValueKind], count_items: &mut usize, message: &mut String) {
let (msg_part, rest): (_, &[ValueKind]) = match kinds {
[] => (String::new(), &[]),
[ValueKind::Integer | ValueKind::NegativeInteger, ValueKind::Float, rest @ ..] => {
("a number".to_owned(), rest)
}
[ValueKind::Integer, ValueKind::NegativeInteger, ValueKind::Float, rest @ ..] => {
("a number".to_owned(), rest)
}
[ValueKind::Integer, ValueKind::NegativeInteger, rest @ ..] => {
("an integer".to_owned(), rest)
}
[a] => (single_description(a).to_owned(), &[]),
[a, rest @ ..] => (single_description(a).to_owned(), rest),
};
if rest.is_empty() {
if *count_items == 0 {
message.push_str(&msg_part);
} else if *count_items == 1 {
message.push_str(&format!(" or {msg_part}"));
} else {
message.push_str(&format!(", or {msg_part}"));
}
} else {
if *count_items == 0 {
message.push_str(&msg_part);
} else {
message.push_str(&format!(", {msg_part}"));
}
*count_items += 1;
description_rec(rest, count_items, message);
}
}
let mut kinds = kinds.to_owned();
kinds.sort_by_key(order);
kinds.dedup();
if kinds.is_empty() {
// Should not happen ideally
"a different value".to_owned()
} else {
let mut message = String::new();
description_rec(kinds.as_slice(), &mut 0, &mut message);
message
}
}
/// Return the JSON string of the value preceded by a description of its kind
fn value_description_with_kind_json(v: &serde_json::Value) -> String {
match v.kind() {
ValueKind::Null => "null".to_owned(),
kind => {
format!(
"{}: `{}`",
value_kinds_description_json(&[kind]),
serde_json::to_string(v).unwrap()
)
}
}
}
impl<C: Default + ErrorCode> deserr::DeserializeError for DeserrJsonError<C> {
fn error<V: IntoValue>(
_self_: Option<Self>,
error: deserr::ErrorKind<V>,
location: ValuePointerRef,
) -> Result<Self, Self> {
let mut message = String::new();
message.push_str(&match error {
ErrorKind::IncorrectValueKind { actual, accepted } => {
let expected = value_kinds_description_json(accepted);
let received = value_description_with_kind_json(&serde_json::Value::from(actual));
let location = location_json_description(location, " at");
format!("Invalid value type{location}: expected {expected}, but found {received}")
}
ErrorKind::MissingField { field } => {
let location = location_json_description(location, " inside");
format!("Missing field `{field}`{location}")
}
ErrorKind::UnknownKey { key, accepted } => {
let location = location_json_description(location, " inside");
format!(
"Unknown field `{}`{location}: expected one of {}",
key,
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
)
}
ErrorKind::UnknownValue { value, accepted } => {
let location = location_json_description(location, " at");
format!(
"Unknown value `{}`{location}: expected one of {}",
value,
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", "),
)
}
ErrorKind::Unexpected { msg } => {
let location = location_json_description(location, " at");
format!("Invalid value{location}: {msg}")
}
});
Err(DeserrJsonError::new(message, C::default().error_code()))
}
}
pub fn immutable_field_error(field: &str, accepted: &[&str], code: Code) -> DeserrJsonError {
let msg = format!(
"Immutable field `{field}`: expected one of {}",
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
);
DeserrJsonError::new(msg, code)
}
/// Return a description of the given location in query parameters, preceded by the
/// given article. e.g. `at key5[2]`. If the location is the origin, the given article
/// will not be included in the description.
pub fn location_query_param_description(location: ValuePointerRef, article: &str) -> String {
fn rec(location: ValuePointerRef) -> String {
match location {
ValuePointerRef::Origin => String::new(),
ValuePointerRef::Key { key, prev } => {
if matches!(prev, ValuePointerRef::Origin) {
key.to_owned()
} else {
rec(*prev) + "." + key
}
}
ValuePointerRef::Index { index, prev } => format!("{}[{index}]", rec(*prev)),
}
}
match location {
ValuePointerRef::Origin => String::new(),
_ => {
format!("{article} `{}`", rec(location))
}
}
}
impl<C: Default + ErrorCode> deserr::DeserializeError for DeserrQueryParamError<C> {
fn error<V: IntoValue>(
_self_: Option<Self>,
error: deserr::ErrorKind<V>,
location: ValuePointerRef,
) -> Result<Self, Self> {
let mut message = String::new();
message.push_str(&match error {
ErrorKind::IncorrectValueKind { actual, accepted } => {
let expected = value_kinds_description_query_param(accepted);
let received = value_description_with_kind_query_param(actual);
let location = location_query_param_description(location, " for parameter");
format!("Invalid value type{location}: expected {expected}, but found {received}")
}
ErrorKind::MissingField { field } => {
let location = location_query_param_description(location, " inside");
format!("Missing parameter `{field}`{location}")
}
ErrorKind::UnknownKey { key, accepted } => {
let location = location_query_param_description(location, " inside");
format!(
"Unknown parameter `{}`{location}: expected one of {}",
key,
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
)
}
ErrorKind::UnknownValue { value, accepted } => {
let location = location_query_param_description(location, " for parameter");
format!(
"Unknown value `{}`{location}: expected one of {}",
value,
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", "),
)
}
ErrorKind::Unexpected { msg } => {
let location = location_query_param_description(location, " in parameter");
format!("Invalid value{location}: {msg}")
}
});
Err(DeserrQueryParamError::new(message, C::default().error_code()))
}
}
/// Return a description of the list of value kinds for query parameters
/// Since query parameters are always treated as strings, we always return
/// "a string" for now.
fn value_kinds_description_query_param(_accepted: &[ValueKind]) -> String {
"a string".to_owned()
}
fn value_description_with_kind_query_param<V: IntoValue>(actual: deserr::Value<V>) -> String {
match actual {
deserr::Value::Null => "null".to_owned(),
deserr::Value::Boolean(x) => format!("a boolean: `{x}`"),
deserr::Value::Integer(x) => format!("an integer: `{x}`"),
deserr::Value::NegativeInteger(x) => {
format!("an integer: `{x}`")
}
deserr::Value::Float(x) => {
format!("a number: `{x}`")
}
deserr::Value::String(x) => {
format!("a string: `{x}`")
}
deserr::Value::Sequence(_) => "multiple values".to_owned(),
deserr::Value::Map(_) => "multiple parameters".to_owned(),
}
}
#[cfg(test)]
mod tests {
use deserr::ValueKind;
use crate::deserr::error_messages::value_kinds_description_json;
#[test]
fn test_value_kinds_description_json() {
insta::assert_display_snapshot!(value_kinds_description_json(&[]), @"a different value");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Boolean]), @"a boolean");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer]), @"a positive integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::NegativeInteger]), @"a negative integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer]), @"a positive integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::String]), @"a string");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Sequence]), @"an array");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Map]), @"an object");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer, ValueKind::Boolean]), @"a boolean or a positive integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Null, ValueKind::Integer]), @"null or a positive integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Sequence, ValueKind::NegativeInteger]), @"a negative integer or an array");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer, ValueKind::Float]), @"a number");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer, ValueKind::Float, ValueKind::NegativeInteger]), @"a number");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer, ValueKind::Float, ValueKind::NegativeInteger, ValueKind::Null]), @"null or a number");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Boolean, ValueKind::Integer, ValueKind::Float, ValueKind::NegativeInteger, ValueKind::Null]), @"null, a boolean, or a number");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Null, ValueKind::Boolean, ValueKind::Integer, ValueKind::Float, ValueKind::NegativeInteger, ValueKind::Null]), @"null, a boolean, or a number");
}
}

View File

@ -1,18 +1,19 @@
use std::convert::Infallible;
use std::fmt;
use std::marker::PhantomData;
use std::ops::ControlFlow;
use deserr::{DeserializeError, MergeWithError, ValuePointerRef};
use deserr::errors::{JsonError, QueryParamError};
use deserr::{take_cf_content, DeserializeError, IntoValue, MergeWithError, ValuePointerRef};
use crate::error::deserr_codes::{self, *};
use crate::error::deserr_codes::*;
use crate::error::{
unwrap_any, Code, DeserrParseBoolError, DeserrParseIntError, ErrorCode, InvalidTaskDateError,
Code, DeserrParseBoolError, DeserrParseIntError, ErrorCode, InvalidTaskDateError,
ParseOffsetDateTimeError,
};
use crate::index_uid::IndexUidFormatError;
use crate::tasks::{ParseTaskKindError, ParseTaskStatusError};
pub mod error_messages;
pub mod query_params;
/// Marker type for the Json format
@ -20,8 +21,8 @@ pub struct DeserrJson;
/// Marker type for the Query Parameter format
pub struct DeserrQueryParam;
pub type DeserrJsonError<C = deserr_codes::BadRequest> = DeserrError<DeserrJson, C>;
pub type DeserrQueryParamError<C = deserr_codes::BadRequest> = DeserrError<DeserrQueryParam, C>;
pub type DeserrJsonError<C = BadRequest> = DeserrError<DeserrJson, C>;
pub type DeserrQueryParamError<C = BadRequest> = DeserrError<DeserrQueryParam, C>;
/// A request deserialization error.
///
@ -37,6 +38,7 @@ impl<Format, C: Default + ErrorCode> DeserrError<Format, C> {
Self { msg, code, _phantom: PhantomData }
}
}
impl<Format, C: Default + ErrorCode> std::fmt::Debug for DeserrError<Format, C> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DeserrError").field("msg", &self.msg).field("code", &self.code).finish()
@ -49,6 +51,16 @@ impl<Format, C: Default + ErrorCode> std::fmt::Display for DeserrError<Format, C
}
}
impl<F, C: Default + ErrorCode> actix_web::ResponseError for DeserrError<F, C> {
fn status_code(&self) -> actix_web::http::StatusCode {
self.code.http()
}
fn error_response(&self) -> actix_web::HttpResponse<actix_web::body::BoxBody> {
crate::error::ResponseError::from_msg(self.msg.to_string(), self.code).error_response()
}
}
impl<Format, C: Default + ErrorCode> std::error::Error for DeserrError<Format, C> {}
impl<Format, C: Default + ErrorCode> ErrorCode for DeserrError<Format, C> {
fn error_code(&self) -> Code {
@ -64,8 +76,8 @@ impl<Format, C1: Default + ErrorCode, C2: Default + ErrorCode>
_self_: Option<Self>,
other: DeserrError<Format, C2>,
_merge_location: ValuePointerRef,
) -> Result<Self, Self> {
Err(DeserrError { msg: other.msg, code: other.code, _phantom: PhantomData })
) -> ControlFlow<Self, Self> {
ControlFlow::Break(DeserrError { msg: other.msg, code: other.code, _phantom: PhantomData })
}
}
@ -74,17 +86,56 @@ impl<Format, C: Default + ErrorCode> MergeWithError<Infallible> for DeserrError<
_self_: Option<Self>,
_other: Infallible,
_merge_location: ValuePointerRef,
) -> Result<Self, Self> {
) -> ControlFlow<Self, Self> {
unreachable!()
}
}
impl<C: Default + ErrorCode> DeserializeError for DeserrJsonError<C> {
fn error<V: IntoValue>(
_self_: Option<Self>,
error: deserr::ErrorKind<V>,
location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
ControlFlow::Break(DeserrJsonError::new(
take_cf_content(JsonError::error(None, error, location)).to_string(),
C::default().error_code(),
))
}
}
impl<C: Default + ErrorCode> DeserializeError for DeserrQueryParamError<C> {
fn error<V: IntoValue>(
_self_: Option<Self>,
error: deserr::ErrorKind<V>,
location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
ControlFlow::Break(DeserrQueryParamError::new(
take_cf_content(QueryParamError::error(None, error, location)).to_string(),
C::default().error_code(),
))
}
}
pub fn immutable_field_error(field: &str, accepted: &[&str], code: Code) -> DeserrJsonError {
let msg = format!(
"Immutable field `{field}`: expected one of {}",
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
);
DeserrJsonError::new(msg, code)
}
// Implement a convenience function to build a `missing_field` error
macro_rules! make_missing_field_convenience_builder {
($err_code:ident, $fn_name:ident) => {
impl DeserrJsonError<$err_code> {
pub fn $fn_name(field: &str, location: ValuePointerRef) -> Self {
let x = unwrap_any(Self::error::<Infallible>(
let x = deserr::take_cf_content(Self::error::<Infallible>(
None,
deserr::ErrorKind::MissingField { field },
location,
@ -112,7 +163,7 @@ macro_rules! merge_with_error_impl_take_error_message {
_self_: Option<Self>,
other: $err_type,
merge_location: ValuePointerRef,
) -> Result<Self, Self> {
) -> ControlFlow<Self, Self> {
DeserrError::<Format, C>::error::<Infallible>(
None,
deserr::ErrorKind::Unexpected { msg: other.to_string() },

View File

@ -15,10 +15,9 @@ use std::convert::Infallible;
use std::ops::Deref;
use std::str::FromStr;
use deserr::{DeserializeError, DeserializeFromValue, MergeWithError, ValueKind};
use deserr::{DeserializeError, Deserr, MergeWithError, ValueKind};
use super::{DeserrParseBoolError, DeserrParseIntError};
use crate::error::unwrap_any;
use crate::index_uid::IndexUid;
use crate::tasks::{Kind, Status};
@ -38,7 +37,7 @@ impl<T> Deref for Param<T> {
}
}
impl<T, E> DeserializeFromValue<E> for Param<T>
impl<T, E> Deserr<E> for Param<T>
where
E: DeserializeError + MergeWithError<T::Err>,
T: FromQueryParameter,
@ -50,9 +49,9 @@ where
match value {
deserr::Value::String(s) => match T::from_query_param(&s) {
Ok(x) => Ok(Param(x)),
Err(e) => Err(unwrap_any(E::merge(None, e, location))),
Err(e) => Err(deserr::take_cf_content(E::merge(None, e, location))),
},
_ => Err(unwrap_any(E::error(
_ => Err(deserr::take_cf_content(E::error(
None,
deserr::ErrorKind::IncorrectValueKind {
actual: value,

View File

@ -19,7 +19,7 @@ type Result<T> = std::result::Result<T, DocumentFormatError>;
pub enum PayloadType {
Ndjson,
Json,
Csv,
Csv { delimiter: u8 },
}
impl fmt::Display for PayloadType {
@ -27,7 +27,7 @@ impl fmt::Display for PayloadType {
match self {
PayloadType::Ndjson => f.write_str("ndjson"),
PayloadType::Json => f.write_str("json"),
PayloadType::Csv => f.write_str("csv"),
PayloadType::Csv { .. } => f.write_str("csv"),
}
}
}
@ -105,11 +105,11 @@ impl ErrorCode for DocumentFormatError {
}
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
let mmap = unsafe { MmapOptions::new().map(file)? };
let csv = csv::Reader::from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
let count = builder.documents_count();
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;

View File

@ -127,7 +127,7 @@ macro_rules! make_error_codes {
}
impl Code {
/// return the HTTP status code associated with the `Code`
fn http(&self) -> StatusCode {
pub fn http(&self) -> StatusCode {
match self {
$(
Code::$code_ident => StatusCode::$status
@ -220,6 +220,7 @@ InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
@ -381,14 +382,6 @@ impl ErrorCode for io::Error {
}
}
/// Unwrap a result, either its Ok or Err value.
pub fn unwrap_any<T>(any: Result<T, T>) -> T {
match any {
Ok(any) => any,
Err(any) => any,
}
}
/// Deserialization when `deserr` cannot parse an API key date.
#[derive(Debug)]
pub struct ParseOffsetDateTimeError(pub String);

View File

@ -2,14 +2,14 @@ use std::error::Error;
use std::fmt;
use std::str::FromStr;
use deserr::DeserializeFromValue;
use deserr::Deserr;
use crate::error::{Code, ErrorCode};
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long
#[derive(Debug, Clone, PartialEq, Eq, DeserializeFromValue)]
#[deserr(from(String) = IndexUid::try_from -> IndexUidFormatError)]
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
pub struct IndexUid(String);
impl IndexUid {

View File

@ -0,0 +1,124 @@
use std::borrow::Borrow;
use std::error::Error;
use std::fmt;
use std::ops::Deref;
use std::str::FromStr;
use deserr::Deserr;
use serde::{Deserialize, Serialize};
use crate::error::{Code, ErrorCode};
use crate::index_uid::{IndexUid, IndexUidFormatError};
/// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long and optionally ending with a *.
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, Hash)]
#[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)]
pub struct IndexUidPattern(String);
impl IndexUidPattern {
pub fn new_unchecked(s: impl AsRef<str>) -> Self {
Self(s.as_ref().to_string())
}
/// Matches any index name.
pub fn all() -> Self {
IndexUidPattern::from_str("*").unwrap()
}
/// Returns `true` if it matches any index.
pub fn matches_all(&self) -> bool {
self.0 == "*"
}
/// Returns `true` if the pattern matches a specific index name.
pub fn is_exact(&self) -> bool {
!self.0.ends_with('*')
}
/// Returns wether this index uid matches this index uid pattern.
pub fn matches(&self, uid: &IndexUid) -> bool {
self.matches_str(uid.as_str())
}
/// Returns wether this string matches this index uid pattern.
pub fn matches_str(&self, uid: &str) -> bool {
match self.0.strip_suffix('*') {
Some(prefix) => uid.starts_with(prefix),
None => self.0 == uid,
}
}
}
impl Deref for IndexUidPattern {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Borrow<str> for IndexUidPattern {
fn borrow(&self) -> &str {
&self.0
}
}
impl TryFrom<String> for IndexUidPattern {
type Error = IndexUidPatternFormatError;
fn try_from(uid: String) -> Result<Self, Self::Error> {
let result = match uid.strip_suffix('*') {
Some("") => Ok(IndexUidPattern(uid)),
Some(prefix) => IndexUid::from_str(prefix).map(|_| IndexUidPattern(uid)),
None => IndexUid::try_from(uid).map(IndexUid::into_inner).map(IndexUidPattern),
};
match result {
Ok(index_uid_pattern) => Ok(index_uid_pattern),
Err(IndexUidFormatError { invalid_uid }) => {
Err(IndexUidPatternFormatError { invalid_uid })
}
}
}
}
impl FromStr for IndexUidPattern {
type Err = IndexUidPatternFormatError;
fn from_str(uid: &str) -> Result<IndexUidPattern, IndexUidPatternFormatError> {
uid.to_string().try_into()
}
}
impl From<IndexUidPattern> for String {
fn from(IndexUidPattern(uid): IndexUidPattern) -> Self {
uid
}
}
#[derive(Debug)]
pub struct IndexUidPatternFormatError {
pub invalid_uid: String,
}
impl fmt::Display for IndexUidPatternFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"`{}` is not a valid index uid pattern. Index uid patterns \
can be an integer or a string containing only alphanumeric \
characters, hyphens (-), underscores (_), and \
optionally end with a star (*).",
self.invalid_uid,
)
}
}
impl Error for IndexUidPatternFormatError {}
impl ErrorCode for IndexUidPatternFormatError {
fn error_code(&self) -> Code {
Code::InvalidIndexUid
}
}

View File

@ -2,7 +2,7 @@ use std::convert::Infallible;
use std::hash::Hash;
use std::str::FromStr;
use deserr::{DeserializeError, DeserializeFromValue, ValuePointerRef};
use deserr::{DeserializeError, Deserr, MergeWithError, ValuePointerRef};
use enum_iterator::Sequence;
use milli::update::Setting;
use serde::{Deserialize, Serialize};
@ -11,31 +11,44 @@ use time::macros::{format_description, time};
use time::{Date, OffsetDateTime, PrimitiveDateTime};
use uuid::Uuid;
use crate::deserr::error_messages::immutable_field_error;
use crate::deserr::DeserrJsonError;
use crate::deserr::{immutable_field_error, DeserrError, DeserrJsonError};
use crate::error::deserr_codes::*;
use crate::error::{unwrap_any, Code, ParseOffsetDateTimeError};
use crate::index_uid::IndexUid;
use crate::star_or::StarOr;
use crate::error::{Code, ErrorCode, ParseOffsetDateTimeError};
use crate::index_uid_pattern::{IndexUidPattern, IndexUidPatternFormatError};
pub type KeyId = Uuid;
#[derive(Debug, DeserializeFromValue)]
impl<C: Default + ErrorCode> MergeWithError<IndexUidPatternFormatError> for DeserrJsonError<C> {
fn merge(
_self_: Option<Self>,
other: IndexUidPatternFormatError,
merge_location: deserr::ValuePointerRef,
) -> std::ops::ControlFlow<Self, Self> {
DeserrError::error::<Infallible>(
None,
deserr::ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct CreateApiKey {
#[deserr(default, error = DeserrJsonError<InvalidApiKeyDescription>)]
pub description: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidApiKeyName>)]
pub name: Option<String>,
#[deserr(default = Uuid::new_v4(), error = DeserrJsonError<InvalidApiKeyUid>, from(&String) = Uuid::from_str -> uuid::Error)]
#[deserr(default = Uuid::new_v4(), error = DeserrJsonError<InvalidApiKeyUid>, try_from(&String) = Uuid::from_str -> uuid::Error)]
pub uid: KeyId,
#[deserr(error = DeserrJsonError<InvalidApiKeyActions>, missing_field_error = DeserrJsonError::missing_api_key_actions)]
pub actions: Vec<Action>,
#[deserr(error = DeserrJsonError<InvalidApiKeyIndexes>, missing_field_error = DeserrJsonError::missing_api_key_indexes)]
pub indexes: Vec<StarOr<IndexUid>>,
#[deserr(error = DeserrJsonError<InvalidApiKeyExpiresAt>, from(Option<String>) = parse_expiration_date -> ParseOffsetDateTimeError, missing_field_error = DeserrJsonError::missing_api_key_expires_at)]
pub indexes: Vec<IndexUidPattern>,
#[deserr(error = DeserrJsonError<InvalidApiKeyExpiresAt>, try_from(Option<String>) = parse_expiration_date -> ParseOffsetDateTimeError, missing_field_error = DeserrJsonError::missing_api_key_expires_at)]
pub expires_at: Option<OffsetDateTime>,
}
impl CreateApiKey {
pub fn to_key(self) -> Key {
let CreateApiKey { description, name, uid, actions, indexes, expires_at } = self;
@ -65,7 +78,7 @@ fn deny_immutable_fields_api_key(
"expiresAt" => immutable_field_error(field, accepted, Code::ImmutableApiKeyExpiresAt),
"createdAt" => immutable_field_error(field, accepted, Code::ImmutableApiKeyCreatedAt),
"updatedAt" => immutable_field_error(field, accepted, Code::ImmutableApiKeyUpdatedAt),
_ => unwrap_any(DeserrJsonError::<BadRequest>::error::<Infallible>(
_ => deserr::take_cf_content(DeserrJsonError::<BadRequest>::error::<Infallible>(
None,
deserr::ErrorKind::UnknownKey { key: field, accepted },
location,
@ -73,7 +86,7 @@ fn deny_immutable_fields_api_key(
}
}
#[derive(Debug, DeserializeFromValue)]
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_api_key)]
pub struct PatchApiKey {
#[deserr(default, error = DeserrJsonError<InvalidApiKeyDescription>)]
@ -90,7 +103,7 @@ pub struct Key {
pub name: Option<String>,
pub uid: KeyId,
pub actions: Vec<Action>,
pub indexes: Vec<StarOr<IndexUid>>,
pub indexes: Vec<IndexUidPattern>,
#[serde(with = "time::serde::rfc3339::option")]
pub expires_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339")]
@ -108,7 +121,7 @@ impl Key {
description: Some("Use it for anything that is not a search operation. Caution! Do not expose it on a public frontend".to_string()),
uid,
actions: vec![Action::All],
indexes: vec![StarOr::Star],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: now,
updated_at: now,
@ -123,7 +136,7 @@ impl Key {
description: Some("Use it to search from the frontend".to_string()),
uid,
actions: vec![Action::Search],
indexes: vec![StarOr::Star],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: now,
updated_at: now,
@ -168,9 +181,7 @@ fn parse_expiration_date(
}
}
#[derive(
Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, Sequence, DeserializeFromValue,
)]
#[derive(Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, Sequence, Deserr)]
#[repr(u8)]
pub enum Action {
#[serde(rename = "*")]

View File

@ -3,6 +3,7 @@ pub mod deserr;
pub mod document_formats;
pub mod error;
pub mod index_uid;
pub mod index_uid_pattern;
pub mod keys;
pub mod settings;
pub mod star_or;

View File

@ -3,9 +3,10 @@ use std::convert::Infallible;
use std::fmt;
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use std::ops::ControlFlow;
use std::str::FromStr;
use deserr::{DeserializeError, DeserializeFromValue, ErrorKind, MergeWithError, ValuePointerRef};
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer;
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
@ -13,7 +14,6 @@ use serde::{Deserialize, Serialize, Serializer};
use crate::deserr::DeserrJsonError;
use crate::error::deserr_codes::*;
use crate::error::unwrap_any;
/// The maximimum number of results that the engine
/// will be able to return in one search call.
@ -41,7 +41,7 @@ pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Unchecked;
impl<E> DeserializeFromValue<E> for Unchecked
impl<E> Deserr<E> for Unchecked
where
E: DeserializeError,
{
@ -59,13 +59,13 @@ fn validate_min_word_size_for_typo_setting<E: DeserializeError>(
) -> Result<MinWordSizeTyposSetting, E> {
if let (Setting::Set(one), Setting::Set(two)) = (s.one_typo, s.two_typos) {
if one > two {
return Err(unwrap_any(E::error::<Infallible>(None, ErrorKind::Unexpected { msg: format!("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {one}` and twoTypos: {two}`.") }, location)));
return Err(deserr::take_cf_content(E::error::<Infallible>(None, ErrorKind::Unexpected { msg: format!("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {one}` and twoTypos: {two}`.") }, location)));
}
}
Ok(s)
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(deny_unknown_fields, rename_all = camelCase, validate = validate_min_word_size_for_typo_setting -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct MinWordSizeTyposSetting {
@ -77,7 +77,7 @@ pub struct MinWordSizeTyposSetting {
pub two_typos: Setting<u8>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(deny_unknown_fields, rename_all = camelCase, where_predicate = __Deserr_E: deserr::MergeWithError<DeserrJsonError<InvalidSettingsTypoTolerance>>)]
pub struct TypoSettings {
@ -95,7 +95,7 @@ pub struct TypoSettings {
pub disable_on_attributes: Setting<BTreeSet<String>>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct FacetingSettings {
@ -104,7 +104,7 @@ pub struct FacetingSettings {
pub max_values_per_facet: Setting<usize>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct PaginationSettings {
@ -118,7 +118,7 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
_self_: Option<Self>,
other: milli::CriterionError,
merge_location: ValuePointerRef,
) -> Result<Self, Self> {
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
@ -130,7 +130,7 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[serde(
deny_unknown_fields,
rename_all = "camelCase",
@ -509,8 +509,8 @@ pub fn settings(
})
}
#[derive(Debug, Clone, PartialEq, Eq, DeserializeFromValue)]
#[deserr(from(&String) = FromStr::from_str -> CriterionError)]
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(try_from(&String) = FromStr::from_str -> CriterionError)]
pub enum RankingRuleView {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.

View File

@ -1,13 +1,13 @@
use std::fmt;
use std::marker::PhantomData;
use std::ops::ControlFlow;
use std::str::FromStr;
use deserr::{DeserializeError, DeserializeFromValue, MergeWithError, ValueKind};
use deserr::{DeserializeError, Deserr, MergeWithError, ValueKind};
use serde::de::Visitor;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use crate::deserr::query_params::FromQueryParameter;
use crate::error::unwrap_any;
/// A type that tries to match either a star (*) or
/// any other thing that implements `FromStr`.
@ -111,7 +111,7 @@ where
}
}
impl<T, E> DeserializeFromValue<E> for StarOr<T>
impl<T, E> Deserr<E> for StarOr<T>
where
T: FromStr,
E: DeserializeError + MergeWithError<T::Err>,
@ -127,11 +127,11 @@ where
} else {
match T::from_str(&v) {
Ok(parsed) => Ok(StarOr::Other(parsed)),
Err(e) => Err(unwrap_any(E::merge(None, e, location))),
Err(e) => Err(deserr::take_cf_content(E::merge(None, e, location))),
}
}
}
_ => Err(unwrap_any(E::error::<V>(
_ => Err(deserr::take_cf_content(E::error::<V>(
None,
deserr::ErrorKind::IncorrectValueKind {
actual: value,
@ -191,7 +191,7 @@ where
}
}
impl<T, E> DeserializeFromValue<E> for OptionStarOr<T>
impl<T, E> Deserr<E> for OptionStarOr<T>
where
E: DeserializeError + MergeWithError<T::Err>,
T: FromQueryParameter,
@ -205,10 +205,10 @@ where
"*" => Ok(OptionStarOr::Star),
s => match T::from_query_param(s) {
Ok(x) => Ok(OptionStarOr::Other(x)),
Err(e) => Err(unwrap_any(E::merge(None, e, location))),
Err(e) => Err(deserr::take_cf_content(E::merge(None, e, location))),
},
},
_ => Err(unwrap_any(E::error::<V>(
_ => Err(deserr::take_cf_content(E::error::<V>(
None,
deserr::ErrorKind::IncorrectValueKind {
actual: value,
@ -271,7 +271,7 @@ impl<T> OptionStarOrList<T> {
}
}
impl<T, E> DeserializeFromValue<E> for OptionStarOrList<T>
impl<T, E> Deserr<E> for OptionStarOrList<T>
where
E: DeserializeError + MergeWithError<T::Err>,
T: FromQueryParameter,
@ -299,7 +299,10 @@ where
Err(e) => {
let location =
if len_cs > 1 { location.push_index(i) } else { location };
error = Some(E::merge(error, e, location)?);
error = match E::merge(error, e, location) {
ControlFlow::Continue(e) => Some(e),
ControlFlow::Break(e) => return Err(e),
};
}
}
}
@ -314,7 +317,7 @@ where
Ok(OptionStarOrList::List(els))
}
}
_ => Err(unwrap_any(E::error::<V>(
_ => Err(deserr::take_cf_content(E::error::<V>(
None,
deserr::ErrorKind::IncorrectValueKind {
actual: value,

View File

@ -1,10 +1,16 @@
[package]
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
edition = "2021"
license = "MIT"
name = "meilisearch"
version = "1.0.0"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
default-run = "meilisearch"
[dependencies]
actix-cors = "0.6.3"
@ -19,7 +25,7 @@ byte-unit = { version = "4.0.14", default-features = false, features = ["std", "
bytes = "1.2.1"
clap = { version = "4.0.9", features = ["derive", "env"] }
crossbeam-channel = "0.5.6"
deserr = "0.3.0"
deserr = "0.5.0"
dump = { path = "../dump" }
either = "1.8.0"
env_logger = "0.9.1"
@ -90,7 +96,7 @@ yaup = "0.2.1"
[build-dependencies]
anyhow = { version = "1.0.65", optional = true }
cargo_toml = { version = "0.13.0", optional = true }
cargo_toml = { version = "0.14.0", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
sha-1 = { version = "0.10.0", optional = true }
@ -110,5 +116,5 @@ japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.5/build.zip"
sha1 = "6fe959b78511b32e9ff857fd9fd31740633b9fce"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.6/build.zip"
sha1 = "dce0aba16bceab5549edf9f01de89858800f7422"

View File

@ -26,18 +26,6 @@ impl SearchAggregator {
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct MultiSearchAggregator;
#[allow(dead_code)]
impl MultiSearchAggregator {
pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
Self::default()
}
pub fn succeed(&mut self) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@ -55,7 +43,6 @@ impl Analytics for MockAnalytics {
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,

View File

@ -23,8 +23,6 @@ use crate::routes::tasks::TasksFilterQuery;
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
@ -32,8 +30,6 @@ pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
@ -78,9 +74,6 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post search
fn post_search(&self, aggregate: SearchAggregator);
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
// this method should be called to aggregate a add documents request
fn add_documents(
&self,

View File

@ -30,7 +30,7 @@ use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use crate::Opt;
@ -68,7 +68,6 @@ pub enum AnalyticsMsg {
BatchMessage(Track),
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator),
@ -134,7 +133,6 @@ impl SegmentAnalytics {
opt: opt.clone(),
batcher,
post_search_aggregator: SearchAggregator::default(),
post_multi_search_aggregator: MultiSearchAggregator::default(),
get_search_aggregator: SearchAggregator::default(),
add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
@ -176,10 +174,6 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
}
fn add_documents(
&self,
documents_query: &UpdateDocumentsQuery,
@ -330,7 +324,6 @@ pub struct Segment {
batcher: AutoBatcher,
get_search_aggregator: SearchAggregator,
post_search_aggregator: SearchAggregator,
post_multi_search_aggregator: MultiSearchAggregator,
add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator,
@ -388,7 +381,6 @@ impl Segment {
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
@ -434,8 +426,6 @@ impl Segment {
.into_event(&self.user, "Documents Searched GET");
let post_search = std::mem::take(&mut self.post_search_aggregator)
.into_event(&self.user, "Documents Searched POST");
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
.into_event(&self.user, "Documents Searched By Array of Queries POST");
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
.into_event(&self.user, "Documents Added");
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
@ -453,9 +443,6 @@ impl Segment {
if let Some(post_search) = post_search {
let _ = self.batcher.push(post_search).await;
}
if let Some(post_multi_search) = post_multi_search {
let _ = self.batcher.push(post_multi_search).await;
}
if let Some(add_documents) = add_documents {
let _ = self.batcher.push(add_documents).await;
}
@ -729,118 +716,6 @@ impl SearchAggregator {
}
}
#[derive(Default)]
pub struct MultiSearchAggregator {
timestamp: Option<OffsetDateTime>,
// requests
total_received: usize,
total_succeeded: usize,
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
total_distinct_index_count: usize,
// number of queries with a single index, use with total_received to compute a proportion
total_single_index: usize,
// sum of the number of search queries in the requests, use with total_received to compute an average
total_search_count: usize,
// context
user_agents: HashSet<String>,
}
impl MultiSearchAggregator {
pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
let timestamp = Some(OffsetDateTime::now_utc());
let user_agents = extract_user_agents(request).into_iter().collect();
let distinct_indexes: HashSet<_> =
query.iter().map(|query| query.index_uid.as_str()).collect();
Self {
timestamp,
total_received: 1,
total_succeeded: 0,
total_distinct_index_count: distinct_indexes.len(),
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
total_search_count: query.len(),
user_agents,
}
}
pub fn succeed(&mut self) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
}
pub fn aggregate(&mut self, other: Self) {
// write the aggregate in a way that will cause a compilation error if a field is added.
// get ownership of self, replacing it by a default value.
let this = std::mem::take(self);
let timestamp = this.timestamp.or(other.timestamp);
let total_received = this.total_received.saturating_add(other.total_received);
let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded);
let total_distinct_index_count =
this.total_distinct_index_count.saturating_add(other.total_distinct_index_count);
let total_single_index = this.total_single_index.saturating_add(other.total_single_index);
let total_search_count = this.total_search_count.saturating_add(other.total_search_count);
let mut user_agents = this.user_agents;
for user_agent in other.user_agents.into_iter() {
user_agents.insert(user_agent);
}
// need all fields or compile error
let mut aggregated = Self {
timestamp,
total_received,
total_succeeded,
total_distinct_index_count,
total_single_index,
total_search_count,
user_agents,
// do not add _ or ..Default::default() here
};
// replace the default self with the aggregated value
std::mem::swap(self, &mut aggregated);
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if self.total_received == 0 {
None
} else {
let properties = json!({
"user-agent": self.user_agents,
"requests": {
"total_succeeded": self.total_succeeded,
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
"total_received": self.total_received,
},
"indexes": {
"total_single_index": self.total_single_index,
"total_distinct_index_count": self.total_distinct_index_count,
"avg_distinct_index_count": (self.total_distinct_index_count as f64) / (self.total_received as f64), // not 0 else returned early
},
"searches": {
"total_search_count": self.total_search_count,
"avg_search_count": (self.total_search_count as f64) / (self.total_received as f64),
}
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,

View File

@ -11,6 +11,8 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
@ -52,6 +54,7 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,

View File

@ -199,6 +199,9 @@ pub mod policies {
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// A tenant token only has access to the search route which always defines an index.
let index = index?;
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return None;
@ -206,7 +209,7 @@ pub mod policies {
let uid = extract_key_id(token)?;
// check if parent key is authorized to do the action.
if auth.is_key_authorized(uid, Action::Search, index).ok()? {
if auth.is_key_authorized(uid, Action::Search, Some(index)).ok()? {
// Check if tenant token is valid.
let key = auth.generate_key(uid)?;
let data = decode::<Claims>(
@ -217,10 +220,8 @@ pub mod policies {
.ok()?;
// Check index access if an index restriction is provided.
if let Some(index) = index {
if !data.claims.search_rules.is_index_authorized(index) {
return None;
}
if !data.claims.search_rules.is_index_authorized(index) {
return None;
}
// Check if token is expired.
@ -230,7 +231,10 @@ pub mod policies {
}
}
return auth.get_key_filters(uid, Some(data.claims.search_rules)).ok();
return match auth.get_key_filters(uid, Some(data.claims.search_rules)) {
Ok(auth) if auth.search_rules.is_index_authorized(index) => Some(auth),
_ => None,
};
}
None

View File

@ -1,78 +0,0 @@
use std::fmt::Debug;
use std::future::Future;
use std::marker::PhantomData;
use std::pin::Pin;
use std::task::{Context, Poll};
use actix_web::dev::Payload;
use actix_web::web::Json;
use actix_web::{FromRequest, HttpRequest};
use deserr::{DeserializeError, DeserializeFromValue};
use futures::ready;
use meilisearch_types::error::{ErrorCode, ResponseError};
/// Extractor for typed data from Json request payloads
/// deserialised by deserr.
///
/// # Extractor
/// To extract typed data from a request body, the inner type `T` must implement the
/// [`deserr::DeserializeFromError<E>`] trait. The inner type `E` must implement the
/// [`ErrorCode`](meilisearch_error::ErrorCode) trait.
#[derive(Debug)]
pub struct ValidatedJson<T, E>(pub T, PhantomData<*const E>);
impl<T, E> ValidatedJson<T, E> {
pub fn new(data: T) -> Self {
ValidatedJson(data, PhantomData)
}
pub fn into_inner(self) -> T {
self.0
}
}
impl<T, E> FromRequest for ValidatedJson<T, E>
where
E: DeserializeError + ErrorCode + std::error::Error + 'static,
T: DeserializeFromValue<E>,
{
type Error = actix_web::Error;
type Future = ValidatedJsonExtractFut<T, E>;
#[inline]
fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future {
ValidatedJsonExtractFut {
fut: Json::<serde_json::Value>::from_request(req, payload),
_phantom: PhantomData,
}
}
}
pub struct ValidatedJsonExtractFut<T, E> {
fut: <Json<serde_json::Value> as FromRequest>::Future,
_phantom: PhantomData<*const (T, E)>,
}
impl<T, E> Future for ValidatedJsonExtractFut<T, E>
where
T: DeserializeFromValue<E>,
E: DeserializeError + ErrorCode + std::error::Error + 'static,
{
type Output = Result<ValidatedJson<T, E>, actix_web::Error>;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let ValidatedJsonExtractFut { fut, .. } = self.get_mut();
let fut = Pin::new(fut);
let res = ready!(fut.poll(cx));
let res = match res {
Err(err) => Err(err),
Ok(data) => match deserr::deserialize::<_, _, E>(data.into_inner()) {
Ok(data) => Ok(ValidatedJson::new(data)),
Err(e) => Err(ResponseError::from(e).into()),
},
};
Poll::Ready(res)
}
}

View File

@ -1,6 +1,4 @@
pub mod payload;
#[macro_use]
pub mod authentication;
pub mod json;
pub mod query_parameters;
pub mod sequential_extractor;

View File

@ -1,70 +0,0 @@
//! A module to parse query parameter with deserr
use std::marker::PhantomData;
use std::{fmt, ops};
use actix_http::Payload;
use actix_utils::future::{err, ok, Ready};
use actix_web::{FromRequest, HttpRequest};
use deserr::{DeserializeError, DeserializeFromValue};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct QueryParameter<T, E>(pub T, PhantomData<*const E>);
impl<T, E> QueryParameter<T, E> {
/// Unwrap into inner `T` value.
pub fn into_inner(self) -> T {
self.0
}
}
impl<T, E> QueryParameter<T, E>
where
T: DeserializeFromValue<E>,
E: DeserializeError + ErrorCode + std::error::Error + 'static,
{
pub fn from_query(query_str: &str) -> Result<Self, actix_web::Error> {
let value = serde_urlencoded::from_str::<serde_json::Value>(query_str)
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::BadRequest))?;
match deserr::deserialize::<_, _, E>(value) {
Ok(data) => Ok(QueryParameter(data, PhantomData)),
Err(e) => Err(ResponseError::from(e).into()),
}
}
}
impl<T, E> ops::Deref for QueryParameter<T, E> {
type Target = T;
fn deref(&self) -> &T {
&self.0
}
}
impl<T, E> ops::DerefMut for QueryParameter<T, E> {
fn deref_mut(&mut self) -> &mut T {
&mut self.0
}
}
impl<T: fmt::Display, E> fmt::Display for QueryParameter<T, E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl<T, E> FromRequest for QueryParameter<T, E>
where
T: DeserializeFromValue<E>,
E: DeserializeError + ErrorCode + std::error::Error + 'static,
{
type Error = actix_web::Error;
type Future = Ready<Result<Self, actix_web::Error>>;
#[inline]
fn from_request(req: &HttpRequest, _: &mut Payload) -> Self::Future {
QueryParameter::from_query(req.query_string()).map(ok).unwrap_or_else(err)
}
}

View File

@ -1,7 +1,8 @@
use std::str;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::DeserializeFromValue;
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use meilisearch_auth::error::AuthControllerError;
use meilisearch_auth::AuthController;
use meilisearch_types::deserr::query_params::Param;
@ -16,8 +17,6 @@ use uuid::Uuid;
use super::PAGINATION_DEFAULT_LIMIT;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::json::ValidatedJson;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::Pagination;
@ -37,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
pub async fn create_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, AuthController>,
body: ValidatedJson<CreateApiKey, DeserrJsonError>,
body: AwebJson<CreateApiKey, DeserrJsonError>,
_req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let v = body.into_inner();
@ -51,7 +50,7 @@ pub async fn create_api_key(
Ok(HttpResponse::Created().json(res))
}
#[derive(DeserializeFromValue, Debug, Clone, Copy)]
#[derive(Deserr, Debug, Clone, Copy)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct ListApiKeys {
#[deserr(default, error = DeserrQueryParamError<InvalidApiKeyOffset>)]
@ -59,6 +58,7 @@ pub struct ListApiKeys {
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidApiKeyLimit>)]
pub limit: Param<usize>,
}
impl ListApiKeys {
fn as_pagination(self) -> Pagination {
Pagination { offset: self.offset.0, limit: self.limit.0 }
@ -67,7 +67,7 @@ impl ListApiKeys {
pub async fn list_api_keys(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
list_api_keys: QueryParameter<ListApiKeys, DeserrQueryParamError>,
list_api_keys: AwebQueryParameter<ListApiKeys, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let paginate = list_api_keys.into_inner().as_pagination();
let page_view = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
@ -104,7 +104,7 @@ pub async fn get_api_key(
pub async fn patch_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, AuthController>,
body: ValidatedJson<PatchApiKey, DeserrJsonError>,
body: AwebJson<PatchApiKey, DeserrJsonError>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;

View File

@ -4,15 +4,16 @@ use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use deserr::DeserializeFromValue;
use deserr::actix_web::AwebQueryParameter;
use deserr::Deserr;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
@ -33,7 +34,6 @@ use crate::error::PayloadError::ReceivePayload;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
@ -67,7 +67,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::post().to(SeqHandler(replace_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
)
@ -80,7 +80,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
);
}
#[derive(Debug, DeserializeFromValue)]
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct GetDocument {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
@ -90,7 +90,7 @@ pub struct GetDocument {
pub async fn get_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
document_param: web::Path<DocumentParam>,
params: QueryParameter<GetDocument, DeserrQueryParamError>,
params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
@ -125,7 +125,7 @@ pub async fn delete_document(
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, DeserializeFromValue)]
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct BrowseQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
@ -139,7 +139,7 @@ pub struct BrowseQuery {
pub async fn get_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: QueryParameter<BrowseQuery, DeserrQueryParamError>,
params: AwebQueryParameter<BrowseQuery, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
@ -155,17 +155,32 @@ pub async fn get_all_documents(
Ok(HttpResponse::Ok().json(ret))
}
#[derive(Deserialize, Debug, DeserializeFromValue)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[derive(Deserialize, Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct UpdateDocumentsQuery {
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
pub primary_key: Option<String>,
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidIndexCsvDelimiter>, error = DeserrQueryParamError<InvalidIndexCsvDelimiter>)]
pub csv_delimiter: Option<u8>,
}
pub async fn add_documents(
fn from_char_csv_delimiter(
c: char,
) -> Result<Option<u8>, DeserrQueryParamError<InvalidIndexCsvDelimiter>> {
if c.is_ascii() {
Ok(Some(c as u8))
} else {
Err(DeserrQueryParamError::new(
format!("csv delimiter must be an ascii character. Found: `{}`", c),
Code::InvalidIndexCsvDelimiter,
))
}
}
pub async fn replace_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: QueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -183,6 +198,7 @@ pub async fn add_documents(
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
body,
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
@ -195,7 +211,7 @@ pub async fn add_documents(
pub async fn update_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: QueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -203,6 +219,7 @@ pub async fn update_documents(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@ -211,7 +228,8 @@ pub async fn update_documents(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.into_inner().primary_key,
params.primary_key,
params.csv_delimiter,
body,
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
@ -221,26 +239,43 @@ pub async fn update_documents(
Ok(HttpResponse::Accepted().json(task))
}
#[allow(clippy::too_many_arguments)]
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: IndexUid,
primary_key: Option<String>,
csv_delimiter: Option<u8>,
mut body: Payload,
method: IndexDocumentsMethod,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) {
Some(("application", "json")) => PayloadType::Json,
Some(("application", "x-ndjson")) => PayloadType::Ndjson,
Some(("text", "csv")) => PayloadType::Csv,
Some((type_, subtype)) => {
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
csv_delimiter,
) {
(Some(("application", "json")), None) => PayloadType::Json,
(Some(("application", "x-ndjson")), None) => PayloadType::Ndjson,
(Some(("text", "csv")), None) => PayloadType::Csv { delimiter: b',' },
(Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv { delimiter },
(Some(("application", "json")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/json",
)))
}
(Some(("application", "x-ndjson")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/x-ndjson",
)))
}
(Some((type_, subtype)), _) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
))
}
None => {
(None, _) => {
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
}
};
@ -285,7 +320,9 @@ async fn document_addition(
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
PayloadType::Csv => read_csv(&read_file, update_file.as_file_mut())?,
PayloadType::Csv { delimiter } => {
read_csv(&read_file, update_file.as_file_mut(), delimiter)?
}
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.

View File

@ -2,14 +2,14 @@ use std::convert::Infallible;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::{DeserializeError, DeserializeFromValue, ValuePointerRef};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::error_messages::immutable_field_error;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{unwrap_any, Code, ResponseError};
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::{self, FieldDistribution, Index};
use meilisearch_types::tasks::KindWithContent;
@ -21,8 +21,6 @@ use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::json::ValidatedJson;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
@ -73,7 +71,7 @@ impl IndexView {
}
}
#[derive(DeserializeFromValue, Debug, Clone, Copy)]
#[derive(Deserr, Debug, Clone, Copy)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct ListIndexes {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexOffset>)]
@ -89,7 +87,7 @@ impl ListIndexes {
pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: QueryParameter<ListIndexes, DeserrQueryParamError>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &index_scheduler.filters().search_rules;
let indexes: Vec<_> = index_scheduler.indexes()?;
@ -105,7 +103,7 @@ pub async fn list_indexes(
Ok(HttpResponse::Ok().json(ret))
}
#[derive(DeserializeFromValue, Debug)]
#[derive(Deserr, Debug)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct IndexCreateRequest {
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
@ -116,7 +114,7 @@ pub struct IndexCreateRequest {
pub async fn create_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
body: ValidatedJson<IndexCreateRequest, DeserrJsonError>,
body: AwebJson<IndexCreateRequest, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@ -149,7 +147,7 @@ fn deny_immutable_fields_index(
"uid" => immutable_field_error(field, accepted, Code::ImmutableIndexUid),
"createdAt" => immutable_field_error(field, accepted, Code::ImmutableIndexCreatedAt),
"updatedAt" => immutable_field_error(field, accepted, Code::ImmutableIndexUpdatedAt),
_ => unwrap_any(DeserrJsonError::<BadRequest>::error::<Infallible>(
_ => deserr::take_cf_content(DeserrJsonError::<BadRequest>::error::<Infallible>(
None,
deserr::ErrorKind::UnknownKey { key: field, accepted },
location,
@ -157,7 +155,7 @@ fn deny_immutable_fields_index(
}
}
#[derive(DeserializeFromValue, Debug)]
#[derive(Deserr, Debug)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_index)]
pub struct UpdateIndexRequest {
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
@ -181,7 +179,7 @@ pub async fn get_index(
pub async fn update_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: ValidatedJson<UpdateIndexRequest, DeserrJsonError>,
body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {

View File

@ -1,7 +1,9 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@ -13,13 +15,11 @@ use serde_json::Value;
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::json::ValidatedJson;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
@ -30,7 +30,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
);
}
#[derive(Debug, deserr::DeserializeFromValue)]
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueryGet {
#[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
@ -101,6 +101,26 @@ impl From<SearchQueryGet> for SearchQuery {
}
}
/// Incorporate search rules in search query
fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
/// Transform the sort query parameter into something that matches the post expected format.
@ -129,7 +149,7 @@ fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: QueryParameter<SearchQueryGet, DeserrQueryParamError>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@ -163,7 +183,7 @@ pub async fn search_with_url_query(
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: ValidatedJson<SearchQuery, DeserrJsonError>,
params: AwebJson<SearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {

View File

@ -1,5 +1,6 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
@ -12,7 +13,6 @@ use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::json::ValidatedJson;
use crate::routes::SummarizedTaskView;
#[macro_export]
@ -68,7 +68,7 @@ macro_rules! make_setting_route {
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
body: $crate::routes::indexes::ValidatedJson<Option<$type>, $err_ty>,
body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
req: HttpRequest,
$analytics_var: web::Data<dyn Analytics>,
) -> std::result::Result<HttpResponse, ResponseError> {
@ -468,7 +468,7 @@ generate_configure!(
pub async fn update_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: ValidatedJson<Settings<Unchecked>, DeserrJsonError>,
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {

View File

@ -17,6 +17,8 @@ use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
const PAGINATION_DEFAULT_LIMIT: usize = 20;
mod api_key;
mod dump;
pub mod indexes;
@ -31,12 +33,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/stats").route(web::get().to(get_stats)))
.service(web::resource("/version").route(web::get().to(get_version)))
.service(web::scope("/indexes").configure(indexes::configure))
.service(web::scope("/search").configure(multi_search::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
}
const PAGINATION_DEFAULT_LIMIT: usize = 20;
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {
@ -60,6 +59,7 @@ impl From<Task> for SummarizedTaskView {
}
}
}
pub struct Pagination {
pub offset: usize,
pub limit: usize,
@ -327,5 +327,3 @@ pub async fn get_health(
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
}
mod multi_search;

View File

@ -1,83 +0,0 @@
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::json::ValidatedJson;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(search_with_post))));
}
#[derive(Serialize)]
struct SearchResults {
results: Vec<SearchResultWithIndex>,
}
#[derive(Debug, deserr::DeserializeFromValue)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueries {
queries: Vec<SearchQueryWithIndex>,
}
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
params: ValidatedJson<SearchQueries, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let queries = params.into_inner().queries;
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
let search_results: Result<_, ResponseError> = (|| {
async {
let mut search_results = Vec::with_capacity(queries.len());
for (index_uid, mut query) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query)
{
debug!("search called with params: {:?}", query);
// Tenant token search_rules.
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result?,
});
}
Ok(search_results)
}
})()
.await;
if search_results.is_ok() {
multi_aggregate.succeed();
}
analytics.post_multi_search(multi_aggregate);
let search_results = search_results?;
debug!("returns: {:?}", search_results);
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
}

View File

@ -1,6 +1,7 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::DeserializeFromValue;
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::InvalidSwapIndexes;
@ -14,14 +15,13 @@ use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::json::ValidatedJson;
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
}
#[derive(DeserializeFromValue, Debug, Clone, PartialEq, Eq)]
#[derive(Deserr, Debug, Clone, PartialEq, Eq)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SwapIndexesPayload {
#[deserr(error = DeserrJsonError<InvalidSwapIndexes>, missing_field_error = DeserrJsonError::missing_swap_indexes)]
@ -30,7 +30,7 @@ pub struct SwapIndexesPayload {
pub async fn swap_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
params: ValidatedJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {

View File

@ -1,6 +1,7 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::DeserializeFromValue;
use deserr::actix_web::AwebQueryParameter;
use deserr::Deserr;
use index_scheduler::{IndexScheduler, Query, TaskId};
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::DeserrQueryParamError;
@ -23,7 +24,6 @@ use super::SummarizedTaskView;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
const DEFAULT_LIMIT: u32 = 20;
@ -162,7 +162,7 @@ impl From<Details> for DetailsView {
}
}
#[derive(Debug, DeserializeFromValue)]
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TasksFilterQuery {
#[deserr(default = Param(DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidTaskLimit>)]
@ -181,19 +181,20 @@ pub struct TasksFilterQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
pub index_uids: OptionStarOrList<IndexUid>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_finished_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_finished_at: OptionStarOr<OffsetDateTime>,
}
impl TasksFilterQuery {
fn into_query(self) -> Query {
Query {
@ -235,7 +236,7 @@ impl TaskDeletionOrCancelationQuery {
}
}
#[derive(Debug, DeserializeFromValue)]
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TaskDeletionOrCancelationQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
@ -249,19 +250,20 @@ pub struct TaskDeletionOrCancelationQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
pub index_uids: OptionStarOrList<IndexUid>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_finished_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_finished_at: OptionStarOr<OffsetDateTime>,
}
impl TaskDeletionOrCancelationQuery {
fn into_query(self) -> Query {
Query {
@ -284,7 +286,7 @@ impl TaskDeletionOrCancelationQuery {
async fn cancel_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
params: QueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@ -330,7 +332,7 @@ async fn cancel_tasks(
async fn delete_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_DELETE }>, Data<IndexScheduler>>,
params: QueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@ -383,7 +385,7 @@ pub struct AllTasks {
async fn get_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
params: QueryParameter<TasksFilterQuery, DeserrQueryParamError>,
params: AwebQueryParameter<TasksFilterQuery, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@ -498,7 +500,7 @@ pub fn deserialize_date_before(
#[cfg(test)]
mod tests {
use deserr::DeserializeFromValue;
use deserr::Deserr;
use meili_snap::snapshot;
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::error::{Code, ResponseError};
@ -507,7 +509,7 @@ mod tests {
fn deserr_query_params<T>(j: &str) -> Result<T, ResponseError>
where
T: DeserializeFromValue<DeserrQueryParamError>,
T: Deserr<DeserrQueryParamError>,
{
let value = serde_urlencoded::from_str::<serde_json::Value>(j)
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::BadRequest))?;

View File

@ -3,12 +3,10 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr;
use std::time::Instant;
use deserr::DeserializeFromValue;
use deserr::Deserr;
use either::Either;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@ -31,7 +29,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
#[derive(Debug, Clone, Default, PartialEq, Eq, DeserializeFromValue)]
#[derive(Debug, Clone, Default, PartialEq, Eq, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQuery {
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
@ -76,101 +74,7 @@ impl SearchQuery {
}
}
/// A `SearchQuery` + an index UID.
// This struct contains the fields of `SearchQuery` inline.
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
#[derive(Debug, deserr::DeserializeFromValue)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueryWithIndex {
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
pub index_uid: IndexUid,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
pub highlight_pre_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
pub highlight_post_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
}
impl SearchQueryWithIndex {
pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
let SearchQueryWithIndex {
index_uid,
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
} = self;
(
index_uid,
SearchQuery {
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
)
}
}
#[derive(Debug, Clone, PartialEq, Eq, DeserializeFromValue)]
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
/// Remove query words from last to first
@ -216,14 +120,6 @@ pub struct SearchResult {
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultWithIndex {
pub index_uid: String,
#[serde(flatten)]
pub result: SearchResult,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(untagged)]
pub enum HitsInfo {
@ -233,26 +129,6 @@ pub enum HitsInfo {
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
/// Incorporate search rules in search query
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
pub fn perform_search(
index: &Index,
query: SearchQuery,

View File

@ -377,7 +377,7 @@ async fn error_add_api_key_invalid_index_uids() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
{
"message": "Invalid value at `.indexes[0]`: `invalid index # / \\name with spaces` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value at `.indexes[0]`: `invalid index # / \\name with spaces` is not a valid index uid pattern. Index uid patterns can be an integer or a string containing only alphanumeric characters, hyphens (-), underscores (_), and optionally end with a star (*).",
"code": "invalid_api_key_indexes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_indexes"

View File

@ -77,12 +77,14 @@ static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
})
});
const MASTER_KEY: &str = "MASTER_KEY";
#[actix_rt::test]
async fn error_access_expired_key() {
use std::{thread, time};
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
let content = json!({
"indexes": ["products"],
@ -111,7 +113,7 @@ async fn error_access_expired_key() {
#[actix_rt::test]
async fn error_access_unauthorized_index() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
let content = json!({
"indexes": ["sales"],
@ -144,7 +146,7 @@ async fn error_access_unauthorized_action() {
for ((method, route), action) in AUTHORIZATIONS.iter() {
// create a new API key letting only the needed action.
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
let content = json!({
"indexes": ["products"],
@ -168,7 +170,7 @@ async fn error_access_unauthorized_action() {
#[actix_rt::test]
async fn access_authorized_master_key() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
// master key must have access to all routes.
for ((method, route), _) in AUTHORIZATIONS.iter() {
@ -185,7 +187,7 @@ async fn access_authorized_restricted_index() {
for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions {
// create a new API key letting only the needed action.
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
let content = json!({
"indexes": ["products"],
@ -222,7 +224,7 @@ async fn access_authorized_no_index_restriction() {
for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions {
// create a new API key letting only the needed action.
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
let content = json!({
"indexes": ["*"],
@ -255,7 +257,7 @@ async fn access_authorized_no_index_restriction() {
#[actix_rt::test]
async fn access_authorized_stats_restricted_index() {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
server.use_admin_key(MASTER_KEY).await;
// create index `test`
let index = server.index("test");
@ -295,7 +297,7 @@ async fn access_authorized_stats_restricted_index() {
#[actix_rt::test]
async fn access_authorized_stats_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
server.use_admin_key(MASTER_KEY).await;
// create index `test`
let index = server.index("test");
@ -335,7 +337,7 @@ async fn access_authorized_stats_no_index_restriction() {
#[actix_rt::test]
async fn list_authorized_indexes_restricted_index() {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
server.use_admin_key(MASTER_KEY).await;
// create index `test`
let index = server.index("test");
@ -376,7 +378,7 @@ async fn list_authorized_indexes_restricted_index() {
#[actix_rt::test]
async fn list_authorized_indexes_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
server.use_admin_key(MASTER_KEY).await;
// create index `test`
let index = server.index("test");
@ -414,10 +416,194 @@ async fn list_authorized_indexes_no_index_restriction() {
assert!(response.iter().any(|index| index["uid"] == "test"));
}
#[actix_rt::test]
async fn access_authorized_index_patterns() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
// create products_1 index
let index_1 = server.index("products_1");
let (response, code) = index_1.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create products index
let index_ = server.index("products");
let (response, code) = index_.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create key with all document access on indices with product_* pattern.
let content = json!({
"indexes": ["products_*"],
"actions": ["documents.*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
});
// Register the key
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
assert!(response["key"].is_string());
// use created key.
let key = response["key"].as_str().unwrap();
server.use_api_key(key);
// refer to products_1 and products with modified api key.
let index_1 = server.index("products_1");
let index_ = server.index("products");
// try to create a index via add documents route
let documents = json!([
{
"id": 1,
"content": "foo",
}
]);
// Adding document to products_1 index. Should succeed with 202
let (response, code) = index_1.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
// Adding document to products index. Should Fail with 403 -- invalid_api_key
let (response, code) = index_.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
server.use_api_key(MASTER_KEY);
// refer to products_1 with modified api key.
let index_1 = server.index("products_1");
index_1.wait_task(task_id).await;
let (response, code) = index_1.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
}
#[actix_rt::test]
async fn raise_error_non_authorized_index_patterns() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
// create products_1 index
let product_1_index = server.index("products_1");
let (response, code) = product_1_index.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create products_2 index
let product_2_index = server.index("products_2");
let (response, code) = product_2_index.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create test index
let test_index = server.index("test");
let (response, code) = test_index.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create key with all document access on indices with product_* pattern.
let content = json!({
"indexes": ["products_*"],
"actions": ["documents.*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
});
// Register the key
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
assert!(response["key"].is_string());
// use created key.
let key = response["key"].as_str().unwrap();
server.use_api_key(key);
// refer to products_1 and products_2 with modified api key.
let product_1_index = server.index("products_1");
let product_2_index = server.index("products_2");
// refer to test index
let test_index = server.index("test");
// try to create a index via add documents route
let documents = json!([
{
"id": 1,
"content": "foo",
}
]);
// Adding document to products_1 index. Should succeed with 202
let (response, code) = product_1_index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", &response);
let task1_id = response["taskUid"].as_u64().unwrap();
// Adding document to products_2 index. Should succeed with 202
let (response, code) = product_2_index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", &response);
let task2_id = response["taskUid"].as_u64().unwrap();
// Adding document to test index. Should Fail with 403 -- invalid_api_key
let (response, code) = test_index.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
server.use_api_key(MASTER_KEY);
// refer to products_1 with modified api key.
let product_1_index = server.index("products_1");
// refer to products_2 with modified api key.
let product_2_index = server.index("products_2");
product_1_index.wait_task(task1_id).await;
product_2_index.wait_task(task2_id).await;
let (response, code) = product_1_index.get_task(task1_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
let (response, code) = product_1_index.get_task(task2_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
}
#[actix_rt::test]
async fn pattern_indexes() {
// Create server with master key
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
// index.* constraints on products_* index pattern
let content = json!({
"indexes": ["products_*"],
"actions": ["indexes.*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
});
// Generate and use the api key
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
let key = response["key"].as_str().expect("Key is not string");
server.use_api_key(key);
// Create Index products_1 using generated api key
let products_1 = server.index("products_1");
let (response, code) = products_1.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// Fail to create products_* using generated api key
let products_1 = server.index("products_*");
let (response, code) = products_1.create(Some("id")).await;
assert_eq!(400, code, "{:?}", &response);
// Fail to create test_1 using generated api key
let products_1 = server.index("test_1");
let (response, code) = products_1.create(Some("id")).await;
assert_eq!(403, code, "{:?}", &response);
}
#[actix_rt::test]
async fn list_authorized_tasks_restricted_index() {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
server.use_admin_key(MASTER_KEY).await;
// create index `test`
let index = server.index("test");
@ -446,7 +632,6 @@ async fn list_authorized_tasks_restricted_index() {
let (response, code) = server.service.get("/tasks").await;
assert_eq!(200, code, "{:?}", &response);
println!("{}", response);
let response = response["results"].as_array().unwrap();
// key should have access on `products` index.
assert!(response.iter().any(|task| task["indexUid"] == "products"));
@ -458,7 +643,7 @@ async fn list_authorized_tasks_restricted_index() {
#[actix_rt::test]
async fn list_authorized_tasks_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_admin_key("MASTER_KEY").await;
server.use_admin_key(MASTER_KEY).await;
// create index `test`
let index = server.index("test");
@ -499,7 +684,7 @@ async fn list_authorized_tasks_no_index_restriction() {
#[actix_rt::test]
async fn error_creating_index_without_action() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
// create key with access on all indexes.
let content = json!({
@ -587,7 +772,7 @@ async fn lazy_create_index() {
];
for content in contents {
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
assert!(response["key"].is_string());
@ -643,14 +828,114 @@ async fn lazy_create_index() {
}
}
#[actix_rt::test]
async fn lazy_create_index_from_pattern() {
let mut server = Server::new_auth().await;
// create key with access on all indexes.
let contents = vec![
json!({
"indexes": ["products_*"],
"actions": ["*"],
"expiresAt": "2050-11-13T00:00:00Z"
}),
json!({
"indexes": ["products_*"],
"actions": ["indexes.*", "documents.*", "settings.*", "tasks.*"],
"expiresAt": "2050-11-13T00:00:00Z"
}),
json!({
"indexes": ["products_*"],
"actions": ["indexes.create", "documents.add", "settings.update", "tasks.get"],
"expiresAt": "2050-11-13T00:00:00Z"
}),
];
for content in contents {
server.use_api_key(MASTER_KEY);
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
assert!(response["key"].is_string());
// use created key.
let key = response["key"].as_str().unwrap();
server.use_api_key(key);
// try to create a index via add documents route
let index = server.index("products_1");
let test = server.index("test");
let documents = json!([
{
"id": 1,
"content": "foo",
}
]);
let (response, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
// Fail to create test index
let (response, code) = test.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add settings route
let index = server.index("products_2");
let settings = json!({ "distinctAttribute": "test"});
let (response, code) = index.update_settings(settings).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
// Fail to create test index
let index = server.index("test");
let settings = json!({ "distinctAttribute": "test"});
let (response, code) = index.update_settings(settings).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add specialized settings route
let index = server.index("products_3");
let (response, code) = index.update_distinct_attribute(json!("test")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
// Fail to create test index
let index = server.index("test");
let settings = json!({ "distinctAttribute": "test"});
let (response, code) = index.update_settings(settings).await;
assert_eq!(403, code, "{:?}", &response);
}
}
#[actix_rt::test]
async fn error_creating_index_without_index() {
let mut server = Server::new_auth().await;
server.use_api_key("MASTER_KEY");
server.use_api_key(MASTER_KEY);
// create key with access on all indexes.
let content = json!({
"indexes": ["unexpected"],
"indexes": ["unexpected","products_*"],
"actions": ["*"],
"expiresAt": "2050-11-13T00:00:00Z"
});
@ -690,4 +975,32 @@ async fn error_creating_index_without_index() {
let index = server.index("test3");
let (response, code) = index.create(None).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add documents route
let index = server.index("products");
let documents = json!([
{
"id": 1,
"content": "foo",
}
]);
let (response, code) = index.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add settings route
let index = server.index("products");
let settings = json!({ "distinctAttribute": "test"});
let (response, code) = index.update_settings(settings).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add specialized settings route
let index = server.index("products");
let (response, code) = index.update_distinct_attribute(json!("test")).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via create index route
let index = server.index("products");
let (response, code) = index.create(None).await;
assert_eq!(403, code, "{:?}", &response);
}

View File

@ -120,7 +120,7 @@ async fn create_api_key_bad_indexes() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.indexes[0]`: `good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"message": "Invalid value at `.indexes[0]`: `good doggo` is not a valid index uid pattern. Index uid patterns can be an integer or a string containing only alphanumeric characters, hyphens (-), underscores (_), and optionally end with a star (*).",
"code": "invalid_api_key_indexes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_indexes"
@ -138,7 +138,7 @@ async fn create_api_key_bad_expires_at() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Unknown field `expires_at`: expected one of `description`, `name`, `uid`, `actions`, `indexes`, `expiresAt`",
"message": "Unknown field `expires_at`: did you mean `expiresAt`? expected one of `description`, `name`, `uid`, `actions`, `indexes`, `expiresAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
@ -150,7 +150,7 @@ async fn create_api_key_bad_expires_at() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Unknown field `expires_at`: expected one of `description`, `name`, `uid`, `actions`, `indexes`, `expiresAt`",
"message": "Unknown field `expires_at`: did you mean `expiresAt`? expected one of `description`, `name`, `uid`, `actions`, `indexes`, `expiresAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -82,6 +82,11 @@ static ACCEPTED_KEYS: Lazy<Vec<Value>> = Lazy::new(|| {
"actions": ["search"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["sal*", "prod*"],
"actions": ["search"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
]
});
@ -104,6 +109,11 @@ static REFUSED_KEYS: Lazy<Vec<Value>> = Lazy::new(|| {
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["prod*", "p*"],
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["products"],
"actions": ["search"],
@ -245,6 +255,10 @@ async fn search_authorized_simple_token() {
"searchRules" => json!(["sales"]),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sa*"]),
"exp" => Value::Null
},
];
compute_authorized_search!(tenant_tokens, {}, 5);
@ -351,11 +365,19 @@ async fn filter_search_authorized_filter_token() {
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({
"*": {},
"sal*": {"filter": ["color = blue"]}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
];
compute_authorized_search!(tenant_tokens, "color = yellow", 1);
}
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
#[actix_rt::test]
async fn error_search_token_forbidden_parent_key() {
let tenant_tokens = vec![
@ -383,6 +405,10 @@ async fn error_search_token_forbidden_parent_key() {
"searchRules" => json!(["sales"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["sali*", "s*", "sales*"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
];
compute_forbidden_search!(tenant_tokens, REFUSED_KEYS);

View File

@ -30,7 +30,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.json"),
("content-type", "application/json"),
vec![("content-type", "application/json")],
)
.await;
assert_eq!(code, 202);
@ -46,7 +46,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.ndjson"),
("content-type", "application/x-ndjson"),
vec![("content-type", "application/x-ndjson")],
)
.await;
assert_eq!(code, 202);
@ -96,6 +96,21 @@ impl Index<'_> {
self.service.post_encoded(url, documents, self.encoder).await
}
pub async fn raw_add_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.post_str(url, payload, Vec::new()).await
}
}
pub async fn update_documents(
&self,
documents: Value,
@ -110,6 +125,21 @@ impl Index<'_> {
self.service.put_encoded(url, documents, self.encoder).await
}
pub async fn raw_update_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.put_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.put_str(url, payload, Vec::new()).await
}
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);

View File

@ -103,10 +103,6 @@ impl Server {
Index { uid: uid.as_ref().to_string(), service: &self.service, encoder }
}
pub async fn search(&self, queries: Value) -> (Value, StatusCode) {
self.service.post("/search", queries).await
}
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
self.service.get(format!("/indexes{parameters}")).await
}

View File

@ -34,17 +34,18 @@ impl Service {
self.request(req).await
}
/// Send a test post request from a text body, with a `content-type:application/json` header.
/// Send a test post request from a text body.
pub async fn post_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
header: (&str, &str),
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let req = test::TestRequest::post()
.uri(url.as_ref())
.set_payload(body.as_ref().to_string())
.insert_header(header);
let mut req =
test::TestRequest::post().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
self.request(req).await
}
@ -57,6 +58,21 @@ impl Service {
self.put_encoded(url, body, Encoder::Plain).await
}
/// Send a test put request from a text body.
pub async fn put_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let mut req =
test::TestRequest::put().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
self.request(req).await
}
pub async fn put_encoded(
&self,
url: impl AsRef<str>,

View File

@ -216,6 +216,133 @@ async fn add_single_document_with_every_encoding() {
}
}
#[actix_rt::test]
async fn add_csv_document() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id,name,race
0,jean,bernese mountain
1,jorts,orange cat";
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn add_csv_document_with_custom_delimiter() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id|name|race
0|jean|bernese mountain
1|jorts|orange cat";
let (response, code) =
index.raw_update_documents(document, Some("text/csv"), "?csvDelimiter=|").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
/// any other content-type is must be refused
#[actix_rt::test]
async fn error_add_documents_test_bad_content_types() {
@ -1027,6 +1154,53 @@ async fn error_document_field_limit_reached() {
@"");
}
#[actix_rt::test]
async fn add_documents_with_geo_field() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
let documents = json!([
{
"id": "1",
},
{
"id": "2",
"_geo": null,
},
{
"id": "3",
"_geo": { "lat": 1, "lng": 1 },
},
{
"id": "4",
"_geo": { "lat": "1", "lng": "1" },
},
]);
index.add_documents(documents, None).await;
let response = index.wait_task(1).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 4,
"indexedDocuments": 4
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn add_documents_invalid_geo_field() {
let server = Server::new().await;

View File

@ -1,5 +1,6 @@
use meili_snap::*;
use serde_json::json;
use urlencoding::encode;
use crate::common::Server;
@ -97,3 +98,323 @@ async fn delete_documents_batch() {
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_update_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) = index
.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰")))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) = index
.raw_update_documents(
"",
Some("application/json"),
&format!("?csvDelimiter={}", encode("🍰")),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}

View File

@ -3,7 +3,6 @@
mod errors;
mod formatted;
mod multi;
mod pagination;
use once_cell::sync::Lazy;

View File

@ -1,343 +0,0 @@
use meili_snap::{json_string, snapshot};
use serde_json::json;
use super::{DOCUMENTS, NESTED_DOCUMENTS};
use crate::common::Server;
#[actix_rt::test]
async fn search_empty_list() {
let server = Server::new().await;
let (response, code) = server.search(json!({"queries": []})).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response), @r###"
{
"results": []
}
"###);
}
#[actix_rt::test]
async fn search_json_object() {
let server = Server::new().await;
let (response, code) = server.search(json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Missing field `queries`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn search_json_array() {
let server = Server::new().await;
let (response, code) = server.search(json!([])).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type: expected an object, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn simple_search_single_index() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "test", "q": "captain"},
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",
"hits": [
{
"title": "Glass",
"id": "450465"
}
],
"query": "glass",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
},
{
"indexUid": "test",
"hits": [
{
"title": "Captain Marvel",
"id": "299537"
}
],
"query": "captain",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
]
"###);
}
#[actix_rt::test]
async fn simple_search_missing_index_uid() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.search(json!({"queries": [
{"q": "glass"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, @r###"
{
"message": "Missing field `indexUid` inside `.queries[0]`",
"code": "missing_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_index_uid"
}
"###);
}
#[actix_rt::test]
async fn simple_search_illegal_index_uid() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.search(json!({"queries": [
{"indexUid": "hé", "q": "glass"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, @r###"
{
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
}
"###);
}
#[actix_rt::test]
async fn simple_search_two_indexes() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",
"hits": [
{
"title": "Glass",
"id": "450465"
}
],
"query": "glass",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
},
{
"indexUid": "nested",
"hits": [
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pesti"
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8
}
],
"cattos": [
"simba",
"pestiféré"
]
}
],
"query": "pesti",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
]
"###);
}
#[actix_rt::test]
async fn search_one_index_doesnt_exist() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `nested` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
}
#[actix_rt::test]
async fn search_multiple_indexes_dont_exist() {
let server = Server::new().await;
let (response, code) = server
.search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
}
#[actix_rt::test]
async fn search_one_query_error() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.search(json!({"queries": [
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, this index does not have configured filterable attributes.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
}
"###);
}
#[actix_rt::test]
async fn search_multiple_query_errors() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.search(json!({"queries": [
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
{"indexUid": "nested", "q": "pesti", "facets": ["doggos"]},
]}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, this index does not have configured filterable attributes.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
}
"###);
}

View File

@ -1,8 +1,15 @@
[package]
name = "milli"
version = "1.0.0"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
# edition.workspace = true
license.workspace = true
[dependencies]
bimap = { version = "0.6.2", features = ["serde"] }
@ -12,7 +19,7 @@ byteorder = "1.4.3"
charabia = { version = "0.7.0", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.6"
deserr = "0.3.0"
deserr = "0.5.0"
either = "1.8.0"
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"

View File

@ -7,45 +7,31 @@ use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::error::is_reserved_keyword;
use crate::search::facet::BadGeoError;
use crate::{CriterionError, Error, UserError};
/// This error type is never supposed to be shown to the end user.
/// You must always cast it to a sort error or a criterion error.
#[derive(Debug)]
#[derive(Error, Debug)]
pub enum AscDescError {
InvalidLatitude,
InvalidLongitude,
#[error(transparent)]
GeoError(BadGeoError),
#[error("Invalid syntax for the asc/desc parameter: expected expression ending by `:asc` or `:desc`, found `{name}`.")]
InvalidSyntax { name: String },
#[error("`{name}` is a reserved keyword and thus can't be used as a asc/desc rule.")]
ReservedKeyword { name: String },
}
impl fmt::Display for AscDescError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::InvalidLatitude => {
write!(f, "Latitude must be contained between -90 and 90 degrees.",)
}
Self::InvalidLongitude => {
write!(f, "Longitude must be contained between -180 and 180 degrees.",)
}
Self::InvalidSyntax { name } => {
write!(f, "Invalid syntax for the asc/desc parameter: expected expression ending by `:asc` or `:desc`, found `{}`.", name)
}
Self::ReservedKeyword { name } => {
write!(
f,
"`{}` is a reserved keyword and thus can't be used as a asc/desc rule.",
name
)
}
}
impl From<BadGeoError> for AscDescError {
fn from(geo_error: BadGeoError) -> Self {
AscDescError::GeoError(geo_error)
}
}
impl From<AscDescError> for CriterionError {
fn from(error: AscDescError) -> Self {
match error {
AscDescError::InvalidLatitude | AscDescError::InvalidLongitude => {
AscDescError::GeoError(_) => {
CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
}
AscDescError::InvalidSyntax { name } => CriterionError::InvalidName { name },
@ -85,9 +71,9 @@ impl FromStr for Member {
.map_err(|_| AscDescError::ReservedKeyword { name: text.to_string() })
})?;
if !(-90.0..=90.0).contains(&lat) {
return Err(AscDescError::InvalidLatitude)?;
return Err(BadGeoError::Lat(lat))?;
} else if !(-180.0..=180.0).contains(&lng) {
return Err(AscDescError::InvalidLongitude)?;
return Err(BadGeoError::Lng(lng))?;
}
Ok(Member::Geo([lat, lng]))
}
@ -162,10 +148,8 @@ impl FromStr for AscDesc {
#[derive(Error, Debug)]
pub enum SortError {
#[error("{}", AscDescError::InvalidLatitude)]
InvalidLatitude,
#[error("{}", AscDescError::InvalidLongitude)]
InvalidLongitude,
#[error(transparent)]
ParseGeoError { error: BadGeoError },
#[error("Invalid syntax for the geo parameter: expected expression formated like \
`_geoPoint(latitude, longitude)` and ending by `:asc` or `:desc`, found `{name}`.")]
BadGeoPointUsage { name: String },
@ -184,8 +168,7 @@ pub enum SortError {
impl From<AscDescError> for SortError {
fn from(error: AscDescError) -> Self {
match error {
AscDescError::InvalidLatitude => SortError::InvalidLatitude,
AscDescError::InvalidLongitude => SortError::InvalidLongitude,
AscDescError::GeoError(error) => SortError::ParseGeoError { error },
AscDescError::InvalidSyntax { name } => SortError::InvalidName { name },
AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => {
SortError::BadGeoPointUsage { name }
@ -277,11 +260,11 @@ mod tests {
),
("_geoPoint(35, 85, 75):asc", ReservedKeyword { name: S("_geoPoint(35, 85, 75)") }),
("_geoPoint(18):asc", ReservedKeyword { name: S("_geoPoint(18)") }),
("_geoPoint(200, 200):asc", InvalidLatitude),
("_geoPoint(90.000001, 0):asc", InvalidLatitude),
("_geoPoint(0, -180.000001):desc", InvalidLongitude),
("_geoPoint(159.256, 130):asc", InvalidLatitude),
("_geoPoint(12, -2021):desc", InvalidLongitude),
("_geoPoint(200, 200):asc", GeoError(BadGeoError::Lat(200.))),
("_geoPoint(90.000001, 0):asc", GeoError(BadGeoError::Lat(90.000001))),
("_geoPoint(0, -180.000001):desc", GeoError(BadGeoError::Lng(-180.000001))),
("_geoPoint(159.256, 130):asc", GeoError(BadGeoError::Lat(159.256))),
("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))),
];
for (req, expected_error) in invalid_req {

View File

@ -123,7 +123,7 @@ impl<'t> Criterion for Attribute<'t> {
None => {
return Ok(Some(CriterionResult {
query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()),
candidates: Some(allowed_candidates),
filtered_candidates: None,
initial_candidates: Some(self.initial_candidates.take()),
}));

View File

@ -21,18 +21,51 @@ pub struct Filter<'a> {
condition: FilterCondition<'a>,
}
#[derive(Debug)]
pub enum BadGeoError {
Lat(f64),
Lng(f64),
BoundingBoxTopIsBelowBottom(f64, f64),
}
impl std::error::Error for BadGeoError {}
impl Display for BadGeoError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::BoundingBoxTopIsBelowBottom(top, bottom) => {
write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`.")
}
Self::Lat(lat) => write!(
f,
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ",
lat
),
Self::Lng(lng) => write!(
f,
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ",
lng
),
}
}
}
#[derive(Debug)]
enum FilterError<'a> {
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
BadGeo(&'a str),
BadGeoLat(f64),
BadGeoLng(f64),
BadGeoBoundingBoxTopIsBelowBottom(f64, f64),
ParseGeoError(BadGeoError),
ReservedGeo(&'a str),
Reserved(&'a str),
TooDeep,
}
impl<'a> std::error::Error for FilterError<'a> {}
impl<'a> From<BadGeoError> for FilterError<'a> {
fn from(geo_error: BadGeoError) -> Self {
FilterError::ParseGeoError(geo_error)
}
}
impl<'a> Display for FilterError<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@ -44,7 +77,11 @@ impl<'a> Display for FilterError<'a> {
attribute,
)
} else {
let filterables_list = filterable_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(" ");
let filterables_list = filterable_fields
.iter()
.map(AsRef::as_ref)
.collect::<Vec<&str>>()
.join(" ");
write!(
f,
@ -53,20 +90,19 @@ impl<'a> Display for FilterError<'a> {
filterables_list,
)
}
},
Self::TooDeep => write!(f,
}
Self::TooDeep => write!(
f,
"Too many filter conditions, can't process more than {} filters.",
MAX_FILTER_DEPTH
),
Self::ReservedGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword),
Self::Reserved(keyword) => write!(
f,
"`{}` is a reserved keyword and thus can't be used as a filter expression.",
keyword
),
Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword),
Self::BadGeoBoundingBoxTopIsBelowBottom(top, bottom) => write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`."),
Self::BadGeoLat(lat) => write!(f, "Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ", lat),
Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ", lng),
Self::ParseGeoError(error) => write!(f, "{}", error),
}
}
}
@ -298,10 +334,10 @@ impl<'a> Filter<'a> {
} else {
match fid.value() {
attribute @ "_geo" => {
Err(fid.as_external_error(FilterError::BadGeo(attribute)))?
Err(fid.as_external_error(FilterError::ReservedGeo(attribute)))?
}
attribute if attribute.starts_with("_geoPoint(") => {
Err(fid.as_external_error(FilterError::BadGeo("_geoPoint")))?
Err(fid.as_external_error(FilterError::ReservedGeo("_geoPoint")))?
}
attribute @ "_geoDistance" => {
Err(fid.as_external_error(FilterError::Reserved(attribute)))?
@ -353,14 +389,10 @@ impl<'a> Filter<'a> {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
return Err(
point[0].as_external_error(FilterError::BadGeoLat(base_point[0]))
)?;
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
}
if !(-180.0..=180.0).contains(&base_point[1]) {
return Err(
point[1].as_external_error(FilterError::BadGeoLng(base_point[1]))
)?;
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
}
let radius = radius.parse_finite_float()?;
let rtree = match index.geo_rtree(rtxn)? {
@ -398,27 +430,26 @@ impl<'a> Filter<'a> {
bottom_right_point[1].parse_finite_float()?,
];
if !(-90.0..=90.0).contains(&top_left[0]) {
return Err(top_left_point[0]
.as_external_error(FilterError::BadGeoLat(top_left[0])))?;
return Err(
top_left_point[0].as_external_error(BadGeoError::Lat(top_left[0]))
)?;
}
if !(-180.0..=180.0).contains(&top_left[1]) {
return Err(top_left_point[1]
.as_external_error(FilterError::BadGeoLng(top_left[1])))?;
return Err(
top_left_point[1].as_external_error(BadGeoError::Lng(top_left[1]))
)?;
}
if !(-90.0..=90.0).contains(&bottom_right[0]) {
return Err(bottom_right_point[0]
.as_external_error(FilterError::BadGeoLat(bottom_right[0])))?;
.as_external_error(BadGeoError::Lat(bottom_right[0])))?;
}
if !(-180.0..=180.0).contains(&bottom_right[1]) {
return Err(bottom_right_point[1]
.as_external_error(FilterError::BadGeoLng(bottom_right[1])))?;
.as_external_error(BadGeoError::Lng(bottom_right[1])))?;
}
if top_left[0] < bottom_right[0] {
return Err(bottom_right_point[1].as_external_error(
FilterError::BadGeoBoundingBoxTopIsBelowBottom(
top_left[0],
bottom_right[0],
),
BadGeoError::BoundingBoxTopIsBelowBottom(top_left[0], bottom_right[0]),
))?;
}

View File

@ -4,7 +4,7 @@ use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesDecode, RoTxn};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
pub use self::filter::Filter;
pub use self::filter::{BadGeoError, Filter};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
mod facet_distribution;

View File

@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
#[track_caller]
pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
@ -427,8 +427,26 @@ pub fn snap_settings(index: &Index) -> String {
snap
}
pub fn snap_documents(index: &Index) -> String {
let mut snap = String::new();
let rtxn = index.read_txn().unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let display = fields_ids_map.ids().collect::<Vec<_>>();
for document in index.all_documents(&rtxn).unwrap() {
let doc = obkv_to_json(&display, &fields_ids_map, document.unwrap().1).unwrap();
snap.push_str(&serde_json::to_string(&doc).unwrap());
snap.push('\n');
}
snap
}
#[macro_export]
macro_rules! full_snap_of_db {
($index:ident, documents) => {{
$crate::snapshot_tests::snap_documents(&$index)
}};
($index:ident, settings) => {{
$crate::snapshot_tests::snap_settings(&$index)
}};

View File

@ -395,6 +395,7 @@ pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult
(Some(_), None) => Ok(Err(MissingLongitude { document_id: debug_id() })),
(None, None) => Ok(Err(MissingLatitudeAndLongitude { document_id: debug_id() })),
},
Value::Null => Ok(Ok(())),
value => Ok(Err(NotAnObject { document_id: debug_id(), value })),
}
}

View File

@ -59,6 +59,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
} else if lat.is_some() && lng.is_none() {
return Err(GeoError::MissingLongitude { document_id: document_id() })?;
}
// else => the _geo object was `null`, there is nothing to do
}
writer_into_reader(writer)

View File

@ -6,6 +6,7 @@ use roaring::RoaringBitmap;
use super::read_u32_ne_bytes;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::update::index_documents::transform::Operation;
use crate::Result;
pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;
@ -57,21 +58,6 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
Ok(obkvs.last().unwrap().clone())
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
Ok(obkvs
.iter()
.cloned()
.reduce(|acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current);
let mut buffer = Vec::new();
merge_two_obkvs(first, second, &mut buffer);
Cow::from(buffer)
})
.unwrap())
}
pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};
@ -88,6 +74,41 @@ pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffe
writer.finish().unwrap();
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs_and_operations<'a>(
_key: &[u8],
obkvs: &[Cow<'a, [u8]>],
) -> Result<Cow<'a, [u8]>> {
// [add, add, delete, add, add]
// we can ignore everything that happened before the last delete.
let starting_position =
obkvs.iter().rposition(|obkv| obkv[0] == Operation::Deletion as u8).unwrap_or(0);
// [add, add, delete]
// if the last operation was a deletion then we simply return the deletion
if starting_position == obkvs.len() - 1 && obkvs.last().unwrap()[0] == Operation::Deletion as u8
{
return Ok(obkvs[obkvs.len() - 1].clone());
}
let mut buffer = Vec::new();
// (add, add, delete) [add, add]
// in the other case, no deletion will be encountered during the merge
let mut ret =
obkvs[starting_position..].iter().cloned().fold(Vec::new(), |mut acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current[1..]);
merge_two_obkvs(first, second, &mut buffer);
// we want the result of the merge into our accumulator
std::mem::swap(&mut acc, &mut buffer);
acc
});
ret.insert(0, Operation::Addition as u8);
Ok(Cow::from(ret))
}
pub fn merge_cbo_roaring_bitmaps<'a>(
_key: &[u8],
values: &[Cow<'a, [u8]>],

View File

@ -13,9 +13,9 @@ pub use grenad_helpers::{
GrenadParameters, MergeableReader,
};
pub use merge_functions::{
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
merge_roaring_bitmaps, merge_two_obkvs, roaring_bitmap_from_u32s_array,
serialize_roaring_bitmap, MergeFn,
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps,
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
};
use crate::MAX_WORD_LENGTH;

View File

@ -79,6 +79,7 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
progress: FP,
should_abort: FA,
added_documents: u64,
deleted_documents: u64,
}
#[derive(Default, Debug, Clone)]
@ -122,6 +123,7 @@ where
wtxn,
index,
added_documents: 0,
deleted_documents: 0,
})
}
@ -166,6 +168,30 @@ where
Ok((self, Ok(indexed_documents)))
}
/// Remove a batch of documents from the current builder.
///
/// Returns the number of documents deleted from the builder.
pub fn remove_documents(
mut self,
to_delete: Vec<String>,
) -> Result<(Self, StdResult<u64, UserError>)> {
// Early return when there is no document to add
if to_delete.is_empty() {
return Ok((self, Ok(0)));
}
let deleted_documents = self
.transform
.as_mut()
.expect("Invalid document deletion state")
.remove_documents(to_delete, self.wtxn, &self.should_abort)?
as u64;
self.deleted_documents += deleted_documents;
Ok((self, Ok(deleted_documents)))
}
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
if self.added_documents == 0 {
@ -1879,4 +1905,328 @@ mod tests {
index.add_documents(doc1).unwrap();
}
#[test]
fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) = builder.remove_documents(vec![S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":3,"name":"jean","age":25}
"###);
}
#[test]
fn add_update_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 5,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn add_document_and_in_another_transform_update_and_delete_documents() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 3,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":2,"doggo":{"name":"bob","age":20}}
{"id":3,"name":"jean","age":25}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn delete_document_and_then_add_documents_in_the_same_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":2,"doggo":{"name":"jean","age":20}}
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn delete_the_same_document_multiple_time() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn add_document_and_in_another_transform_delete_the_document_then_add_it_again() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let documents = documents!([
{ "id": 1, "catto": "jorts" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"catto":"jorts"}
"###);
}
}

View File

@ -12,7 +12,9 @@ use roaring::RoaringBitmap;
use serde_json::Value;
use smartstring::SmartString;
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
use super::helpers::{
create_sorter, create_writer, keep_latest_obkv, merge_obkvs_and_operations, MergeFn,
};
use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
@ -50,8 +52,12 @@ pub struct Transform<'a, 'i> {
pub index_documents_method: IndexDocumentsMethod,
available_documents_ids: AvailableDocumentsIds,
// Both grenad follows the same format:
// key | value
// u32 | 1 byte for the Operation byte, the rest is the obkv of the document stored
original_sorter: grenad::Sorter<MergeFn>,
flattened_sorter: grenad::Sorter<MergeFn>,
replaced_documents_ids: RoaringBitmap,
new_documents_ids: RoaringBitmap,
// To increase the cache locality and decrease the heap usage we use compact smartstring.
@ -59,6 +65,14 @@ pub struct Transform<'a, 'i> {
documents_count: usize,
}
/// This enum is specific to the grenad sorter stored in the transform.
/// It's used as the first byte of the grenads and tells you if the document id was an addition or a deletion.
#[repr(u8)]
pub enum Operation {
Addition,
Deletion,
}
/// Create a mapping between the field ids found in the document batch and the one that were
/// already present in the index.
///
@ -94,7 +108,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// with the same user id must be merged or fully replaced in the same batch.
let merge_function = match index_documents_method {
IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs_and_operations,
};
// We initialize the sorter with the user indexing settings.
@ -151,9 +165,7 @@ impl<'a, 'i> Transform<'a, 'i> {
FA: Fn() -> bool + Sync,
{
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
let primary_key = cursor.primary_key().to_string();
@ -161,6 +173,7 @@ impl<'a, 'i> Transform<'a, 'i> {
self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;
let mut obkv_buffer = Vec::new();
let mut document_sorter_buffer = Vec::new();
let mut documents_count = 0;
let mut docid_buffer: Vec<u8> = Vec::new();
let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
@ -212,10 +225,13 @@ impl<'a, 'i> Transform<'a, 'i> {
Entry::Occupied(entry) => *entry.get() as u32,
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a replaced document.
// It'll be deleted later. We keep its original docid to insert it in the grenad.
// It'll be deleted later.
if let Some(docid) = external_documents_ids.get(entry.key()) {
self.replaced_documents_ids.insert(docid);
original_docid = Some(docid);
// If it was already in the list of replaced documents it means it was deleted
// by the remove_document method. We should starts as if it never existed.
if self.replaced_documents_ids.insert(docid) {
original_docid = Some(docid);
}
}
let docid = self
.available_documents_ids
@ -248,26 +264,46 @@ impl<'a, 'i> Transform<'a, 'i> {
skip_insertion = true;
} else {
// we associate the base document with the new key, everything will get merged later.
self.original_sorter.insert(docid.to_be_bytes(), base_obkv)?;
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(base_obkv);
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
match self.flatten_from_fields_ids_map(KvReader::new(base_obkv))? {
Some(buffer) => {
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?
Some(flattened_obkv) => {
// we recreate our buffer with the flattened documents
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
}
None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
}
}
}
if !skip_insertion {
self.new_documents_ids.insert(docid);
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&obkv_buffer);
// We use the extracted/generated user id as the key for this document.
self.original_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?;
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
match self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? {
Some(buffer) => self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?,
None => {
self.flattened_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?
Some(flattened_obkv) => {
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
}
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
}
}
documents_count += 1;
@ -293,6 +329,73 @@ impl<'a, 'i> Transform<'a, 'i> {
Ok(documents_count)
}
/// The counter part of `read_documents` that removes documents either from the transform or the database.
/// It can be called before, after or in between two calls of the `read_documents`.
///
/// It needs to update all the internal datastructure in the transform.
/// - If the document is coming from the database -> it's marked as a to_delete document
/// - If the document to remove was inserted by the `read_documents` method before AND was present in the db,
/// it's marked as `to_delete` + added into the grenad to ensure we don't reinsert it.
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
/// - If the document to remove was not present in either the db or the transform we do nothing.
pub fn remove_documents<FA>(
&mut self,
mut to_remove: Vec<String>,
wtxn: &mut heed::RwTxn,
should_abort: FA,
) -> Result<usize>
where
FA: Fn() -> bool + Sync,
{
// there may be duplicates in the documents to remove.
to_remove.sort_unstable();
to_remove.dedup();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mut documents_deleted = 0;
for to_remove in to_remove {
if should_abort() {
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
match self.new_external_documents_ids_builder.entry((*to_remove).into()) {
// if the document was added in a previous iteration of the transform we make it as deleted in the sorters.
Entry::Occupied(entry) => {
let doc_id = *entry.get() as u32;
self.original_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
self.flattened_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
// we must NOT update the list of replaced_documents_ids
// Either:
// 1. It's already in it and there is nothing to do
// 2. It wasn't in it because the document was created by a previous batch and since
// we're removing it there is nothing to do.
self.new_documents_ids.remove(doc_id);
entry.remove_entry();
}
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a `to_delete` document.
// It'll be deleted later. We don't need to push anything to the sorters.
if let Some(docid) = external_documents_ids.get(entry.key()) {
self.replaced_documents_ids.insert(docid);
} else {
// if the document is nowehere to be found, there is nothing to do and we must NOT
// increment the count of documents_deleted
continue;
}
}
};
documents_deleted += 1;
}
Ok(documents_deleted)
}
// Flatten a document from the fields ids map contained in self and insert the new
// created fields. Returns `None` if the document doesn't need to be flattened.
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
@ -487,6 +590,11 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut documents_count = 0;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
// send a callback to show at which step we are
documents_count += 1;
progress_callback(UpdateIndexingStep::ComputeIdsAndMergeDocuments {
@ -518,9 +626,18 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level,
tempfile::tempfile()?,
);
// Once we have written all the documents into the final sorter, we write the documents
// into this writer, extract the file and reset the seek to be able to read it again.
self.flattened_sorter.write_into_stream_writer(&mut writer)?;
// Once we have written all the documents into the final sorter, we write the nested documents
// into this writer.
// We get rids of the `Operation` byte and skip the deleted documents as well.
let mut iter = self.flattened_sorter.into_stream_merger_iter()?;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
writer.insert(key, val)?;
}
let mut flattened_documents = writer.into_inner()?;
flattened_documents.rewind()?;
@ -701,3 +818,45 @@ impl TransformOutput {
.collect())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn merge_obkvs() {
let mut doc_0 = Vec::new();
let mut kv_writer = KvWriter::new(&mut doc_0);
kv_writer.insert(0_u8, [0]).unwrap();
kv_writer.finish().unwrap();
doc_0.insert(0, Operation::Addition as u8);
let ret = merge_obkvs_and_operations(&[], &[Cow::from(doc_0.as_slice())]).unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from([Operation::Deletion as u8].as_slice()), Cow::from(doc_0.as_slice())],
)
.unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from(doc_0.as_slice()), Cow::from([Operation::Deletion as u8].as_slice())],
)
.unwrap();
assert_eq!(*ret, [Operation::Deletion as u8]);
let ret = merge_obkvs_and_operations(
&[],
&[
Cow::from([Operation::Addition as u8, 1].as_slice()),
Cow::from([Operation::Deletion as u8].as_slice()),
Cow::from(doc_0.as_slice()),
],
)
.unwrap();
assert_eq!(*ret, doc_0);
}
}

View File

@ -2,7 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet};
use std::result::Result as StdResult;
use charabia::{Tokenizer, TokenizerBuilder};
use deserr::{DeserializeError, DeserializeFromValue};
use deserr::{DeserializeError, Deserr};
use itertools::Itertools;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use time::OffsetDateTime;
@ -23,9 +23,9 @@ pub enum Setting<T> {
NotSet,
}
impl<T, E> DeserializeFromValue<E> for Setting<T>
impl<T, E> Deserr<E> for Setting<T>
where
T: DeserializeFromValue<E>,
T: Deserr<E>,
E: DeserializeError,
{
fn deserialize_from_value<V: deserr::IntoValue>(

View File

@ -1,9 +1,16 @@
[package]
name = "permissive-json-pointer"
version = "1.0.0"
edition = "2021"
description = "A permissive json pointer"
readme = "README.md"
publish = false
version.workspace = true
authors.workspace = true
# description.workspace = true
homepage.workspace = true
# readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
serde_json = "1.0"