Compare commits

...

2969 Commits

Author SHA1 Message Date
curquiza
bddf3f96e6 Use Debian instead of Alpine in Dockerfile 2023-02-20 19:38:22 +01:00
bors[bot]
1e9ac00800 Merge #3505
3505: Csv delimiter r=irevoire a=irevoire

Fixes https://github.com/meilisearch/meilisearch/issues/3442
Closes https://github.com/meilisearch/meilisearch/pull/2803
Specified in https://github.com/meilisearch/specifications/pull/221

This PR is a reimplementation of https://github.com/meilisearch/meilisearch/pull/2803, on the new engine. Thanks for your idea and initial PR `@MixusMinimax;` sorry I couldn’t update/merge your PR. Way too many changes happened on the engine in the meantime.

**Attention to reviewer**; I had to update deserr to implement the support of deserializing `char`s

-------

It introduces four new error messages;
- Invalid value in parameter csvDelimiter: expected a string of one character, but found an empty string
- Invalid value in parameter csvDelimiter: expected a string of one character, but found the following string of 5 characters: doggo
- csv delimiter must be an ascii character. Found: 🍰 
- The Content-Type application/json does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type text/csv.

And one error code;
- `invalid_index_csv_delimiter`

The `invalid_content_type` error code is now also used when we encounter the `csvDelimiter` query parameter with a non-csv content type.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 17:01:36 +00:00
bors[bot]
b08a49a16e Merge #3319 #3470
3319: Transparently resize indexes on MaxDatabaseSizeReached errors r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/discussions/3280, depends on https://github.com/meilisearch/milli/pull/760

## What does this PR do?

### User standpoint

- Meilisearch no longer fails tasks that encounter the `milli::UserError(MaxDatabaseSizeReached)` error.
- Instead, these tasks are retried after increasing the maximum size allocated to the index where the failure occurred.

### Implementation standpoint

- Add `Batch::index_uid` to get the `index_uid` of a batch of task if there is one
- `IndexMapper::create_or_open_index` now takes an additional `size` argument that allows to (re)open indexes with a size different from the base `IndexScheduler::index_size` field
- `IndexScheduler::tick` now returns a `Result<TickOutcome>` instead of a `Result<usize>`. This offers more explicit control over what the behavior should be wrt the next tick.
- Add `IndexStatus::BeingResized` that contains a handle that a thread can use to await for the resize operation to complete and the index to be available again.
- Add `IndexMapper::resize_index` to increase the size of an index.
- In `IndexScheduler::tick`, intercept task batches that failed due to `MaxDatabaseSizeReached` and resize the index that caused the error, then request a new tick that will eventually handle the still enqueued task.

## Testing the PR

The following diff can be applied to this branch to make testing the PR easier:

<details>


```diff
diff --git a/index-scheduler/src/index_mapper.rs b/index-scheduler/src/index_mapper.rs
index 553ab45a..022b2f00 100644
--- a/index-scheduler/src/index_mapper.rs
+++ b/index-scheduler/src/index_mapper.rs
`@@` -228,13 +228,15 `@@` impl IndexMapper {
 
         drop(lock);
 
+        std:🧵:sleep_ms(2000);
+
         let current_size = index.map_size()?;
         let closing_event = index.prepare_for_closing();
-        log::info!("Resizing index {} from {} to {} bytes", name, current_size, current_size * 2);
+        log::error!("Resizing index {} from {} to {} bytes", name, current_size, current_size * 2);
 
         closing_event.wait();
 
-        log::info!("Resized index {} from {} to {} bytes", name, current_size, current_size * 2);
+        log::error!("Resized index {} from {} to {} bytes", name, current_size, current_size * 2);
 
         let index_path = self.base_path.join(uuid.to_string());
         let index = self.create_or_open_index(&index_path, None, 2 * current_size)?;
`@@` -268,8 +270,10 `@@` impl IndexMapper {
             match index {
                 Some(Available(index)) => break index,
                 Some(BeingResized(ref resize_operation)) => {
+                    log::error!("waiting for resize end");
                     // Deadlock: no lock taken while doing this operation.
                     resize_operation.wait();
+                    log::error!("trying our luck again!");
                     continue;
                 }
                 Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 11b17d05..242dc095 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
`@@` -908,6 +908,7 `@@` impl IndexScheduler {
     ///
     /// Returns the number of processed tasks.
     fn tick(&self) -> Result<TickOutcome> {
+        log::error!("ticking!");
         #[cfg(test)]
         {
             *self.run_loop_iteration.write().unwrap() += 1;
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index 050c825a..63f312f6 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
`@@` -25,7 +25,7 `@@` fn setup(opt: &Opt) -> anyhow::Result<()> {
 
 #[actix_web::main]
 async fn main() -> anyhow::Result<()> {
-    let (opt, config_read_from) = Opt::try_build()?;
+    let (mut opt, config_read_from) = Opt::try_build()?;
 
     setup(&opt)?;
 
`@@` -56,6 +56,8 `@@` We generated a secure master key for you (you can safely copy this token):
         _ => (),
     }
 
+    opt.max_index_size = byte_unit::Byte::from_str("1MB").unwrap();
+
     let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
 
     #[cfg(all(not(debug_assertions), feature = "analytics"))]
```
</details>

Mainly, these debug changes do the following:

- Set the default index size to 1MiB so that index resizes are initially frequent
- Turn some logs from info to error so that they can be displayed with `--log-level ERROR` (hiding the other infos)
- Add a long sleep between the beginning and the end of the resize so that we can observe the `BeingResized` index status (otherwise it would never come up in my tests)

## Open questions

- Is the growth factor of x2 the correct solution? For a `Vec` in memory it makes sense, but here we're manipulating quantities that are potentially in the order of 500GiBs. For bigger indexes it may make more sense to add at most e.g. 100GiB on each resize operation, avoiding big steps like 500GiB -> 1TiB.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


3470: Autobatch addition and deletion r=irevoire a=irevoire

This PR adds the capability to meilisearch to batch document addition and deletion together.

Fix https://github.com/meilisearch/meilisearch/issues/3440

--------------

Things to check before merging;

- [x] What happens if we delete multiple time the same documents -> add a test
- [x] If a documentDeletion gets batched with a documentAddition but the index doesn't exist yet? It should not work

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 15:00:19 +00:00
bors[bot]
a8f6f108e0 Merge #3515
3515: Consider null as a valid geo field r=irevoire a=irevoire

Fix #3497
Associated spec; https://github.com/meilisearch/specifications/pull/222

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 14:12:55 +00:00
Tamo
1479050f7a apply review suggestions 2023-02-20 14:53:37 +01:00
bors[bot]
97b8c32e22 Merge #3514
3514: Bump version of mini-dashboard to v0.2.6 r=irevoire a=bidoubiwa

Update the version of the mini-dashboard to v0.2.6.

See [release notes](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.6).

Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
2023-02-20 13:21:00 +00:00
Louis Dureuil
35f6c624bc Make sure we don't leave the in memory hashmap in an inconsistent state 2023-02-20 13:55:32 +01:00
Louis Dureuil
1116788475 Resize indexes when they're full 2023-02-20 13:55:32 +01:00
Louis Dureuil
951a5b5832 Add IndexMapper::resize_index fn 2023-02-20 13:55:32 +01:00
Louis Dureuil
1c670d7fa0 Add IndexStatus::BeingResized 2023-02-20 13:55:32 +01:00
Louis Dureuil
6cc3797aa1 IndexScheduler::tick returns a TickOutcome 2023-02-20 13:55:31 +01:00
Louis Dureuil
faf1e17a27 create_or_open_index takes a map_size argument 2023-02-20 13:55:31 +01:00
Louis Dureuil
4c519c2ab3 Add Batch::index_uid 2023-02-20 13:55:31 +01:00
Charlotte Vermandel
dd120e0e16 Bump version of mini-dashboard to v0.2.6 2023-02-20 13:45:57 +01:00
Tamo
18796d6e6a Consider null as a valid geo object 2023-02-20 13:45:51 +01:00
bors[bot]
c91bfeaf15 Merge #3467
3467: Identify builds git tagged with `prototype-...` in CLI and analytics r=curquiza a=dureuill

# Pull Request

## What does this PR do?

- Parses the last git tag to extract a prototype name if:
  - Current build uses the prototype tag (not after the tag) precisely
  - The prototype tag name respects the following conditions:
    1. starts with `prototype-`
    2. ends with a number
    3. the hyphen-separated segment right before the number is not a number (required to reject commits after the tag).
- Display the prototype name in the launch summary in the CLI
- Send the prototype name to analytics if any
- Update prototypes instructions in CONTRIBUTING.md

|`VERGEN_GIT_SEMVER_LIGHTWEIGHT` value | Prototype |
|---|---|
| `Some("prototype-geo-bounding-box-0-139-gcde89018")` | `None` (does not end with a number) |
| `Some("prototype-geo-bounding-box-0-139-89018")` | `None` (before the last segment is a number) |
| `Some("prototype-geo-bounding-box-0")` | `Some("prototype-geo-bounding-box-0")` |
| `Some("prototype-geo-bounding-box")` | `None` (does not end with a number") |
| `Some("geo-bounding-box-0")` | `None` (does not start with "prototype") |
| `None` | `None` | 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-20 09:27:51 +00:00
bors[bot]
28961b2ad1 Merge #3499
3499: Use the workspace inheritance r=Kerollmops a=irevoire

Use the workspace inheritance [introduced in rust 1.64](https://blog.rust-lang.org/2022/09/22/Rust-1.64.0.html#cargo-improvements-workspace-inheritance-and-multi-target-builds).

It allows us to define the version of meilisearch once in the main `Cargo.toml` and let all the other `Cargo.toml` uses this version.

`@curquiza` I added you as a reviewer because I had to patch some CI scripts

And `@Kerollmops,` I had to bump the `cargo_toml` crates because our version was getting old and didn't support the feature yet.

Also, in another PR, I would like to unify some of our dependencies to ensure we always stay in sync between all our crates.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-17 09:52:29 +00:00
Tamo
895ab2906c apply review suggestions 2023-02-16 18:42:47 +01:00
Tamo
f11c7d4b62 cargo run execute meilisearch by default 2023-02-16 18:03:45 +01:00
Tamo
e79f6f87f6 make cargo fmt&clippy happy 2023-02-16 18:00:40 +01:00
Tamo
5367d8f05a add two tests on the indexing of csvs 2023-02-16 17:37:11 +01:00
Tamo
52686da028 test various error on the document ressource 2023-02-16 17:37:10 +01:00
Tamo
8c074f5028 implements the csv delimiter without tests
Co-authored-by: Maxi Barmetler <maxi.barmetler@gmail.com>
2023-02-16 17:35:36 +01:00
Louis Dureuil
49e18da23e Do not escape tag name
$() syntax is not interpreted by the Dockerfile
2023-02-16 10:53:14 +01:00
Louis Dureuil
54240db495 Add note in code so one does not forget next time 2023-02-16 10:53:14 +01:00
Louis Dureuil
e1ed4bc750 Change Dockerfile to also pass the VERGEN_GIT_SEMVER_LIGHTWEIGHT when building 2023-02-16 10:53:14 +01:00
Louis Dureuil
9bd1cfb3a3 Ignore -dirty flag 2023-02-16 10:53:14 +01:00
Louis Dureuil
a341c94871 Update contributing.md 2023-02-16 10:53:14 +01:00
Louis Dureuil
f46cf46b8c Add prototype to analytics if any 2023-02-16 10:53:14 +01:00
Louis Dureuil
c3a30a5a91 If using a prototype, display its name at Meilisearch startup 2023-02-16 10:53:14 +01:00
bors[bot]
143e3cf948 Merge #3490
3490: Fix attributes set candidates r=curquiza a=ManyTheFish

# Pull Request

Fix attributes set candidates for v1.1.0

## details

The attribute criterion was not returning the remaining candidates when its internal algorithm was been exhausted.
We had a loss of candidates by the attribute criterion leading to the bug reported in the issue linked below.
After some investigation, it seems that it was the only criterion that had this behavior.

We are now returning the remaining candidates instead of an empty bitmap.

## Related issue

Fixes #3483
PR on milli for v1.0.1: https://github.com/meilisearch/milli/pull/777


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-02-15 17:38:07 +00:00
Tamo
ab2adba183 update our CI scripts accordingly 2023-02-15 13:56:24 +01:00
Tamo
74d1a67a99 Use the workspace inheritance feature of rust 1.64 2023-02-15 13:51:07 +01:00
bors[bot]
91ce8a5e67 Merge #3492
3492: Bump deserr r=Kerollmops a=irevoire

Bump deserr to the latest version;
- We now use the default actix-web extractors that deserr provides (which were copy/pasted from meilisearch)
- We also use the default `JsonError` message provided by deserr instead of defining our own in meilisearch
- Finally, we get the new `did you mean?` error message. Fix #3493

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-15 10:05:05 +00:00
bors[bot]
fd7ae1883b Merge #3495
3495: Add tests with rust nightly in CI r=curquiza a=ztkmkoo

# Pull Request

## Related issue
Fixes #3402 

## What does this PR do?
- add ci test with rust nightly
- make test with rust stable not run on schedule event

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Kebron <ztkmkoo@gmail.com>
2023-02-15 07:53:17 +00:00
Tamo
42a3cdca66 get rids of the unwrap_any function in favor of take_cf_content 2023-02-14 20:06:31 +01:00
Tamo
a43765d454 use the pre-defined deserr extractors 2023-02-14 20:05:30 +01:00
Tamo
769576fd94 get rids of the whole error_message module since it has been integrated into the last version of deserr 2023-02-14 20:05:27 +01:00
Tamo
8fb7b1d10f bump deserr 2023-02-14 20:04:30 +01:00
bors[bot]
d494c29768 Merge #3479
3479: Unify "Bad latitude" & "Bad longitude" errors r=irevoire a=cymruu

# Pull Request

## Related issue
Fix part of #3006

## What does this PR do?
- Moved out `BadGeoLat`, `BadGeoLng`, `BadGeoBoundingBoxTopIsBelowBottom` from `FilterError` into newly introduced error type `ParseGeoError`. 
- Renamed `BadGeo` error  to `ReservedGeo`
- Used new `ParseGeoError` type in `FilterError` and `AscDescError`

Screenshot: 
![image](https://user-images.githubusercontent.com/2981598/217927231-fe23b6a3-2ea8-4145-98af-38eb61c4ff16.png)

I ran `cargo test --package milli -- --test-threads 1` and tests passed.
`--test-threads` was set to 1 because my OS complained about too many opened files.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Filip Bachul <filipbachul@gmail.com>
Co-authored-by: filip <filipbachul@gmail.com>
2023-02-14 18:35:51 +00:00
Tamo
74dcfe9676 Fix a bug when you update a document that was already present in the db, deleted and then inserted again in the same transform 2023-02-14 19:09:40 +01:00
Tamo
1b1703a609 make a small optimization to merge obkvs a little bit faster 2023-02-14 18:32:41 +01:00
Tamo
fb5e4957a6 fix and test the early exit in case a grenad ends with a deletion 2023-02-14 18:23:57 +01:00
Tamo
8de3c9f737 Update milli/src/update/index_documents/transform.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-02-14 17:57:14 +01:00
Tamo
43a19d0709 document the operation enum + the grenads 2023-02-14 17:55:26 +01:00
Tamo
29d14bed90 get rids of the let/else syntax 2023-02-14 17:45:46 +01:00
bors[bot]
f3b54337f9 Merge #3174
3174: Allow wildcards at the end of index names for API Keys and Tenant tokens r=irevoire a=Kerollmops

This PR introduces the wildcards at the end of the index names when identifying indexes in the API Keys and tenant tokens. It fixes #2788 and fixes #2908. This PR is based on `@akhildevelops'` work.

Note that when a tenant token filter is chosen to restrict a search, it is always the most restrictive pattern that is chosen. If we have an index pattern _prod*_ that defines _filter1_ and _p*_ that defines _filter2_, the engine will choose _filter1_ over _filter2_ as it is defined for a most restrictive pattern, _prod*_. This restrictiveness is defined by 1. is it exact, without _*_ 2. the length of the pattern.

It is a continuation of work that has already started and should close #2869.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-02-14 16:12:01 +00:00
Clément Renault
7f3ae40204 Remove a useless comment regarding the index pattern error code 2023-02-14 17:09:20 +01:00
Filip Bachul
a53536836b fmt 2023-02-14 17:04:22 +01:00
Kebron
b095325bf8 Add tests with rust nightly in CI 2023-02-14 15:33:12 +00:00
Filip Bachul
d7ad39ad77 fix: clippy error 2023-02-14 00:15:35 +01:00
Filip Bachul
849de089d2 add thiserror for AscDescError 2023-02-14 00:15:35 +01:00
filip
7f25007d31 Update milli/src/asc_desc.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2023-02-14 00:15:35 +01:00
Filip Bachul
c810af3ebf implement From<ParseGeoError> for AscDescError 2023-02-14 00:15:35 +01:00
Filip Bachul
c0b77773ba fmt asc_desc 2023-02-14 00:15:35 +01:00
Filip Bachul
7481559e8b move BadGeo to FilterError 2023-02-14 00:15:35 +01:00
Filip Bachul
83c765ce6c implement From<ParseGeoError> for FilterError 2023-02-14 00:15:35 +01:00
Filip Bachul
4c91037602 use ParseGeoError in sort parser 2023-02-14 00:15:35 +01:00
Filip Bachul
825923f6fc export ParseGeoError 2023-02-14 00:15:35 +01:00
Filip Bachul
e405702733 chore: introduce new error ParseGeoError type 2023-02-14 00:15:35 +01:00
ManyTheFish
6fa877efb0 Fix attributes set candidates 2023-02-13 17:49:52 +01:00
Kerollmops
4b1cd10653 Return an internal error when index pattern should be valid 2023-02-13 17:49:42 +01:00
Clément Renault
47748395dc Update an authentication comment
Co-authored-by: Many the fish <many@meilisearch.com>
2023-02-13 17:20:08 +01:00
bors[bot]
ff595156d7 Merge #3480
3480: Gitignore vscode & jetbrains IDE folders r=curquiza a=AymanHamdoun

# Pull Request

## Related issue
There is no issue for it, and i couldn't find an appropriate category to make an issue for it.

## What does this PR do?
- Its just a gitignore edit so people who use vscode and jetbrains IDEs (like IntelliJ) dont have to deal with committing the folder the IDE generates to store local project configs by mistake. (I honestly wanted to fork the repo to add something else but this bothered me enough to make a PR for it first) 

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ✔️ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [✔️  ] Have you read the contributing guidelines?
- [✔️  ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ayman <ayman.s.hamdoun@gmail.com>
2023-02-13 10:47:25 +00:00
Ayman
8770088df3 remove idea folder 2023-02-10 11:45:02 +04:00
Ayman
827c1c8447 edit gitignore to ignore .idea and .vscode folders 2023-02-10 11:42:19 +04:00
Clément Renault
764df24b7d Make clippy happy (again) 2023-02-09 13:21:20 +01:00
Clément Renault
4570d5bf3a Merge remote-tracking branch 'origin/main' into temp-wildcard 2023-02-09 13:14:05 +01:00
Tamo
746b31c1ce makes clippy happy 2023-02-09 12:23:01 +01:00
Tamo
eaad84bd1d fix the test to handle the document deletion correctly 2023-02-09 11:29:13 +01:00
Kerollmops
c690c4fec4 Added and modified the current API Key and Tenant Token tests 2023-02-09 11:17:30 +01:00
Tamo
ea9ac46f28 stop autobatching the deletion without the index creation right with the addition 2023-02-08 21:24:27 +01:00
Tamo
93db755d57 add a test to ensure we handle correctly a deletion of multiple time the same document 2023-02-08 21:03:34 +01:00
Tamo
93f130a400 fix all warnings 2023-02-08 20:57:35 +01:00
Tamo
860c993ef7 Handle the autobatching of deletion and addition in the scheduler 2023-02-08 20:53:19 +01:00
Tamo
67dda0678f cleanup the autobatcher a little bit 2023-02-08 18:10:59 +01:00
Tamo
2db6347686 update the autobatcher to batch the addition and deletion together 2023-02-08 18:07:59 +01:00
Tamo
421a9cf05e provide a new method on the transform to remove documents 2023-02-08 16:06:09 +01:00
Kerollmops
7b4b57ecc8 Fix the current tests 2023-02-08 14:54:05 +01:00
Tamo
8f64fba1ce rewrite the current transform to handle a new byte specifying the kind of operation it's merging 2023-02-08 12:53:38 +01:00
bors[bot]
9882029fa4 Merge #3456
3456: Bump tokio from 1.24.1 to 1.24.2 r=curquiza a=dependabot[bot]

Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.24.1 to 1.24.2.
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/tokio-rs/tokio/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=tokio&package-manager=cargo&previous-version=1.24.1&new-version=1.24.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-02-07 13:28:42 +00:00
dependabot[bot]
5f56e6dd58 Bump tokio from 1.24.1 to 1.24.2
Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.24.1 to 1.24.2.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/commits)

---
updated-dependencies:
- dependency-name: tokio
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-02-07 12:14:05 +00:00
bors[bot]
c88c3637b4 Merge #3461
3461: Bring v1 changes into main r=curquiza a=Kerollmops

Also bring back changes in milli (the remote repository) into main done during the pre-release

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Philipp Ahlner <philipp@ahlner.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-02-07 11:27:27 +00:00
bors[bot]
97fd9ac493 Merge #3405
3405: Implement geo bounding box r=irevoire a=curquiza

Following https://github.com/meilisearch/milli/pull/672 (work from `@gmourier)`

Fixes #2761

Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-07 09:55:20 +00:00
bors[bot]
821d92b5d0 Merge #3407
3407: Add Cargo feature for LMDB's POSIX semaphores r=dureuill a=GregoryConrad

See https://github.com/meilisearch/milli/pull/757

Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2023-02-07 08:25:20 +00:00
bors[bot]
0b60928cbc Merge #3199
3199: Fixup dumps-destination -> dump-directory section header in help link r=curquiza a=dureuill

# Pull Request

## Related issue
See https://github.com/meilisearch/product/discussions/560#discussioncomment-4323938

## What does this PR do?
- change link in help message to the future new section header #dump-directory

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-06 17:49:32 +00:00
Tamo
42114325cd Apply suggestions from code review
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-06 18:07:00 +01:00
Tamo
7a38fe624f throw an error if the top left corner is found below the bottom right corner 2023-02-06 17:50:47 +01:00
Tamo
1b005f697d update the syntax of the geoboundingbox filter to uses brackets instead of parens around lat and lng 2023-02-06 16:50:27 +01:00
Kerollmops
fbec48f56e Merge remote-tracking branch 'milli/main' into bring-v1-changes 2023-02-06 16:48:10 +01:00
Kerollmops
a377a49218 Make meiliserach depend on the local milli 2023-02-06 16:44:43 +01:00
Kerollmops
41cbaad1cb Revert "Add git config about ownershio in Docker CI"
This reverts commit e269027cdd.
2023-02-06 16:42:16 +01:00
Kerollmops
a015e232ab Merge remote-tracking branch 'origin/release-v1.0.0' into bring-v1-changes 2023-02-06 16:41:10 +01:00
Tamo
3ebc99473f Apply suggestions from code review
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-06 13:29:37 +01:00
bors[bot]
fadea504ed Merge #3451
3451: Pin Rust version in Clippy job r=dureuill a=curquiza

Avoid "surprising" CI failure because of clippy when rust is releasing a new version

Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-02-06 12:19:35 +00:00
Tamo
d27007005e comments the geoboundingbox + forbid the usage of the lexeme method which could introduce bugs 2023-02-06 11:36:49 +01:00
Tamo
fcb09ccc3d add tests on the geoBoundingBox 2023-02-02 18:19:56 +01:00
bors[bot]
734a9ecea8 Merge #3040
3040: feat: create a preview environment for every PR using Uffizzi r=curquiza a=waveywaves

# Pull Request

## Related discussion (was created as an issue initially)
https://github.com/meilisearch/meilisearch/discussions/2883

## What does this PR do?
This PR adds gha workflows to create preview environments on every PR. This workflow also posts the preview url as a comment on the PR.
[This PR created against my fork of meilisearch](https://github.com/waveywaves/meilisearch/pull/2) demonstrates how this change behaves. 

In [the demo preview](https://pr-2-deployment-7396-meilisearch.app.uffizzi.com/) you can run the `meilisearch` binary built from the PR and can access meilisearch running from the PR by adding `/meilisearch` to the url of the PR.

eg: I go to the demo preview at the URL https://app.uffizzi.com/github.com/waveywaves/meilisearch/pull/2, run `meilisearch` in the terminal. I can access this running instance of `meilisearch` in the preview env fromhttps://pr-2-deployment-7396-meilisearch.app.uffizzi.com/meilisearch

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vibhav Bobade <vibhav.bobde@gmail.com>
2023-02-02 16:06:38 +00:00
curquiza
69fcd3d05e Add comment information about the cron job 2023-02-02 15:58:03 +01:00
Clémentine Urquizar - curqui
1ca7778e6a Update .github/workflows/create-issue-dependencies.yml
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-02 15:54:33 +01:00
curquiza
a11d992923 Update issue description for the dependency updates 2023-02-02 15:33:38 +01:00
curquiza
781691191a Pin Rust version in Clippy job 2023-02-02 15:22:58 +01:00
Louis Dureuil
ae8660e585 Add Token::original_span rather than making Token::span pub 2023-02-02 15:03:34 +01:00
Guillaume Mourier
d80ce00623 Update insta test 2023-02-02 12:34:51 +01:00
Guillaume Mourier
2d66fdc8e9 Apply review comments 2023-02-02 12:34:51 +01:00
Guillaume Mourier
b297b5deb0 cargo fmt 2023-02-02 12:34:49 +01:00
Guillaume Mourier
0d71c80ba6 add tests 2023-02-02 12:31:27 +01:00
Guillaume Mourier
b2054d3f6c Add insta test on geo filters whitespacing 2023-02-02 12:27:58 +01:00
Guillaume Mourier
65a3086cf1 fix test 2023-02-02 12:27:58 +01:00
Guillaume Mourier
426d63b01b Update insta test suite 2023-02-02 12:27:56 +01:00
Guillaume Mourier
b078477d80 Add error handling and earth lap collision with bounding box 2023-02-02 12:17:38 +01:00
Guillaume Mourier
5c525168a0 Add _geoBoundingBox parser 2023-02-02 11:57:21 +01:00
bors[bot]
39b62b7158 Merge #3436
3436: Add more detailed contribution instructions for tests r=irevoire a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-02 10:19:41 +00:00
bors[bot]
3f97f630ed Merge #3448
3448: Bump docker/build-push-action from 3 to 4 r=curquiza a=dependabot[bot]

Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 3 to 4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/docker/build-push-action/releases">docker/build-push-action's releases</a>.</em></p>
<blockquote>
<h2>v4.0.0</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://github-redirect.dependabot.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>Revert disable provenance by default if not set by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` in <a href="https://github-redirect.dependabot.com/docker/build-push-action/pull/784">docker/build-push-action#784</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.3.1...v4.0.0">https://github.com/docker/build-push-action/compare/v3.3.1...v4.0.0</a></p>
<h2>v3.3.1</h2>
<ul>
<li>Disable provenance by default if not set by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/781">#781</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.3.0...v3.3.1">https://github.com/docker/build-push-action/compare/v3.3.0...v3.3.1</a></p>
<h2>v3.3.0</h2>
<blockquote>
<p><strong>Note</strong></p>
<p>Buildx v0.10 enables support for a minimal <a href="https://slsa.dev/provenance/">SLSA Provenance</a> attestation, which requires support for <a href="https://github.com/opencontainers/image-spec">OCI-compliant</a> multi-platform images. This may introduce issues with registry and runtime support (e.g. <a href="https://github-redirect.dependabot.com/docker/buildx/issues/1533">Google Cloud Run and AWS Lambda</a>). You can optionally disable the default provenance attestation functionality using <code>provenance: false</code>.</p>
</blockquote>
<ul>
<li>Add <code>attests</code>, <code>provenance</code> and <code>sbom</code> inputs by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/746">#746</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/759">#759</a>)</li>
<li>Log GitHub Actions runtime token access controls by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/707">#707</a>)</li>
<li>Examples moved to <a href="https://docs.docker.com/build/ci/github-actions/examples/">docs website</a> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/718">#718</a>)</li>
<li>Bump minimatch from 3.0.4 to 3.1.2 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/732">#732</a>)</li>
<li>Bump csv-parse from 5.3.0 to 5.3.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/729">#729</a>)</li>
<li>Bump json5 from 2.2.0 to 2.2.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/749">#749</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.2.0...v3.3.0">https://github.com/docker/build-push-action/compare/v3.2.0...v3.3.0</a></p>
<h2>v3.2.0</h2>
<ul>
<li>Remove workaround for <code>setOutput</code> by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/704">#704</a>)</li>
<li>Docs: fix Git context link and add more details about subdir support by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/685">#685</a>)</li>
<li>Docs: named context by <a href="https://github.com/baibaratsky"><code>`@​baibaratsky</code></a>` and <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/665">#665</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.9.0 to 1.10.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/667">#667</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/695">#695</a>)</li>
<li>Bump <code>`@​actions/github</code>` from 5.0.3 to 5.1.1 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/696">#696</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.1.1...v3.2.0">https://github.com/docker/build-push-action/compare/v3.1.1...v3.2.0</a></p>
<h2>v3.1.1</h2>
<ul>
<li>Fix GitHub token not passed with Git context if subdir defined by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/663">#663</a>)</li>
<li>Replace deprecated <code>fs.rmdir</code> with <code>fs.rm</code> by <a href="https://github.com/bendrucker"><code>`@​bendrucker</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/657">#657</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.1.0...v3.1.1">https://github.com/docker/build-push-action/compare/v3.1.0...v3.1.1</a></p>
<h2>v3.1.0</h2>
<ul>
<li><code>no-cache-filters</code> input by <a href="https://github.com/crazy-max"><code>`@​crazy-max</code></a>` (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/653">#653</a>)</li>
<li>Bump <code>`@​actions/github</code>` from 5.0.1 to 5.0.3 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/619">#619</a>)</li>
<li>Bump <code>`@​actions/core</code>` from 1.6.0 to 1.9.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/620">#620</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/637">#637</a>)</li>
<li>Bump csv-parse from 5.0.4 to 5.3.0 (<a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/623">#623</a> <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/650">#650</a>)</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/docker/build-push-action/compare/v3.0.0...v3.1.0">https://github.com/docker/build-push-action/compare/v3.0.0...v3.1.0</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="3b5e8027fc"><code>3b5e802</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/docker/build-push-action/issues/784">#784</a> from crazy-max/enable-provenance</li>
<li><a href="02d3266a89"><code>02d3266</code></a> update generated content</li>
<li><a href="f403dafe18"><code>f403daf</code></a> revert disable provenance by default if not set</li>
<li>See full diff in <a href="https://github.com/docker/build-push-action/compare/v3...v4">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=docker/build-push-action&package-manager=github_actions&previous-version=3&new-version=4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-02-01 18:13:15 +00:00
Kerollmops
a36b1dbd70 Fix the tasks with the new patterns 2023-02-01 18:21:45 +01:00
dependabot[bot]
5672165e44 Bump docker/build-push-action from 3 to 4
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 3 to 4.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](https://github.com/docker/build-push-action/compare/v3...v4)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-02-01 17:02:17 +00:00
Kerollmops
d563ed8a39 Making it work with index uid patterns 2023-02-01 17:51:30 +01:00
bors[bot]
36cae3b480 Merge #3399
3399: Rework technical information in the README r=Kerollmops a=curquiza

Following this https://github.com/meilisearch/meilisearch/pull/3346#discussion_r1073289399

Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-02-01 14:34:55 +00:00
bors[bot]
5e12af88e2 Merge #3445
3445: Bump milli to v0.41.1 r=curquiza a=dureuill

# Pull Request

## Related issue

Fixes #3438.

## What does this PR do?
- Bump milli to [v0.41.1](https://github.com/meilisearch/milli/releases/tag/v0.41.1) that includes a bugfix for #3438 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-02-01 11:07:46 +00:00
Louis Dureuil
231067a1c4 Bump milli to v0.41.1 2023-02-01 11:53:39 +01:00
Vibhav Bobade
2a1a7ef00a Integrate Uffizzi 2023-02-01 13:06:27 +05:30
bors[bot]
758b4acea7 Merge #776
776: Reduce incremental indexing time of `words_prefix_position_docids` DB r=curquiza a=loiclec

Fixes partially https://github.com/meilisearch/milli/issues/605

The `words_prefix_position_docids` can easily contain millions of entries. Thus, iterating
over it can be very expensive. But we do so needlessly for every document addition tasks.

It can sometimes cause indexing performance issues when :
- a user sends many `documentAdditionOrUpdate` tasks that cannot be all batched together (for example if they are interspersed with `documentDeletion` tasks)
- the documents contain long, diverse text fields, thus increasing the number of entries in `words_prefix_position_docids`
- the index has accumulated many soft-deleted documents, further increasing the size of `words_prefix_position_docids`
- the machine running Meilisearch does not have great IO performance (e.g. slow SSD, or quota-limited by the cloud provider)

Note, before approving  the PR: the only changed file should be `milli/src/update/words_prefix_position_docids.rs`.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-31 15:52:28 +00:00
bors[bot]
20f8184c06 Merge #3441
3441: Fix import of dump v2 r=dureuill a=irevoire

# Pull Request
This bug was introduced because of a mistake we did earlier: We said the last version to export dump v2 was the v0.21.0 while it was the v0.22.0.
To fix the bug I updated our whole v2 reader to use the code from meilisearch v0.22.0.
Also:
- Import the bugged dump in the tests
- Test the import of this dump in the v2 reader and current reader

## Related issue
Fixes #3435


Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-31 13:23:57 +00:00
bors[bot]
2f8ebd0501 Merge #3439
3439: Add git config about ownership in Docker CI r=curquiza a=curquiza

The docker CI si failing because of git usage: https://github.com/meilisearch/meilisearch/actions/runs/4053334082/jobs/6973827940

<img width="960" alt="Capture d’écran 2023-01-31 à 12 12 44" src="https://user-images.githubusercontent.com/20380692/215745119-b866bcf2-7077-48e4-b018-7a2085b23680.png">


> fatal: detected dubious ownership in repository at '/home/meili/actions-runner/_work/meilisearch/meilisearch'

I made some research and I found out this https://github.com/actions/runner-images/issues/6775

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-31 12:58:59 +00:00
Tamo
6be9a828fa makes clippy happy 2023-01-31 13:03:28 +01:00
Tamo
4b7b2d6a90 fix the import of dump v2 generated by meilisearch v0.22.0 2023-01-31 13:03:28 +01:00
bors[bot]
a4e8158239 Merge #774
774: Update version for the next release (v0.41.1) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-31 11:51:42 +00:00
bors[bot]
151e52c481 Merge #3433
3433: Add prototype guide to CONTRIBUTING.md r=curquiza a=curquiza



Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-01-31 11:25:46 +00:00
curquiza
e269027cdd Add git config about ownershio in Docker CI 2023-01-31 12:04:41 +01:00
Loïc Lecrenier
a2690ea8d4 Reduce incremental indexing time of words_prefix_position_docids DB
This database can easily contain millions of entries. Thus, iterating
over it can be very expensive.

For regular `documentAdditionOrUpdate` tasks, `del_prefix_fst_words`
will always be empty. Thus, we can save a significant amount of time
by adding this `if !del_prefix_fst_words.is_empty()` condition.

The code's behaviour remains completely unchanged.
2023-01-31 11:42:24 +01:00
bors[bot]
33f61d2cd4 Merge #775
775: Fix clippy for Rust 1.67, allow `uninlined_format_args` r=dureuill a=dureuill

# Pull Request

milli part of https://github.com/meilisearch/meilisearch/pull/3437

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-31 10:29:24 +00:00
bors[bot]
544b581b15 Merge #3437
3437: Make clippy happy for Rust 1.67, allow uninlined_format_args r=Kerollmops a=dureuill

# Pull Request

This PR is the equivalent of #3434 for the `release-v1.0.0` branch.

See #3434 for more information.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-31 10:29:12 +00:00
Louis Dureuil
5c0668afcf clippy: allow uninlined_format_args 2023-01-31 11:13:47 +01:00
Louis Dureuil
20f05efb3c clippy: needless_lifetimes 2023-01-31 11:12:59 +01:00
Louis Dureuil
cbf029f64c clippy: --fix 2023-01-31 11:12:59 +01:00
curquiza
bffabf9cc6 Update version for the next release (v0.41.1) in Cargo.toml files 2023-01-31 09:56:22 +00:00
bors[bot]
f647b20818 Merge #3434
3434: Make clippy happy for Rust 1.67, allow `uninlined_format_args` r=Kerollmops a=dureuill

# Pull Request

This PR allows `uninlined_format_args` in CI for clippy.

This is due to https://github.com/rust-lang/rust-clippy/issues/10087, which in particular has correctness issues wrt edition 2018 crates, and is a big change altogether. https://github.com/rust-lang/rust-clippy/pull/10265 is already open in order to change the category of this lint to "pedantic", meaning that if this latter PR merges, a future Rust release will accept our code unmodified wrt uninlined format arguments.

As a result, this PR introduces the following changes:

1. Allow `uninlined_format_args` in the clippy command in CI.
2. Use rewind rather than seek(0)
3. Remove lifetimes that clippy deems needless.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-31 09:45:12 +00:00
Louis Dureuil
924d5d4c11 clippy: remove needless lifetimes 2023-01-31 10:40:48 +01:00
Louis Dureuil
771a367b97 clippy: use rewind instead of seek 0 2023-01-31 10:40:48 +01:00
Louis Dureuil
07603373f3 clippy: allow uninlined_format_args 2023-01-31 10:15:07 +01:00
Louis Dureuil
d91f8fc493 clippy: Allow uninlined_format_args in CI 2023-01-31 09:56:26 +01:00
Louis Dureuil
3296cf7ae6 clippy: remove needless lifetimes 2023-01-31 09:32:40 +01:00
Louis Dureuil
89675e5f15 clippy: Replace seek 0 by rewind 2023-01-31 09:32:40 +01:00
Louis Dureuil
47b7d515ed Add more detailed contribution instructions for tests 2023-01-30 17:39:05 +01:00
Clémentine Urquizar - curqui
2ba4629938 Update CONTRIBUTING.md
Co-authored-by: Many the fish <many@meilisearch.com>
2023-01-30 15:56:30 +01:00
curquiza
982dd76042 Improve readability 2023-01-30 14:36:22 +01:00
curquiza
3505ee47f8 Add volume to docker command 2023-01-30 14:33:50 +01:00
curquiza
b2d25c07d7 Add guide to create a proto 2023-01-30 14:31:36 +01:00
Clémentine Urquizar - curqui
b9d8bd77fc Update README.md
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-01-26 18:14:00 +01:00
Clémentine Urquizar - curqui
8a66ba01d8 Update README.md
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-01-26 18:13:53 +01:00
Clémentine Urquizar - curqui
8a6d548041 Update README.md
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-01-26 18:13:08 +01:00
Clémentine Urquizar - curqui
b452358124 Update README.md
Co-authored-by: gui machiavelli <hey@guimachiavelli.com>
2023-01-26 18:12:56 +01:00
bors[bot]
bfb1f9279b Merge #3420 #3422
3420: Add image hyperlink in README.md r=curquiza a=gregsadetsky

# Pull Request

## What does this PR do?
- tiny README.md improvement: under "SDKs & integration tools", add a hyperlink to the image with all of the language logos so that clicking the image leads to the integrations page. Otherwise, right now, clicking this image leads to the image file in the repo, which is not really useful.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [X] Have you read the contributing guidelines?
- [X] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


3422: Remove cache from test CI r=dureuill a=curquiza

Comment the lines in CIs where we use the test CIs
We indeed have cache issues (lack of space on the machine) when running our test CIs
https://github.com/meilisearch/meilisearch/pull/3403

Co-authored-by: Greg Sadetsky <lepetitg@gmail.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-25 16:25:53 +00:00
Clémentine Urquizar - curqui
48dabd27ea Update .github/workflows/rust.yml
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-25 16:58:15 +01:00
bors[bot]
4549e0a36e Merge #3415
3415: Test all the errors of wrong `_geo` field and bump milli r=dureuill a=irevoire

## Attention to reviewer

The first commit is only a refactoring of the test suite to use snapshot tests everywhere instead of `assert_eq`.
It doesn’t change the content of anything and there is probably nothing to review. I just made it for maintenance purpose in the future.


Fix https://github.com/meilisearch/meilisearch/issues/3414

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-25 15:54:42 +00:00
Tamo
cac93f149e fix the tests after rebasing 2023-01-25 16:52:54 +01:00
Tamo
481df7a8b6 Update meilisearch/tests/documents/add_documents.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-25 16:45:11 +01:00
Tamo
8356f109c1 bump milli to fix the last test 2023-01-25 16:45:11 +01:00
Tamo
934f2b3cb5 exhaustively test all the errors that can arise from a bad geo field 2023-01-25 16:45:11 +01:00
Tamo
a3f1b8fdb9 refactorize the test suite of the add_documents module to use snapshot tests when possible 2023-01-25 16:45:11 +01:00
curquiza
9c3830a19c Remove cache everywhere 2023-01-25 16:35:02 +01:00
Clémentine Urquizar - curqui
ff6b8dfac4 Remove cache from Windows and macOs CIs 2023-01-25 16:24:04 +01:00
Kerollmops
ec7de4bae7 Make it work for any all routes including stats and index swaps 2023-01-25 16:12:40 +01:00
bors[bot]
d963c2ce55 Merge #3419
3419: Test all the api key error codes r=dureuill a=irevoire

Partially fix #3325

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-25 15:09:19 +00:00
bors[bot]
5beb1aab7d Merge #3418
3418: Compute the size of the auth-controller, index-scheduler and all update files in the global stats r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3201

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-25 14:05:17 +00:00
Kerollmops
184b8afd9e Make it work in the CreateApiKey struct 2023-01-25 15:01:50 +01:00
Tamo
a858531574 apply review comments 2023-01-25 14:51:36 +01:00
Kerollmops
29961b8c6b Make it work with the dumps 2023-01-25 14:41:36 +01:00
Clément Renault
0b08413c98 Introduce the IndexUidPattern type 2023-01-25 14:22:17 +01:00
Clément Renault
474d4ec498 Add tests for the index patterns 2023-01-25 14:22:16 +01:00
Tamo
bf94f89035 Update index-scheduler/src/lib.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-25 11:31:50 +01:00
Tamo
3bcff60d1c makes clippy happy 2023-01-25 11:31:48 +01:00
Tamo
04c4487660 udpate the analytics with the new stats method 2023-01-25 11:25:04 +01:00
Tamo
c92948b143 Compute the size of the auth-controller, index-scheduler and all update files in the global stats 2023-01-25 11:25:02 +01:00
bors[bot]
0544b60974 Merge #3409
3409: Bump libgit2-sys from 0.14.1+1.5.0 to 0.14.2+1.5.1 r=Kerollmops a=dependabot[bot]

Bumps [libgit2-sys](https://github.com/rust-lang/git2-rs) from 0.14.1+1.5.0 to 0.14.2+1.5.1.
<details>
<summary>Commits</summary>
<ul>
<li><a href="a233483a39"><code>a233483</code></a> Update to libgit2 1.5.1</li>
<li><a href="bce15556ef"><code>bce1555</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/909">#909</a> from ehuss/ssh-keys</li>
<li><a href="222fbf3b9e"><code>222fbf3</code></a> Bump versions</li>
<li><a href="fa41943135"><code>fa41943</code></a> Change the certificate_check callback to support passthrough.</li>
<li><a href="84e21aad4e"><code>84e21aa</code></a> Add ability to get the SSH host key and its type.</li>
<li><a href="e6aa6666b9"><code>e6aa666</code></a> Bump git2-curl version. (<a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/861">#861</a>)</li>
<li><a href="46674cebd9"><code>46674ce</code></a> Fix warning about unused_must_use for Box::from_raw (<a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/860">#860</a>)</li>
<li><a href="951dce9dea"><code>951dce9</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/858">#858</a> from davidkna/git2150</li>
<li><a href="8871f8e9b3"><code>8871f8e</code></a> bump libgit2 to 1.5.0</li>
<li><a href="04278a24ba"><code>04278a2</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/rust-lang/git2-rs/issues/839">#839</a> from davidkna/libgit2_143</li>
<li>Additional commits viewable in <a href="https://github.com/rust-lang/git2-rs/compare/0.14.1...libgit2-sys-0.14.2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=libgit2-sys&package-manager=cargo&previous-version=0.14.1+1.5.0&new-version=0.14.2+1.5.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-01-25 09:34:34 +00:00
Greg Sadetsky
4223c51838 Add image hyperlink in README.md 2023-01-24 15:24:09 -05:00
bors[bot]
b3c2a4ae27 Merge #3412
3412: When adding documents, trying to update the primary-key now throw an error r=Kerollmops a=irevoire

While updating the test suite, I also noticed an issue with the indexed_documents value of failed tasks and had to update it. I also named a bunch of snapshots that had no name, sorry 😬

Fixes https://github.com/meilisearch/meilisearch/issues/3385
Fixes https://github.com/meilisearch/meilisearch/issues/3411

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-24 17:14:11 +00:00
Tamo
c7b2e3be87 apply review comments 2023-01-24 17:54:43 +01:00
Tamo
aa17a54feb test all the api key error codes 2023-01-24 17:30:35 +01:00
bors[bot]
6f71a2b38b Merge #3403
3403: Add `--all` to test CI r=curquiza a=curquiza

Discussed with `@irevoire` [here](https://meilisearch.slack.com/archives/G01A1F4KVGU/p1674144546920649?thread_ts=1674144456.561199&cid=G01A1F4KVGU) (internal link)

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-24 16:23:08 +00:00
bors[bot]
898160587f Merge #3416
3416: Add tests on the index resource r=Kerollmops a=irevoire

Fix part of https://github.com/meilisearch/meilisearch/issues/3325

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-24 15:26:18 +00:00
bors[bot]
7c9935f96a Merge #769
769: Modify README to prevent contributions r=Kerollmops a=curquiza



Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-01-24 15:14:31 +00:00
Clémentine Urquizar - curqui
f7ae8bc065 Update README.md
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-24 15:58:41 +01:00
Clémentine Urquizar - curqui
3d8a3d22d1 Update README.md
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-24 15:58:34 +01:00
bors[bot]
30f88350c7 Merge #773
773: bump milli r=Kerollmops a=irevoire

I need a new release of milli for https://github.com/meilisearch/meilisearch/pull/3415

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-24 14:51:32 +00:00
bors[bot]
4c4baaf1ce Merge #3387
3387: Update create-issue-dependencies.yml r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-01-24 14:44:09 +00:00
Tamo
55e8046551 bump milli 2023-01-24 13:52:21 +01:00
Tamo
32364e9919 add tests on the index resource 2023-01-24 13:20:20 +01:00
bors[bot]
4e4d8dfda7 Merge #772
772: Throw an error on unknown fields specified in the _geo field r=irevoire a=irevoire

Fix parts of https://github.com/meilisearch/meilisearch/issues/3414

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-24 11:36:00 +00:00
Tamo
de3c4f1986 throw an error on unknown fields specified in the _geo field 2023-01-24 12:23:24 +01:00
Tamo
ea3b269b77 reformat 2023-01-23 23:59:34 +01:00
Tamo
a4be4c49e8 Update index-scheduler/src/batch.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-23 23:58:03 +01:00
Tamo
7d1ebb7295 add test on the autobatcher layer 2023-01-23 20:56:12 +01:00
bors[bot]
e664f09045 Merge #3396
3396: Update our error message about negative integer r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3394

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-23 19:50:04 +00:00
Tamo
767cb725a5 reimplement the batching of task with or without primary key in the autobatcher 2023-01-23 20:18:22 +01:00
Tamo
13c2cd700d Update error message about negative integer 2023-01-23 18:08:46 +01:00
bors[bot]
fea41ca788 Merge #3404
3404: Fix matching strategy error r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #3391


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-23 17:04:05 +00:00
bors[bot]
217504fff3 Merge #3406
3406: Master Key: Implements errors and warnings from the specification r=irevoire a=dureuill

<sub>Now in technicolor</sub>

# Pull Request

## What does this PR do?
- Uses `atty` and `termcolor` as dependency
- Use these dependencies to print colored background for warning messages
- Update messages to match https://github.com/meilisearch/specifications/pull/209

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-23 16:39:18 +00:00
Tamo
5672118bfa When adding documents, trying to update the primary-key now throw an error
While updating the test suite I also noticed an issue with the indexed_documents value of failed task and had to update it.
I also named a bunch of snapshots that had no name sorry 😬
2023-01-23 17:32:13 +01:00
Louis Dureuil
57682cbabe Fix test url after #3398 2023-01-23 15:43:17 +01:00
ManyTheFish
5dd582918d Add test 2023-01-23 15:40:42 +01:00
bors[bot]
74747b65b1 Merge #3395
3395: Indicate filterable attributes in facet distributions when user requests a non filterable one. r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #3390 

## What does this PR do?
- bump milli & deserr
- Update and add tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-23 13:53:55 +00:00
Tamo
c79b6a1ee4 bump milli 2023-01-23 14:13:19 +01:00
ManyTheFish
f0e6b9c0c5 Update deserr to 0.3.0 2023-01-23 14:13:04 +01:00
Louis Dureuil
56db54486c Add tests 2023-01-23 14:00:30 +01:00
Louis Dureuil
a9b3f91467 Add missing space
Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
2023-01-23 10:33:30 +01:00
dependabot[bot]
5f4497935f Bump libgit2-sys from 0.14.1+1.5.0 to 0.14.2+1.5.1
Bumps [libgit2-sys](https://github.com/rust-lang/git2-rs) from 0.14.1+1.5.0 to 0.14.2+1.5.1.
- [Release notes](https://github.com/rust-lang/git2-rs/releases)
- [Commits](https://github.com/rust-lang/git2-rs/compare/0.14.1...libgit2-sys-0.14.2)

---
updated-dependencies:
- dependency-name: libgit2-sys
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-01-20 23:38:40 +00:00
Gregory Conrad
3f69dd6450 feat: add Cargo feature for LMDB's POSIX semaphores 2023-01-19 12:08:38 -05:00
bors[bot]
1c4b1b3b2d Merge #770
770: Update deserr v0.3.0 r=irevoire a=ManyTheFish

related to https://github.com/meilisearch/meilisearch/issues/3391


Co-authored-by: Many the fish <many@meilisearch.com>
2023-01-19 17:05:56 +00:00
Louis Dureuil
0de9a3ffe7 Implements errors and warnings from the specification
Now in technicolor
2023-01-19 18:04:45 +01:00
bors[bot]
b4f1e9bc36 Merge #771
771: Update version for the next release (v0.40.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-19 16:45:20 +00:00
curquiza
abd65d9307 Update version for the next release (v0.40.0) in Cargo.toml files 2023-01-19 16:43:45 +00:00
Many the fish
30fc376713 Update deserr v0.3.0 2023-01-19 17:37:30 +01:00
curquiza
2a1787ed22 Add --all in test CI 2023-01-19 17:26:47 +01:00
curquiza
d1a31afdd6 Modify README to prevent contributions 2023-01-19 17:17:34 +01:00
bors[bot]
60018d0fe4 Merge #3343
3343: Extract creation and last updated timestamp for v3 dump r=curquiza a=FrancisMurillo

# Pull Request

## Related issue
Fixes #2988

## What does this PR do?

Inspired by the v4 dump implementation, this extracts the first `createdAt` and last `updatedAt` fields by parsing the task queue.

Questions:
- Should the parsing of the tasks be cached instead of being parsed for every index since it might add a performance penalty?
- I am not sure if the `created_at` and `processed_at` fields are correct 
- Should I assume the data is sorted in some order like with `uuid` or `updateId`? I assumed the list is unordered.
- I was planning to populate my dev instance with data and dump my data. Is there a way to dump with previous versions?

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Francis Murillo <evacuee.overlap.vs3op@aleeas.com>
2023-01-19 16:14:21 +00:00
bors[bot]
8fb685f5aa Merge #3401
3401: improve the error messages for the immutable fields r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3400

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-19 15:52:50 +00:00
Tamo
e3742a38d4 improve the error messages for the immutable fields 2023-01-19 16:49:44 +01:00
curquiza
13b1abceaf Rework technical information in the README 2023-01-19 16:23:06 +01:00
bors[bot]
e16b5c615a Merge #3398
3398: Error links use underscores again r=irevoire a=dureuill

# Pull Request

## Related issue

Follow-up of #3288 where [it was decided](https://github.com/meilisearch/meilisearch/pull/3288#issuecomment-1396733603) to revert course on the separator to use in error anchors.

## What does this PR do?
- Use `_` again as separator in anchors of error link
- Fix tests


Impacts `@meilisearch/docs-team` : we need `_`-separated anchors to be generated in the online documentation to match the ones emitted from the engine.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-19 15:17:33 +00:00
bors[bot]
3521a3a0b2 Merge #763
763: Fixes error message when lat and lng are unparseable r=loiclec a=ahlner

# Pull Request

## Related issue
Fixes partially [#3007](https://github.com/meilisearch/meilisearch/issues/3007)

## What does this PR do?
- Changes function validate_geo_from_json to return a BadLatitudeAndLongitude if lat or lng is a string and not parseable to f64
- implemented some unittests
- Derived PartialEq for GeoError to use assert_eq! in tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Philipp Ahlner <philipp@ahlner.com>
2023-01-19 15:15:46 +00:00
Louis Dureuil
d2420f5c8f Fix non insta tests 2023-01-19 16:10:05 +01:00
Louis Dureuil
72e2b220ed Fix tests 2023-01-19 15:48:20 +01:00
bors[bot]
40a53f8824 Merge #767
767: Update version for the next release (v0.39.2) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-19 14:48:12 +00:00
Louis Dureuil
b0c33ed6d2 Error codes are underscore again 2023-01-19 15:47:01 +01:00
Philipp Ahlner
f5ca421227 Superfluous test removed 2023-01-19 15:39:21 +01:00
curquiza
3f048927a0 Update version for the next release (v0.39.2) in Cargo.toml files 2023-01-19 14:29:09 +00:00
bors[bot]
e7c0617699 Merge #766
766: Indicate filterable attributes when the user sets a non filterable attribute in facet distributions r=irevoire a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3390

## What does this PR do?
- Title

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-19 14:18:13 +00:00
bors[bot]
a1e9c44fe5 Merge #3389
3389: Return `invalid_search_facets` rather than `bad_request` when using facet on a non filterable attribute r=irevoire a=dureuill

# Pull Request

## Related issue

Fixes https://github.com/meilisearch/meilisearch/issues/3384

## What does the PR does

- title
- also adds a test

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-19 13:19:22 +00:00
bors[bot]
7df1dda002 Merge #3393
3393: improve the error message when no task filter are specified for the cancelation or deletion of tasks r=dureuill a=irevoire

Close https://github.com/meilisearch/meilisearch/issues/3392

Was already present in v0.30

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-19 12:55:52 +00:00
Louis Dureuil
3d8ca62c35 InvalidFacetDistribution returns invalid_search_facet 2023-01-19 13:41:26 +01:00
Tamo
e8e7070cc6 improve the error message when no task filter are specified for the cancelation or deletion of tasks 2023-01-19 12:42:08 +01:00
Francis Murillo
798aa4ee92 Fix clippy issues 2023-01-19 19:38:20 +08:00
Louis Dureuil
4fd6fd9bef Indicate filterable attributes when the user set a non filterable attribute in facet distributions 2023-01-19 12:25:18 +01:00
bors[bot]
f857d9c2df Merge #3383
3383: Fix api key patch r=irevoire a=irevoire

This was introduced in the previous rc

Fix https://github.com/meilisearch/meilisearch/issues/3374

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-19 10:05:09 +00:00
Philipp Ahlner
a2cd7214f0 Fixes error message when lat/lng are unparseable 2023-01-19 10:10:26 +01:00
Clémentine Urquizar - curqui
0ce1d6d487 Update create-issue-dependencies.yml 2023-01-18 23:43:33 +01:00
Tamo
d0988e115f fix the patch of description and name for the api-key 2023-01-18 19:07:26 +01:00
Tamo
5dcb920fb4 improve the tests 2023-01-18 18:27:00 +01:00
bors[bot]
b3166df7ea Merge #3372
3372: Enhance facet string normalization r=ManyTheFish a=ManyTheFish

# Pull Request

Use compatibility decomposition normalizer in facet string extraction in order to have a more human friendly sort order.

Now, [é (U+00E9)](https://www.compart.com/fr/unicode/U+00E9) is converted to [e (U+0065)](https://www.compart.com/fr/unicode/U+0065) + [◌́ (U+0301)](https://www.compart.com/fr/unicode/U+0301). This way any word starting with an accented/diacritized version of a character is put just after the words starting with the unaccented version of the character.

## Related issue

Fixes #3260 


Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-01-18 17:17:53 +00:00
bors[bot]
6f7e0c431a Merge #3341
3341: add functionnal + error tests on the swap_indexes route and fix a confusing error message r=loiclec a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3340
Fix part of https://github.com/meilisearch/meilisearch/issues/3325
Fix https://github.com/meilisearch/meilisearch/issues/3381

Test both the functionality and the error codes

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-18 16:32:22 +00:00
Tamo
00f6af6475 fix a wrong error message 2023-01-18 17:26:48 +01:00
bors[bot]
1803998017 Merge #3335 #3353
3335: Remove test badge r=curquiza a=curquiza

Suggestion of removal: from my point of view, this badge does not provide any useful information, and most of all is often outdated. Currently ours is "no status" despite our tests passing.
Plus, sometimes our tests are not passing because we are still in development, but it does not mean our current provided binaries are not.

<img width="619" alt="Capture d’écran 2023-01-12 à 14 06 40" src="https://user-images.githubusercontent.com/20380692/212074200-f9e3ab3e-ad1d-4171-bd13-46584c3cd117.png">


3353: Bump svenstaro/upload-release-action from 2.3.0 to 2.4.0 r=curquiza a=dependabot[bot]

Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.3.0 to 2.4.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/releases">svenstaro/upload-release-action's releases</a>.</em></p>
<blockquote>
<h2>2.4.0</h2>
<ul>
<li>Update to node 16</li>
<li>Bump most dependencies</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md">svenstaro/upload-release-action's changelog</a>.</em></p>
<blockquote>
<h2>[2.4.0] - 2023-01-09</h2>
<ul>
<li>Update to node 16</li>
<li>Bump most dependencies</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="2728235f7d"><code>2728235</code></a> 2.4.0</li>
<li><a href="c2e0608dc4"><code>c2e0608</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/88">#88</a> from svenstaro/dependabot/npm_and_yarn/json5-1.0.2</li>
<li><a href="bd74772a1a"><code>bd74772</code></a> Don't shadow vars</li>
<li><a href="16e7903b2d"><code>16e7903</code></a> Bump json5 from 1.0.1 to 1.0.2</li>
<li><a href="f2c549b117"><code>f2c549b</code></a> Bump some more deps</li>
<li><a href="7a7d004438"><code>7a7d004</code></a> Bump some deps</li>
<li><a href="9c4a92ec0d"><code>9c4a92e</code></a> Use explicit any</li>
<li><a href="039214a996"><code>039214a</code></a> Bump jest and typescript versions</li>
<li><a href="2b373356cb"><code>2b37335</code></a> Update to node16</li>
<li><a href="fb1eb39e74"><code>fb1eb39</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/75">#75</a> from svenstaro/dependabot/npm_and_yarn/jsdom-16.7.0</li>
<li>Additional commits viewable in <a href="https://github.com/svenstaro/upload-release-action/compare/2.3.0...2.4.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=svenstaro/upload-release-action&package-manager=github_actions&previous-version=2.3.0&new-version=2.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-01-18 15:45:21 +00:00
bors[bot]
3e5b3df487 Merge #3370 #3373 #3375
3370: make the swap indexes not found errors return an IndexNotFound error-code r=irevoire a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3368

3373: fix a wrong error code and add tests on the document resource r=irevoire a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/3371

3375: Avoid deleting all task invalid canceled by r=irevoire a=Kerollmops

Fixes #3369 by making sure that at least one `canceledBy` task filter parameter matches something.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-01-18 15:21:11 +00:00
Kerollmops
e89973f1bf Do not delete all tasks when no canceled-by matches 2023-01-18 15:50:46 +01:00
Kerollmops
d3c796af38 Add a new test to check that invalid canceledBy works correctly 2023-01-18 15:50:46 +01:00
Kerollmops
182eea1f17 Introduce a canceledBy filter for the tests 2023-01-18 15:50:42 +01:00
bors[bot]
1af3089456 Merge #3348
3348: fix cargo flaky r=irevoire a=irevoire

Partially fix https://github.com/meilisearch/meilisearch/issues/3273

Ideally, we should revert this commit and fix cargo-flaky directly to ensure we never forget to add a sub-crate to the CI.

-----

Here is an example of the CI running (and thus working); https://github.com/meilisearch/meilisearch/actions/runs/3932783699/jobs/6725755801

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-18 14:46:50 +00:00
Tamo
a4476c20f8 fix a wrong error code and add tests on the document resource 2023-01-18 15:28:02 +01:00
ManyTheFish
d1fc42b53a Use compatibility decomposition normalizer in facets 2023-01-18 15:02:13 +01:00
ManyTheFish
e64571a881 Add test sorting string with diacritics 2023-01-18 14:43:38 +01:00
Tamo
57da80900d make the swap indexes not found errors return an IndexNotFound error code 2023-01-18 14:16:00 +01:00
bors[bot]
7322f4e78e Merge #3355
3355: fix the wrong error code on minWordSizeForTypos r=irevoire a=irevoire

Fix #3354

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-18 12:25:03 +00:00
Philipp Ahlner
497187083b Add test for bug #3007: Wrong error message
Adds a test for #3007: Wrong error message when lat and lng are
unparseable
2023-01-18 13:24:26 +01:00
Tamo
0f727d079b fix the wrong error code on minWordSizeForTypos 2023-01-18 12:28:46 +01:00
dependabot[bot]
32e2848a74 Bump svenstaro/upload-release-action from 2.3.0 to 2.4.0
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.3.0 to 2.4.0.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/2.3.0...2.4.0)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-01-18 11:02:44 +00:00
bors[bot]
6b3da8a6de Merge #3346
3346: Import milli 🎉 r=Kerollmops a=Kerollmops

Fixes https://github.com/meilisearch/meilisearch/issues/2901

Main work
- integrate the milli repository as an internal crate into this repo  
- Update the Cargo.toml accordingly
- Ensure meilisearch-type now uses the internal milli crate and not the remote repository
- Update the milli's version to follow the meilisearch one

Also
- Removed the beta tests in test CI (will be re-integrated later if needed)
- Move and modify milli's README into the `milli` folder
- remove the script folder from `milli`
- Removed useless CI (release-drafter and enforce-label)

⚠️ Also, import all the `release-v1.0.0` until [a5c4fb](a5c4fbbcea) included (merged of the PR https://github.com/meilisearch/meilisearch/pull/3334)

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Samyak S Sarnayak <samyak201@gmail.com>
Co-authored-by: unvalley <kirohi.code@gmail.com>
Co-authored-by: Samyak Sarnayak <samyak201@gmail.com>
2023-01-18 10:19:42 +00:00
Clément Renault
0769090dd6 Add a note in the README about the crates versionning 2023-01-18 10:08:12 +01:00
Loïc Lecrenier
82bdb54537 Update the index swap tests after git rebase 2023-01-18 09:40:41 +01:00
Tamo
b6ec1f1c6d add functionnal + error tests on the swap_indexes route 2023-01-18 09:36:04 +01:00
Clément Renault
1d507c84b2 Fix the formatting 2023-01-17 18:25:55 +01:00
Clément Renault
1b78231e18 Make clippy happy 2023-01-17 18:25:54 +01:00
Clément Renault
2b1f6a7f11 Fix the CI to ignore a missing file 2023-01-17 16:26:03 +01:00
Francis Murillo
6993924f32 Use finished_at for v3 dumps instead 2023-01-17 23:11:49 +08:00
bors[bot]
41a970247e Merge #3339
3339: Continued deserr integration r=irevoire a=loiclec

Fix https://github.com/meilisearch/meilisearch/issues/3337
Fix https://github.com/meilisearch/meilisearch/issues/3338

1. Add new error codes that should have been implemented earlier:
- `MissingApiKeyActions`
- `MissingApiKeyExpiresAt`
- `MissingApiKeyIndexes`
- `MissingSwapIndexes`

2. Fix a bug where it was possible to create an API key without specifying the value of `expiresAt`

3. Improve the error messages generated by deserr. Have specific error messages for JSON and QueryParam deserialisation errors.

4. Improve error tests by passing query params as arguments to `GET` routes directly instead of using an intermediary JSON object

5. [Use invalid_index_uid error code in more places](e225608337)

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-17 14:41:22 +00:00
Loïc Lecrenier
e225608337 Use invalid_index_uid error code in more places 2023-01-17 15:28:06 +01:00
Loïc Lecrenier
56e79fa850 Update task snapshot test and clean up details 2023-01-17 13:19:04 +01:00
Loïc Lecrenier
c71a8ea183 Update to latest milli and deserr 2023-01-17 13:10:38 +01:00
bors[bot]
0c7d1f761e Merge #765
765: Update version for the next release (v0.39.1) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-17 11:04:26 +00:00
curquiza
e3d30e28ef Update version for the next release (v0.39.1) in Cargo.toml files 2023-01-17 10:50:29 +00:00
bors[bot]
63af1e9f28 Merge #764
764: Update deserr to latest version r=irevoire a=loiclec

Update deserr to 0.1.5, which changes the `DeserializeFromValue` trait, getting rid of the `default()` method.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-17 10:39:36 +00:00
Loïc Lecrenier
f073a86387 Update deserr to latest version 2023-01-17 11:28:19 +01:00
Loïc Lecrenier
b781f9a0f9 cargo fmt 2023-01-17 11:07:07 +01:00
Loïc Lecrenier
07b90dec08 Remove unused proptest dependency 2023-01-17 11:07:07 +01:00
Loïc Lecrenier
9194508a0f Refactor query parameter deserialisation logic 2023-01-17 11:07:07 +01:00
Tamo
9dd01ff44b fix cargo flaky 2023-01-17 11:03:44 +01:00
Loïc Lecrenier
49ddaaef49 Fix missing_swap_indexes error code and handling of expires_at param...
of create api key route
2023-01-17 09:43:07 +01:00
Loïc Lecrenier
766dd830ae Update deserr to latest version + add new error codes for missing fields
- missing_api_key_indexes
- missing_api_key_actions
- missing_api_key_expires_at

- missing_swap_indexes_indexes
2023-01-17 09:43:07 +01:00
Loïc Lecrenier
436ae4e466 Improve error messages generated by deserr
Split Json and Query Parameter error types
2023-01-17 09:43:07 +01:00
Kerollmops
507a7bad96 Use the local milli subcrate 2023-01-16 17:35:54 +01:00
Kerollmops
cde62fcb5b Merge remote-tracking branch 'origin/release-v1.0.0' into import-milli 2023-01-16 17:35:18 +01:00
Kerollmops
03a82136dc Remove the useless cli subcrate 2023-01-16 17:08:43 +01:00
Kerollmops
e68758cec4 Refine the cargo workspace profile settings 2023-01-16 17:04:25 +01:00
Kerollmops
4fb47492e5 Make clippy happy 2023-01-16 16:35:58 +01:00
Kerollmops
5bab8cf7ec Remove useless CI configs 2023-01-16 16:31:46 +01:00
Kerollmops
97005dd505 Bump the milli-imported crates to v1.0.0 2023-01-16 16:29:12 +01:00
Kerollmops
eabef5194a Remove the useless script folder 2023-01-16 16:26:07 +01:00
Kerollmops
ebb2494879 Add a README to the milli crate 2023-01-16 16:25:12 +01:00
Kerollmops
0cec352d2b Merge remote-tracking branch 'milli/main' into import-milli 2023-01-16 16:20:22 +01:00
Francis Murillo
a97281af08 Extract createdAt and updatedAt from v3 dump 2023-01-13 22:45:45 +08:00
bors[bot]
a5c4fbbcea Merge #3334
3334: Add specific error codes `immutable_...` r=irevoire a=loiclec

Add the following error codes:

When an immutable field of API key is sent to the `PATCH /keys` route: 
- `ImmutableApiKeyUid` 
- `ImmutableApiKeyKey`
- `ImmutableApiKeyActions`
- `ImmutableApiKeyIndexes`
- `ImmutableApiKeyExpiresAt`
- `ImmutableApiKeyCreatedAt`
- `ImmutableApiKeyUpdatedAt`

When an immutable field of Index is sent to the `PATCH /indexes/{uid}` route:
- `ImmutableIndexUid`
- `ImmutableIndexCreatedAt`
- `ImmutableIndexUpdatedAt`

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-12 15:31:38 +00:00
Tamo
21b8cd53b7 reformat 2023-01-12 16:20:24 +01:00
Loïc Lecrenier
7f80b116bc Add specific immutable_field error codes 2023-01-12 16:20:14 +01:00
bors[bot]
341f8478b4 Merge #3330
3330: test the error codes on the task routes + fix the missing error codes on the limit and from r=dureuill a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-12 15:02:44 +00:00
Tamo
79c7f65c30 make a test more reliable 2023-01-12 15:39:28 +01:00
bors[bot]
2bc60c29fc Merge #3336
3336: Add missing `needs:` to the git latest tag workflow r=curquiza a=curquiza

Fixes this problem: the workflow to update the latest git tag was triggered despite the first check failed

<img width="580" alt="Capture d’écran 2023-01-12 à 15 07 00" src="https://user-images.githubusercontent.com/20380692/212087926-975eb387-c8c9-4789-8a62-a56143b9bbd4.png">


These leads to update our latest git tag: our latest git tag corresponds to the `v1.0.0-rc.0` tag instead of `v0.30.5`. (I'm fixing this right now)

<img width="586" alt="Capture d’écran 2023-01-12 à 15 08 15" src="https://user-images.githubusercontent.com/20380692/212088136-f4bc2e9c-d824-4c23-8213-52598c742ebd.png">


Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-12 14:24:31 +00:00
curquiza
680ea39bba Add missingneeds: to the git latest tag workflow 2023-01-12 15:04:11 +01:00
Tamo
a524dfb713 fix the analytics 2023-01-12 14:49:50 +01:00
Tamo
705fcaa3b8 reformat the imports 2023-01-12 14:09:15 +01:00
Clémentine Urquizar - curqui
55605435bc Remove test badge 2023-01-12 14:04:48 +01:00
Loïc Lecrenier
a09b6a341d Move tasks route to deserr 2023-01-12 13:57:29 +01:00
Tamo
387874ea26 test the error codes on the task routes 2023-01-12 13:46:19 +01:00
bors[bot]
5c1a7c3b9a Merge #3329
3329: Refactor error handling from deserr r=irevoire a=loiclec

Close https://github.com/meilisearch/meilisearch/issues/3318
Close https://github.com/meilisearch/meilisearch/issues/3289

[TODO]

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-11 18:15:32 +00:00
Tamo
6d658f4c52 fix a wrong error code + update some error messages 2023-01-11 19:14:11 +01:00
Tamo
bf573885ea integrate the latest version of milli 2023-01-11 19:08:39 +01:00
Tamo
a68ac3a1dc reformat the headers 2023-01-11 19:08:39 +01:00
Tamo
b252c87197 add tests on the sub settings routes 2023-01-11 19:08:39 +01:00
Loïc Lecrenier
b0b7ad7caf Apply review suggestions 2023-01-11 19:08:39 +01:00
Loïc Lecrenier
c91ffec72e Update Cargo.toml 2023-01-11 19:08:39 +01:00
Loïc Lecrenier
1fc11264e8 Refactor deserr integration 2023-01-11 19:08:39 +01:00
Loïc Lecrenier
2bc2e99ff3 Simplify declaration of the error codes 2023-01-11 19:08:39 +01:00
bors[bot]
808e184069 Merge #3324
3324: Add a test on the search route for each possible error codes r=irevoire a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-11 16:08:19 +00:00
bors[bot]
e6bea99974 Merge #762
762: Update version for the next release (v0.39.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-11 15:07:33 +00:00
curquiza
9e32ac7cb2 Update version for the next release (v0.39.0) in Cargo.toml files 2023-01-11 15:05:06 +00:00
bors[bot]
302d6cccd7 Merge #761
761: Integrate deserr r=irevoire a=loiclec

1. `Setting<T>` now implements `DeserializeFromValue`
2. The settings now store ranking rules as strongly typed `Criterion` instead of `String`, since the validation of the ranking rules will be done on meilisearch's side from now on


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-11 14:35:15 +00:00
bors[bot]
21b7d709ad Merge #759
759: Change primary key inference error messages r=Kerollmops a=dureuill

# Pull Request

## Related issue
Milli part of https://github.com/meilisearch/meilisearch/issues/3301

## What does this PR do?
- Change error message strings

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-11 14:04:25 +00:00
Tamo
7a30d98264 fix a flaky test 2023-01-11 14:54:29 +01:00
Loïc Lecrenier
02fd06ea0b Integrate deserr 2023-01-11 13:56:47 +01:00
Tamo
d0a85057a3 fix the bad filter test 2023-01-11 11:37:12 +01:00
bors[bot]
b3574de809 Merge #3321
3321: Update the system http error code to return an internal server error r=irevoire a=irevoire

Fix parts of https://github.com/meilisearch/meilisearch/issues/3318

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-11 10:27:13 +00:00
bors[bot]
59704c000c Merge #3326
3326: Test error codes on settings r=irevoire a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-11 10:07:52 +00:00
bors[bot]
b117c688f5 Merge #3328
3328: Replace published by released r=Kerollmops a=curquiza

Fix a bug introduced here: https://github.com/meilisearch/meilisearch/pull/3229

Regarding this line:

> * In multiple CIs: replace the `released` type by `published`, see [here](https://stackoverflow.com/questions/59319281/github-action-different-between-release-created-and-published) why. Will not impact anything, but will prevent to fail our future automation

I made mistakes by replacing some un-relevant lines in the 
- latest git workflow
- APT and brew workflow

-> the consequence was the workflow ran when releasing `rc0` but they shouldn't have. Luckily the check inside the workflow prevent any release.

<img width="1366" alt="Capture d’écran 2023-01-11 à 10 36 52" src="https://user-images.githubusercontent.com/20380692/211771382-d716ff16-0d53-41a9-90de-0d93e01e45fa.png">

This fix is not mandatory thanks to the check inside the workflow, but I would rather roll back to avoid any issues when releasing the official v1 release.

Co-authored-by: curquiza <clementine@meilisearch.com>
2023-01-11 09:43:42 +00:00
curquiza
5ec85b7dfb Replace published by released 2023-01-11 10:30:18 +01:00
bors[bot]
d80be0c28d Merge #3322
3322: Update mini-dashboard to v0.2.5 r=curquiza a=mdubus



Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
2023-01-11 09:08:11 +00:00
Tamo
398c0c32cd test all the error codes that can be throw in the settings 2023-01-10 18:19:27 +01:00
Tamo
d4157c0ce4 add a test on the search route for each possible error codes
snapshot the json directly instead of using the debug formatting
2023-01-10 17:59:24 +01:00
bors[bot]
98dffbf213 Merge #3317
3317: Remove the unused error codes r=irevoire a=irevoire

Remove some unused error code + fix the usage of the search+settings sort and filter error_code

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-10 16:36:11 +00:00
bors[bot]
11ee7daa0f Merge #760
760: Add Index::map_size r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to discussion: https://github.com/meilisearch/meilisearch/discussions/3280

## What does this PR do?
- Expose `heed::Env::map_size` through `Index::map_size`. This allows knowing after the fact with which `map_size` an environment was opened (which is not always the `map_size` that was configured for the opening of the environment, see the documentation for `Index::map_size`), which will be necessary to guarantee we can reopen the index with a larger `map_size`.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-10 15:59:05 +00:00
Morgane Dubus
f63fee5e97 Update Cargo.toml 2023-01-10 15:11:25 +01:00
Tamo
f0d408c295 update the system http error code to return an internal server error 2023-01-10 14:33:46 +01:00
Tamo
d308684395 remove two ununsed error codes + fix the sort error_code 2023-01-10 11:32:11 +01:00
Louis Dureuil
00746b32c0 Add Index::map_size 2023-01-10 11:16:51 +01:00
bors[bot]
e27bb8ab3e Merge #3246
3246: Implement most of the error handling enhancement planned for v1.0 r=irevoire a=irevoire

Fix #3095 and #2325
Close https://github.com/meilisearch/meilisearch/pull/2540

Implements most of https://github.com/meilisearch/specifications/pull/212

## Generic error message we re-implements (in deserr):

- [x] Json
  - [x] Incorrect value kind
  - [x] Missing field
  - [x] Unknown key
  - [x] Unexpected
  - [x] Reimplement the way we show the location

- [x] Query parameter
  - [x] Incorrect value kind
  - [x] Missing field
  - [x] Unknown key
  - [x] Unexpected

## Routes to implements:
- [x] Get search
- [x] Post search
- [x] Settings
- [x] Swap indexes
- [x] Task API
- [x] Documents ressource

Error codes to implements;
## Swap API

- [x] `duplicate_index_found` → `invalid_swap_duplicate_index_found`

## Search API

- [x] `invalid_search_q`
- [x] `invalid_search_offset`
- [x] `invalid_search_limit`
- [x] `invalid_search_page`
- [x] `invalid_search_hits_per_page`
- [x] `invalid_search_attributes_to_retrieve`
- [x] `invalid_search_attributes_to_crop`
- [x] `invalid_search_crop_length`
- [x] `invalid_search_attributes_to_highlight`
- [x] `invalid_search_show_matches_position`
- [x] `invalid_search_filter`
- [x] `invalid_search_sort`
- [x] `invalid_search_facets`
- [x] `invalid_search_highlight_pre_tag`
- [x] `invalid_search_highlight_post_tag`
- [x] `invalid_search_crop_marker`
- [x] `invalid_search_matching_strategy`

## Settings API

- [x] invalid_settings_displayed_attributes
- [x] invalid_settings_searchable_attributes
- [x] invalid_settings_filterable_attributes
- [x] invalid_settings_sortable_attributes
- [x] invalid_settings_ranking_rules
- [x] invalid_settings_stop_words
- [x] invalid_settings_synonyms
- [x] invalid_settings_distinct_attribute
- [x] Add invalid_settings_typo_tolerance
    - [x] ~~invalid_settings_typo_tolerance_min_word_size_for_typos~~ (Merge in **invalid_settings_typo_tolerance**)
- [x] invalid_settings_faceting
- [x] invalid_settings_pagination

## Task API

- [x] invalid_task_date_filer → invalid_task_before_enqueued_at_filter (for all date filter) ?

## Document Resource

- [x] ~~`primary_key_inference_failed` → `index_primary_key_`~~ This doesn't exists anymore after `@dureuill` PR's on the primary key inference

------------------

# Changes

# `code` property

## Swap API

- [x] `invalid_swap_duplicate_index_found`  [RENAME]
- [x] `invalid_swap_indexes`  [NEW]

## Index API

### POST

- [x] `missing_index_uid`  [NEW]

### POST/PATCH

- [x] `invalid_index_primary_key`  [NEW]

### GET

- [x] `invalid_index_limit`  [NEW]
- [x] `invalid_index_offset`  [NEW]

## Documents API

### GET

- [x] `fields` parameter error `bad_request` → `invalid_document_fields`  [NEW]
- [x] `limit` parameter error `bad_request` → `invalid_document_limit`  [NEW]
- [x] `offset` parameter error `bad_request` → `invalid_document_offset`  [NEW]

### POST/PUT

- [x] `?primaryKey` parameter error `bad_request` →  `invalid_index_primary_key`  [NEW]

## Keys API

### POST

- ~~`missing_parameter`~~
    - [x] `missing_api_key_actions`  [NEW]
    - [x] `missing_api_key_indexes`  [NEW]
    - [x] `missing_api_key_expires_at`  [NEW]

### GET

- [x] `limit` parameter `bad_request` → `invalid_api_key_limit`  [NEW]
- [x] `offset` parameter `bad_request` → `invalid_api_key_offset`  [NEW]

## Misc
- [x] ~~`invalid_geo_field`~~ → `invalid_document_geo_field`  [RENAME]

# `type` property

## `system`   [NEW]

- [x] `no_space_left_on_device` error code
- [x] `io_error` error code (**does not exist in the current spec, need a catch-up**)
- [x] `too_many_open_files` error code (**does not exist in the current spec, need a catch-up**)

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-09 16:25:48 +00:00
Tamo
ff843881c5 remove the documentation of the query parameter extractor module 2023-01-09 15:14:48 +01:00
Loïc Lecrenier
ae08fba76e Remove forgotten comment 2023-01-09 13:45:03 +01:00
Loïc Lecrenier
af6d4b3031 Remove unused deserr extractor 2023-01-09 13:43:16 +01:00
Louis Dureuil
1cce613399 Fixup dumps-destination -> dump-directory section header in help link 2023-01-09 13:31:57 +01:00
Tamo
b03ee54fe0 makes clippy turbo-happy 2023-01-09 13:04:31 +01:00
Tamo
d17efb9ed6 use the published version of deserr 2023-01-09 12:51:10 +01:00
Loïc Lecrenier
9ab791bedc Update error codes on the api key routes 2023-01-09 12:30:25 +01:00
Loïc Lecrenier
96105a5e8d Update error codes on the documents/ routes 2023-01-09 12:30:25 +01:00
Tamo
e706628bb1 fix the error code of the swap index route 2023-01-06 14:48:25 +01:00
Tamo
3c630891bb fix the error code for the swap index 2023-01-05 21:25:20 +01:00
Tamo
97854274b4 rename the invalid_geo_field error code to invalid_document_geo_field 2023-01-05 21:08:19 +01:00
Tamo
0646f63404 implement the new type property for the system error 2023-01-05 21:06:50 +01:00
Tamo
ce3e8794a2 fix the tests after the rebase 2023-01-05 20:52:26 +01:00
Tamo
50ce0409bc Integrate deserr on the most important routes 2023-01-05 20:48:29 +01:00
bors[bot]
839b05c43d Merge #3305
3305: Remove hidden but usable CLI arguments r=Kerollmops a=Kerollmops

`@curquiza` found out that we were exposing some internal CLI arguments: `nb-max-chunks` and `log-every-n`. In this PR I removed those two, the only two ones that I found. Those options shouldn't be accessible as non-documented in the documentation or the `--help` message.

Fixes https://github.com/meilisearch/meilisearch/issues/3307

Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-05 17:11:58 +00:00
bors[bot]
cc699fae40 Merge #3308
3308: Remove `--generate-master-key` option r=Kerollmops a=dureuill

# Pull Request

## Related issue

Related to https://github.com/meilisearch/specifications/pull/210#issuecomment-1372035525

## What does this PR do?
- Remove the short-lived `--generate-master-key` flag that was too beautiful for this world :D.

Removal of this option proceeds of the following reasoning:

1. It is the only option that starts meilisearch and then immediately exits
2. We are unsure if we want to keep it under this form in the future or switch to a subcommand.
3. Releasing this option in v1 would make it insta-stable.
5. The option is only marginally useful, as users will be presented with freshly generated key directly in the error messages if their master key is absent/too short.
6. If we remove this option now, we can still add it back in a future v1 release. If we add it now, we won't be able to remove it in any future v1 version.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!

### Impacts

this impacts the docs team as they would previously have had to document this option, and they may have wanted to use it in the user workflow.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-05 16:19:40 +00:00
Clément Renault
aa4b813237 Derive Default on IndexerOpts 2023-01-05 16:00:45 +01:00
Louis Dureuil
eb08a0fb0b Remove --generate-master-key option 2023-01-05 14:55:24 +01:00
Clément Renault
cda529c07b Remove hidden but usable CLI arguments 2023-01-05 14:25:41 +01:00
bors[bot]
1f8ddb366c Merge #3302
3302: Update insta snap tests for index dates of dump v5 r=curquiza a=loiclec

This PR simply updates the content of the insta snapshot test following https://github.com/meilisearch/meilisearch/pull/3013 . I manually verified that the dates in the snaps are indeed correct.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-05 12:58:10 +00:00
bors[bot]
8a3da0c2a7 Merge #3304
3304: Fix update cargo.toml workflow r=Kerollmops a=curquiza

Following https://github.com/meilisearch/meilisearch/pull/3224

Fixes #3219 

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2023-01-05 12:16:57 +00:00
Clémentine Urquizar - curqui
c840d55e89 Fix update cargo.toml workflow 2023-01-05 12:56:02 +01:00
bors[bot]
c7a3992510 Merge #3303
3303: Update version for the next release (v1.0.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-01-05 11:53:09 +00:00
curquiza
28408816ef Update version for the next release (v1.0.0) in Cargo.toml files 2023-01-05 11:45:15 +00:00
bors[bot]
0eaa8ca255 Merge #3266
3266: Improve the way we receive the documents payload- serde multiple ndjson fix r=curquiza a=jiangbo212

# Pull Request

## Related issue
Fixes #3037 

## Related PR
#3164 

## What does this PR do?
Sorry, This PR is mainly to fix the problems caused by my previously provided PR #3164. It causes multiple ndjson data deserialization failures
- Fix serde multiple ndjson data failures and add test to it
- Fix serde jsonarray error and againest serde it use `from_slice`. only use `from_slice` when serde error category is `data`, it indicate json data is a single json.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: jiangbo212 <peiyaoliukuan@126.com>
2023-01-05 11:30:29 +00:00
bors[bot]
201bc633d2 Merge #3288
3288: Replace underscores with hyphens in documentation link to error code r=dureuill a=loiclec

# Pull Request

## Related issue
Fixes #3097 

## Implementation
Add a new dependency to `convert_case` (already used transitively by `deserr`) so that the link can be generated using:
```rust
    /// return the doc url associated with the error
    fn url(&self) -> String {
        format!(
            "https://docs.meilisearch.com/errors#{}",
            self.name().to_case(convert_case::Case::Kebab)
        )
    }
```

## Review
I'd like the reviewer to check whether it is expected that the content of some `dump` snapshot tests changed :-)

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-05 11:08:57 +00:00
Loïc Lecrenier
ba839852f5 Update insta snap tests for index dates of dump v5 2023-01-05 11:45:40 +01:00
Louis Dureuil
be9786bed9 Change primary key inference error messages 2023-01-05 10:40:09 +01:00
Loïc Lecrenier
f9aa897ab5 Update insta tests 2023-01-05 10:19:19 +01:00
Loïc Lecrenier
2d74678b51 Replace underscores with hyphens in doc link to error code 2023-01-05 10:09:02 +01:00
bors[bot]
db7eaf23f4 Merge #3251
3251: Add a specific test on finite pagination placeolder search with disti… r=curquiza a=ManyTheFish

Add a specific test on finite pagination placeholder search with distinct attributes


related to https://github.com/meilisearch/milli/pull/743
related to https://github.com/meilisearch/meilisearch/issues/3200

poke `@curquiza` 

> note that the destination branch should be changed

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-01-05 09:06:53 +00:00
bors[bot]
32f7cfa5cb Merge #3295
3295: Adjust Master Key-related messages r=dureuill a=dureuill

# Pull Request

## Related issue
Follow up for #3272 

## What does this PR do?
- Consistently capitalize "master key" (instead of "Master Key" sometimes) (see https://github.com/meilisearch/specifications/pull/209#discussion_r1060081094)
- Clarify that the counted unit for master key length is bytes, not characters (see https://github.com/meilisearch/documentation/issues/2069#issuecomment-1368873167)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-05 08:43:23 +00:00
bors[bot]
a402fc4486 Merge #3013
3013: Extract the dates out of the dumpv5. r=loiclec a=funilrys

Hi there, 

please review this PR that tries to fix #2986. I'm still learning Rust and I found that #2986 is an excellent way for me to read and learn what others do with Rust. So please excuse my semantics ...

Stay safe and healthy.

---

# Pull Request

This patch possibly fixes #2986.

This patch introduces a way to fill the IndexMetadata.created_at and IndexMetadata.updated_at keys from the tasks events. This is done by reading the creation date of the first event (created_at) and the creation date of the last event (updated_at).


## Related issue
Fixes #2986

## What does this PR do?
- Extract the dates out of the dumpv5.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: funilrys <contact@funilrys.com>
2023-01-05 08:23:52 +00:00
bors[bot]
502d9e4b24 Merge #3278
3278: Remove `--max-index-size` and `--max-task-db-size` flags r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #3231 

## What does this PR do?
- Remove `--max-index-size` and `--max-task-db-size` flags from the CLI, config file and environment variable
- Set the size of all indexes to **500GiB** and the size of the task DB to **10GiB**.  Reviewers might want to review these values carefully.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-04 16:44:27 +00:00
Louis Dureuil
a85ff1f690 Fix documentation
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-01-04 17:20:03 +01:00
Louis Dureuil
233372abea Remove --max-index-size and --max-task-db-size 2023-01-04 17:20:01 +01:00
bors[bot]
13d4ae264a Merge #3269
3269: Simplify primary key inference r=dureuill a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3233

## What does this PR do?
- Integrates https://github.com/meilisearch/milli/pull/752 in meilisearch
- Remove `Serialize` and `Deserialize` from `error::Code` as it is unused.
- No longer filter on `milli` logs when `--log-level` is "info".
  - `milli` only has the newly-added inference log at the `info` level (from greping `info` in the codebase)
  - the default value for `--log-level` is "INFO" and not "info" since `v0.30` so the filter is not active by default.
- updates milli to v0.38.0

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-04 16:14:36 +00:00
bors[bot]
c766e06003 Merge #3281
3281: Merge `--schedule-snapshot` and `--snapshot-interval-sec` options r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #3131

## What does this PR do?
- Removes `--snapshot-interval-sec`
- `--schedule-snapshot` now accepts an optional integer value specifying the interval in seconds
- The config file no longer has a snapshot_interval_sec key.  Instead, the schedule_snapshot key now additionally accepts an integer value specifying the interval in seconds
- The env variable MEILI_SNAPSHOT_INTERVAL no longer exists
- The env variable MEILI_SCHEDULE_SNAPSHOT is always specified to the interval of the snapshot in seconds when defined. If snapshots are disabled the variable is undefined.

---

Relevant part of the `--help`

<img width="885" alt="Capture d’écran 2022-12-27 à 18 22 32" src="https://user-images.githubusercontent.com/41078892/209700626-1a1292c1-14e3-45b6-8265-e0adbd76ecf1.png">

---

### Tests

| `schedule_snapshot` in config.toml | `--schedule-snapshot` flag on CLI | `MEILI_SCHEDULE_SNAPSHOT` | `opt.schedule_snapshot` |
|--|--|--|--|
| missing | missing | missing | `Disabled`
| `false` | missing | missing | `Disabled`
| `true` | missing | missing | `Enabled(86400)`
| `1234` | missing | missing | `Enabled(1234)`
| missing | `--schedule-snapshot` | missing | `Enabled(86400)`
| `false` | `--schedule-snapshot` | missing | `Enabled(86400)` 
| missing | `--schedule-snapshot 2345` | missing | `Enabled(2345)`
| `false` | `--schedule-snapshot 2345` | missing | `Enabled(2345)`
| `true` | `--schedule-snapshot 2345` | missing | `Enabled(2345)`
| `1234` | `--schedule-snapshot 2345` | missing | `Enabled(2345)`
| `false` | `--schedule-snapshot 2345` | 3456 | `Enabled(2345)`
| `false` | `--schedule-snapshot` | 3456 | **`Enabled(86400)`**
| `1234` | missing | 3456 | `Enabled(3456)`
| `false` | missing | 3456 | `Enabled(3456)`


## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-04 14:25:47 +00:00
Louis Dureuil
fcbd47281b Fix tests 2023-01-04 14:24:20 +01:00
Louis Dureuil
b6d80293f7 Propagate new error codes from milli 2023-01-04 14:24:20 +01:00
Louis Dureuil
0e98a71a24 Update milli to v0.38 2023-01-04 14:24:20 +01:00
Louis Dureuil
5cb566b165 No longer filter out milli logs when --log-level is "info" 2023-01-04 14:24:20 +01:00
Louis Dureuil
9d46caba29 Code doesn't need to be serializable/deserializable 2023-01-04 14:16:22 +01:00
Louis Dureuil
c4aa5cc7d0 Merge --schedule-snapshot and --snapshot-interval-sec options 2023-01-04 14:13:54 +01:00
bors[bot]
12c3d432f9 Merge #3293
3293: Explicitly restrict log level options to those that are documented r=loiclec a=loiclec

Fixes https://github.com/meilisearch/meilisearch/issues/3292





Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-04 10:30:35 +00:00
bors[bot]
c3f4835e8e Merge #733
733: Avoid a prefix-related worst-case scenario in the proximity criterion r=loiclec a=loiclec

# Pull Request

## Related issue
Somewhat fixes (until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3118

## What does this PR do?
When a query ends with a word and a prefix, such as:
```
word pr
```
Then we first determine whether `pre` *could possibly* be in the proximity prefix database before querying it. There are then three possibilities:

1. `pr` is not in any prefix cache because it is not the prefix of many words. We don't query the proximity prefix database. Instead, we list all the word derivations of `pre` through the FST and query the regular proximity databases.

2. `pr` is in the prefix cache but cannot be found in the proximity prefix databases. **In this case, we partially disable the proximity ranking rule for the pair `word pre`.** This is done as follows:
   1. Only find the documents where `word` is in proximity to `pre` **exactly** (no derivations)
   2. Otherwise, assume that their proximity in all the documents in which they coexist is >= 8

3. `pr` is in the prefix cache and can be found in the proximity prefix databases. In this case we simply query the proximity prefix databases.

Note that if a prefix is longer than 2 bytes, then it cannot be in the proximity prefix databases. Also, proximities larger than 4 are not present in these databases either. Therefore, the impact on relevancy is:

1. For common prefixes of one or two letters: we no longer distinguish between proximities from 4 to 8
2. For common prefixes of more than two letters: we no longer distinguish between any proximities
3. For uncommon prefixes: nothing changes

Regarding (1), it means that these two documents would be considered equally relevant according to the proximity rule for the query `heard pr` (IF `pr` is the prefix of more than 200 words in the dataset):
```json
[
    { "text": "I heard there is a faster proximity criterion" },
    { "text": "I heard there is a faster but less relevant proximity criterion" }
]
```

Regarding (2), it means that two documents would be considered equally relevant according to the proximity rule for the query "faster pro":
```json
[
    { "text": "I heard there is a faster but less relevant proximity criterion" }
    { "text": "I heard there is a faster proximity criterion" },
]
```
But the following document would be considered more relevant than the two documents above:
```json
{ "text": "I heard there is a faster swimmer who is competing in the pro section of the competition " }
```

Note, however, that this change of behaviour only occurs when using the set-based version of the proximity criterion. In cases where there are fewer than 1000 candidate documents when the proximity criterion is called, this PR does not change anything. 

---

## Performance

I couldn't use the existing search benchmarks to measure the impact of the PR, but I did some manual tests with the `songs` benchmark dataset.   

```
1. 10x 'a': 
	- 640ms ⟹ 630ms                  = no significant difference
2. 10x 'b':
	- set-based: 4.47s ⟹ 7.42        = bad, ~2x regression
	- dynamic: 1s ⟹ 870 ms           = no significant difference
3. 'Someone I l':
	- set-based: 250ms ⟹ 12 ms       = very good, x20 speedup
	- dynamic: 21ms ⟹ 11 ms          = good, x2 speedup 
4. 'billie e':
	- set-based: 623ms ⟹ 2ms         = very good, x300 speedup 
	- dynamic: ~4ms ⟹ 4ms            = no difference
5. 'billie ei':
	- set-based: 57ms ⟹ 20ms         = good, ~2x speedup
	- dynamic: ~4ms ⟹ ~2ms.          = no significant difference
6. 'i am getting o' 
	- set-based: 300ms ⟹ 60ms        = very good, 5x speedup
	- dynamic: 30ms ⟹ 6ms            = very good, 5x speedup
7. 'prologue 1 a 1:
	- set-based: 3.36s ⟹ 120ms       = very good, 30x speedup
	- dynamic: 200ms ⟹ 30ms          = very good, 6x speedup
8. 'prologue 1 a 10':
	- set-based: 590ms ⟹ 18ms        = very good, 30x speedup 
	- dynamic: 82ms ⟹ 35ms           = good, ~2x speedup
```

Performance is often significantly better, but there is also one regression in the set-based implementation with the query `b b b b b b b b b b`.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-04 09:00:50 +00:00
Loïc Lecrenier
d082ded7ad Explicitly restrict log level options to those that are documented
Fixes https://github.com/meilisearch/meilisearch/issues/3292
2023-01-04 09:40:24 +01:00
bors[bot]
49f58b2c47 Merge #732
732: Interpret synonyms as phrases r=loiclec a=loiclec

# Pull Request

## Related issue
Fixes (when merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3125

## What does this PR do?
We now map multi-word synonyms to phrases instead of loose words. Such that the request:
```
btw I am going to nyc soon
```
is interpreted as (when the synonym interpretation is chosen for both `btw` and `nyc`):
```
"by the way" I am going to "New York City" soon
```
instead of:
```
by the way I am going to New York City soon
```

This prevents queries containing multi-word synonyms to exceed to word length limit and degrade the search performance.

In terms of relevancy, there is a debate to have. I personally think this could be considered an improvement, since it would be strange for a user to search for:
```
good DIY project
```
and have a result such as:
```
{
    "text": "whether it is a good project to do, you'll have to decide for yourself"
}
```
However, for synonyms such as `NYC -> New York City`, then we will stop matching documents where `New York` is separated from `City`. This is however solvable by adding an additional mapping: `NYC -> New York`.

## Performance

With the old behaviour, some long search requests making heavy uses of synonyms could take minutes to be executed. This is no longer the case, these search requests now take an average amount of time to be resolved.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-04 08:34:18 +00:00
bors[bot]
947f08793a Merge #3296
3296: Remove `--disable-auto-batching` CLI option r=gmourier a=loiclec

Fixes #3294 

The `index-scheduler` code is not modified, only the CLI options have changed.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-03 16:57:14 +00:00
bors[bot]
6a10e85707 Merge #736
736: Update charabia r=curquiza a=ManyTheFish

Update Charabia to the last version.

> We are now Romanizing Chinese characters into Pinyin.
> Note that we keep the accent because they are in fact never typed directly by the end-user, moreover, changing an accent leads to a different Chinese character, and I don't have sufficient knowledge to forecast the impact of removing accents in this context.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2023-01-03 15:44:41 +00:00
Louis Dureuil
17dac72464 Characters -> bytes 2023-01-03 15:31:02 +01:00
Loïc Lecrenier
b821c72459 Remove --disable-auto-batching CLI option 2023-01-03 15:01:04 +01:00
Louis Dureuil
7b2575c646 Master Key -> master key 2023-01-03 14:45:23 +01:00
bors[bot]
c505fa9d7d Merge #758
758: Bump taiki-e/install-action from 1 to 2 r=curquiza a=dependabot[bot]

Bumps [taiki-e/install-action](https://github.com/taiki-e/install-action) from 1 to 2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's releases</a>.</em></p>
<blockquote>
<h2>2.0.0</h2>
<p>This release implements a mechanism to automatically track the latest version of the tool on our end. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/27">#27</a>)
Hopefully, this will avoid situations such as &quot;new version of the tool has been released, but the maintainer has not been aware of it for a number of months&quot;.
This also makes it easier to add support for new tools.</p>
<p>This release also includes the following improvements:</p>
<ul>
<li>
<p>Verify SHA256 checksums for downloaded files in all tools installed from GH releases. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/27">#27</a>)</p>
</li>
<li>
<p>Support omitting the patch/minor version in all tools installed from GH releases. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/27">#27</a>)</p>
<p>For example:</p>
<pre lang="yaml"><code>- uses: taiki-e/install-action@v2
  with:
    tool: cargo-hack@0.5
</code></pre>
<p>You can also omit the minor version if the major version of tool is 1 or greater.</p>
</li>
<li>
<p>Support <code>just</code>. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/34">#34</a>)</p>
</li>
<li>
<p>Support <code>dprint</code>. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/34">#34</a>)</p>
</li>
</ul>
<p>Note: This release is considered a breaking change because installing on versions not yet recognized by the action or on pre-release versions will no longer work with this release. (They were never officially supported, but they could work before.) Please submit an issue if you need these supports again.</p>
<h2>1.17.3</h2>
<ul>
<li>Update <code>wasmtime@latest</code> to 4.0.0.</li>
</ul>
<h2>1.17.2</h2>
<ul>
<li>Update <code>mdbook@latest</code> to 0.4.25.</li>
</ul>
<h2>1.17.1</h2>
<ul>
<li>Update <code>mdbook@latest</code> to 0.4.23.</li>
<li>Support <code>mdbook</code> on Linux (musl).</li>
<li>Update <code>cargo-llvm-cov@latest</code> to 0.5.3.</li>
</ul>
<h2>1.17.0</h2>
<ul>
<li>Update <code>protoc@latest</code> to 3.21.12.</li>
<li>Support aarch64 self-hosted runners (Linux, macOS, Windows).</li>
<li>Improve support for Fedora/RHEL based containers/self-hosted runners.</li>
</ul>
<h2>1.16.0</h2>
<ul>
<li>
<p>Update <code>cargo-binstall@latest</code> to 0.18.1. (<a href="https://github-redirect.dependabot.com/taiki-e/install-action/pull/32">#32</a>, thanks <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>)</p>`
</li>
<li>
<p>If the host environment lacks packages required for installation, such as <code>curl</code> or <code>tar</code>, install them if possible.</p>
<p>It is mainly intended to make the use of this action easy on containers or self-hosted runners, and currently supports Debian-based distributions (including Ubuntu) and Alpine.</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's changelog</a>.</em></p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="8ffc26aecd"><code>8ffc26a</code></a> Release 2.0.1</li>
<li><a href="82e9eb5996"><code>82e9eb5</code></a> Update DEVELOPMENT.md</li>
<li><a href="d3b7ad8380"><code>d3b7ad8</code></a> Update changelog</li>
<li><a href="f1a96ee3ed"><code>f1a96ee</code></a> Update changelog when manifest update</li>
<li><a href="46063c186c"><code>46063c1</code></a> Update cargo-minimal-versions</li>
<li><a href="048586d7a8"><code>048586d</code></a> Update cargo-hack</li>
<li><a href="d117b8d41a"><code>d117b8d</code></a> Remove outdated todo</li>
<li><a href="76828c33cd"><code>76828c3</code></a> Release 2.0.0</li>
<li><a href="eea8c318de"><code>eea8c31</code></a> Update readme and changelog</li>
<li><a href="ab0e193cf5"><code>ab0e193</code></a> Support dprint</li>
<li>Additional commits viewable in <a href="https://github.com/taiki-e/install-action/compare/v1...v2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=1&new-version=2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-01-03 10:44:36 +00:00
bors[bot]
ab655a85e8 Merge #3279
3279: Clarify error message when the db and engine versions are incompatible r=irevoire a=dureuill

# Pull Request

## Related issue

Related to https://github.com/meilisearch/meilisearch/issues/2752

## What does this PR do?
- Implements https://github.com/meilisearch/product/discussions/572#discussioncomment-4390616

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-02 17:18:11 +00:00
bors[bot]
6425e06cf2 Merge #3274
3274: Reject master keys that are less than 16 bytes and add `--generate-master-key` CLI option r=irevoire a=dureuill

# Pull Request

## Related issue
Fix #3272 
Fix #3287

## What does this PR do?

### User standpoint

---

- Adds a `--generate-master-key` CLI flag to generate a fresh Master Key and exit.

<img width="1351" alt="Capture d’écran 2022-12-22 à 14 18 58" src="https://user-images.githubusercontent.com/41078892/209142778-eab52eeb-eaa8-409b-897a-c0d5728c8aaa.png">

---

(relevant fragment of the `--help` message)

<img width="1351" alt="Capture d’écran 2022-12-22 à 14 19 40" src="https://user-images.githubusercontent.com/41078892/209142891-ebfa2ed6-f231-4f76-a3ae-b7542c7aef04.png">

---

- When `meilisearch` is started in the `development` environment and no Master Key has been provided, then the binary prints a warning before starting.

<img width="1351" alt="Capture d’écran 2022-12-22 à 14 14 49" src="https://user-images.githubusercontent.com/41078892/209142158-54eba3b7-bf71-4f3f-8840-0600b13a1a9f.png">

---

- When `meilisearch` is started in the `development` environment and the provided Master Key is shorter than 16 bytes, then the binary prints a warning before starting.

<img width="1351" alt="Capture d’écran 2022-12-22 à 14 15 58" src="https://user-images.githubusercontent.com/41078892/209142295-0209fe47-c03b-424f-a73f-cee9b633137a.png">

---

- When `meilisearch` is started in the `production` environment, and no Master Key is provided, the error message is altered to generate a fresh Master Key.

<img width="1351" alt="Capture d’écran 2022-12-22 à 17 29 02" src="https://user-images.githubusercontent.com/41078892/209180540-0def5798-15db-47f0-a6ec-8cfa081dea77.png">


---

- When `meilisearch` is started in the `production` environment, and the provided Master Key is shorter than 16 bytes, then the binary exits with an error.

<img width="1351" alt="Capture d’écran 2022-12-22 à 17 28 47" src="https://user-images.githubusercontent.com/41078892/209180567-fa54fe33-fbc4-4b9f-b281-7dfb7b33af85.png">


---

This implements the solution B described here: https://github.com/meilisearch/product/discussions/538#discussioncomment-4391346 

### Implementation standpoint

- Add a new `meilisearch-auth::generate_master_key` function that uses a Cryptographic Random Number Generator (CRNG) to fill a vector of 32 bytes before encoding these bytes as base64

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-01-02 16:00:40 +00:00
Tamo
1692f58b83 slightly update the message associated with the cli parameter + accept an env variable 2023-01-02 16:49:35 +01:00
Tamo
9ba4d0f921 update the error messages according to the spec 2023-01-02 16:43:23 +01:00
Tamo
4b6ffe0cd1 Update meilisearch-auth/src/lib.rs 2023-01-02 16:33:02 +01:00
bors[bot]
336c77aa45 Merge #3245
3245: Enable create_raw_index(...) to specify time r=irevoire a=amab8901

# Pull Request

## Related issue
Partially fixes #2983 

## What does this PR do?
- Enables [`create_raw_index`](660be071b5/index-scheduler/src/lib.rs (L868)) to specify time

## PR checklist
Please check if your PR fulfills the following requirements:
- [ X ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ X ] Have you read the contributing guidelines?
- [ X ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: amab8901 <amab8901@protonmail.com>
2023-01-02 14:11:39 +00:00
bors[bot]
9519e60f97 Merge #709
709: Optimise the `ExactWords` sub-criterion within `Exactness` r=loiclec a=loiclec

# Pull Request

## Related issue
Fixes (partially) https://github.com/meilisearch/meilisearch/issues/3116

## What does this PR do?
1. Reduces the algorithmic complexity of finding the documents containing N exact words from something that is exponential to something that is polynomial.
2. Cache intermediary results between different calls to the `exactness` criterion.

## Performance Results
On the `smol_songs.csv` dataset, a request containing 10 common words now takes about 60ms instead of 5 seconds to execute. For example, this is the case with this (admittedly nonsensical) request: `Rock You Hip Hop Folk World Country Electronic Love The`.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2023-01-02 12:28:30 +00:00
Loïc Lecrenier
b5df889dcb Apply review suggestions: simplify implementation of exactness criterion 2023-01-02 13:11:47 +01:00
bors[bot]
31155dce4c Merge #752
752: Simplify primary key inference r=irevoire a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3233

## What does this PR do?

### User PoV

- Change primary key inference to only consider a value as a candidate when it ends with "id", rather than when it simply contains "id".
- Change primary key inference to always fail when there are multiple candidates.
- Replace UserError::MissingPrimaryKey with `UserError::NoPrimaryKeyCandidateFound` and `UserError::MultiplePrimaryKeyCandidatesFound`

### Implementation-wise

- Remove uses of UserError::MissingPrimaryKey not pertaining to inference. This introduces a possible panicking path.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-01-02 11:44:22 +00:00
Loïc Lecrenier
8d36570958 Add explicit criterion impl strategy to proximity search tests 2023-01-02 10:37:01 +01:00
dependabot[bot]
939e7faf31 Bump taiki-e/install-action from 1 to 2
Bumps [taiki-e/install-action](https://github.com/taiki-e/install-action) from 1 to 2.
- [Release notes](https://github.com/taiki-e/install-action/releases)
- [Changelog](https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/taiki-e/install-action/compare/v1...v2)

---
updated-dependencies:
- dependency-name: taiki-e/install-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-01-01 10:02:00 +00:00
bors[bot]
776acb5ed3 Merge #3276
3276: README: Replace Slack link with Discord r=dureuill a=shivaylamba

# Pull Request

## Related issue
Fixes #3275 

## What does this PR do?

Update Slack link with Discord link in the README

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Shivay Lamba <shivaylamba@gmail.com>
2022-12-29 10:32:42 +00:00
Louis Dureuil
3e9834abff Change error message when the db version is incompatible with engine version. 2022-12-26 17:34:36 +01:00
Louis Dureuil
3cba476a9f Add --generate-master-key CLI option 2022-12-26 10:36:45 +01:00
Louis Dureuil
57e851d8a9 Check for key length 2022-12-26 10:36:45 +01:00
Shivay Lamba
9c45850bd2 README: Replace Slack link with Discord 2022-12-26 00:19:13 +05:30
funilrys
3e0e8164a3 fixup! Adjust + Cleanup changes. 2022-12-22 18:01:54 +01:00
funilrys
0bc4572905 Adjust + Cleanup changes.
Indeed, I missed some of the changed that were introduced by #3190.
2022-12-22 17:53:33 +01:00
funilrys
4e6c663a2e Release unecessary ownership. 2022-12-22 17:47:58 +01:00
funilrys
e2775c6f49 Remove unused object. 2022-12-22 17:47:58 +01:00
funilrys
c07a5932cb Apply fmt. 2022-12-22 17:47:58 +01:00
funilrys
528a944997 Reimplement v5 date extraction.
Indeed, before this patch the implementation wasn't correct.
2022-12-22 17:47:58 +01:00
funilrys
13fb5ce974 Re-Open tasks list when needed.
Indeed, before this patch we were using the reference instead of
"reopening" the task list each time we needed to access it.
Without this patch, all other usage of the task attribute will
break.
2022-12-22 17:47:57 +01:00
funilrys
a43a0712fa Add reader.v5.tasks.Task.updated_at.
There was no way to "quickly" get the update date.
2022-12-22 17:47:57 +01:00
funilrys
1be4619b91 Add reader.v5.tasks.Task.created_at.
There was no way to "quickly" get the creation date.
2022-12-22 17:47:57 +01:00
funilrys
cf50f85986 Add reader.v5.tasks.Task.processed_at.
There was no way to "quickly" get the processed date.
2022-12-22 17:47:57 +01:00
funilrys
61b3a29ff3 Extract the dates out of the dumpv5.
This patch possibly fixes #2986.

This patch introduces a way to fill the IndexMetadata.created_at
and IndexMetadata.updated_at keys from the tasks events.
This is done by reading the creation date of the first event
(created_at) and the creation date of the last event (updated_at).
2022-12-22 17:47:57 +01:00
Loïc Lecrenier
32c6062e65 Optimise exactness criterion
1. Cache some results between calls to next()
2. Compute the combinations of exact words more efficiently
2022-12-22 12:28:45 +01:00
Loïc Lecrenier
f097aafa1c Add unit test for prefix handling by the proximity criterion 2022-12-22 12:08:00 +01:00
Loïc Lecrenier
777b387dc4 Avoid a prefix-related worst-case scenario in the proximity criterion 2022-12-22 12:08:00 +01:00
Loïc Lecrenier
b0f3dc2c06 Interpret synonyms as phrases 2022-12-22 12:07:51 +01:00
Louis Dureuil
66e18eae79 auth: add generate_master_key function 2022-12-22 11:55:27 +01:00
amab8901
9a39c4e40d Get date from IndexMetaData 2022-12-22 11:46:17 +01:00
amab8901
df176aaf01 Insert dump_reader.date() into create_raw_index(_) argument 2022-12-21 15:16:31 +01:00
Louis Dureuil
4b166bea2b Add primary_key_inference test 2022-12-21 15:13:38 +01:00
Louis Dureuil
5943100754 Fix existing tests 2022-12-21 15:13:38 +01:00
Louis Dureuil
b24def3281 Add logging when inference took place.
Displays log message in the form:
```
[2022-12-21T09:19:42Z INFO  milli::update::index_documents::enrich] Primary key was not specified in index. Inferred to 'id'
```
2022-12-21 15:13:38 +01:00
Louis Dureuil
402dcd6b2f Simplify primary key inference 2022-12-21 15:13:38 +01:00
Louis Dureuil
13c95d25aa Remove uses of UserError::MissingPrimaryKey not related to inference 2022-12-21 15:13:36 +01:00
amab8901
0893b175dc Merge branch 'main' into 2983-forward-date-to-milli 2022-12-21 14:31:19 +01:00
amab8901
d5978d11e1 Refactor 2022-12-21 14:28:00 +01:00
bors[bot]
a8defb585b Merge #742
742: Add a "Criterion implementation strategy" parameter to Search r=irevoire a=loiclec

Add a parameter to search requests which determines the implementation strategy of the criteria. This can be either `set-based`, `iterative`, or `dynamic` (ie choosing between set-based or iterative at search time). See https://github.com/meilisearch/milli/issues/755 for more context about this change.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-21 12:18:49 +00:00
Loïc Lecrenier
339a4b0789 Make clippy happy 2022-12-21 12:49:34 +01:00
Loïc Lecrenier
904fd2f6d1 Add a search strategy option to the cli 2022-12-21 12:48:53 +01:00
Loïc Lecrenier
229405aeb9 Choose implementation strategy of criterion at runtime 2022-12-21 09:29:39 +01:00
jiangbo212
2780e365e2 test update and ndjson serde use from_slice 2022-12-21 14:31:45 +08:00
jiangbo212
bf2a401a05 serde ndjson fix 2022-12-21 11:27:15 +08:00
bors[bot]
9925309492 Merge #3263
3263: Handle most io error instead of tagging everything as an internal r=dureuill a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/2255
Fix https://github.com/meilisearch/meilisearch/issues/2785
Close https://github.com/meilisearch/milli/pull/580

- [x] Find a way to catch the `io::Error` contained in `serde_json::Error`: We can't: https://docs.rs/serde_json/latest/serde_json/struct.Error.html
- [x] Check the `grenad::Error` as well => the `grenad::Error::Io` error are correctly converted to a `milli::Error::Io` error 
- [x] Ensure the error code mean the same thing under windows

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-20 17:15:53 +00:00
Tamo
9e0cce5ca4 Update dump/src/error.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-12-20 18:08:51 +01:00
Tamo
336ea57384 Update dump/src/error.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-12-20 18:08:44 +01:00
Tamo
c637bfba37 convert all the document format error due to io to io::Error 2022-12-20 17:49:38 +01:00
Tamo
3040172562 update the error message as well 2022-12-20 17:31:13 +01:00
bors[bot]
249e051cd4 Merge #750
750: Fix hard-deletion of an external id that was soft-deleted and then reimported - main r=irevoire a=loiclec

# Pull Request

## Related issue
Fixes (when merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3021

## What does this PR do?
There was a bug happening when:

1. Documents were added
2. Some of these documents were replaced using soft-deletion
3. A deletion of another non-replaced document takes place and triggers a hard-deletion
4. Documents with the same identifiers as the replaced documents are added again

Then, search results would return duplicate documents. No crash would happen at any time (this is the reason it wasn't caught by the previous fuzz test. I have updated the new one such that it also checks the result of a placeholder search request, which then finds the bug immediately).

The cause of the bug is: 

1. When a hard-deletion is triggered, we try to retrieve the external document id associated with each soft-deleted document id. 
2. Then, we take this list of external document ids and remove each of them from the `ExternalDocumentsIds` structure. 
3. However, this is not correct in case an existing (non-deleted) document shares the external id of a soft-deleted document. 
   
## Implementation of the fix
1. Before we process a permanent deletion, we update the list of soft-deleted document ids.
2. Then, the permanent deletion's job is to remove the soft-deleted documents from all data structures. Therefore, to update `ExternalDocumentsIds`, we can simply call the `delete_soft_deleted_documents_ids_from_fsts` method, which is faster and simpler.

## Correctness
A unit test was added to reproduce the bug. The new fuzz test, when adjusted to check the correctness of a placeholder search, could also instantly reproduce the bug, but now does not find any other problem.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-20 16:13:20 +00:00
Tamo
52aa34d984 remove an unused error handling file 2022-12-20 16:32:51 +01:00
Loïc Lecrenier
fc0e7382fe Fix hard-deletion of an external id that was soft-deleted 2022-12-20 15:33:31 +01:00
bors[bot]
2c86d42a44 Merge #3264
3264: Remove macos-latest and windows-latest usages r=curquiza a=curquiza

Related to https://github.com/meilisearch/meilisearch/issues/3109#issuecomment-1359151297

Remove the `macos-latest` and `windows-latest` to replace them with the specific version: this will avoid "surprises" in the future when GitHub changes the `latest` version.
This way, it will also allow us to let the documentation team know about the changes, since we will control the macOS/Windows version we support

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-20 10:53:37 +00:00
curquiza
8ce3a34ffa Remove macos-latest and windows-latest usages 2022-12-20 11:10:09 +01:00
bors[bot]
259c04eb28 Merge #3261
3261: Use ubuntu-18.04 container instead of GitHub hosted actions r=curquiza a=curquiza

Related to (but does not fix totally) https://github.com/meilisearch/meilisearch/issues/3109 and https://github.com/meilisearch/product/discussions/547#discussioncomment-4109143

## For reviewers, what's the PR changes:
- Use ubuntu-latest where compiling with ubuntu-18.04 is not needed (`update-version-cargo-toml`, `fmt`, `clippy` jobs)
- Where ubuntu-18.04 is required
  - Use `ubuntu-latest` as runner
  - Use `ubuntu:18.04` as Docker container
  - Install the required dependencies (curl and cc)
  - Use `actions-rs/toolchain@v1` instead of `hecrj/setup-rust-action@master`. It's more stable and followed alternative. Plus it was easy to make it work with our container contrary to the old one. Change applied in all our CIs to be more consistent
- Remove some useless space to increase readability.

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-20 09:28:09 +00:00
Tamo
d8fb506c92 handle most io error instead of tagging everything as an internal 2022-12-19 20:50:40 +01:00
amab8901
aa03e02fdc Apply Rustfmt 2022-12-19 19:24:56 +01:00
curquiza
7ef23addb6 Add comment to bring more context 2022-12-19 18:46:27 +01:00
bors[bot]
97fb64e40e Merge #747
747: Soft-deletion computation no longer depends on the mapsize r=irevoire a=dureuill

# Pull Request

## Related issue

Related to https://github.com/meilisearch/meilisearch/issues/3231: After removing `--max-index-size`, the `mapsize` will always be unrelated to the actual max size the user wants for their DB, so it doesn't make sense to use these values any longer.

This implements solution 2.3 from https://github.com/meilisearch/meilisearch/issues/3231#issuecomment-1348628824

## What does this PR do?

### User-visible

- Soft-deleted are no longer deleted when there is less than 10% of the mapsize available or when they take more than 10% of the mapsize
- Instead, they are deleted when they are more soft deleted than regular documents, or when they take more than 1GiB disk space (estimated).

### Implementation standpoint

1. Adds a `DeletionStrategy` struct to replace the boolean `disable_soft_deletion` that we had up until now. This enum allows us to specify that we want "always hard", "always soft", or to use the dynamic soft-deletion strategy (default).
2. Uses the current strategy when deleting documents, with the new heuristics being used in the `DeletionStrategy::Dynamic` variant.
3. Updates the tests to use the appropriate DeletionStrategy whenever needed (one of `AlwaysHard` or `AlwaysSoft` depending on the test)

Note to reviewers: this PR is optimized for a commit-by-commit review.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-19 17:46:18 +00:00
curquiza
b3fce7c366 Remove useless continue-on-error 2022-12-19 18:39:35 +01:00
curquiza
5099a40484 Use ubuntu-18.04 container in publish CIs 2022-12-19 18:35:33 +01:00
Tamo
69edbf9f6d Update milli/src/update/delete_documents.rs 2022-12-19 18:23:50 +01:00
bors[bot]
8957251eed Merge #751
751: Update version for the next release (v0.38.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-12-19 17:02:39 +00:00
curquiza
c72535531b Update version for the next release (v0.38.0) in Cargo.toml files 2022-12-19 16:35:38 +00:00
bors[bot]
19ee9a828f Merge #3262
3262: Clippy fixes after updating Rust to v1.66 r=curquiza a=dureuill

Ran `cargo clippy --fix`

Fixes the CI.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-12-19 14:05:59 +00:00
Louis Dureuil
869d331680 Clippy fixes after updating Rust to v1.66 2022-12-19 14:17:12 +01:00
curquiza
913eff5b2f Use ubuntu-18.04 container in rust tests 2022-12-19 10:46:29 +01:00
Louis Dureuil
916c23e7be Tests: rename snapshots 2022-12-19 10:07:17 +01:00
Louis Dureuil
ad9937c755 Fix tests after adding DeletionStrategy 2022-12-19 10:07:17 +01:00
Louis Dureuil
171c942282 Soft-deletion computation no longer takes into account the mapsize
Implemented solution 2.3 from https://github.com/meilisearch/meilisearch/issues/3231#issuecomment-1348628824
2022-12-19 10:07:17 +01:00
Louis Dureuil
e2ae3b24aa Hard or soft delete according to the deletion strategy 2022-12-19 10:00:13 +01:00
Louis Dureuil
fc7618d49b Add DeletionStrategy 2022-12-19 09:49:58 +01:00
amab8901
b4a73f2d74 Remove redundant date-setting 2022-12-16 08:32:44 +01:00
amab8901
4e175ae882 Replace Index::new_with_creation_dates(...) with Index::new(...) 2022-12-16 08:20:13 +01:00
amab8901
5a0a0468df Combine created and added into date 2022-12-16 08:11:12 +01:00
ManyTheFish
7f88c4ff2f Fix #1714 test 2022-12-15 18:22:28 +01:00
ManyTheFish
96d4242b93 Update charabia 2022-12-15 18:22:22 +01:00
ManyTheFish
60ebf0ea0b Add a specific test on finite pagination placeolder search with distinct attributes 2022-12-15 17:28:20 +01:00
bors[bot]
867279f2a4 Merge #3249
3249: Bring back changes from release-v0.30.3 to main r=curquiza a=curquiza

⚠️ ⚠️ I had to fix git conflicts, ensure I did not lose anything ⚠️ ⚠️ 

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-12-15 14:13:30 +00:00
bors[bot]
5114686394 Merge #743
743: Fix finite pagination with placeholder search r=Kerollmops a=ManyTheFish

this bug is reproducible on real datasets and is hard to isolate in a simple test.

related to: https://github.com/meilisearch/meilisearch/issues/3200

poke `@curquiza` 

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-12-15 09:31:47 +00:00
ManyTheFish
3322018c06 Fix placeholder search 2022-12-14 20:09:47 +01:00
Louis Dureuil
ce84a59873 Re-apply some changes from #3132 2022-12-14 20:02:39 +01:00
Tamo
d66bb3a53f rename the two new functions 2022-12-14 17:27:43 +01:00
Tamo
6c0b8edab5 Fix typos
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-12-14 17:27:37 +01:00
Tamo
fbbc6eaeca Fix the import of dumps and snapshot.
Some flags were badly applied + the database wrongly deleted when they shouldn't
2022-12-14 17:27:28 +01:00
Kerollmops
60c3bac108 Bump milli to v0.37.3 2022-12-14 17:25:40 +01:00
bors[bot]
9491fe0704 Merge #3247
3247: Re-add push in docker CI r=curquiza a=curquiza

I made a mistake here https://github.com/meilisearch/meilisearch/pull/3229, `push` is not `true` by default, see https://github.com/docker/build-push-action#customizing

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-12-14 13:15:41 +00:00
Clémentine Urquizar - curqui
240c73d292 Re-add push 2022-12-14 14:05:25 +01:00
amab8901
d3eb8d2d5c Enable create_raw_index(...) to specify time 2022-12-14 10:44:25 +01:00
bors[bot]
0276d5212a Merge #728
728: Add some integration tests on the sort criterion r=ManyTheFish a=loiclec

This is simply an integration test ensuring that the sort criterion works properly. 

However, only one version of the algorithm is tested here (the iterative one). To test the version that uses the facet DB, one has to manually set the `CANDIDATES_THRESHOLD` constant to `0`. I have done that and ensured that the test still succeeds. However, in the future, we will probably want to have an option to force which algorithm is used at runtime, for testing purposes.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-14 09:27:12 +00:00
bors[bot]
660be071b5 Merge #3236
3236: Improves clarity of the code that receives payloads r=Kerollmops a=Kerollmops

This PR makes small changes to #3164. It improves the clarity and simplicity of some parts of the code.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-12-13 18:20:24 +00:00
bors[bot]
89542d7d8b Merge #3241
3241: Remove core mention r=curquiza a=curquiza

No impact for the users or the team

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-13 17:35:50 +00:00
curquiza
f62e7a3501 Remove core mention 2022-12-13 17:34:43 +01:00
Kerollmops
a08cc82983 Revert "Simplify the code when array_each failed"
This reverts commit 271685cceb.
2022-12-13 16:29:49 +01:00
bors[bot]
e2ffc3d69a Merge #741
741: Add test reproducing the bug fixed by #737 r=Kerollmops a=ManyTheFish

related to #737

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-12-13 15:02:19 +00:00
ManyTheFish
739da9fd4d Add test 2022-12-13 15:54:43 +01:00
bors[bot]
2af93966e0 Merge #740
740: Fix two nightly errors r=Kerollmops a=irevoire

Currently, we have these two errors on rust nightly. It would be nice to help rustc understand what's going on

```
error[E0658]: anonymous lifetimes in `impl Trait` are unstable
   --> filter-parser/src/lib.rs:173:53
    |
173 | fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> {
    |                                                     ^ expected named lifetime parameter
    |
    = help: add `#![feature(anonymous_lifetime_in_impl_trait)]` to the crate attributes to enable
help: consider introducing a named lifetime parameter
    |
173 | fn ws<'a, 'a, O>(inner: impl FnMut(Span<'a>) -> IResult<'a, O>) -> impl FnMut(Span<'a>) -> IResult<O> {
    |       +++                                               +++

error[E0658]: anonymous lifetimes in `impl Trait` are unstable
  --> filter-parser/src/error.rs:36:49
   |
36 |     mut parser: impl FnMut(Span<'a>) -> IResult<O>,
   |                                                 ^ expected named lifetime parameter
   |
   = help: add `#![feature(anonymous_lifetime_in_impl_trait)]` to the crate attributes to enable
help: consider introducing a named lifetime parameter
   |
35 ~ pub fn cut_with_err<'a, 'a, O>(
36 ~     mut parser: impl FnMut(Span<'a>) -> IResult<'a, O>,
   |

For more information about this error, try `rustc --explain E0658`.
error: could not compile `filter-parser` due to 2 previous errors
```

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-13 14:33:40 +00:00
Kerollmops
7b2f2a4f9c Do only one convertion to u64 2022-12-13 15:31:55 +01:00
Tamo
2c47500bc3 fix two nightly errors 2022-12-13 15:29:52 +01:00
Kerollmops
5d5615ef45 Rename the ReceivePayload error variant 2022-12-13 15:07:35 +01:00
Kerollmops
526793b5b2 Handle empty arrays the same way we handle other arrays 2022-12-13 14:58:40 +01:00
Kerollmops
271685cceb Simplify the code when array_each failed 2022-12-13 14:58:05 +01:00
bors[bot]
1af590d3bc Merge #3234
3234: Update README.md r=curquiza a=tpayet

Change Slack link to Discord link

Co-authored-by: Thomas Payet <thomas@meilisearch.com>
2022-12-13 11:41:10 +00:00
bors[bot]
dab2634ca8 Merge #3164
3164: Improve the way we receive the documents payload r=Kerollmops a=jiangbo212

# Pull Request

## Related issue
Fixes #3037 

## What does this PR do?
- writing the playload to a temporary file via BufWritter
- deserialising the json tempporary file to an array of Objects by means of a memory map
- deserialising thie csv tempporary file by means of a memory map
- Adapted some read_json tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: jiangbo212 <peiyaoliukuan@gmail.com>
Co-authored-by: jiangbo212 <peiyaoliukuan@126.com>
2022-12-13 10:58:24 +00:00
bors[bot]
406ee31d1a Merge #737
737: Fix typo initial candidates computation r=Kerollmops a=ManyTheFish

When `Typo` criterion was after a different criterion than `Words` and the previous criterion wasn't returning any candidates at the first iteration of the bucket sort, then the `initial_candidates` were lost.

Now, `Typo`ensure to keep the `initial_candidates` between iterations.


related to https://github.com/meilisearch/meilisearch/issues/3200#issuecomment-1345179578
related to https://github.com/meilisearch/meilisearch/issues/3228

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-12-13 10:29:28 +00:00
ManyTheFish
2d8d0af1a6 Rename short name bc by ic for initial_candidates 2022-12-13 10:56:38 +01:00
Thomas Payet
8a7f90250c Update README.md
Change Slack link to Discord link
2022-12-13 10:46:05 +01:00
bors[bot]
e0a8f8cb5a Merge #734
734: Fix bug 2945/3021 (missing key in documents database) r=Kerollmops a=loiclec

# Pull Request

## Related issue
Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/2945 (until we integrate the new milli bump into meilisearch).

**Note that a dump will not be sufficient to upgrade from meilisearch v0.30.2 to meilisearch v0.30.3 due to this fix** because the bug could have caused the `documents` database to be corrupted. Instead, a full manual reimport of the documents will be necessary.

## What does this PR do?
There was a bug happening when:
1. A few documents are added to the index
2. Some of these documents are soft-deleted
3. New documents are added, replacing existing ones and triggering a hard-deletion

The `IndexDocuments::execute` method would then perform the hard-deletion but forget to change the `external_document_ids` structure appropriately. As a result, the `external_document_ids` would contain keys corresponding to documents that do no exist anymore.

To fix this bug, I split the `DeleteDocuments::execute` method into two: `execute_inner` and `execute`. 
- `execute_inner` returns a `DetailedDocumentDeletionResult` which says whether soft-deletion was used or not
- `execute` keeps the exact same signature and behaviour

Then, when deleting replaced documents inside `IndexDocuments::execute`, we call `DeleteDocuments::execute_inner` instead of `DeleteDocuments::execute`. If soft-deletion was used, nothing more is done. But if hard-deletion was used, we remove every reference to soft-deleted documents in the new `external_documents_ids` structure.

## Correctness

- Every other test still passes
- The reproduction test case now passes
- In a different branch ([`update-fuzz-test`](https://github.com/meilisearch/milli/pull/735)), I created a fuzz-test that reproduces the past two bugs. This fuzz test cannot find this bug through any combination of some hand-selected `DocumentAddition / DocumentDeletion / DocumentClear / SettingsUpdate` operations. In that test, each relevant operations can be executed with or without soft-deletion, and document additions can be done in batches, replacing or updating existing documents.



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-13 09:45:57 +00:00
Loïc Lecrenier
be3b00350c Apply review suggestions: naming and documentation 2022-12-13 10:15:22 +01:00
jiangbo212
23c1b223b3 Merge branch 'fix-3037' of github.com:jiangbo212/meilisearch into fix-3037 2022-12-13 10:41:50 +08:00
jiangbo212
87ae0032bf review change 2022-12-13 10:41:43 +08:00
jiangbo212
7c24fea9f2 Merge branch 'main' into fix-3037 2022-12-13 05:16:03 +08:00
ManyTheFish
80d34a4169 Fix typo initial candiddates computation 2022-12-12 19:02:48 +01:00
jiangbo212
27d1bee0bb Merge branch 'main' into fix-3037-new 2022-12-12 22:16:22 +08:00
jiangbo212
b1c3174061 fix fmt 2022-12-12 22:06:24 +08:00
jiangbo212
fa46dfb7bb fmt fix 2022-12-12 22:02:56 +08:00
bors[bot]
40d9b73aaf Merge #3223
3223: Bring back release-v0.30.2 changes into main r=irevoire a=curquiza

Only bring back the necessary changes from `release-v0.30.2` to `main`, following v0.30.2 release

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-12 13:49:01 +00:00
jiangbo212
169682d3ec Merge branch 'main' into fix-3037-new 2022-12-12 21:36:10 +08:00
bors[bot]
21b926cb00 Merge #3224
3224: Fix update-cargo-toml-version.yml r=curquiza a=mohitsaxenaknoldus

# Pull Request

## Related issue
Fixes #3219 

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Mohit Saxena <76725454+mohitsaxenaknoldus@users.noreply.github.com>
2022-12-12 13:27:46 +00:00
Loïc Lecrenier
e3ee553dcc Remove soft deleted ids from ExternalDocumentIds during document import
If the document import replaces a document using hard deletion
2022-12-12 14:16:09 +01:00
bors[bot]
34a6f2598b Merge #3229
3229: Add a nightly CI: create every day a `nightly` Docker tag based on the latest commit on `main` r=Kerollmops a=curquiza

Also, fixes #3195

Easy to follow with the commits
- In the Docker CI:
  - create every day a `nightly` Docker tag based on the latest commit on `main`
  - check if the release is the latest one, before creating the `latest` Docker tag. A script has been added.
  - add the `worflow_dispatch` event to trigger the CI to build the `nightly` tag when we want (always on the latest commit on `main`)
- In multiple CIs: replace the `released` type by `published`, see [here](https://stackoverflow.com/questions/59319281/github-action-different-between-release-created-and-published) why. Will not impact anything, but will prevent to fail our future automation
- Remove a useless CI (code coverage, not used for 1 year)
- Remove useless lines (comments and CI logic) that don't have any impact

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-12 10:46:33 +00:00
curquiza
14824cee86 Remove obsolete comment line 2022-12-11 21:46:48 +01:00
curquiza
796e61ec7e Remove useless CI 2022-12-11 21:29:23 +01:00
curquiza
9a3f9577b8 Remove useless line in CI 2022-12-11 21:26:05 +01:00
curquiza
2c8eb92537 Check before publish latest 2022-12-11 21:24:52 +01:00
Mohit Saxena
1bf5c0edb9 Update update-cargo-toml-version.yml 2022-12-10 23:04:26 +05:30
curquiza
b1ffbe561e Add nightly for docker CI 2022-12-09 20:06:59 +01:00
curquiza
84204b8cd5 Replace the released type by published 2022-12-09 19:27:58 +01:00
Mohit Saxena
346fca5608 Update update-cargo-toml-version.yml 2022-12-09 00:20:51 +05:30
Loïc Lecrenier
bebd050961 Add new test for bug 3021 2022-12-08 19:19:40 +01:00
curquiza
4631f4d97f Bump milli to v0.37.2 2022-12-08 18:16:48 +01:00
Tamo
6f1c30b247 Fix the instance-uid in the data.ms
We were writing the instance-uid as bytes instead of string in the data.ms and thus we were unable to parse it later.
Also it was less practical for our user to retrieve it and send it to us.
2022-12-08 18:16:43 +01:00
bors[bot]
abba54e913 Merge #3112
3112: Rename meilisearch-http r=Kerollmops a=colbsmcdolbs

# Pull Request

## Related issue
Fixes #3073 

## What does this PR do?
- Renames all references of `meilisearch-http` to `meilisearch`
- Might need to be rebased before the 1.0.0 release

## PR checklist
Please check if your PR fulfills the following requirements:
- [X] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [X] Have you read the contributing guidelines?
- [X] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Colby Allen <colbyjayallen@gmail.com>
2022-12-08 16:32:08 +00:00
bors[bot]
c426fa1478 Merge #3212
3212: Setup COMMIT_SHA and COMMIT_DATE build args in the Docker image r=curquiza a=brunoocasali

GitHub auto-closed my PR when I synced changes with my remote 🤷‍♂️  https://github.com/meilisearch/meilisearch/pull/2550
The last PR #3205 were closed to help `@curquiza` test the CI.

In any case, the summary of changes is quite similar:

- Fix `git` usage from my last attempt (when you use `actions/checkout`) you get the `git` command to use.
- Add the `build-args` definition from https://github.com/docker/build-push-action#inputs, which is supposed to work precisely as docker build `--build-arg`. 

Fixes https://github.com/meilisearch/meilisearch/issues/2028

The result will be like this:

<img width="556" alt="image" src="https://user-images.githubusercontent.com/4116980/206019608-2713559a-1f58-4ff3-9fec-7720783993ac.png">

Co-authored-by: Bruno Casali <brunoocasali@gmail.com>
2022-12-08 16:07:50 +00:00
bors[bot]
574be942cd Merge #3221
3221: Update README to reference Meilisearch Cloud r=curquiza a=davelarkan

# Pull Request

## Related issue
Fixes #3220

## What does this PR do?
- Updates the README to link to the Pricing page where people can choose a Meilisearch Cloud plan

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Dave Larkan <davelarkan@gmail.com>
2022-12-08 15:43:43 +00:00
Colby Allen
2262766494 chore: run fmt nightly on project 2022-12-08 08:31:15 -07:00
Colby Allen
ad2b1467da Renames meilisearch-http to meilisearch 2022-12-08 08:22:53 -07:00
Dave Larkan
ee37d5e724 Update README to reference Meilisearch Cloud 2022-12-08 15:02:34 +00:00
bors[bot]
ded2a50d14 Merge #3216
3216: Update version for the next release (v1.0.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-12-08 13:49:50 +00:00
Bruno Casali
58327979f1 Use correct env vars "VERGEN_*" on Dockerfile 2022-12-08 10:48:16 -03:00
Bruno Casali
50d9fe036e Setup COMMIT_SHA and COMMIT_DATE build args in the Docker image 2022-12-08 10:48:16 -03:00
curquiza
026cf223b3 Update version for the next release (v1.0.0) in Cargo.toml files 2022-12-08 12:20:17 +00:00
bors[bot]
af6f7f8462 Merge #3215
3215: Use nightly in cargo fmt r=curquiza a=curquiza

Discussed with `@Kerollmops,` needs this change

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-12-08 10:53:24 +00:00
Clémentine Urquizar - curqui
5023d36ee7 Use nightly in cargo fmt 2022-12-08 11:51:13 +01:00
bors[bot]
1f1beae077 Merge #729
729: Fix distincted exhaustive hits r=Kerollmops a=ManyTheFish

This PR changes the name and behavior of `bucket_candidates`:
- `bucket_candidates` become `initial_candidates` that is less confusing
- `initial_candidates` is no more a simple `RoaringBitmap` but an enum allowing us to precise if the candidates are exhaustive or not
- this enum ensures that any modification is allowed only if the candidates are not already exhaustive.

The bug occurred because `initial_candidates` are modified during the bucket sort allowing the estimation to be more and more precise along the search, and this was an issue when the `initial_candidates` were already exhaustive, now, if candidates are exhaustive, then no modifications are made.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-12-08 09:26:34 +00:00
ManyTheFish
55724f2412 Introduce an initial candidates set that makes the difference between an exhaustive count and an estimation 2022-12-08 09:41:34 +01:00
ManyTheFish
6d50ea0830 add tests 2022-12-08 08:56:57 +01:00
bors[bot]
f4dc4c5d8d Merge #3210
3210: Fix `MDB_PAGE_FULL` by bumping LMDB r=Kerollmops a=Kerollmops

This PR fixes #3062 by upgrading LMDB to the latest version.

The changes were made in https://github.com/meilisearch/lmdb/pull/1 and https://github.com/meilisearch/lmdb-rs/pull/12. As heed directly depends on the latest main commit of https://github.com/meilisearch/lmdb-rs, we can bump the `lmdb-rkv-sys` dependency in the Meilisearch _Cargo.lock_ by doing a:

```
cargo update -p lmdb-rkv-sys
```

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-12-07 16:21:23 +00:00
Loïc Lecrenier
f37c86e0b2 Add some integration tests on the sort criterion 2022-12-07 15:59:33 +01:00
jiangbo212
717dd36547 Merge branch 'fix-3037' of github.com:jiangbo212/meilisearch into fix-3037 2022-12-07 22:54:16 +08:00
jiangbo212
538030c2da change NameTempFile to tempfile() 2022-12-07 22:47:32 +08:00
bors[bot]
098c410612 Merge #727
727: Fix bug in filter search r=Kerollmops a=loiclec

# Pull Request

## Related issue
Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3178

## What does this PR do?
The most important change is this one:
```rust
    // in milli/src/search/facet/facet_range_search.rs, line 239
    let should_stop = {
        match self.right {
            Bound::Included(right) => right < previous_key.left_bound,
            Bound::Excluded(right) => right <= previous_key.left_bound,
            Bound::Unbounded => false,
        }
    };
```
where the operations `<` and `<=` between the two branches were switched. This caused (very few) documents to be missing from filter results.

The second change is a simplification of the algorithm for filters such as `field = value`, where we now perform a direct query into the "Level 0" of the facet db to retrieve the docids instead of invoking the full facet search algorithm. This change is done in `milli/src/search/facet/filter.rs`.

I have added yet more insta-snapshot tests, rechecked the content of the snapshots, and added some integration tests as well. 

This is purely a fix in the search algorithms. Based on this PR alone, a dump will not be necessary to switch from v0.30.1 (where this bug is present) to v0.30.2 (where this PR is merged).


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-07 14:34:59 +00:00
Kerollmops
1d5294d11a Bump lmdb version 2022-12-07 15:29:56 +01:00
bors[bot]
ee10cb8c87 Merge #726
726: Update the contributing.md r=curquiza a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-07 13:59:04 +00:00
Loïc Lecrenier
d38cc73630 Add one more filter "integration" test 2022-12-07 14:38:25 +01:00
Loïc Lecrenier
e688581c36 Add tests for facet range search on different field ids 2022-12-07 14:38:21 +01:00
Loïc Lecrenier
4ac8f96342 Simplify implementation of equality condition in filters 2022-12-07 14:38:18 +01:00
Loïc Lecrenier
1c9555566e Fix bug in facet range search 2022-12-07 14:38:14 +01:00
Loïc Lecrenier
303d740245 Prepare fix within facet range search
By creating snapshots and updating the format of the existing
snapshots. The next commit will apply the fix, which will show
its effects cleanly on the old and new snapshot tests
2022-12-07 14:38:10 +01:00
bors[bot]
34c3e5ec5e Merge #3208
3208: Stop snapshotting the version of meilisearch in the dump r=Kerollmops a=irevoire

It might change, and we don't want to update this test every time we make a new release.


Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-07 12:54:55 +00:00
Tamo
1c3a326199 stop snapshotting the version of meilisearch in the dump
It might change and we don't want to update this test everytime we make a new release.
2022-12-07 13:26:02 +01:00
Tamo
250743885d add a sentence about installing rust-nightly 2022-12-07 12:31:43 +01:00
bors[bot]
34c0f11c26 Merge #3207
3207: Add release check when starting latest CI r=Kerollmops a=curquiza

Adding this to have the same kind of check before starting to move the latest tag

<img width="737" alt="Capture d’écran 2022-12-07 à 12 18 33" src="https://user-images.githubusercontent.com/20380692/206165868-18a2be7c-78ec-48c9-acb9-d7f60797c2e3.png">

Also, removing an un-unused script

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-07 11:27:47 +00:00
Tamo
5eecb8489d Update CONTRIBUTING.md
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-12-07 12:23:12 +01:00
Tamo
0e5c3b1f64 Update CONTRIBUTING.md
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-12-07 12:23:06 +01:00
curquiza
be300138e4 Add release check when starting latest CI 2022-12-07 12:22:44 +01:00
bors[bot]
2ed6017603 Merge #3204
3204: Bring back v0.30.1 changes to `main` r=curquiza a=curquiza

I was not able to just import `release-v0.30.1` to `main`, see:
<img width="1371" alt="Capture d’écran 2022-12-06 à 20 03 50" src="https://user-images.githubusercontent.com/20380692/206000844-b39b3063-7da2-475f-b3e4-1791c39a7c2f.png">

So I cherry-picked the commits.

⚠️ ⚠️ ⚠️ I had a git conflict here

<img width="730" alt="Capture d’écran 2022-12-06 à 20 09 04" src="https://user-images.githubusercontent.com/20380692/206001007-f56bc28f-c0b1-46a0-bb60-cce4e73b9584.png">


⚠️ ⚠️ ⚠️ Check out carefully how I fixed it


Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-12-07 11:08:37 +00:00
Kerollmops
c1337f9e08 Update dump snap to new version 2022-12-07 11:48:29 +01:00
bors[bot]
9acac28574 Merge #3128
3128: Bumps cargo_toml version to most up to date r=curquiza a=colbsmcdolbs

# Pull Request

## Related issue
Fixes #3127

## What does this PR do?
- The README of this repository declares that one package is not up to date. In order to ensure Due Diligence, I have bumped the version number of the package. No test failures running on Windows.

## PR checklist
Please check if your PR fulfills the following requirements:
- [X] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [X] Have you read the contributing guidelines?
- [X] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Colby Allen <colbyjayallen@gmail.com>
2022-12-07 10:31:25 +00:00
jiangbo212
cb1d184904 fmt fix 2022-12-07 17:04:24 +08:00
jiangbo212
2841b09789 Merge branch 'meilisearch:main' into fix-3037 2022-12-07 16:30:21 +08:00
jiangbo212
35f3dd68b6 error change and tokio file use change 2022-12-07 16:20:36 +08:00
Kerollmops
f1de3aa75a Make the tests use MB to trigger page size issues 2022-12-06 20:10:10 +01:00
Kerollmops
e4e4370a3c Clamp the databases size to the page size 2022-12-06 20:09:49 +01:00
Kerollmops
24c79b79f9 Bump milli to v0.37.1 2022-12-06 20:05:52 +01:00
curquiza
5db7c4057c Update version for the next release (v0.30.1) in Cargo.toml files 2022-12-06 20:05:46 +01:00
Tamo
f53bdc4320 update the contributing.md 2022-12-06 17:41:05 +01:00
bors[bot]
0a301b5f88 Merge #723
723: Fix bug in handling of soft deleted documents when updating settings r=Kerollmops a=loiclec

# Pull Request

## Related issue
Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3021

## What does this PR do?
This PR fixes the bug where a `missing key in documents database` internal error message could appear when indexing documents.

When updating the settings, before clearing the database and before creating the transform output, we now modify the `ExternalDocumentsIds` structure to get rid of all references to soft deleted document ids in its FSTs.

It used to be that updating the settings would clear the soft-deleted document ids, but keep the original `ExternalDocumentsIds` structure. As a consequence of this, when processing a future document addition, we could wrongly believe that a document was being replaced when, in fact, it was a completely new document. See the tests `bug_3021_first`, `bug_3021_second`, and `bug_3021` for a minimal test case that would have reproduced the issue.
 
We need to take special care to:
- evaluate how users should update to v0.30.1 (containing this fix): dump? reimporting all documents from scratch?
- understand IF/HOW this bug could have caused duplicate documents to be returned 
- and evaluate the correctness of the fix, of course :)


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-06 14:37:38 +00:00
Loïc Lecrenier
a993b68684 Cargo fmt >:-( 2022-12-06 15:22:10 +01:00
Loïc Lecrenier
80c7a00567 Fix compilation error in tests of settings update 2022-12-06 15:19:26 +01:00
Loïc Lecrenier
67d8cec209 Fix bug in handling of soft deleted documents when updating settings 2022-12-06 15:09:19 +01:00
bors[bot]
2867d2e91a Merge #3190
3190: Fix the dump date-import of the dumpv4 r=irevoire a=irevoire

# Pull Request
After merging https://github.com/meilisearch/meilisearch/pull/3012 I realized that the tests on the date of the dump-v4 were still ignored, thus, I fixed them and then noticed #3012 wasn't working properly.

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/2987 a second time


`@funilrys` since you wrote most of the code you might be interested, but don't feel obligated to review this code. 
Someone from the team will double-check it works 😁 

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-06 10:47:00 +00:00
bors[bot]
2a846aaae7 Merge #719
719: Add more members of `filter_parser` to `milli::` & `From<&str>` implementation for `Token` r=Kerollmops a=GregoryConrad

## What does this PR do?
The current `milli::Filter` and `milli::FilterCondition` APIs require working with some members of `filter_parser` directly that `milli::` does *not* re-export to its users (at least when not parsing input using `parse`). Also, using `filter_parser` does not make sense when using milli from an embedded context where there is no query to parse.

Instead of reworking `milli::Filter` and `milli::FilterCondition`, this PR adds two non-breaking changes that ease the use of milli:
- Re-exports more members of the dependent version of `filter_parser` in `milli`
- Implements `From<&str>` for `filter_parser::Token`
  - This will also allow some basic tests that need to create a `Token` from a string to avoid some boilerplate.

In conjunction, both of these will allow milli users to easily create a `Token` from a `&str` without needing to add `filter_parser` as an extra dependency.

Note: I wanted to use `FromStr` for the `From` implementation; however, it requires returning a `Result` which is not needed for the conversion. Thus, I just left it as `From<&str>`.

Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2022-12-06 10:36:00 +00:00
bors[bot]
1458a12531 Merge #3197
3197: Revert "Upgrade alpine 3.16 to 3.17" r=irevoire a=curquiza

Reverts meilisearch/meilisearch#3189

Because `rust:alpine3.17` does not exist, and our scheduled CI failed: https://github.com/meilisearch/meilisearch/actions/runs/3626327181

`@ivanionut` for your information, I'm sorry I should have better checked before accepting the PR, this is my bad


Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-12-06 10:25:11 +00:00
Clémentine Urquizar - curqui
cbb8d0f97b Revert "Upgrade alpine 3.16 to 3.17" 2022-12-06 11:09:57 +01:00
Tamo
bef81065f9 return the same time in case we didn't found a created or updated at 2022-12-06 11:03:23 +01:00
Tamo
180511795b Update dump/src/reader/v4/mod.rs fix typo
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-12-06 10:53:43 +01:00
bors[bot]
3bef6e6690 Merge #3175
3175: Rename dump command from --dumps-dir to --dump-dir r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes #3132 

## What does this PR do?
- Rename the dump commands, env variables and default config

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-12-06 09:49:42 +00:00
bors[bot]
d6eacb2aac Merge #722
722: Geosearch for zero radius r=irevoire a=amab8901

# Pull Request

## Related issue
Fixes #3167 (https://github.com/meilisearch/meilisearch/issues/3167)

## What does this PR do?
- allows Geosearch with zero radius to return the specified location when the coordinates match perfectly (instead of returning nothing). See link for more details.
- new attempt on https://github.com/meilisearch/milli/pull/713

## PR checklist
Please check if your PR fulfills the following requirements:
- [ X ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ X ] Have you read the contributing guidelines?
- [ X ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: amab8901 <amab8901@protonmail.com>
Co-authored-by: Tamo <irevoire@protonmail.ch>
2022-12-05 19:57:08 +00:00
Tamo
212dbfa3b5 Update milli/src/search/facet/filter.rs 2022-12-05 20:56:21 +01:00
amab8901
456da5de9c Geosearch for zero radius 2022-12-05 20:11:46 +01:00
bors[bot]
46e26ab550 Merge #720
720: Make soft deletion optional in document addition and deletion + add lots of tests r=irevoire a=loiclec

# Pull Request

## What does this PR do?
When debugging recent issues, I created a few unit tests in the hopes reproducing the bugs I was looking for. In the end, I didn't find any, but I thought it would still be good to keep those tests. 

More importantly, I added a field to the `DeleteDocuments` and `IndexDocuments` builders, called `disable_soft_deletion`. If set to `true`, the indexing/deletion will never add documents to the `soft_deleted_documents_ids` and instead perform a real deletion of the documents from the databases.

For the new tests, I have:
- Improved the insta-snapshot format of the `external_documents_ids` structure
- Added more tests for the facet DB indexing, deletion, and search algorithms, making sure to test them when the facet DB contains strings (instead of numbers) as well.
- Added more tests for the incremental indexing of the prefix proximity databases. For example, to see if documents are replaced correctly and if common prefixes are deleted correctly.
- Added tests that mix soft deletion and hard deletion, including when processing batches of document updates. 


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-12-05 18:26:01 +00:00
bors[bot]
9b23885e85 Merge #3188
3188: re-enable the dump test on the dates r=irevoire a=irevoire

I just noticed that we have the real date in the dump-v1 contrarily to the dump-v2/3/4/5, thus we can ensure it doesn't change unexpectedly 👍 

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-05 18:10:22 +00:00
bors[bot]
8b46093117 Merge #3189
3189: Upgrade alpine 3.16 to 3.17 r=curquiza a=ivanionut

Upgrade alpine 3.16 to 3.17

Co-authored-by: Ivan Ionut <ivan.ionut@gmail.com>
2022-12-05 17:39:10 +00:00
Tamo
9c89e3dadc uncomment more test for the dump v4 2022-12-05 18:15:29 +01:00
Tamo
b0cf431614 Fix the dump date-import of the dumpv4 2022-12-05 18:08:35 +01:00
Ivan Ionut
afe520a67e Upgrade alpine 3.16 to 3.17 2022-12-05 17:49:15 +01:00
Tamo
688911ed34 re-enable the dump test on the dates 2022-12-05 17:05:37 +01:00
bors[bot]
776af129bf Merge #3012
3012: Extract the dates out of the dumpv4. r=irevoire a=funilrys

Hi there, 

please review this PR that tries to fix #2987. I'm still learning Rust and I found that #2987 is an excellent way for me to read and learn what others do with Rust. So please excuse my semantics ...

Stay safe and healthy.

---

# Pull Request

This patch possibly fixes #2987.

This patch introduces a way to fill the IndexMetadata.created_at and IndexMetadata.updated_at keys from the tasks events. This is done by reading the creation date of the first event (created_at) and the creation date of the last event (updated_at).


## Related issue
Fixes #2987

## What does this PR do?
- Extract the dates out of the dumpv4.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: funilrys <contact@funilrys.com>
2022-12-05 15:57:07 +00:00
Louis Dureuil
492fd2829a use a consistent dump directory name in tests
changed from 'dump' to 'dumps' to be consistent with the default settings

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-12-05 16:56:28 +01:00
bors[bot]
ffa6d1ed4e Merge #3186
3186: Update mini-dashboard to v0.2.4 r=curquiza a=mdubus



Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
2022-12-05 14:45:27 +00:00
Morgane Dubus
293efb7485 Update Cargo.toml 2022-12-05 14:54:01 +01:00
Loïc Lecrenier
cda4ba2bb6 Add document import tests 2022-12-05 12:02:49 +01:00
Loïc Lecrenier
ae59d37b75 Improve insta-snap of the external document ids 2022-12-05 10:51:02 +01:00
Loïc Lecrenier
f2cf981641 Add more tests and allow disabling of soft-deletion outside of tests
Also allow disabling soft-deletion in the IndexDocumentsConfig
2022-12-05 10:51:01 +01:00
jiangbo212
6766712840 fmt fix 2022-12-04 23:05:34 +08:00
jiangbo212
980776b646 test fail fix 2022-12-04 22:31:23 +08:00
jiangbo212
6bdd37beb8 tokio file write update 2022-12-04 18:25:06 +08:00
Gregory Conrad
50954d31fa feat: Re-export Span and Token to milli:: 2022-12-03 13:37:33 -05:00
Gregory Conrad
1b5b5778c1 feat: Add From<&str> implementation for Token 2022-12-03 13:13:41 -05:00
funilrys
8b6eba4f0b Apply fmt. 2022-12-03 17:47:02 +01:00
funilrys
e510ace179 fixup! Re-open tasks queue. 2022-12-03 17:41:33 +01:00
funilrys
f056fc118f Re-open tasks queue.
Indeed, before this patch, I was (probably) breaking every usage
of the tasks BufReader. This patch solves the issue by reopening
the the tasks file every time its needed.
2022-12-03 17:29:41 +01:00
jiangbo212
5a770ffe47 test fail fix 2022-12-03 22:48:38 +08:00
jiangbo212
7b08d700f7 requested changes fix 2022-12-03 18:52:20 +08:00
jiangbo212
c63748723d Merge branch 'meilisearch:main' into fix-3037 2022-12-03 15:56:41 +08:00
bors[bot]
40339919ad Merge #3170 #3180 #3181 #3183
3170: Re-enable importing from dumps v1 r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #2985 

## What does this PR do?

### User standpoint

- Allows importing dumps version 1 (exported with Meilisearch <=v0.20) to modern-day Meilisearch
- Tasks of type "Customs" are skipped, with a warning
- Tasks of status "enqueued" are skipped, with a warning
- The "WordsPosition" ranking rule is skipped when encountered in the ranking rules, with a warning.

After an import from a v1 dump, it is recommended that a user checks each index and its settings.

### Implementation standpoint

- Add a dump v1 reader based on the one by `@irevoire` 
- Add a v1_to_v2 compatibility layer based on the v2_to_v3 one
  - as v2 requires UUIDs, the v1 indexes are mapped to UUIDs built from their position in the metada file: the first index is given UUID all zeroes, the second one UUID `00000000-0000-0000-0000-000000000001`, and so on... This should have no bearing on the final indexes because v6 is not using UUIDs, but this allows us to correctly identify which tasks belong to which index.
- Modify the v2_to_v3 compatibility layer to account for the fact that the reader can actually be a v1_to_v2 compat layer
- Make some base dump types Clone
- impl Display for v2::settings::Criterion

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


3180: Bump mislav/bump-homebrew-formula-action from 1 to 2 r=curquiza a=dependabot[bot]

Bumps [mislav/bump-homebrew-formula-action](https://github.com/mislav/bump-homebrew-formula-action) from 1 to 2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/mislav/bump-homebrew-formula-action/releases">mislav/bump-homebrew-formula-action's releases</a>.</em></p>
<blockquote>
<h2>bump-homebrew-formula 2.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Use Node 16 by <a href="https://github.com/chenrui333"><code>`@​chenrui333</code></a>` in <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/pull/36">mislav/bump-homebrew-formula-action#36</a></li>
<li>Bump minimist from 1.2.5 to 1.2.6 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/pull/33">mislav/bump-homebrew-formula-action#33</a></li>
<li>Bump node-fetch from 2.6.6 to 2.6.7 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/pull/34">mislav/bump-homebrew-formula-action#34</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/chenrui333"><code>`@​chenrui333</code></a>` made their first contribution in <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/pull/36">mislav/bump-homebrew-formula-action#36</a></li>
</ul>
<h2>bump-homebrew-formula 1.16</h2>
<ul>
<li>Replaces broken v1.15 tag, thanks <a href="https://github.com/hendrikmaus"><code>`@​hendrikmaus</code></a>` <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/issues/32">mislav/bump-homebrew-formula-action#32</a></li>
<li>Add <code>push-to</code> option, thanks <a href="https://github.com/codefromthecrypt"><code>`@​codefromthecrypt</code></a>` <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/pull/30">mislav/bump-homebrew-formula-action#30</a></li>
<li>Fix syntax error, thanks <a href="https://github.com/hendrikmaus"><code>`@​hendrikmaus</code></a>` <a href="https://github.com/wata727"><code>`@​wata727</code></a>` <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/pull/27">mislav/bump-homebrew-formula-action#27</a></li>
<li>Ensure repeated placeholders in <code>commit-message</code> are expanded, thanks <a href="https://github.com/hendrikmaus"><code>`@​hendrikmaus</code></a>` <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/pull/29">mislav/bump-homebrew-formula-action#29</a></li>
</ul>
<h2>bump-homebrew-formula 1.14</h2>
<ul>
<li>Ignore HTTP 409 error when fast-forwading the main branch of <code>homebrew-tap</code> fork</li>
</ul>
<h2>bump-homebrew-formula 1.13</h2>
<ul>
<li>Add <code>create-pullrequest</code> input to control whether or not a PR is submitted to <code>homebrew-tap</code></li>
<li>Add <code>download-sha256</code> input to define the SHA256 checksum of the archive at <code>download-url</code></li>
<li>Fix creating a new branch in the forked repo failing with HTTP 404</li>
</ul>
<h2>bump-homebrew-formula 1.12</h2>
<ul>
<li>Fix Actions CJS loader halting on <code>foo?.bar</code> JS syntax</li>
</ul>
<h2>bump-homebrew-formula 1.11</h2>
<ul>
<li>New optional <code>formula-path</code> input accepts the filename of the formula file to edit (default <code>Formula/&lt;formula-name&gt;.rb</code>).</li>
<li>Remove <code>revision N</code> lines when bumping Homebrew formulae.</li>
</ul>
<h2>bump-homebrew-formula 1.10</h2>
<ul>
<li>The new optional <code>tag-name</code> input allows this action to be <a href="https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow">manually triggered via <code>workflow_dispatch</code></a> instead of on git push to a tag.</li>
</ul>
<h2>bump-homebrew-formula 1.9</h2>
<ul>
<li>Fix following multiple HTTP redirects while calculating checksum for <code>download-url</code></li>
</ul>
<h2>bump-homebrew-formula 1.8</h2>
<ul>
<li>Enable JavaScript source maps for better failure debugging</li>
</ul>
<h2>bump-homebrew-formula 1.7</h2>
<ul>
<li>
<p>Allow <code>download-url</code> as input parameter</p>
</li>
<li>
<p>Add support for git-based <code>download-url</code></p>
</li>
</ul>
<h2>bump-homebrew-formula 1.6</h2>
<ul>
<li>Control the git commit message template being used for updating the formula file via the <code>commit-message</code> action input</li>
</ul>
<h2>bump-homebrew-formula 1.5</h2>
<ul>
<li>Support detection version from <code>https://github.com/OWNER/REPO/releases/download/TAG/FILE</code> download URLs</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="fcd7e28e54"><code>fcd7e28</code></a> lib</li>
<li><a href="33989a8502"><code>33989a8</code></a> Merge branch 'main' into v2</li>
<li><a href="5983bb6c59"><code>5983bb6</code></a> Improve extracting complex tag names from URLs</li>
<li><a href="9750a1166b"><code>9750a11</code></a> lib</li>
<li><a href="64410e9c96"><code>64410e9</code></a> v2</li>
<li><a href="677d7482a3"><code>677d748</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/issues/36">#36</a> from chenrui333/node-16</li>
<li><a href="f364e76079"><code>f364e76</code></a> also update some build dependencies</li>
<li><a href="c08fd9bee5"><code>c08fd9b</code></a> deps: update to use nodev16</li>
<li><a href="280f532e9a"><code>280f532</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/mislav/bump-homebrew-formula-action/issues/34">#34</a> from mislav/dependabot/npm_and_yarn/node-fetch-2.6.7</li>
<li><a href="5d94a66af3"><code>5d94a66</code></a> Bump node-fetch from 2.6.6 to 2.6.7</li>
<li>Additional commits viewable in <a href="https://github.com/mislav/bump-homebrew-formula-action/compare/v1...v2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=mislav/bump-homebrew-formula-action&package-manager=github_actions&previous-version=1&new-version=2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

3181: Bump Swatinem/rust-cache from 2.0.0 to 2.2.0 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.0 to 2.2.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.2.0</h2>
<ul>
<li>Add new <code>save-if</code> option to always restore, but only conditionally save the cache.</li>
</ul>
<h2>v2.1.0</h2>
<ul>
<li>Only hash <code>Cargo.{lock,toml}</code> files in the configured workspace directories.</li>
</ul>
<h2>v2.0.2</h2>
<ul>
<li>Avoid calling cargo metadata on pre-cleanup.</li>
<li>Added <code>prefix-key</code>, <code>cache-directories</code> and <code>cache-targets</code> options.</li>
</ul>
<h2>v2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.2.0</h2>
<ul>
<li>Add new <code>save-if</code> option to always restore, but only conditionally save the cache.</li>
</ul>
<h2>2.1.0</h2>
<ul>
<li>Only hash <code>Cargo.{lock,toml}</code> files in the configured workspace directories.</li>
</ul>
<h2>2.0.2</h2>
<ul>
<li>Avoid calling <code>cargo metadata</code> on pre-cleanup.</li>
<li>Added <code>prefix-key</code>, <code>cache-directories</code> and <code>cache-targets</code> options.</li>
</ul>
<h2>2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="359a70e43a"><code>359a70e</code></a> 2.2.0</li>
<li><a href="ecee04e7b3"><code>ecee04e</code></a> feat: add save-if option, closes <a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/66">#66</a> (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/91">#91</a>)</li>
<li><a href="b894d59a8d"><code>b894d59</code></a> 2.1.0</li>
<li><a href="e78327dd9e"><code>e78327d</code></a> small code style improvements, README and CHANGELOG updates</li>
<li><a href="ccdddcc049"><code>ccdddcc</code></a> only hash Cargo.toml/Cargo.lock that belong to a configured workspace (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/90">#90</a>)</li>
<li><a href="b5ec9edd91"><code>b5ec9ed</code></a> 2.0.2</li>
<li><a href="3f2513fdf4"><code>3f2513f</code></a> avoid calling cargo metadata on pre-cleanup</li>
<li><a href="19c46583c5"><code>19c4658</code></a> update dependencies</li>
<li><a href="b8e72aae83"><code>b8e72aa</code></a> Added <code>prefix-key</code> <code>cache-directories</code> and <code>cache-targets</code> options (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/85">#85</a>)</li>
<li><a href="22c9328bcb"><code>22c9328</code></a> 2.0.1</li>
<li>Additional commits viewable in <a href="https://github.com/Swatinem/rust-cache/compare/v2.0.0...v2.2.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.0.0&new-version=2.2.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

3183: Use ubuntu-latest when not impacting r=Kerollmops a=curquiza

Minor changes
- Use `ubuntu-latest` for CI where there is no compilation
- rename one of the workflow (obsolete name)

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-01 17:33:59 +00:00
curquiza
7f3653ec31 Use ubuntu-latest when not impacting 2022-12-01 18:23:29 +01:00
bors[bot]
a1f5ec1e9e Merge #3179
3179: Bump svenstaro/upload-release-action from 1.pre.release to 2.3.0 r=curquiza a=dependabot[bot]

Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 1.pre.release to 2.3.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/releases">svenstaro/upload-release-action's releases</a>.</em></p>
<blockquote>
<h2>2.3.0</h2>
<ul>
<li>Now defaults <code>repo_token</code> to <code>${{ github.token }}</code> and <code>tag</code> to <code>${{ github.ref }}</code> <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/69">#69</a> (thanks <a href="https://github.com/leighmcculloch"><code>`@​leighmcculloch</code></a>)**</li>`
</ul>
<h2>2.2.1</h2>
<ul>
<li>Added support for the GitHub pagination API for repositories with many releases <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/36">#36</a> (thanks <a href="https://github.com/djpohly"><code>`@​djpohly</code></a>)</li>`
</ul>
<h2>2.2.0</h2>
<ul>
<li>Add support for ceating a new release in a foreign repository <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/25">#25</a> (thanks <a href="https://github.com/kittaakos"><code>`@​kittaakos</code></a>)</li>`
<li>Upgrade all deps</li>
</ul>
<h2>2.1.1</h2>
<ul>
<li>Fix <code>release_name</code> option <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/27">#27</a> (thanks <a href="https://github.com/kittaakos"><code>`@​kittaakos</code></a>)</li>`
</ul>
<h2>2.1.0</h2>
<ul>
<li>Strip refs/heads/ from the input tag <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/23">#23</a> (thanks <a href="https://github.com/OmarEmaraDev"><code>`@​OmarEmaraDev</code></a>)</li>`
</ul>
<h2>2.0.0</h2>
<ul>
<li>Add <code>prerelease</code> input parameter. Setting this marks the created release as a pre-release.</li>
<li>Add <code>release_name</code> input parameter. Setting this explicitly sets the title of the release.</li>
<li>Add <code>body</code> input parameter. Setting this sets the text content of the created release.</li>
<li>Add <code>browser_download_url</code> output variable. This contains the publicly accessible download URL of the uploaded artifact.</li>
<li>Allow for leaving <code>asset_name</code> unset. This will cause the asset to use the filename.</li>
</ul>
<h2>1.1.0</h2>
<p>No release notes provided.</p>
<h2>1.0.1</h2>
<p>No release notes provided.</p>
<h2>1.0.0</h2>
<p>No release notes provided.</p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md">svenstaro/upload-release-action's changelog</a>.</em></p>
<blockquote>
<h2>[2.3.0] - 2022-06-05</h2>
<ul>
<li>Now defaults <code>repo_token</code> to <code>${{ github.token }}</code> and <code>tag</code> to <code>${{ github.ref }}</code> <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/69">#69</a> (thanks <a href="https://github.com/leighmcculloch"><code>`@​leighmcculloch</code></a>)</li>`
</ul>
<h2>[2.2.1] - 2020-12-16</h2>
<ul>
<li>Added support for the GitHub pagination API for repositories with many releases <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/36">#36</a> (thanks <a href="https://github.com/djpohly"><code>`@​djpohly</code></a>)</li>`
</ul>
<h2>[2.2.0] - 2020-10-07</h2>
<ul>
<li>Add support for ceating a new release in a foreign repository <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/25">#25</a> (thanks <a href="https://github.com/kittaakos"><code>`@​kittaakos</code></a>)</li>`
<li>Upgrade all deps</li>
</ul>
<h2>[2.1.1] - 2020-09-25</h2>
<ul>
<li>Fix <code>release_name</code> option <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/27">#27</a> (thanks <a href="https://github.com/kittaakos"><code>`@​kittaakos</code></a>)</li>`
</ul>
<h2>[2.1.0] - 2020-08-10</h2>
<ul>
<li>Strip refs/heads/ from the input tag <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/pull/23">#23</a> (thanks <a href="https://github.com/OmarEmaraDev"><code>`@​OmarEmaraDev</code></a>)</li>`
</ul>
<h2>[2.0.0] - 2020-07-03</h2>
<ul>
<li>Add <code>prerelease</code> input parameter. Setting this marks the created release as a pre-release.</li>
<li>Add <code>release_name</code> input parameter. Setting this explicitly sets the title of the release.</li>
<li>Add <code>body</code> input parameter. Setting this sets the text content of the created release.</li>
<li>Add <code>browser_download_url</code> output variable. This contains the publicly accessible download URL of the uploaded artifact.</li>
<li>Allow for leaving <code>asset_name</code> unset. This will cause the asset to use the filename.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="133984371c"><code>1339843</code></a> 2.3.0</li>
<li><a href="c2b649c57e"><code>c2b649c</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/72">#72</a> from svenstaro/dependabot/npm_and_yarn/node-fetch-2.6.7</li>
<li><a href="eb625cd0ad"><code>eb625cd</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/71">#71</a> from svenstaro/dependabot/npm_and_yarn/ansi-regex-4.1.1</li>
<li><a href="99cbd251b2"><code>99cbd25</code></a> Bump node-fetch from 2.6.1 to 2.6.7</li>
<li><a href="a6824c9e54"><code>a6824c9</code></a> Bump ansi-regex from 4.1.0 to 4.1.1</li>
<li><a href="6eb74c809d"><code>6eb74c8</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/67">#67</a> from svenstaro/dependabot/npm_and_yarn/minimist-1.2.6</li>
<li><a href="8ec375d911"><code>8ec375d</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/svenstaro/upload-release-action/issues/69">#69</a> from leighmcculloch/patch-1</li>
<li><a href="8d45355ac2"><code>8d45355</code></a> Update README.md</li>
<li><a href="7d269bd712"><code>7d269bd</code></a> Add defaults to commonly set fields to reduce setup</li>
<li><a href="c71fb95114"><code>c71fb95</code></a> Bump minimist from 1.2.5 to 1.2.6</li>
<li>Additional commits viewable in <a href="https://github.com/svenstaro/upload-release-action/compare/v1-release...2.3.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=svenstaro/upload-release-action&package-manager=github_actions&previous-version=1.pre.release&new-version=2.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-01 17:11:54 +00:00
dependabot[bot]
dd6593f7b6 Bump Swatinem/rust-cache from 2.0.0 to 2.2.0
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.0 to 2.2.0.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v2.0.0...v2.2.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-12-01 17:02:25 +00:00
dependabot[bot]
fc905f29e9 Bump mislav/bump-homebrew-formula-action from 1 to 2
Bumps [mislav/bump-homebrew-formula-action](https://github.com/mislav/bump-homebrew-formula-action) from 1 to 2.
- [Release notes](https://github.com/mislav/bump-homebrew-formula-action/releases)
- [Commits](https://github.com/mislav/bump-homebrew-formula-action/compare/v1...v2)

---
updated-dependencies:
- dependency-name: mislav/bump-homebrew-formula-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-12-01 17:02:18 +00:00
dependabot[bot]
f892d122de Bump svenstaro/upload-release-action from 1.pre.release to 2.3.0
Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 1.pre.release to 2.3.0.
- [Release notes](https://github.com/svenstaro/upload-release-action/releases)
- [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md)
- [Commits](https://github.com/svenstaro/upload-release-action/compare/v1-release...2.3.0)

---
updated-dependencies:
- dependency-name: svenstaro/upload-release-action
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-12-01 17:02:14 +00:00
bors[bot]
7e314664fc Merge #3169
3169: Improve download-latest.sh script: integrate apple-silicon binary r=curquiza a=curquiza

Fixes multiples issues: https://github.com/meilisearch/meilisearch/issues/3044, https://github.com/meilisearch/meilisearch/issues/3027, https://github.com/meilisearch/meilisearch/issues/2613, https://github.com/meilisearch/meilisearch/issues/3014


Improvement/Addition
- https://github.com/meilisearch/meilisearch/issues/3044
- https://github.com/meilisearch/meilisearch/issues/3027

Simplification
- With https://github.com/meilisearch/meilisearch/issues/3014, we removed the complex part to get the latest version of Meilisearch we can now get directly with the GitHub API

Bug fixes:
- Should remove the problem the users encountered in this issue (https://github.com/meilisearch/meilisearch/issues/2613) because the related part has been removed from the script

Co-authored-by: curquiza <clementine@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-12-01 16:18:38 +00:00
Louis Dureuil
a92d67b79f Also update analytics 2022-12-01 16:24:02 +01:00
Louis Dureuil
daa0f2e9aa Change dumps_dir to dump_dir in config.toml 2022-12-01 15:46:54 +01:00
Louis Dureuil
e35db5e59b Rename dumps-dir to dump-dir in CLI
Also rename the associated environment variable
2022-12-01 15:46:54 +01:00
bors[bot]
d3731dda48 Merge #706
706: Limit the reindexing caused by updating settings when not needed r=curquiza a=GregoryConrad

## What does this PR do?
When updating index settings using `update::Settings`, sometimes a `reindex` of `update::Settings` is triggered when it doesn't need to be. This PR aims to prevent those unnecessary `reindex` calls.

For reference, here is a snippet from the current `execute` method in `update::Settings`:
```rust
// ...
if stop_words_updated
    || faceted_updated
    || synonyms_updated
    || searchable_updated
    || exact_attributes_updated
{
    self.reindex(&progress_callback, &should_abort, old_fields_ids_map)?;
}
```

- [x] `faceted_updated` - looks good as-is 
- [x] `stop_words_updated` - looks good as-is 
- [x] `synonyms_updated` - looks good as-is 
- [x] `searchable_updated` - fixed in this PR
- [x] `exact_attributes_updated` - fixed in this PR

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2022-12-01 13:58:02 +00:00
Louis Dureuil
b727fe7179 Fix integration tests 2022-12-01 14:03:15 +01:00
Louis Dureuil
1260c32d18 Add reader mod test 2022-12-01 14:03:15 +01:00
Louis Dureuil
e03c216952 Add compat_v1_to_v2 test 2022-12-01 14:03:15 +01:00
Louis Dureuil
c7749127fa Use reader v1 and compat to v2 2022-12-01 14:03:15 +01:00
Louis Dureuil
c8841344e2 v1: RankingRule::from_str 2022-12-01 14:03:15 +01:00
Louis Dureuil
b8de369e33 Add v1 reader 2022-12-01 14:03:15 +01:00
bors[bot]
93b59d55e3 Merge #3172
3172: Add CI to push a latest git tag for every stable Meilisearch release r=curquiza a=curquiza

Fixes partially #3147.

- Add a CI to add/update a `latest` git tag ONLY when releasing a stable version of Meilisearch (not for pre-release or for custom tag for prototypes)
- Update the Docker CI to avoid being triggered when `latest` git tag is pushed: the CI is still triggered when a git tag is pushed, except for the `latest` tag.
Reminder: the `latest` Docker image is already created and pushed when releasing a stable version of Meilisearch. This step is already present in our current Docker CI.

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-01 12:37:09 +00:00
curquiza
b9a8533de1 Add CI to push a latest git tag for every stable Meilisearch release 2022-12-01 12:53:44 +01:00
bors[bot]
51a2613c5c Merge #715
715: Fix benchmark CI r=irevoire a=curquiza

Fixes #714 

Tested with our actions: https://github.com/meilisearch/milli/actions/runs/3591527753/jobs/6046157141

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-12-01 10:39:38 +00:00
Louis Dureuil
d44652209d impl Display for v2::settings::Criterion 2022-12-01 11:15:57 +01:00
Louis Dureuil
5d22c7bcce Make some dump types Clone 2022-12-01 11:15:57 +01:00
bors[bot]
82e1c4f468 Merge #716
716: Bump Swatinem/rust-cache from 2.0.1 to 2.2.0 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.1 to 2.2.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.2.0</h2>
<ul>
<li>Add new <code>save-if</code> option to always restore, but only conditionally save the cache.</li>
</ul>
<h2>v2.1.0</h2>
<ul>
<li>Only hash <code>Cargo.{lock,toml}</code> files in the configured workspace directories.</li>
</ul>
<h2>v2.0.2</h2>
<ul>
<li>Avoid calling cargo metadata on pre-cleanup.</li>
<li>Added <code>prefix-key</code>, <code>cache-directories</code> and <code>cache-targets</code> options.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.2.0</h2>
<ul>
<li>Add new <code>save-if</code> option to always restore, but only conditionally save the cache.</li>
</ul>
<h2>2.1.0</h2>
<ul>
<li>Only hash <code>Cargo.{lock,toml}</code> files in the configured workspace directories.</li>
</ul>
<h2>2.0.2</h2>
<ul>
<li>Avoid calling <code>cargo metadata</code> on pre-cleanup.</li>
<li>Added <code>prefix-key</code>, <code>cache-directories</code> and <code>cache-targets</code> options.</li>
</ul>
<h2>2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
<h2>2.0.0</h2>
<ul>
<li>The action code was refactored to allow for caching multiple workspaces and
different <code>target</code> directory layouts.</li>
<li>The <code>working-directory</code> and <code>target-dir</code> input options were replaced by a
single <code>workspaces</code> option that has the form of <code>$workspace -&gt; $target</code>.</li>
<li>Support for considering <code>env-vars</code> as part of the cache key.</li>
<li>The <code>sharedKey</code> input option was renamed to <code>shared-key</code> for consistency.</li>
</ul>
<h2>1.4.0</h2>
<ul>
<li>Clean both <code>debug</code> and <code>release</code> target directories.</li>
</ul>
<h2>1.3.0</h2>
<ul>
<li>Use Rust toolchain file as additional cache key.</li>
<li>Allow for a configurable target-dir.</li>
</ul>
<h2>1.2.0</h2>
<ul>
<li>Cache <code>~/.cargo/bin</code>.</li>
<li>Support for custom <code>$CARGO_HOME</code>.</li>
<li>Add a <code>cache-hit</code> output.</li>
<li>Add a new <code>sharedKey</code> option that overrides the automatic job-name based key.</li>
</ul>
<h2>1.1.0</h2>
<ul>
<li>Add a new <code>working-directory</code> input.</li>
<li>Support caching git dependencies.</li>
<li>Lots of other improvements.</li>
</ul>
<h2>1.0.2</h2>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="359a70e43a"><code>359a70e</code></a> 2.2.0</li>
<li><a href="ecee04e7b3"><code>ecee04e</code></a> feat: add save-if option, closes <a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/66">#66</a> (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/91">#91</a>)</li>
<li><a href="b894d59a8d"><code>b894d59</code></a> 2.1.0</li>
<li><a href="e78327dd9e"><code>e78327d</code></a> small code style improvements, README and CHANGELOG updates</li>
<li><a href="ccdddcc049"><code>ccdddcc</code></a> only hash Cargo.toml/Cargo.lock that belong to a configured workspace (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/90">#90</a>)</li>
<li><a href="b5ec9edd91"><code>b5ec9ed</code></a> 2.0.2</li>
<li><a href="3f2513fdf4"><code>3f2513f</code></a> avoid calling cargo metadata on pre-cleanup</li>
<li><a href="19c46583c5"><code>19c4658</code></a> update dependencies</li>
<li><a href="b8e72aae83"><code>b8e72aa</code></a> Added <code>prefix-key</code> <code>cache-directories</code> and <code>cache-targets</code> options (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/85">#85</a>)</li>
<li>See full diff in <a href="https://github.com/Swatinem/rust-cache/compare/v2.0.1...v2.2.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.0.1&new-version=2.2.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-12-01 10:08:58 +00:00
curquiza
5bdf5c0aaf Update the steps to set variables 2022-12-01 11:07:54 +01:00
dependabot[bot]
282b2e3b98 Bump Swatinem/rust-cache from 2.0.1 to 2.2.0
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.1 to 2.2.0.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v2.0.1...v2.2.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-12-01 10:02:54 +00:00
bors[bot]
5e754b3ee0 Merge #708
708: Reduce memory usage of the MatchingWords structure r=ManyTheFish a=loiclec

# Pull Request

## Related issue
Fixes (partially) https://github.com/meilisearch/meilisearch/issues/3115 

## What does this PR do?
1. Reduces the memory usage caused by the creation of a 10-word query tree by 20x. 
   This is done by deduplicating the `MatchingWord` values, which are heavy because of their inner DFA. The deduplication works by wrapping each `MatchingWord` in a reference-counted box and using a hash map to determine whether a  `MatchingWord` DFA already exists for a certain signature, or whether a new one needs to be built.
 
2. Avoid the worst-case scenario of creating a `MatchingWord` for extremely long words that cannot be indexed by milli.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-11-30 17:47:34 +00:00
bors[bot]
e1612fcb01 Merge #712
712: Fix bulk facet indexing bug r=Kerollmops a=loiclec

# Pull Request

## Related issue
Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3165

## What does this PR do?
Fixes a bug where indexing certain numbers of filterable attribute values in bulk led to corrupted facet databases. This was due to a lossy integer conversion which would ultimately prevent entire levels of the facet database to be written into LMDB.

More specifically, this change was made:
```diff
      - if cur_writer_len as u8 >= self.min_level_size {
      + if cur_writer_len >= self.min_level_size as usize {
```
I also checked other comparisons to `min_level_size` and other conversions such as `x as u8` in this part of the codebase.



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-11-30 16:51:48 +00:00
Clémentine Urquizar - curqui
da8044f91e Update download-latest.sh
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
2022-11-30 16:55:32 +01:00
Clémentine Urquizar - curqui
9e3b1eb7a8 Update download-latest.sh
Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
2022-11-30 16:55:27 +01:00
Loïc Lecrenier
9dd4b33a9a Fix bulk facet indexing bug 2022-11-30 14:27:36 +01:00
curquiza
eab1156f8c Update script to be used with macOS apple silicon 2022-11-30 14:14:46 +01:00
curquiza
8d405fad12 Simplify download-latest.sh script 2022-11-30 13:53:12 +01:00
jiangbo212
bf96b6df93 clippy fix change 2022-11-30 17:59:06 +08:00
jiangbo212
9c28632498 Merge branch 'main' into fix-3037 2022-11-30 09:38:01 +08:00
jiangbo212
38982d13fe fix issue 3037 2022-11-30 00:03:22 +08:00
bors[bot]
de22116b3d Merge #711
711: Replace deprecated gh actions r=curquiza a=pnhatminh

# Pull Request

## Related issue
Fixes #678

## What does this PR do?
- Replace deprecated github action command with newly defined command.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Minh Pham <minh.pham@codelink.io>
2022-11-29 09:56:22 +00:00
bors[bot]
6150aa73b0 Merge #3148
3148: Bring back `release-v0.30.0` into `main` r=Kerollmops a=curquiza

Following this message

https://github.com/meilisearch/meilisearch/pull/3145#issuecomment-1329296168

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
2022-11-29 09:23:44 +00:00
Minh Pham
5f78522044 Updagte 2022-11-29 10:11:38 +07:00
Gregory Conrad
87e2bc3bed fix(reindex): reindex in a few more cases
Cases: whenever searchable_fields OR user_defined_searchable_fields is modified
2022-11-28 13:12:19 -05:00
bors[bot]
14840a24c6 Merge #3149
3149: Fix the dump tests r=Kerollmops a=irevoire

You'll need to trust me on this one. But the tests in the release-v0.30.0 were deactivated for a long time, and I don't know what was wrong with them.
Anyway, I checked that these SHA did match the tasks view we're expecting, and it looks good to me.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-11-28 15:48:36 +00:00
Tamo
41eb986f65 Fix the dump tests 2022-11-28 16:39:22 +01:00
Loïc Lecrenier
61b58b115a Don't create partial matching words for synonyms in ngrams 2022-11-28 16:32:28 +01:00
Clémentine Urquizar - curqui
457a473b72 Bring back release-v0.30.0 into release-v0.30.0-temp (final: into main) (#3145)
* Fix error code of the "duplicate index found" error

* Use the content of the ProcessingTasks in the tasks cancelation system

* Change the missing_filters error code into missing_task_filters

* WIP Introduce the invalid_task_uid error code

* Use more precise error codes/message for the task routes

+ Allow star operator in delete/cancel tasks
+ rename originalQuery to originalFilters
+ Display error/canceled_by in task view even when they are = null
+ Rename task filter fields by using their plural forms
+ Prepare an error code for canceledBy filter
+ Only return global tasks if the API key action `index.*` is there

* Add canceledBy task filter

* Update tests following task API changes

* Rename original_query to original_filters everywhere

* Update more insta-snap tests

* Make clippy happy

They're a happy clip now.

* Make rustfmt happy

>:-(

* Fix Index name parsing error message to fit the specification

* Bump milli version to 0.35.1

* Fix the new error messages

* fix the error messages and add tests

* rename the error codes for the sake of consistency

* refactor the way we send the cli informations + add the analytics for the config file and ssl usage

* Apply suggestions from code review

Co-authored-by: Clément Renault <clement@meilisearch.com>

* add a comment over the new infos structure

* reformat, sorry @kero

* Store analytics for the documents deletions

* Add analytics on all the settings

* Spawn threads with names

* Spawn rayon threads with names

* update the distinct attributes to the spec update

* update the analytics on the search route

* implements the analytics on the health and version routes

* Fix task details serialization

* Add the question mark to the task deletion query filter

* Add the question mark to the task cancelation query filter

* Fix tests

* add analytics on the task route

* Add all the missing fields of the new task query type
* Create a new analytics for the task deletion
* Create a new analytics for the task creation

* batch the tasks seen events

* Update the finite pagination analytics

* add the analytics of the swap-indexes route

* Stop removing the DB when failing to read it

* Rename originalFilters into originalFilters

* Rename matchedDocuments into providedIds

* Add `workflow_dispatch` to flaky.yml

* Bump grenad to 0.4.4

* Bump milli to version v0.37.0

* Don't multiply total memory returned by sysinfo anymore

sysinfo now returns bytes rather than KB

* Add a dispatch to the publish binaries workflow

* Fix publish release CI

* Don't use gold but the default linker

* Always display details for the indexDeletion task

* Fix the insta tests

* refactorize the whole test suite
1. Make a call to assert_internally_consistent automatically when snapshoting the scheduler. There is no point in snapshoting something broken and expect the dumb humans to notice.
2. Replace every possible call to assert_internally_consistent by a snapshot of the scheduler. It takes as many lines and ensure we never change something without noticing in any tests ever.
3. Name every snapshots: it's easier to debug when something goes wrong and easier to review in general.
4. Stop skipping breakpoints, it's too easy to miss something. Now you must explicitely show which path is the scheduler supposed to use.
5. Add a timeout on the channel.recv, it eases the process of writing tests, now when something file you get a failure instead of a deadlock.

* rebase on release-v0.30

* makes clippy happy

* update the snapshots after a rebase

* try to remove the flakyness of the failing test

* Add more analytics on the ranking rules positions

* Update the dump test to check for the dumpUid dumpCreation task details

* send the ranking rules as a string because amplitude is too dumb to process an array as a single value

* Display a null dumpUid until we computed the dump itself on disk

* Update tests

* Check if the master key is missing before returning an error

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-28 16:27:41 +01:00
Gregory Conrad
d3182f3830 refactor: Change return type to keep consistency with others 2022-11-28 10:02:03 -05:00
bors[bot]
f698e6cfdf Merge #707
707: Add all_obkv_to_json function r=Kerollmops a=GregoryConrad

## What does this PR do?
When embedding milli in an application (other than Meilisearch), it often makes sense to not use the `displayed_attributes` functionality and instead just use milli as a full document store. Thus, this PR adds a function, `all_obkv_to_json`, to supplement the already exposed `milli::obkv_to_json` so that those embedding milli *do not* need to deal with `displayed_attributes` if they don't need to.

~This PR also introduces a slight breaking change: `obkv_to_json` now accepts a reference to `obkv::KvReaderU16` instead of taking ownership of it. As far as I can tell, this seems like a change for the better (`obkv_to_json` only acts upon `obkv` rather than consuming it), but I can change it back if you so desire.~ (reverted in [935a724](935a724c57))

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Gregory Conrad <gregorysconrad@gmail.com>
2022-11-28 14:52:45 +00:00
Loïc Lecrenier
f70856bab1 Remove memory usage test that fails when many tests are run in parallel 2022-11-28 12:55:28 +01:00
Loïc Lecrenier
80588daae5 Fix compilation error in formatting benches 2022-11-28 10:27:15 +01:00
Loïc Lecrenier
e2ebed62b1 Don't create partial matching words for synonyms, split words, phrases 2022-11-28 10:20:13 +01:00
Loïc Lecrenier
8284bd760f Relax memory ordering of operations within the test CountingAlloc 2022-11-28 10:20:13 +01:00
Loïc Lecrenier
8d0ace2d64 Avoid creating a MatchingWord for words that exceed the length limit 2022-11-28 10:20:13 +01:00
Loïc Lecrenier
86c34a996b Deduplicate matching words 2022-11-28 10:20:13 +01:00
Minh Pham
eba7af1d2c Replace deprecated gh actions 2022-11-27 06:47:08 +07:00
Gregory Conrad
e0d24104a3 refactor: Rewrite another method chain to be more readable 2022-11-26 13:33:19 -05:00
Gregory Conrad
2db738dbac refactor: rewrite method chain to be more readable 2022-11-26 13:26:39 -05:00
bors[bot]
84dd2e4df1 Merge #710
710: Update Clippy to use Rust Stable r=irevoire a=Kerollmops

This PR changes the CI to use Rust stable for Clippy and Rustfmt. This way we will reduce the number of times we break the CI. [The version will only change every two months or so](https://www.whatrustisit.com/).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-11-24 15:57:04 +00:00
Clément Renault
3d06ea41ea Keep a nightly for rustfmt
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-11-24 16:54:40 +01:00
Clément Renault
3958db4b17 Update the CI to use Rust Stable 2022-11-24 16:26:48 +01:00
Gregory Conrad
935a724c57 revert: Revert pass by reference API change 2022-11-24 10:08:23 -05:00
bors[bot]
914f8b118c Merge #3119
3119: Dump tests r=Kerollmops a=irevoire

Reenable the dump tests

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-11-24 10:17:38 +00:00
Gregory Conrad
ed29cceae9 perf: Prevent reindex in searchable set case when not needed 2022-11-23 22:33:06 -05:00
Gregory Conrad
bb9e33bf85 perf: Prevent reindex in searchable reset case when not needed 2022-11-23 22:01:46 -05:00
Gregory Conrad
7c0e544839 feat: Add all_obkv_to_json function 2022-11-23 21:18:58 -05:00
Colby Allen
6ecd486d1b Bumps cargo_toml version to most up to date 2022-11-23 16:27:54 -07:00
Gregory Conrad
d19c8672bb perf: limit reindex to when exact_attributes changes 2022-11-23 15:50:53 -05:00
Tamo
7b8641a7af fix the dump tests
The issue was linked to the fact that the debug implementation of the PhantomData wasn't the same between rust stable and rust nightly.
This was causing an issue while snapshsotting the settings and this commit fix it by representing the settings as json which already ignores the PhantomData
2022-11-23 16:59:20 +01:00
bors[bot]
f509d81ec9 Merge #3100
3100: Add a dispatch to the publish binaries workflow r=Kerollmops a=curquiza

Add `worklfow_dispatch` event to publish binaries workflow to allow the manually trigger

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-11-21 19:31:20 +00:00
Kerollmops
8d73ae80bb Add a dispatch to the publish binaries workflow 2022-11-21 18:50:57 +01:00
bors[bot]
57c9f03e51 Merge #697
697: Fix bug in prefix DB indexing r=loiclec a=loiclec

Where the batch's information was not properly updated in cases where only the proximity changed between two consecutive word pair proximities.

Closes partially https://github.com/meilisearch/meilisearch/issues/3043



Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-11-17 15:22:01 +00:00
bors[bot]
00129e112a Merge #3041
3041: Add `workflow_dispatch` to flaky.yml r=irevoire a=curquiza

To be able to run the job manual and don't wait for one week

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-11-17 13:21:11 +00:00
bors[bot]
467e742bd1 Merge #702
702: Update version for the next release (v0.37.0) in Cargo.toml files r=ManyTheFish a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-11-17 12:54:27 +00:00
curquiza
cd5aaa3a9f Update version for the next release (v0.37.0) in Cargo.toml files 2022-11-17 12:50:07 +00:00
bors[bot]
8ceb199dca Merge #696
696: Fix Facet Indexing bugs r=Kerollmops a=loiclec

1. Handle keys with variable length correctly

Closes (partially) https://github.com/meilisearch/meilisearch/issues/3042 
This issue is now easily reproducible with the updated fuzz tests, which now generate keys with variable lengths.

2. Prevent adding facets to the database if their encoded value does not satisfy `valid_lmdb_key`.

Closes (partially) https://github.com/meilisearch/meilisearch/issues/2743
This fixes an indexing failure when a document had a filterable attribute containing a value whose length is higher than ~500 bytes. For now, this fix is just meant to prevent crashes. Better handling of long values of filterable attributes will be handled in a separate PR.


Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-11-17 11:56:16 +00:00
Loïc Lecrenier
777eb3fa00 Add insta-snaps for test of bug 3043 2022-11-17 12:21:27 +01:00
Loïc Lecrenier
0caadedd3b Make clippy happy 2022-11-17 12:17:53 +01:00
Loïc Lecrenier
ac3baafbe8 Truncate facet values that are too long before indexing them 2022-11-17 11:29:42 +01:00
Loïc Lecrenier
990a861241 Add test for indexing a document with a long facet value 2022-11-17 11:29:42 +01:00
Loïc Lecrenier
d95d02cb8a Fix Facet Indexing bugs
1. Handle keys with variable length correctly

This fixes https://github.com/meilisearch/meilisearch/issues/3042 and
is easily reproducible with the updated fuzz tests, which now generate
keys with variable lengths.

2. Prevent adding facets to the database if their encoded value does
not satisfy `valid_lmdb_key`.

This fixes an indexing failure when a document had a filterable
attribute containing a value whose length is higher than ~500 bytes.
2022-11-17 11:29:42 +01:00
Loïc Lecrenier
f00108d2ec Fix name of bug in reproduction test 2022-11-17 11:29:18 +01:00
Loïc Lecrenier
f7c8730d09 Fix bug in prefix DB indexing
Where the batch's information was not properly updated in cases
where only the proximity changed between two consecutive word pair
proximities.

Closes https://github.com/meilisearch/meilisearch/issues/3043
2022-11-17 11:29:18 +01:00
bors[bot]
46c275f0e4 Merge #3070
3070: Remove core and use engine r=Kerollmops a=curquiza

Following the new team name
Not mandatory since GitHub is doing redirection, but more consistent

Co-authored-by: curquiza <clementine@meilisearch.com>
2022-11-16 16:40:44 +00:00
bors[bot]
a651397afc Merge #685
685: ci: Use pre-compiled binaries for faster CI r=irevoire a=azzamsa

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: azzamsa <me@azzamsa.com>
2022-11-16 16:39:39 +00:00
curquiza
f4d4f313ea Use the right new link 2022-11-16 17:21:35 +01:00
bors[bot]
2000db8453 Merge #701
701: Remove Hacktoberfest sections r=curquiza a=meili-bot

_This PR is auto-generated._

Remove Hacktoberfest sections from CONTRIBUTING file.


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-11-15 15:17:18 +00:00
meili-bot
92cc3550d8 Update CONTRIBUTING.md 2022-11-15 16:16:40 +01:00
bors[bot]
5e1fa53354 Merge #3055
3055: Remove Hacktoberfest sections r=curquiza a=meili-bot

_This PR is auto-generated._

Remove Hacktoberfest sections from README and CONTRIBUTING files.


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-11-15 14:29:27 +00:00
meili-bot
fe59a6f628 Update README.md 2022-11-15 15:27:18 +01:00
meili-bot
647b7a20e9 Update CONTRIBUTING.md 2022-11-15 15:27:17 +01:00
bors[bot]
cd3bca06e9 Merge #699
699: Force vendoring of LMDB even if a system version is available r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/issues/3017: will fix once ported to milli and meilisearch.

## What does this PR do?
- Force using vendored version of LMDB
- **don't use lmdb master3 branch anymore**: this is a bit of a side effect of using a tag instead of branch for heed as a dependency, but it is wanted anyway for now as lmdb master3 was more of an experiment
- **modifies CI to run `cargo check` on the release rather than the debug artifacts**. This is an attempt to reduce the necessary disk space and avoid "out of space" failures.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-15 11:09:20 +00:00
Louis Dureuil
87576cf26c Perform cargo check on the release artifacts 2022-11-15 10:25:02 +01:00
funilrys
e81b349658 Fix linting issue. 2022-11-14 18:51:34 +01:00
Louis Dureuil
6dc6a5d874 Force using vendored version of LMDB
- don't use lmdb master3 branch anymore
2022-11-14 17:17:51 +01:00
Clémentine Urquizar - curqui
a84fad5ce6 Add workflow_dispatch to flaky.yml 2022-11-14 10:10:00 +01:00
funilrys
0a102d601c Update Task.created_at
Indeed, before this patch we weren't considering the
TaskContent::SetingsUpdate while trying to find the creation date.
2022-11-13 10:14:20 +01:00
funilrys
8a14f6f545 Add Task.processed_at. 2022-11-13 10:13:10 +01:00
funilrys
079357ee1f Fix linting issues. 2022-11-12 20:57:27 +01:00
funilrys
06e7db7a1f fixup! Extract the dates out of the dumpv4. 2022-11-12 18:28:23 +01:00
bors[bot]
9e189f5041 Merge #3015
3015: Replace deprecated set-output in GitHub actions r=curquiza a=funilrys

# Pull Request

This patch fixes #3011.

This patch fixes the deprecation warning regarding the usage of `set-output`.
This patch fixes the issues by switching the following format:

```
echo ::set-output name=[name]::[value]
```

into the following format:

```
echo "[name]=[value]" >> ${GITHUB_OUTPUT}
```


## Related issue
Fixes #3011

## What does this PR do?
- Fix CI/CD deprecation warnings.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: funilrys <contact@funilrys.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-11-10 09:45:55 +00:00
Clémentine Urquizar - curqui
fe980f9e88 Update .github/workflows/publish-docker-images.yml 2022-11-10 10:40:21 +01:00
Clémentine Urquizar - curqui
32cd9e4852 Update .github/workflows/publish-docker-images.yml 2022-11-10 10:40:16 +01:00
Clémentine Urquizar - curqui
8d79f501f3 Update .github/workflows/publish-binaries.yml 2022-11-10 10:40:09 +01:00
Clémentine Urquizar - curqui
c05abc2b0d Update .github/workflows/publish-binaries.yml 2022-11-10 10:40:04 +01:00
bors[bot]
e75829aded Merge #694
694: Update version for the next release (v0.36.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2022-11-09 11:22:24 +00:00
Kerollmops
d00d2aab3f Update version for the next release (v0.36.0) in Cargo.toml files 2022-11-09 11:03:09 +00:00
bors[bot]
f46a8ab2e2 Merge #693
693: use the lmdb-master.3 branch r=Kerollmops a=irevoire

After investigating https://github.com/meilisearch/meilisearch/issues/3017, we found out that it was due to lmdb and that, without any code change on our side, bumping using the lmdb-master-3 branch fix our issues.

But, we’re not really confident about what changed between the `mdb.master` and `mdb.master3` branches; thus this is a temporary change, and we hope we’ll be able to move to the new version of heed asap (either before the end of the pre-release or for the next release).

--------

The bug is hard to reproduce; I can reproduce it 100% of the time on my archlinux personal computer. But on a scaleway archlinux bare-metal machine, it doesn’t reproduce. It’s flaky on our test suite, but `@loiclec` was able to write a minimal test that reproduces it every time on macOS.
Basically, what happens is when there are multiple threads opening databases in a different directory at the same time.
If there are 10 or more threads running at the same time, lmdb starts throwing the `Invalid argument (os error 22)` error for no reason, we believe.
I would like to submit an issue to lmdb, but I don’t really have the time to write a test in C without heed currently.

`@hyc,` if you want to take a look at it, here is the repo that reproduces the issue on macOS: https://github.com/irevoire/heed-bug

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-11-09 09:42:38 +00:00
funilrys
a441fe5ae5 Remove unecessary line. 2022-11-08 21:18:24 +01:00
funilrys
7331da0410 Fix auto-formater issue.
Indeed, my editor always fixes the format for me. That's why those
2 lines were changed.
2022-11-08 21:16:47 +01:00
funilrys
72c4db4553 Rewrite: ${GITHUB_OUTPUT} -> $GITHUB_OUTPUT. 2022-11-08 21:15:28 +01:00
bors[bot]
c3b75bbe5d Merge #691
691: Update version for the next release (v0.35.1) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2022-11-08 15:31:50 +00:00
Irevoire
c7711daca3 use the lmdb-master.3 branch 2022-11-08 16:28:01 +01:00
bors[bot]
f18a4581f1 Merge #692
692: Update CONTRIBUTING.md r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-11-08 14:51:30 +00:00
Clémentine Urquizar - curqui
8ce8bbcdfc Update CONTRIBUTING.md 2022-11-08 15:49:45 +01:00
Kerollmops
bd12989610 Update version for the next release (v0.35.1) in Cargo.toml files 2022-11-08 14:31:39 +00:00
bors[bot]
24a298a83c Merge #690
690: Fix soft deleted bug settings r=ManyTheFish a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-11-08 13:45:10 +00:00
bors[bot]
d85cd9bf1a Merge #689
689: Handle non-finite floats consistently in filters r=irevoire a=dureuill

# Pull Request

## Related issue

Related meilisearch/meilisearch#3000

## What does this PR do?

### User

- Filters using `field = inf`, (or `infinite`, `NaN`) now match the value as a string rather than returning an internal error.
- Filters using `field < inf` (or other comparison operators) now return an invalid_filter error rather than returning an internal error, much like when using `field < aaa`.

### Implementation

- Add new `NonFiniteFloat` error variants to the filter-parser errors
- Add `Token::parse_as_finite_float` that can fail both when the string is not a float and when the float is not finite
- Refactor `Filter::inner_evaluate` to always use `parse_as_finite_float` instead of just `parse`
- Add corresponding tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2022-11-08 13:24:38 +00:00
Kerollmops
37b3c5c323 Fix transform to use all_documents and ignore soft_deleted documents 2022-11-08 14:23:16 +01:00
Kerollmops
1b1ad1923b Add a test to check that we take care of soft deleted documents 2022-11-08 14:23:14 +01:00
Louis Dureuil
a836b8e703 tests: Tests filter with non-finite floats 2022-11-08 13:56:55 +01:00
Louis Dureuil
3328560788 fix: allow filters on = inf, = NaN, return InvalidFilter for < inf, < NaN
Fixes meilisearch/meilisearch#3000
2022-11-08 13:27:15 +01:00
bors[bot]
cf76ec7b37 Merge #673
673: Add clippy job r=ManyTheFish a=unvalley

# Pull Request

## Related issue
Fixes #231 

## What does this PR do?
- fix some clippy errors remain
- add clippy job to CI (I set `nightly` as toolchain)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: unvalley <kirohi.code@gmail.com>
2022-11-08 09:43:26 +00:00
unvalley
abf1cf9cd5 Fix clippy errors 2022-11-04 09:27:46 +09:00
unvalley
b09676779d Use nightly for clippy and remove conflict mistake 2022-11-04 09:13:01 +09:00
unvalley
70465aa5ce Execute cargo fmt 2022-11-04 08:59:58 +09:00
unvalley
3009981d31 Fix clippy errors
Add clippy job

Add clippy job to CI
2022-11-04 08:58:14 +09:00
unvalley
401e956128 Add clippy job
Add clippy job to CI
2022-11-04 08:58:12 +09:00
azzamsa
48eafc546f ci: Use pre-compiled binaries for faster CI 2022-11-04 00:03:53 +07:00
bors[bot]
6add470805 Merge #659
659: Fix clippy error to add clippy job on Ci r=Kerollmops a=unvalley

## Related PR
This PR is for #673 

## What does this PR do?
- ~~add `Run Clippy` job to CI (rust.yml)~~
- apply `cargo clippy --fix` command
- fix some `cargo clippy` error manually (but warnings still remain on tests)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: unvalley <kirohi.code@gmail.com>
Co-authored-by: unvalley <38400669+unvalley@users.noreply.github.com>
2022-11-03 15:24:38 +00:00
unvalley
13175f2339 refactor: match for filterCondition 2022-11-03 17:34:33 +09:00
funilrys
953b2ec438 fixup! Extract the dates out of the dumpv4. 2022-11-02 17:49:37 +01:00
bors[bot]
1a1ad8a792 Merge #679
679: Bump Swatinem/rust-cache from 2.0.0 to 2.0.1 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.0 to 2.0.1.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.0.1</h2>
<ul>
<li>Primarily just updating dependencies to fix GitHub deprecation notices.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="22c9328bcb"><code>22c9328</code></a> 2.0.1</li>
<li><a href="d4d463bd9b"><code>d4d463b</code></a> bump deps and rebuild</li>
<li><a href="c4652c677c"><code>c4652c6</code></a> Update <code>`@actions/core</code>` (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/83">#83</a>)</li>
<li><a href="76686c56f2"><code>76686c5</code></a> docs: Fix github workflows directory (<a href="https://github-redirect.dependabot.com/Swatinem/rust-cache/issues/79">#79</a>)</li>
<li><a href="1b43d2f2c3"><code>1b43d2f</code></a> remove outdated versioning note</li>
<li><a href="20b9201e8a"><code>20b9201</code></a> bump cargo hash</li>
<li><a href="0d72e5f9a0"><code>0d72e5f</code></a> revert explicit dir close</li>
<li><a href="86531941c2"><code>8653194</code></a> Merge branch 'master' of <a href="https://github.com/Swatinem/rust-cache">https://github.com/Swatinem/rust-cache</a></li>
<li><a href="be4be3720d"><code>be4be37</code></a> explicitly close dir handles, add more logging, cleanups</li>
<li><a href="213334cd98"><code>213334c</code></a> cargo update</li>
<li>Additional commits viewable in <a href="https://github.com/Swatinem/rust-cache/compare/v2.0.0...v2.0.1">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.0.0&new-version=2.0.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-11-02 12:38:05 +00:00
dependabot[bot]
4492605a78 Bump Swatinem/rust-cache from 2.0.0 to 2.0.1
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.0.0 to 2.0.1.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v2.0.0...v2.0.1)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-11-01 10:19:45 +00:00
funilrys
09e71fdeb6 Replace deprecated set-output in GitHub actions
This patch fixes #3011.

This patch fixes the depracation warning regarding the usage of
`set-output`.
This patch fixes the issues by switching the following format:

```
echo ::set-output name=[name]::[value]
```

into the following format:

```
echo "[name]=[value]" >> ${GITHUB_OUTPUT}
```
2022-10-31 22:28:01 +01:00
bors[bot]
fe5a0219e1 Merge #677
677: run the tests in all workspaces r=curquiza a=irevoire

With #676 I noticed the tests were not running in any of our sub crates.
Most of our sub crates didn't includes any tests though.
But the filter-parser did and we're lucky we never broke these one without noticing 😁 

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-10-31 18:05:04 +00:00
funilrys
ab3056cc66 Extract the dates out of the dumpv4.
This patch possibly fixes #2987.

This patch introduces a way to fill the IndexMetadata.created_at
and IndexMetadata.updated_at keys from the tasks events.
This is done by reading the creation date of the first event
(created_at) and the creation date of the last event (updated_at).
2022-10-31 18:58:14 +01:00
Irevoire
5ff066c3e7 run the tests in all workspaces 2022-10-31 18:38:48 +01:00
bors[bot]
6770eb2a87 Merge #676
676: chore: added `IN`,`NOT IN` to `invalid_filter` msg r=irevoire a=Pranav-yadav

# Pull Request

## Related issue
`Fixes` https://github.com/meilisearch/meilisearch/issues/3004

## What does this PR do?
- Improves correct error msg in response

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Pranav Yadav <Pranavyadav3912@gmail.com>
2022-10-31 17:29:24 +00:00
unvalley
0d43ddbd85 Update filter-parser/src/lib.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2022-11-01 01:32:54 +09:00
bors[bot]
c7caadb54e Merge #3001
3001: Implement Uuid codec for heed r=Kerollmops a=elbertronnie

# Pull Request

## Related issue
Fixes #2984 

## What does this PR do?
- Created a new heed codec for uuid::Uuid named as UuidCodec
- Replaced SerdeBincode\<Uuid\> with UuidCodec
- Removed the TODO in code associated with this issue

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Elbert Ronnie <elbert.ronniep@gmail.com>
2022-10-31 16:13:20 +00:00
Pranav Yadav
3950ec8d3c chore: update tests for invalid_filter msg 2022-10-31 15:41:49 +00:00
Pranav Yadav
3b35ebda50 chore: added IN,NOT IN to invalid_filter msg 2022-10-31 15:01:14 +00:00
bors[bot]
2254bbf3bd Merge #3002
3002: Fix dump import without instance uid r=Kerollmops a=irevoire

When creating a dump without any instance-uid (that can happen if you’ve always run meilisearch with the `--no-analytics` flag), you could get an error when trying to load the dump.


Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-10-31 12:58:37 +00:00
Elbert Ronnie
0219ef25fe Moved the struct UuidCodec to a new file 2022-10-31 12:25:19 +05:30
Irevoire
510afda590 remove unused import 2022-10-30 20:05:20 +01:00
Irevoire
fea9fdcd7e fix the dump reader process when no instance-uid was specified 2022-10-30 20:00:27 +01:00
bors[bot]
4bcfd14a45 Merge #675
675: Deleted empty files r=Kerollmops a=SKVKPandey

# Pull Request

## Related issue
Fixes #674

## What does this PR do?
Delete empty files:
- `milli/src/heed_codec/facet/facet_string_level_zero_value_codec.rs`
- `milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs`

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Shashank Kashyap <50551759+SKVKPandey@users.noreply.github.com>
2022-10-30 07:09:30 +00:00
Shashank Kashyap
a07f0a4a43 Delete facet_string_zero_bounds_value_codec.rs 2022-10-30 08:59:04 +05:30
Shashank Kashyap
2dec6e86e9 Delete facet_string_level_zero_value_codec.rs 2022-10-30 08:58:36 +05:30
Elbert Ronnie
3911fd64b5 Implement Uuid codec for heed 2022-10-30 03:27:30 +05:30
bors[bot]
c965200010 Merge #664
664: Fix phrase search containing stop words r=ManyTheFish a=Samyak2

# Pull Request

This a WIP draft PR I wanted to create to let other potential contributors know that I'm working on this issue. I'll be completing this in a few hours from opening this.

## Related issue
Fixes #661 and towards fixing meilisearch/meilisearch#2905

## What does this PR do?
- [x] Change Phrase Operation to use a `Vec<Option<String>>` instead of `Vec<String>` where `None` corresponds to a stop word
- [x] Update all other uses of phrase operation
- [x] Update `resolve_phrase`
- [x] Update `create_primitive_query`?
- [x] Add test

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Samyak S Sarnayak <samyak201@gmail.com>
Co-authored-by: Samyak Sarnayak <samyak201@gmail.com>
2022-10-29 13:42:52 +00:00
unvalley
d55f0e2e53 Execute cargo fmt 2022-10-28 23:42:23 +09:00
unvalley
d53a80b408 Fix clippy error 2022-10-28 23:41:35 +09:00
Samyak Sarnayak
ecb88143f9 Run cargo fmt 2022-10-28 19:37:02 +05:30
Samyak Sarnayak
03eb5d87c1 Only call plane_sweep on subgroups when 2 or more are present 2022-10-28 19:32:05 +05:30
unvalley
a1d7ed1258 fix clippy error and remove clippy job from ci
Remove clippy job

Fix clippy error type_complexity

Restore ambiguous change
2022-10-28 22:33:50 +09:00
unvalley
f3c0b05ae8 Fix rust fmt 2022-10-28 09:32:31 +09:00
bors[bot]
dd1011ba76 Merge #2995
2995: merge the settings and do one indexation at the end r=irevoire a=irevoire



Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-10-27 21:24:21 +00:00
unvalley
f4ec1abb9b Fix all clippy error after conflicts 2022-10-27 23:58:13 +09:00
Irevoire
313f204f39 merge the settings and do one indexation at the end 2022-10-27 16:38:21 +02:00
Samyak S Sarnayak
d35afa0cf5 Change consecutive phrase search grouping logic
Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-10-26 23:10:48 +05:30
Samyak S Sarnayak
752d031010 Update phrase search to use new execute method 2022-10-26 23:07:20 +05:30
unvalley
c7322f704c Fix cargo clippy errors
Dont apply clippy for tests for now

Fix clippy warnings of filter-parser package

parent 8352febd646ec4bcf56a44161e5c4dce0e55111f
author unvalley <38400669+unvalley@users.noreply.github.com> 1666325847 +0900
committer unvalley <kirohi.code@gmail.com> 1666791316 +0900

Update .github/workflows/rust.yml

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>

Allow clippy lint too_many_argments

Allow clippy lint needless_collect

Allow clippy lint too_many_arguments and type_complexity

Fix for clippy warnings comparison_chains

Fix for clippy warnings vec_init_then_push

Allow clippy lint should_implement_trait

Allow clippy lint drop_non_drop

Fix lifetime clipy warnings in filter-paprser

Execute cargo fmt

Fix clippy remaining warnings

Fix clippy remaining warnings again and allow lint on each place
2022-10-27 01:04:23 +09:00
unvalley
811f156031 Execute cargo clippy --fix 2022-10-27 01:00:00 +09:00
unvalley
d8fed1f7a9 Add clippy job
Add Run Clippy to bors.toml
2022-10-27 01:00:00 +09:00
bors[bot]
2e539249cb Merge #619
619: Refactor the Facets databases to enable incremental indexing r=curquiza a=loiclec

# Pull Request

## What does this PR do?
Party fixes https://github.com/meilisearch/milli/issues/605 by making the indexing of the facet databases (i.e. `facet_id_f64_docids` and `facet_id_string_docids`) incremental. It also closes #327 and https://github.com/meilisearch/meilisearch/issues/2820 . Two more untracked bugs were also fixed:
1. The facet distribution algorithm did not respect the `maxFacetValues` parameter when there were only a few candidate document ids.
2. The structure of the levels > 0 of the facet databases were not updated following the deletion of documents

## How to review this PR

First, read this comment to get an overview of the changes.

Then, based on this comment, raise any concerns you might have about:
1. the new structure of the databases
2. the algorithms for sort, facet distribution, and range search
3. the new/removed heed codecs

Then, weigh in on the following concerns:
1. adding `fuzzcheck` as a fuzz-only dependency may add too much complexity for the benefits it provides
2. the `ByteSliceRef` and `StrRefCodec` are misnamed or should not exist
3. the new behaviour of facet distributions can be considered incorrect
4. incremental deletion is useless given that documents are always deleted in bulk

## What's left for me to do

1. Re-read everything once to make sure I haven't forgotten anything
2. Wait for the results of the benchmarks and see if (1) they provide enough information (2) there was any change in performance, especially for search queries. Then, maybe, spend some time optimising the code.
3. Test whether the `info`/`http-ui` crates survived the refactor

## Old structure of the `facet_id_f64_docids` and `facet_id_string_docids` databases

Previously, these two databases had different but conceptually similar structures. For each field id, the facet number database had the following format:
```
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
┌───────┐   │            1.2 – 2            │           3.4 – 100           │   102 – 104   │
│Level 2│   │                               │                               │               │
└───────┘   │         a, b, d, f, z         │         c, d, e, f, g         │     u, y      │
            ├───────────────┬───────────────┼───────────────┬───────────────┼───────────────┤
┌───────┐   │   1.2 – 1.3   │    1.6 – 2    │   3.4 – 12    │  12.3 – 100   │   102 – 104   │
│Level 1│   │               │               │               │               │               │
└───────┘   │  a, b, d, z   │    a, b, f    │    c, d, g    │     e, f      │     u, y      │
            ├───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┤
┌───────┐   │  1.2  │  1.3  │  1.6  │   2   │  3.4  │   12  │  12.3 │  100  │  102  │  104  │
│Level 0│   │       │       │       │       │       │       │       │       │       │       │
└───────┘   │  a, b │  d, z │  b, f │  a, f │  c, d │   g   │   e   │  e, f │   y   │   u   │
            └───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
```
where the first line is the key of the database, consisting of :
- the field id
- the level height
- the left and right bound of the group 

and the second line is the value of the database, consisting of:
- a bitmap of all the docids that have a facet value within the bounds

The `facet_id_string_docids` had a similar structure:
```
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
┌───────┐   │             0 – 3             │             4 – 7             │     8 – 9     │
│Level 2│   │                               │                               │               │
└───────┘   │         a, b, d, f, z         │         c, d, e, f, g         │     u, y      │
            ├───────────────┬───────────────┼───────────────┬───────────────┼───────────────┤
┌───────┐   │     0 – 1     │     2 – 3     │     4 – 5     │     6 – 7     │     8 – 9     │
│Level 1│   │  "ab" – "ac"  │ "ba" – "bac"  │ "gaf" – "gal" │"form" – "wow" │ "woz" – "zz"  │
└───────┘   │  a, b, d, z   │    a, b, f    │    c, d, g    │     e, f      │     u, y      │
            ├───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┤
┌───────┐   │  "ab" │  "ac" │  "ba" │ "bac" │ "gaf" │ "gal" │ "form"│ "wow" │ "woz" │  "zz" │
│Level 0│   │  "AB" │ " Ac" │ "ba " │ "Bac" │ " GAF"│ "gal" │ "Form"│ " wow"│ "woz" │  "ZZ" │
└───────┘   │  a, b │  d, z │  b, f │  a, f │  c, d │   g   │   e   │  e, f │   y   │   u   │
            └───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
```
where, **at level 0**, the key is:
* the normalised facet value (string)

and the value is:
* the original facet value (string)
* a bitmap of all the docids that have this normalised string facet value

**At level 1**, the key is:
* the left bound of the range as an index in level 0
* the right bound of the range as an index in level 0

and the value is:
* the left bound of the range as a normalised string
* the right bound of the range as a normalised string
* a bitmap of all the docids that have a string facet value within the bounds

**At level > 1**, the key is:
* the left bound of the range as an index in level 0
* the right bound of the range as an index in level 0

and the value is:
* a bitmap of all the docids that have a string facet value within the bounds

## New structure of the `facet_id_f64_docids` and `facet_id_string_docids` databases

Now both the `facet_id_f64_docids` and `facet_id_string_docids` databases have the exact same structure:
```                                                                                             
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
┌───────┐   │           "ab" (2)            │           "gaf" (2)           │   "woz" (1)   │
│Level 2│   │                               │                               │               │
└───────┘   │        [a, b, d, f, z]        │        [c, d, e, f, g]        │    [u, y]     │
            ├───────────────┬───────────────┼───────────────┬───────────────┼───────────────┤
┌───────┐   │   "ab" (2)    │   "ba" (2)    │   "gaf" (2)   │  "form" (2)   │   "woz" (2)   │
│Level 1│   │               │               │               │               │               │
└───────┘   │ [a, b, d, z]  │   [a, b, f]   │   [c, d, g]   │    [e, f]     │    [u, y]     │
            ├───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┤
┌───────┐   │  "ab" │  "ac" │  "ba" │ "bac" │ "gaf" │ "gal" │ "form"│ "wow" │ "woz" │  "zz" │
│Level 0│   │       │       │       │       │       │       │       │       │       │       │
└───────┘   │ [a, b]│ [d, z]│ [b, f]│ [a, f]│ [c, d]│  [g]  │  [e]  │ [e, f]│  [y]  │  [u]  │
            └───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
```
where for all levels, the key is a `FacetGroupKey<T>` containing:
* the field id (`u16`)
* the level height (`u8`)
* the left bound of the range (`T`)

and the value is a `FacetGroupValue` containing:
* the number of elements from the level below that are part of the range (`u8`, =0 for level 0)
* a bitmap of all the docids that have a facet value within the bounds (`RoaringBitmap`)

The right bound of the range is now implicit, it is equal to `Excluded(next_left_bound)`.

In the code, the key is always encoded using `FacetGroupKeyCodec<C>` where `C` is the codec used to encode the facet value (either `OrderedF64Codec` or `StrRefCodec`) and the value is encoded with `FacetGroupValueCodec`.

Since both databases share the same structure, we can implement almost all operations only once by treating the facet value as a byte slice (i.e. `FacetGroupKey<&[u8]>` encoded as `FacetGroupKeyCodec<ByteSliceRef>`). This is, in my opinion, a big simplification.

The reason for changing the structure of the databases is to make it possible to incrementally add a facet value to an existing database. Since the `facet_id_string_docids` used to store indices to `level 0` in all levels > 0, adding an element to level 0 would potentially invalidate all the indices.

Note that the original string value of a facet is no longer stored in this database.

## Incrementally adding a facet value

Here I describe how we can add a facet value to the new database incrementally. If we want to add the document with id `z` and facet value `gap`., then we want to add/modify the elements highlighted below in pink:
<img width="946" alt="Screenshot 2022-09-12 at 10 14 54" src="https://user-images.githubusercontent.com/6040237/189605532-fe4b0f52-e13d-4b3c-92d9-10c705953e3d.png">

which results in:
<img width="662" alt="Screenshot 2022-09-12 at 10 23 29" src="https://user-images.githubusercontent.com/6040237/189607015-c3a37588-b825-43c2-878a-f8f85c000b94.png">

* one element was added in level 0
* one key/value was modified in level 1
* one value was modified in level 2

Adding this element was easy since we could simply add it to level 0 and then increase the `group_size` part of the value for the level above. However, in order to keep the structure balanced, we can't always do this. If the group size reaches a threshold (`max_group_size`), then we split the node into two. For example, let's imagine that `max_group_size` is `4` and we add the docid `y` with facet value `gas`. First, we add it in level 0:
<img width="904" alt="Screenshot 2022-09-12 at 10 30 40" src="https://user-images.githubusercontent.com/6040237/189608391-531f9df1-3424-4f1f-8344-73eb194570e5.png">
Then, we realise that the group size of its parent is going to reach the maximum group size (=4) and thus we split the parent into two nodes:
<img width="919" alt="Screenshot 2022-09-12 at 10 33 16" src="https://user-images.githubusercontent.com/6040237/189608884-66f87635-1fc6-41d2-a459-87c995491ac4.png">
and since we inserted an element in level 1, we also update level 2 accordingly, by increasing the group size of the parent:
<img width="915" alt="Screenshot 2022-09-12 at 10 34 42" src="https://user-images.githubusercontent.com/6040237/189609233-d4a893ff-254a-48a7-a5ad-c0dc337f23ca.png">

We also have two other parameters:
* `group_size` is the default group size when building the database from scratch
* `min_level_size` is the minimum number of elements that a level should contain

When the highest level size is greater than `group_size * min_level_size`, then we create an additional level above it.

There is one more edge case for the insertion algorithm. While we normally don't modify the existing left bounds of a key, we have to do it if the facet value being inserted is smaller than the first left bound. For example, inserting `"aa"` with the docid `w` would change the database to:
<img width="756" alt="Screenshot 2022-09-12 at 10 41 56" src="https://user-images.githubusercontent.com/6040237/189610637-a043ef71-7159-4bf1-b4fd-9903134fc095.png">

The root of the code for incremental indexing is the `FacetUpdateIncremental` builder.

## Incrementally removing a facet value
TODO: the algorithm was implemented and works, but its current API is: `fn delete(self, facet_value, single_docid)`. It removes the given document id from all keys containing the given facet value. I don't think it is the right way to implement it anymore. Perhaps a bitmap of docids should be given instead. This is fairly easy to do. But since we batch document deletions together (because of soft deletion), it's not clear to me anymore that incremental deletion should be implemented at all.  

## Bulk insertion
While it's faster to incrementally add a single facet value to the database, it is sometimes **slower** to repeatedly add facet values one-by-one instead of doing it in bulk. For example, during initial indexing, we'd like to build the database from a list of facet values and associated document ids in one go. The `FacetUpdateBulk` builder provides a way to do so. It works by:
1. clearing all levels > 0 from the DB
2. adding all new elements in level 0
3. rebuilding the higher levels from scratch 

The algorithm for bulk insertion is the same as the previous one.

## Choosing between incremental and bulk insertion
On my computer, I measured that is about 50x slower to add N facet values incrementally than it is to re-build a database with N facet values in level 0. Therefore, we dynamically choose to use either incremental insertion or bulk insertion based on (1) the number of existing elements in level 0 of the database and (2) the number of facet values from the new documents.

This is imprecise but is mainly aimed at avoiding the worst-case scenario where the incremental insertion method is used repeatedly millions of times.

## Fuzz-testing

**Potentially controversial:**
I fuzz-tested incremental addition and deletion using fuzzcheck, which found many bugs. The fuzz-test consists of inserting/deleting facet values and docids in succession, each operation is processed with different parameters for `group_size`, `max_group_size`, and `min_level_size`. After all the operations are processed, the content of level 0 is compared to the content of an equivalent structure with a simple and easily-checked implementation. Furthermore, we check that the database has a correct structure (all groups from levels > 0 correctly combine the content of their children). I also visualised the code coverage found by the fuzz-test. It covered 100% of the relevant code except for `unreachable/panic` statements and errors returned by `heed`.

The fuzz-test and the fuzzcheck dependency are only compiled when `cargo fuzzcheck` is used. For now, the dependency is from a local path on my computer, but it can be changed to a crate version if we decide to keep it. 

## Algorithms operating on the facet databases

There are four important algorithms making use of the facet databases:
1. Sort, ascending
2. Sort, descending
3. Facet distribution
4. Range search

Previously, the implementation of all four algorithms was based on a number of iterators specific to each database kind (number or string): `FacetNumberRange`, `FacetNumberRevRange`, `FacetNumberIter` (with a reversed and reducing/non-reducing option), `FacetStringGroupRange`, `FacetStringGroupRevRange`, `FacetStringLevel0Range`, `FacetStringLevel0RevRange`, and `FacetStringIter` (reversed + reducing/non-reducing). 

Now, all four algorithms have a unique implementation shared by both the string and number databases. There are four functions:
1. `ascending_facet_sort` in `search/facet/facet_sort_ascending.rs`
2. `descending_facet_sort` in `search/facet/facet_sort_descending.rs`
3. `iterate_over_facet_distribution` in `search/facet/facet_distribution_iter.rs`
4. `find_docids_of_facet_within_bounds` in `search/facet/facet_range_search.rs`

I have tried to test them with some snapshot tests but more testing could still be done. I don't *think* that the performance of these algorithms regressed, but that will need to be confirmed by benchmarks.

## Change of behaviour for facet distributions

Previously, the original string value of a facet was stored in the level 0 of `facet_id_string_docids `. This is no longer the case. The original string value was used in the implementation of the facet distribution algorithm. Now, to recover it, we pick a random document id which contains the normalised string value and look up the original one in `field_id_docid_facet_strings`. As a consequence, it may be that the string value returned in the field distribution does not appear in any of the candidates. For example,
```json
{ "id": 0, "colour": "RED" }
{ "id": 1, "colour": "red" }
```
Facet distribution for the `colour` field among the candidates `[1]`:
```
{ "RED": 1 }
```
Here, "RED" was given as the original facet value even though it does not appear in the document id `1`.

## Heed codecs

A number of heed codecs related to the facet databases were removed:
* `FacetLevelValueF64Codec`
* `FacetLevelValueU32Codec`
* `FacetStringLevelZeroCodec`
* `StringValueCodec`
* `FacetStringZeroBoundsValueCodec`
* `FacetValueStringCodec`
* `FieldDocIdFacetStringCodec`
* `FieldDocIdFacetF64Codec`

They were replaced by:
* `FacetGroupKeyCodec<C>` (replaces all key codecs for the facet databases)
* `FacetGroupValueCodec` (replaces all value codecs for the facet databases)
* `FieldDocIdFacetCodec<C>` (replaces `FieldDocIdFacetStringCodec` and `FieldDocIdFacetF64Codec`)

Since the associated encoded item of `FacetGroupKeyCodec<C>` is `FacetKey<T>` and we often work with `FacetKey<&[u8]>` and `FacetKey<&str>`, then we need to have codecs that encode values of type `&str` and `&[u8]`. The existing `ByteSlice` and `Str` codecs do not work for that purpose (their `EItem` are `[u8]` and `str`), I have also created two new codecs:
* `ByteSliceRef` is a codec with a `EItem = DItem = &[u8]`
* `StrRefCodec` is a codec with a `EItem = DItem = &str`

I have also factored out the code used to encode an ordered f64 into its own `OrderedF64Codec`.


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-10-26 15:04:53 +00:00
Samyak S Sarnayak
488d31ecdf Run cargo fmt 2022-10-26 19:09:45 +05:30
Samyak S Sarnayak
af33d22f25 Consecutive is false when at least 1 stop word is surrounded by words 2022-10-26 19:09:45 +05:30
Samyak S Sarnayak
f1da623af3 Add test for phrase search with stop words and all criteria at once
Moved the actual test into a separate function used by both the existing
test and the new test.
2022-10-26 19:09:44 +05:30
Samyak S Sarnayak
77f1ff019b Simplify stop word checking in create_primitive_query 2022-10-26 19:09:44 +05:30
Samyak S Sarnayak
2aa11afb87 Fix panic when phrase contains only one stop word and nothing else 2022-10-26 19:09:42 +05:30
Samyak S Sarnayak
bb9ce3c5c5 Run cargo fmt 2022-10-26 19:09:03 +05:30
Samyak S Sarnayak
d187b32a28 Fix snapshots to use new phrase type 2022-10-26 19:09:03 +05:30
Samyak S Sarnayak
c8c666c6a6 Use resolve_phrase in exactness and typo criteria 2022-10-26 19:09:01 +05:30
Samyak S Sarnayak
3e190503e6 Search for closest non-stop words in proximity criteria 2022-10-26 19:08:34 +05:30
Samyak S Sarnayak
709ab3c14c Increment position even when it's a stop word in exactness criteria 2022-10-26 19:08:33 +05:30
Samyak S Sarnayak
ef13c6a5b6 Perform filter after enumerate to keep origin indices 2022-10-26 19:08:33 +05:30
Samyak S Sarnayak
6a10b679ca Add test for phrase search with stop words
Originally written by ManyTheFish here:
https://gist.github.com/ManyTheFish/f840e37cb2d2e029ce05396b4d540762

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-10-26 19:08:32 +05:30
Samyak S Sarnayak
62816dddde [WIP] Fix phrase search containing stop words
Fixes #661 and meilisearch/meilisearch#2905
2022-10-26 19:08:06 +05:30
Loïc Lecrenier
54c0cf93fe Merge remote-tracking branch 'origin/main' into facet-levels-refactor 2022-10-26 15:13:34 +02:00
bors[bot]
365f44c39b Merge #668
668: Fix many Clippy errors part 2 r=ManyTheFish a=ehiggs

This brings us a step closer to enforcing clippy on each build.

# Pull Request

## Related issue
This does not fix any issue outright, but it is a second round of fixes for clippy after https://github.com/meilisearch/milli/pull/665. This should contribute to fixing https://github.com/meilisearch/milli/pull/659.

## What does this PR do?

Satisfies many issues for clippy. The complaints are mostly:

* Passing reference where a variable is already a reference.
* Using clone where a struct already implements `Copy`
* Using `ok_or_else` when it is a closure that returns a value instead of using the closure to call function (hence we use `ok_or`)
* Unambiguous lifetimes don't need names, so we can just use `'_`
* Using `return` when it is not needed as we are on the last expression of a function.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com>
2022-10-26 12:16:24 +00:00
Loïc Lecrenier
2fa85a24ec Remove outdated files from http-ui/ and infos/
... that were reintroduced after a rebase
2022-10-26 14:09:35 +02:00
Loïc Lecrenier
631e9910da Depend on released version of fuzzcheck from crates.io 2022-10-26 14:06:59 +02:00
Loïc Lecrenier
2741756248 Merge remote-tracking branch 'origin/main' into facet-levels-refactor 2022-10-26 14:03:23 +02:00
bors[bot]
d3f95e6c69 Merge #671
671: Update version for the next release (v0.35.0) in Cargo.toml files r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-10-26 11:58:05 +00:00
Loïc Lecrenier
b7f2428961 Fix formatting and warning after rebasing from main 2022-10-26 13:49:33 +02:00
Loïc Lecrenier
3b1f908e5e Revert behaviour of facet distribution to what it was before
Where the docid that is used to get the original facet string value
definitely belongs to the candidates
2022-10-26 13:48:01 +02:00
Loïc Lecrenier
14ca8048a8 Add some documentation on how to run the facet db fuzzer 2022-10-26 13:48:01 +02:00
Loïc Lecrenier
206a3e00e5 cargo fmt 2022-10-26 13:48:01 +02:00
Loïc Lecrenier
f198b20c42 Add facet deletion tests that use both the incremental and bulk methods
+ update deletion snapshots to the new database format
2022-10-26 13:47:46 +02:00
Loïc Lecrenier
e3ba1fc883 Make deletion tests for both soft-deletion and hard-deletion 2022-10-26 13:47:46 +02:00
Loïc Lecrenier
ab5e56fd16 Add document deletion snapshot tests and tests for hard-deletion 2022-10-26 13:47:46 +02:00
Loïc Lecrenier
d885de1600 Add option to avoid soft deletion of documents 2022-10-26 13:47:46 +02:00
Loïc Lecrenier
ee1abfd1c1 Ignore files generated by fuzzcheck 2022-10-26 13:47:46 +02:00
Loïc Lecrenier
2295e0e3ce Use real delete function in facet indexing fuzz tests
By deleting multiple docids at once instead of one-by-one
2022-10-26 13:47:46 +02:00
Loïc Lecrenier
acc8caebe6 Add link to GitHub PR to document of update/facet module 2022-10-26 13:47:46 +02:00
Loïc Lecrenier
a034a1e628 Move StrRefCodec and ByteSliceRefCodec to their own files 2022-10-26 13:47:46 +02:00
Loïc Lecrenier
1165ba2171 Make facet deletion incremental 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
0ade699873 Don't crash when failing to decode using StrRef codec 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
d0109627b9 Fix a bug in facet_range_search and add documentation 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
a2270b7432 Change fuzzcheck dependency to point to git repository 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
1ecd3bb822 Fix bug in FieldDocIdFacetCodec 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
51961e1064 Polish some details 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
cb8442a119 Further unify facet databases of f64s and strings 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
3baa34d842 Fix compiler errors/warnings 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
86d9f50b9c Fix bugs in incremental facet indexing with variable parameters
e.g. add one facet value incrementally with a group_size = X and then
add another one with group_size = Y

It is not actually possible to do so with the public API of milli,
but I wanted to make sure the algorithm worked well in those cases
anyway.

The bugs were found by fuzzing the code with fuzzcheck, which I've added
to milli as a conditional dev-dependency. But it can be removed later.
2022-10-26 13:47:04 +02:00
Loïc Lecrenier
de52a9bf75 Improve documentation of some facet-related algorithms 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
985a94adfc cargo fmt 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
b1ab09196c Remove outdated TODOs 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
3d7ed3263f Fix bug in string facet distribution with few candidates 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
fca4577e23 Return original string in facet distributions, work on facet tests 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
27454e9828 Document and refine facet indexing algorithms 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
bee3c23b45 Add comparison benchmark between bulk and incremental facet indexing 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
b2f01ad204 Refactor facet database tests 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
9026867d17 Give same interface to bulk and incremental facet indexing types
+ cargo fmt, oops, sorry for the bad history :(
2022-10-26 13:47:04 +02:00
Loïc Lecrenier
330c9eb1b2 Rename facet codecs and refine FacetsUpdate API 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
485a72306d Refactor facet-related codecs 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
9b55e582cd Add FacetsUpdate type that wraps incremental and bulk indexing methods 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
3d145d7f48 Merge the two <facetttype>_faceted_documents_ids methods into one 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
982efab88f Fix encoding bugs in facet databases 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
079ed4a992 Add more snapshots 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
afdf87f6f7 Fix bugs in asc/desc criterion and facet indexing 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
a7201ece04 cargo fmt 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
36296bbb20 Add facet incremental indexing snapshot tests + fix bug 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
07ff92c663 Add more snapshots from facet tests 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
61252248fb Fix some facet indexing bugs 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
68cbcdf08b Fix compile errors/warnings in http-ui and infos 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
85824ee203 Try to make facet indexing incremental 2022-10-26 13:47:04 +02:00
Loïc Lecrenier
d30c89e345 Fix compile error+warnings in new tests 2022-10-26 13:46:46 +02:00
Loïc Lecrenier
e8a156d682 Reorganise facets database indexing code 2022-10-26 13:46:46 +02:00
Loïc Lecrenier
fb8d23deb3 Reintroduce db_snap! for facet databases 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
e570c23153 Reintroduce asc/desc functionality 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
bd2c0e1ab6 Remove unused code 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
39a4a0a362 Reintroduce filter range search and facet extractors 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
22d80eeaf9 Reintroduce facet deletion functionality 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
6cc91824c1 Remove unused heed codec files 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
5a904cf29d Reintroduce facet distribution functionality 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
b8a1caad5e Add range search and incremental indexing algorithm 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
63ef0aba18 Start porting facet distribution and sort to new database structure 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
7913d6365c Update Facets indexing to be compatible with new database structure 2022-10-26 13:46:14 +02:00
Loïc Lecrenier
c3f49f766d Prepare refactor of facets database
Prepare refactor of facets database
2022-10-26 13:46:14 +02:00
curquiza
e883bccc76 Update version for the next release (v0.35.0) in Cargo.toml files 2022-10-26 11:43:54 +00:00
bors[bot]
c8f16530d5 Merge #616
616: Introduce an indexation abortion function when indexing documents r=Kerollmops a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-10-26 11:41:18 +00:00
Ewan Higgs
9d27ac8a2e Ignore too many arguments to functions. 2022-10-25 21:22:53 +02:00
Ewan Higgs
42cdc38c7b Allow weird ranges like 1..=0 to pass clippy.
Everything else is just a warning and exit code will be 0.
2022-10-25 21:12:59 +02:00
Ewan Higgs
2ce025a906 Fixes after rebase to fix new issues. 2022-10-25 20:58:31 +02:00
Ewan Higgs
17f7922bfc Remove unneeded lifetimes. 2022-10-25 20:49:04 +02:00
Ewan Higgs
6b2fe94192 Fixes for clippy bringing us down to 18 remaining issues.
This brings us a step closer to enforcing clippy on each build.
2022-10-25 20:49:02 +02:00
bors[bot]
004c09a8e2 Merge #669
669: Add method to create a new Index with specific creation dates r=irevoire a=loiclec

This functionality is needed to implement the import of dumps correctly.

Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-10-25 12:44:43 +00:00
Loïc Lecrenier
36bd66281d Add method to create a new Index with specific creation dates 2022-10-25 14:37:56 +02:00
bors[bot]
d11a6e187f Merge #639
639: Reduce the size of the word_pair_proximity database  r=loiclec a=loiclec

# Pull Request

## What does this PR do?
Fixes #634 

Now, the value corresponding to the key `prox word1 word2` in the `word_pair_proximity_docids` database contains the ids of the documents in which:
- `word1` is followed by `word2`
- the minimum number of words between `word1` and `word2` is `prox-1`

Before this PR, the `word_pair_proximity_docids` had keys with the format `word1 word2 prox` and the value contained the ids of the documents in which either:
- `word1` is followed by `word2` after a minimum of `prox-1` words in between them
- `word2` is followed by `word1` after a minimum of `prox-2` words 

As a consequence of this change, calls such as:
```
let docids = word_pair_proximity_docids.get(rtxn, (word1, word2, prox));
```
have to be replaced with:
```
let docids1 = word_pair_proximity_docids.get(rtxn, (prox, word1, word2)) ;
let docids2 = word_pair_proximity_docids.get(rtxn, (prox-1, word2, word1)) ;
let docids = docids1 | docids2;
```

## Phrase search

The PR also fixes two bugs in the `resolve_phrase` function. The first bug is that a phrase containing twice the same word would always return zero documents (e.g. `"dog eats dog"`). 

The second bug occurs with a phrase such as "fox is smarter than a dog"` and the document with the text:
```
fox or dog? a fox is smarter than a dog
```
In that case, the phrase search would not return the documents because:
* we only have the key `fox dog 2` in `word_pair_proximity_docids`
* but the implementation of `resolve_phrase` looks for `fox dog 5`, which returns 0 documents 

### New implementation of `resolve_phrase`
Given the phrase:
```
fox is smarter than a dog
```
We select the document ids corresponding to all of the following keys in `word_pair_proximity_docids`:
- `1 fox is`
- `1 is smarter`
- `1 smarter than`
- (etc.)
- `1 fox smarter` OR `2 fox smarter`
- `1 is than` OR `2 is than`
- ...
- `1 than dog` OR `2 than dog`

## Benchmark Results

Indexing:
```
group                                                                     indexing_main_d94339a8                 indexing_word-pair-proximity-docids-refactor_2983dd8e
-----                                                                     ----------------------                 -----------------------------------------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.19    40.7±11.28ms        ? ?/sec    1.00     34.3±4.16ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.62     11.3±3.77ms        ? ?/sec    1.00      7.0±1.56ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.00     12.5±2.62ms        ? ?/sec    1.07     13.4±4.24ms        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.26    50.2±12.63ms        ? ?/sec    1.00    39.8±20.25ms        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.83   269.1±16.11ms        ? ?/sec    1.00    146.8±6.12ms        ? ?/sec
indexing/Indexing geo_point                                               1.00      47.2±0.46s        ? ?/sec    1.00      47.3±0.56s        ? ?/sec
indexing/Indexing movies in three batches                                 1.42      12.7±0.13s        ? ?/sec    1.00       9.0±0.07s        ? ?/sec
indexing/Indexing movies with default settings                            1.40      10.2±0.07s        ? ?/sec    1.00       7.3±0.06s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.22       7.8±0.11s        ? ?/sec    1.00       6.4±0.13s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.24       7.3±0.07s        ? ?/sec    1.00       5.9±0.06s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.14      47.6±0.67s        ? ?/sec    1.00      41.8±0.63s        ? ?/sec
indexing/Indexing songs with default settings                             1.13      44.1±0.74s        ? ?/sec    1.00      38.9±0.76s        ? ?/sec
indexing/Indexing songs without any facets                                1.19      42.0±0.66s        ? ?/sec    1.00      35.2±0.48s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.20      44.3±1.40s        ? ?/sec    1.00      37.0±0.48s        ? ?/sec
indexing/Indexing wiki                                                    1.39     862.9±9.95s        ? ?/sec    1.00    622.6±27.11s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.40     934.4±5.97s        ? ?/sec    1.00     665.7±4.72s        ? ?/sec
indexing/Reindexing geo_point                                             1.01      15.9±0.39s        ? ?/sec    1.00      15.7±0.28s        ? ?/sec
indexing/Reindexing movies with default settings                          1.15   288.8±25.03ms        ? ?/sec    1.00    250.4±2.23ms        ? ?/sec
indexing/Reindexing songs with default settings                           1.01       4.1±0.06s        ? ?/sec    1.00       4.1±0.03s        ? ?/sec
indexing/Reindexing wiki                                                  1.41   1484.7±20.59s        ? ?/sec    1.00   1052.0±19.89s        ? ?/sec
```

Search Wiki:
<details>
<pre>
group                                                                                    search_wiki_main_d94339a8              search_wiki_word-pair-proximity-docids-refactor_2983dd8e
-----                                                                                    -------------------------              --------------------------------------------------------
smol-wiki-articles.csv: basic placeholder/                                               1.02     25.8±0.21µs        ? ?/sec    1.00     25.4±0.19µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"film"                                          1.00    441.7±2.57µs        ? ?/sec    1.00    442.3±2.41µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"france"                                        1.00    357.0±2.63µs        ? ?/sec    1.00    358.3±2.65µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"japan"                                         1.00    239.4±2.24µs        ? ?/sec    1.00    240.2±1.82µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"machine"                                       1.00    180.3±2.40µs        ? ?/sec    1.00    180.0±1.08µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"miles" "davis"                                 1.00      9.1±0.03ms        ? ?/sec    1.03      9.3±0.04ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"mingus"                                        1.00      3.6±0.01ms        ? ?/sec    1.03      3.7±0.02ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"rock" "and" "roll"                             1.00     34.0±0.11ms        ? ?/sec    1.03     35.1±0.13ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"spain"                                         1.00    162.0±0.88µs        ? ?/sec    1.00    161.9±0.98µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/film                                         1.01    164.4±1.46µs        ? ?/sec    1.00    163.1±1.58µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/france                                       1.00   1698.3±7.37µs        ? ?/sec    1.00  1697.7±11.53µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/japan                                        1.00  1154.0±23.61µs        ? ?/sec    1.00   1150.7±9.27µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/machine                                      1.00    524.6±3.45µs        ? ?/sec    1.01    528.1±4.56µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/miles davis                                  1.00     13.5±0.05ms        ? ?/sec    1.02     13.8±0.05ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/mingus                                       1.00      4.1±0.02ms        ? ?/sec    1.03      4.2±0.01ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/rock and roll                                1.00     49.0±0.19ms        ? ?/sec    1.03     50.4±0.22ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/spain                                        1.00    412.2±3.35µs        ? ?/sec    1.00    412.9±2.81µs        ? ?/sec
smol-wiki-articles.csv: prefix search/c                                                  1.00    383.9±2.53µs        ? ?/sec    1.00    383.4±2.44µs        ? ?/sec
smol-wiki-articles.csv: prefix search/g                                                  1.00    433.4±2.53µs        ? ?/sec    1.00    432.8±2.52µs        ? ?/sec
smol-wiki-articles.csv: prefix search/j                                                  1.00    424.3±2.05µs        ? ?/sec    1.00    424.0±2.15µs        ? ?/sec
smol-wiki-articles.csv: prefix search/q                                                  1.00    154.0±1.93µs        ? ?/sec    1.00    153.5±1.04µs        ? ?/sec
smol-wiki-articles.csv: prefix search/t                                                  1.04   658.5±91.93µs        ? ?/sec    1.00    631.4±3.89µs        ? ?/sec
smol-wiki-articles.csv: prefix search/x                                                  1.00    446.2±2.09µs        ? ?/sec    1.00    445.6±3.13µs        ? ?/sec
smol-wiki-articles.csv: proximity/april paris                                            1.02      3.4±0.39ms        ? ?/sec    1.00      3.3±0.01ms        ? ?/sec
smol-wiki-articles.csv: proximity/diesel engine                                          1.00  1022.1±17.52µs        ? ?/sec    1.00   1017.7±8.16µs        ? ?/sec
smol-wiki-articles.csv: proximity/herald sings                                           1.01  1872.5±97.70µs        ? ?/sec    1.00   1862.2±8.57µs        ? ?/sec
smol-wiki-articles.csv: proximity/tea two                                                1.00   295.2±34.91µs        ? ?/sec    1.00    296.6±4.08µs        ? ?/sec
smol-wiki-articles.csv: typo/Disnaylande                                                 1.00      3.4±0.51ms        ? ?/sec    1.04      3.5±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/aritmetric                                                  1.00      3.6±0.01ms        ? ?/sec    1.00      3.7±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/linax                                                       1.00    167.5±1.28µs        ? ?/sec    1.00    167.1±2.65µs        ? ?/sec
smol-wiki-articles.csv: typo/migrosoft                                                   1.01    217.9±1.84µs        ? ?/sec    1.00    216.2±1.61µs        ? ?/sec
smol-wiki-articles.csv: typo/nympalidea                                                  1.00      2.9±0.01ms        ? ?/sec    1.10      3.1±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/phytogropher                                                1.00      3.0±0.23ms        ? ?/sec    1.08      3.3±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/sisan                                                       1.00    234.6±1.38µs        ? ?/sec    1.01    235.8±1.67µs        ? ?/sec
smol-wiki-articles.csv: typo/the fronce                                                  1.00    104.4±0.84µs        ? ?/sec    1.00    103.9±0.81µs        ? ?/sec
smol-wiki-articles.csv: words/Abraham machin                                             1.02    675.5±4.74µs        ? ?/sec    1.00    662.1±5.13µs        ? ?/sec
smol-wiki-articles.csv: words/Idaho Bellevue pizza                                       1.02  1004.5±11.07µs        ? ?/sec    1.00   989.5±13.08µs        ? ?/sec
smol-wiki-articles.csv: words/Kameya Tokujirō mingus monk                                1.00  1650.8±10.92µs        ? ?/sec    1.00  1643.2±10.77µs        ? ?/sec
smol-wiki-articles.csv: words/Ulrich Hensel meilisearch milli                            1.00      5.4±0.03ms        ? ?/sec    1.00      5.4±0.02ms        ? ?/sec
smol-wiki-articles.csv: words/the black saint and the sinner lady and the good doggo     1.00     32.9±0.10ms        ? ?/sec    1.00     32.8±0.10ms        ? ?/sec
</pre>
</details>

Search songs:
<details>
<pre>
group                                                                                                    search_songs_main_d94339a8             search_songs_word-pair-proximity-docids-refactor_2983dd8e
-----                                                                                                    --------------------------             ---------------------------------------------------------
smol-songs.csv: asc + default/Notstandskomitee                                                           1.00      3.0±0.01ms        ? ?/sec    1.01      3.0±0.04ms        ? ?/sec
smol-songs.csv: asc + default/charles                                                                    1.00      2.2±0.01ms        ? ?/sec    1.01      2.2±0.01ms        ? ?/sec
smol-songs.csv: asc + default/charles mingus                                                             1.00      3.1±0.01ms        ? ?/sec    1.01      3.1±0.01ms        ? ?/sec
smol-songs.csv: asc + default/david                                                                      1.00      2.9±0.01ms        ? ?/sec    1.00      2.9±0.01ms        ? ?/sec
smol-songs.csv: asc + default/david bowie                                                                1.00      4.5±0.02ms        ? ?/sec    1.00      4.5±0.02ms        ? ?/sec
smol-songs.csv: asc + default/john                                                                       1.00      3.1±0.01ms        ? ?/sec    1.01      3.2±0.01ms        ? ?/sec
smol-songs.csv: asc + default/marcus miller                                                              1.00      5.0±0.02ms        ? ?/sec    1.00      5.0±0.02ms        ? ?/sec
smol-songs.csv: asc + default/michael jackson                                                            1.00      4.7±0.02ms        ? ?/sec    1.00      4.7±0.02ms        ? ?/sec
smol-songs.csv: asc + default/tamo                                                                       1.00  1463.4±12.17µs        ? ?/sec    1.01   1481.5±8.83µs        ? ?/sec
smol-songs.csv: asc + default/thelonious monk                                                            1.00      4.4±0.01ms        ? ?/sec    1.00      4.4±0.02ms        ? ?/sec
smol-songs.csv: asc/Notstandskomitee                                                                     1.01      2.6±0.01ms        ? ?/sec    1.00      2.6±0.01ms        ? ?/sec
smol-songs.csv: asc/charles                                                                              1.00    473.6±3.70µs        ? ?/sec    1.01   476.8±22.17µs        ? ?/sec
smol-songs.csv: asc/charles mingus                                                                       1.01    780.1±3.90µs        ? ?/sec    1.00    773.6±4.60µs        ? ?/sec
smol-songs.csv: asc/david                                                                                1.00    757.6±4.50µs        ? ?/sec    1.00    760.7±5.20µs        ? ?/sec
smol-songs.csv: asc/david bowie                                                                          1.00   1131.2±8.68µs        ? ?/sec    1.00   1130.7±8.36µs        ? ?/sec
smol-songs.csv: asc/john                                                                                 1.00    668.9±6.48µs        ? ?/sec    1.00    669.9±2.78µs        ? ?/sec
smol-songs.csv: asc/marcus miller                                                                        1.00    959.8±7.10µs        ? ?/sec    1.00    958.9±4.72µs        ? ?/sec
smol-songs.csv: asc/michael jackson                                                                      1.01  1076.7±16.73µs        ? ?/sec    1.00   1070.8±7.34µs        ? ?/sec
smol-songs.csv: asc/tamo                                                                                 1.00     70.4±0.55µs        ? ?/sec    1.00     70.5±0.51µs        ? ?/sec
smol-songs.csv: asc/thelonious monk                                                                      1.01      2.9±0.01ms        ? ?/sec    1.00      2.9±0.01ms        ? ?/sec
smol-songs.csv: basic filter: <=/Notstandskomitee                                                        1.00    162.0±0.91µs        ? ?/sec    1.01    163.6±1.72µs        ? ?/sec
smol-songs.csv: basic filter: <=/charles                                                                 1.00     38.3±0.24µs        ? ?/sec    1.01     38.7±0.31µs        ? ?/sec
smol-songs.csv: basic filter: <=/charles mingus                                                          1.01     85.3±0.44µs        ? ?/sec    1.00     84.6±0.47µs        ? ?/sec
smol-songs.csv: basic filter: <=/david                                                                   1.01     32.4±0.25µs        ? ?/sec    1.00     32.1±0.24µs        ? ?/sec
smol-songs.csv: basic filter: <=/david bowie                                                             1.00     68.6±0.99µs        ? ?/sec    1.01     68.9±0.88µs        ? ?/sec
smol-songs.csv: basic filter: <=/john                                                                    1.04     26.1±0.37µs        ? ?/sec    1.00     25.1±0.22µs        ? ?/sec
smol-songs.csv: basic filter: <=/marcus miller                                                           1.00     76.7±0.39µs        ? ?/sec    1.01     77.3±0.61µs        ? ?/sec
smol-songs.csv: basic filter: <=/michael jackson                                                         1.00     95.5±0.66µs        ? ?/sec    1.01     96.3±0.79µs        ? ?/sec
smol-songs.csv: basic filter: <=/tamo                                                                    1.03     26.2±0.36µs        ? ?/sec    1.00     25.3±0.23µs        ? ?/sec
smol-songs.csv: basic filter: <=/thelonious monk                                                         1.00    140.7±1.36µs        ? ?/sec    1.01    142.7±0.88µs        ? ?/sec
smol-songs.csv: basic filter: TO/Notstandskomitee                                                        1.00    165.4±1.25µs        ? ?/sec    1.00    165.7±1.72µs        ? ?/sec
smol-songs.csv: basic filter: TO/charles                                                                 1.01     40.6±0.57µs        ? ?/sec    1.00     40.1±0.54µs        ? ?/sec
smol-songs.csv: basic filter: TO/charles mingus                                                          1.01     87.1±0.80µs        ? ?/sec    1.00     86.3±0.61µs        ? ?/sec
smol-songs.csv: basic filter: TO/david                                                                   1.02     34.5±0.26µs        ? ?/sec    1.00     33.7±0.24µs        ? ?/sec
smol-songs.csv: basic filter: TO/david bowie                                                             1.00     70.6±0.38µs        ? ?/sec    1.00     70.6±0.68µs        ? ?/sec
smol-songs.csv: basic filter: TO/john                                                                    1.02     27.5±0.77µs        ? ?/sec    1.00     26.9±0.21µs        ? ?/sec
smol-songs.csv: basic filter: TO/marcus miller                                                           1.01     79.8±0.76µs        ? ?/sec    1.00     79.3±1.27µs        ? ?/sec
smol-songs.csv: basic filter: TO/michael jackson                                                         1.00     98.3±0.54µs        ? ?/sec    1.00     98.0±0.88µs        ? ?/sec
smol-songs.csv: basic filter: TO/tamo                                                                    1.03     27.9±0.23µs        ? ?/sec    1.00     27.1±0.32µs        ? ?/sec
smol-songs.csv: basic filter: TO/thelonious monk                                                         1.00    142.5±1.36µs        ? ?/sec    1.02    145.2±0.98µs        ? ?/sec
smol-songs.csv: basic placeholder/                                                                       1.00     49.4±0.34µs        ? ?/sec    1.00     49.3±0.45µs        ? ?/sec
smol-songs.csv: basic with quote/"Notstandskomitee"                                                      1.00    190.5±1.60µs        ? ?/sec    1.01    191.8±2.10µs        ? ?/sec
smol-songs.csv: basic with quote/"charles"                                                               1.00    165.0±1.13µs        ? ?/sec    1.01    166.0±1.39µs        ? ?/sec
smol-songs.csv: basic with quote/"charles" "mingus"                                                      1.00  1149.4±15.78µs        ? ?/sec    1.02   1171.1±9.95µs        ? ?/sec
smol-songs.csv: basic with quote/"david"                                                                 1.00    236.5±1.61µs        ? ?/sec    1.00    236.9±1.73µs        ? ?/sec
smol-songs.csv: basic with quote/"david" "bowie"                                                         1.00   1384.8±9.02µs        ? ?/sec    1.01  1393.8±11.39µs        ? ?/sec
smol-songs.csv: basic with quote/"john"                                                                  1.00    358.3±4.85µs        ? ?/sec    1.00    358.9±1.75µs        ? ?/sec
smol-songs.csv: basic with quote/"marcus" "miller"                                                       1.00    281.4±1.79µs        ? ?/sec    1.01    285.6±3.24µs        ? ?/sec
smol-songs.csv: basic with quote/"michael" "jackson"                                                     1.00   1328.4±8.01µs        ? ?/sec    1.00   1334.6±8.00µs        ? ?/sec
smol-songs.csv: basic with quote/"tamo"                                                                  1.00    528.7±3.72µs        ? ?/sec    1.01    533.4±5.31µs        ? ?/sec
smol-songs.csv: basic with quote/"thelonious" "monk"                                                     1.00   1223.0±7.24µs        ? ?/sec    1.02  1245.7±12.04µs        ? ?/sec
smol-songs.csv: basic without quote/Notstandskomitee                                                     1.00      2.8±0.01ms        ? ?/sec    1.00      2.8±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/charles                                                              1.00    273.3±2.06µs        ? ?/sec    1.01    275.9±1.76µs        ? ?/sec
smol-songs.csv: basic without quote/charles mingus                                                       1.00      2.3±0.01ms        ? ?/sec    1.02      2.4±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/david                                                                1.00    434.3±3.86µs        ? ?/sec    1.01    436.7±2.47µs        ? ?/sec
smol-songs.csv: basic without quote/david bowie                                                          1.00      5.6±0.02ms        ? ?/sec    1.01      5.7±0.02ms        ? ?/sec
smol-songs.csv: basic without quote/john                                                                 1.00   1322.5±9.98µs        ? ?/sec    1.00  1321.2±17.40µs        ? ?/sec
smol-songs.csv: basic without quote/marcus miller                                                        1.02      2.4±0.02ms        ? ?/sec    1.00      2.4±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/michael jackson                                                      1.00      3.8±0.02ms        ? ?/sec    1.01      3.9±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/tamo                                                                 1.00    809.0±4.01µs        ? ?/sec    1.01    819.0±6.22µs        ? ?/sec
smol-songs.csv: basic without quote/thelonious monk                                                      1.00      3.8±0.02ms        ? ?/sec    1.02      3.9±0.02ms        ? ?/sec
smol-songs.csv: big filter/Notstandskomitee                                                              1.00      2.7±0.01ms        ? ?/sec    1.01      2.8±0.01ms        ? ?/sec
smol-songs.csv: big filter/charles                                                                       1.00    266.5±1.34µs        ? ?/sec    1.01    270.1±8.17µs        ? ?/sec
smol-songs.csv: big filter/charles mingus                                                                1.00    651.0±5.40µs        ? ?/sec    1.00    651.0±2.73µs        ? ?/sec
smol-songs.csv: big filter/david                                                                         1.00  1018.1±11.16µs        ? ?/sec    1.00   1022.3±8.94µs        ? ?/sec
smol-songs.csv: big filter/david bowie                                                                   1.00  1912.2±11.13µs        ? ?/sec    1.00   1919.8±8.30µs        ? ?/sec
smol-songs.csv: big filter/john                                                                          1.00    867.2±6.66µs        ? ?/sec    1.01    873.3±3.44µs        ? ?/sec
smol-songs.csv: big filter/marcus miller                                                                 1.00    717.7±2.86µs        ? ?/sec    1.01    721.5±3.89µs        ? ?/sec
smol-songs.csv: big filter/michael jackson                                                               1.00  1668.4±16.76µs        ? ?/sec    1.00  1667.9±10.11µs        ? ?/sec
smol-songs.csv: big filter/tamo                                                                          1.01    136.7±0.88µs        ? ?/sec    1.00    135.5±1.22µs        ? ?/sec
smol-songs.csv: big filter/thelonious monk                                                               1.03      3.1±0.02ms        ? ?/sec    1.00      3.0±0.01ms        ? ?/sec
smol-songs.csv: desc + default/Notstandskomitee                                                          1.00      3.0±0.01ms        ? ?/sec    1.00      3.0±0.01ms        ? ?/sec
smol-songs.csv: desc + default/charles                                                                   1.00  1599.5±13.07µs        ? ?/sec    1.01  1622.9±22.43µs        ? ?/sec
smol-songs.csv: desc + default/charles mingus                                                            1.00      2.3±0.01ms        ? ?/sec    1.01      2.4±0.03ms        ? ?/sec
smol-songs.csv: desc + default/david                                                                     1.00      5.7±0.02ms        ? ?/sec    1.00      5.7±0.02ms        ? ?/sec
smol-songs.csv: desc + default/david bowie                                                               1.00      9.0±0.04ms        ? ?/sec    1.00      9.0±0.03ms        ? ?/sec
smol-songs.csv: desc + default/john                                                                      1.00      4.5±0.01ms        ? ?/sec    1.00      4.5±0.02ms        ? ?/sec
smol-songs.csv: desc + default/marcus miller                                                             1.00      3.9±0.01ms        ? ?/sec    1.00      3.9±0.02ms        ? ?/sec
smol-songs.csv: desc + default/michael jackson                                                           1.00      6.6±0.03ms        ? ?/sec    1.00      6.6±0.03ms        ? ?/sec
smol-songs.csv: desc + default/tamo                                                                      1.00  1472.4±10.38µs        ? ?/sec    1.01   1484.2±8.07µs        ? ?/sec
smol-songs.csv: desc + default/thelonious monk                                                           1.00      4.4±0.02ms        ? ?/sec    1.00      4.4±0.05ms        ? ?/sec
smol-songs.csv: desc/Notstandskomitee                                                                    1.01      2.6±0.01ms        ? ?/sec    1.00      2.6±0.01ms        ? ?/sec
smol-songs.csv: desc/charles                                                                             1.00    475.9±3.38µs        ? ?/sec    1.00    475.9±2.64µs        ? ?/sec
smol-songs.csv: desc/charles mingus                                                                      1.00    775.3±4.30µs        ? ?/sec    1.00    778.9±3.52µs        ? ?/sec
smol-songs.csv: desc/david                                                                               1.00    757.9±4.10µs        ? ?/sec    1.01    763.4±3.27µs        ? ?/sec
smol-songs.csv: desc/david bowie                                                                         1.00  1129.0±11.87µs        ? ?/sec    1.01   1135.1±8.86µs        ? ?/sec
smol-songs.csv: desc/john                                                                                1.00    670.2±4.38µs        ? ?/sec    1.00    670.2±3.46µs        ? ?/sec
smol-songs.csv: desc/marcus miller                                                                       1.00    961.2±4.47µs        ? ?/sec    1.00    961.9±4.03µs        ? ?/sec
smol-songs.csv: desc/michael jackson                                                                     1.00   1076.5±6.61µs        ? ?/sec    1.00   1077.9±7.11µs        ? ?/sec
smol-songs.csv: desc/tamo                                                                                1.00     70.6±0.57µs        ? ?/sec    1.01     71.3±0.48µs        ? ?/sec
smol-songs.csv: desc/thelonious monk                                                                     1.01      2.9±0.01ms        ? ?/sec    1.00      2.9±0.01ms        ? ?/sec
smol-songs.csv: prefix search/a                                                                          1.00   1236.2±9.43µs        ? ?/sec    1.00  1232.0±12.07µs        ? ?/sec
smol-songs.csv: prefix search/b                                                                          1.00   1090.8±9.89µs        ? ?/sec    1.00   1090.8±9.43µs        ? ?/sec
smol-songs.csv: prefix search/i                                                                          1.00   1333.9±8.28µs        ? ?/sec    1.00  1334.2±11.21µs        ? ?/sec
smol-songs.csv: prefix search/s                                                                          1.00    810.5±3.69µs        ? ?/sec    1.00    806.6±3.50µs        ? ?/sec
smol-songs.csv: prefix search/x                                                                          1.00    290.5±1.88µs        ? ?/sec    1.00    291.0±1.85µs        ? ?/sec
smol-songs.csv: proximity/7000 Danses Un Jour Dans Notre Vie                                             1.00      4.7±0.02ms        ? ?/sec    1.00      4.7±0.02ms        ? ?/sec
smol-songs.csv: proximity/The Disneyland Sing-Along Chorus                                               1.01      5.6±0.02ms        ? ?/sec    1.00      5.6±0.03ms        ? ?/sec
smol-songs.csv: proximity/Under Great Northern Lights                                                    1.00      2.5±0.01ms        ? ?/sec    1.00      2.5±0.01ms        ? ?/sec
smol-songs.csv: proximity/black saint sinner lady                                                        1.00      4.8±0.02ms        ? ?/sec    1.00      4.8±0.02ms        ? ?/sec
smol-songs.csv: proximity/les dangeureuses 1960                                                          1.00      3.2±0.01ms        ? ?/sec    1.01      3.2±0.01ms        ? ?/sec
smol-songs.csv: typo/Arethla Franklin                                                                    1.00    388.7±5.16µs        ? ?/sec    1.00    390.0±2.11µs        ? ?/sec
smol-songs.csv: typo/Disnaylande                                                                         1.01      2.6±0.01ms        ? ?/sec    1.00      2.6±0.01ms        ? ?/sec
smol-songs.csv: typo/dire straights                                                                      1.00    125.9±1.22µs        ? ?/sec    1.00    126.0±0.71µs        ? ?/sec
smol-songs.csv: typo/fear of the duck                                                                    1.00    373.7±4.25µs        ? ?/sec    1.01   375.7±14.17µs        ? ?/sec
smol-songs.csv: typo/indochie                                                                            1.00    103.6±0.94µs        ? ?/sec    1.00    103.4±0.74µs        ? ?/sec
smol-songs.csv: typo/indochien                                                                           1.00    155.6±1.14µs        ? ?/sec    1.01    157.5±1.75µs        ? ?/sec
smol-songs.csv: typo/klub des loopers                                                                    1.00    160.6±2.98µs        ? ?/sec    1.01    161.7±1.96µs        ? ?/sec
smol-songs.csv: typo/michel depech                                                                       1.00     79.4±0.54µs        ? ?/sec    1.01     79.9±0.60µs        ? ?/sec
smol-songs.csv: typo/mongus                                                                              1.00    126.7±1.85µs        ? ?/sec    1.00    126.1±0.74µs        ? ?/sec
smol-songs.csv: typo/stromal                                                                             1.01    132.9±0.99µs        ? ?/sec    1.00    131.9±1.09µs        ? ?/sec
smol-songs.csv: typo/the white striper                                                                   1.00    287.8±2.88µs        ? ?/sec    1.00    286.5±1.91µs        ? ?/sec
smol-songs.csv: typo/thelonius monk                                                                      1.00    304.2±1.49µs        ? ?/sec    1.01    306.5±1.50µs        ? ?/sec
smol-songs.csv: words/7000 Danses / Le Baiser / je me trompe de mots                                     1.01     20.9±0.08ms        ? ?/sec    1.00     20.7±0.07ms        ? ?/sec
smol-songs.csv: words/Bring Your Daughter To The Slaughter but now this is not part of the title         1.00     48.9±0.13ms        ? ?/sec    1.00     48.9±0.11ms        ? ?/sec
smol-songs.csv: words/The Disneyland Children's Sing-Alone song                                          1.01     13.9±0.06ms        ? ?/sec    1.00     13.8±0.07ms        ? ?/sec
smol-songs.csv: words/les liaisons dangeureuses 1793                                                     1.01      3.7±0.01ms        ? ?/sec    1.00      3.6±0.02ms        ? ?/sec
smol-songs.csv: words/seven nation mummy                                                                 1.00  1054.2±14.49µs        ? ?/sec    1.00  1056.6±10.53µs        ? ?/sec
smol-songs.csv: words/the black saint and the sinner lady and the good doggo                             1.00     58.2±0.29ms        ? ?/sec    1.00     57.9±0.21ms        ? ?/sec
smol-songs.csv: words/whathavenotnsuchforth and a good amount of words to pop to match the first one     1.00     66.1±0.21ms        ? ?/sec    1.00     66.0±0.24ms        ? ?/sec
</code>
</details>

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
2022-10-25 10:42:04 +00:00
Loïc Lecrenier
9a569d73d1 Minor code style change 2022-10-24 15:30:43 +02:00
Loïc Lecrenier
be302fd250 Remove outdated workaround for duplicate words in phrase search 2022-10-24 15:27:06 +02:00
Loïc Lecrenier
d76d0cb1bf Merge branch 'main' into word-pair-proximity-docids-refactor 2022-10-24 15:23:00 +02:00
bors[bot]
2bf867982a Merge #667
667: Update version for the next release (v0.34.0) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-10-24 10:19:04 +00:00
curquiza
f3874d58b9 Update version for the next release (v0.34.0) in Cargo.toml files 2022-10-24 10:13:25 +00:00
Loïc Lecrenier
a983129613 Apply suggestions from code review 2022-10-20 09:49:37 +02:00
bors[bot]
f11a4087da Merge #665
665: Fixing piles of clippy errors. r=ManyTheFish a=ehiggs

## Related issue
No issue fixed. Simply cleaning up some code for clippy on the march towards a clean build when #659 is merged.

## What does this PR do?
Most of these are calling clone when the struct supports Copy.

Many are using & and &mut on `self` when the function they are called from already has an immutable or mutable borrow so this isn't needed.

I tried to stay away from actual changes or places where I'd have to name fresh variables.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: Ewan Higgs <ewan.higgs@gmail.com>
2022-10-20 07:19:46 +00:00
Loïc Lecrenier
176ffd23f5 Fix compile error after rebasing wppd-refactor 2022-10-18 10:40:26 +02:00
Loïc Lecrenier
ab2f6f3aa4 Refine some details in word_prefix_pair_proximity indexing code 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
e6e76fbefe Improve performance of resolve_phrase at the cost of some relevancy 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
178d00f93a Cargo fmt 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
830a7c0c7a Use resolve_phrase function for exactness criteria as well 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
18d578dfc4 Adjust some algorithms using DBs of word pair proximities 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
072b576514 Fix proximity value in keys of prefix_word_pair_proximity_docids 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
6c3a5d69e1 Update snapshots 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
a7de4f5b85 Don't add swapped word pairs to the word_pair_proximity_docids db 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
264a04922d Add prefix_word_pair_proximity database
Similar to the word_prefix_pair_proximity one but instead the keys are:
(proximity, prefix, word2)
2022-10-18 10:37:34 +02:00
Loïc Lecrenier
1dbbd8694f Rename StrStrU8Codec to U8StrStrCodec and reorder its fields 2022-10-18 10:37:34 +02:00
Loïc Lecrenier
bdeb47305e Change encoding of word_pair_proximity DB to (proximity, word1, word2)
Same for word_prefix_pair_proximity
2022-10-18 10:37:34 +02:00
bors[bot]
19b2326f3d Merge #586
586: Add settings to force milli to exhaustively compute the total number of hits r=Kerollmops a=ManyTheFish

Add a new setting `exhaustive_number_hits` to `Search` forcing the `Initial` criterion to exhaustively compute the bucket_candidates allowing the end users to implement finite pagination.
 
related to https://github.com/meilisearch/meilisearch/pull/2601

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2022-10-17 16:24:35 +00:00
Many the fish
81919a35a2 Update milli/src/search/criteria/initial.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-10-17 18:23:20 +02:00
Many the fish
516e838eb4 Update milli/src/search/criteria/initial.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-10-17 18:23:15 +02:00
Clément Renault
fc03e53615 Add a test to check that we can abort an indexation 2022-10-17 17:28:03 +02:00
Kerollmops
6603437cb1 Introduce an indexation abortion function when indexing documents 2022-10-17 17:28:03 +02:00
ManyTheFish
6f55e7844c Add some code comments 2022-10-17 14:41:57 +02:00
ManyTheFish
cf203b7fde Take filter in account when computing the pages candidates 2022-10-17 14:13:44 +02:00
ManyTheFish
d71bc1e69f Compute an exact count when using distinct 2022-10-17 14:13:44 +02:00
ManyTheFish
a396806343 Add settings to force milli to exhaustively compute the total number of hits 2022-10-17 14:13:44 +02:00
bors[bot]
fad0de4581 Merge #655
655: Upgrade all dependencies r=Kerollmops a=loiclec

Upgrade all dependencies to their latest versions.

Partly fixes https://github.com/meilisearch/meilisearch/issues/2822





Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-10-17 11:19:46 +00:00
Loïc Lecrenier
c2ca259f48 Update cli to latest indicatif crate version 2022-10-17 13:05:56 +02:00
Loïc Lecrenier
4c481a8947 Upgrade all dependencies 2022-10-17 13:05:56 +02:00
Ewan Higgs
beb987d3d1 Fixing piles of clippy errors.
Most of these are calling clone when the struct supports Copy.

Many are using & and &mut on `self` when the function they are called
from already has an immutable or mutable borrow so this isn't needed.

I tried to stay away from actual changes or places where I'd have to
name fresh variables.
2022-10-13 22:02:54 +02:00
bors[bot]
95e45e1c2c Merge #663
663: Fix CONTRIBUTING.md step to make the project work r=Kerollmops a=curquiza

Following this discussion: https://github.com/meilisearch/milli/issues/76#issuecomment-1277459125

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-10-13 11:47:34 +00:00
Clémentine Urquizar - curqui
59fe1e8efa Update CONTRIBUTING.md 2022-10-13 13:46:18 +02:00
bors[bot]
f30979d021 Merge #662
662: Enhance word splitting strategy r=ManyTheFish a=akki1306

# Pull Request

## Related issue
Fixes #648 

## What does this PR do?
- [split_best_frequency](55d889522b/milli/src/search/query_tree.rs (L282-L301)) to use frequency of word pairs near together with proximity value of 1 instead of considering the frequency of individual words. Word pairs having max frequency are considered.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!

Co-authored-by: Akshay Kulkarni <akshayk.gj@gmail.com>
2022-10-13 08:14:22 +00:00
Akshay Kulkarni
85f3028317 remove underscore and introduce back word_documents_count 2022-10-13 13:21:59 +05:30
Akshay Kulkarni
8195fc6141 revert removal of word_documents_count method 2022-10-13 13:14:27 +05:30
Akshay Kulkarni
32f825d442 move default implementation of word_pair_frequency to TestContext 2022-10-13 12:57:50 +05:30
Akshay Kulkarni
ff8b2d4422 formatting 2022-10-13 12:44:08 +05:30
Akshay Kulkarni
6cb8b46900 use word_pair_frequency and remove word_documents_count 2022-10-13 12:43:11 +05:30
Akshay Kulkarni
8c9245149e format file 2022-10-12 15:27:56 +05:30
bors[bot]
2000f7958d Merge #604
604: Speed up debug builds r=Kerollmops a=loiclec

Note: this draft PR is based on https://github.com/meilisearch/milli/pull/601 , for no particular reason.

## What does this PR do?
Make a series of changes with the goal of speeding up debug builds:

1. Add an `all_languages` feature which compiles charabia with its `default` features activated.
The `all_languages` feature is activated by default. But running:
```
cargo build --no-default-features
```
on `milli` is now much faster.

2. Reduce the debug optimisation level from 3 to 0, except for a few critical dependencies.

3.  Compile the build dependencies quicker as well. Previously, all build dependencies were compiled with `opt-level = 3`. Now, only the critical build dependencies are compiled with optimisations.

4. Reduce the amount of code generated by the `documents!` macro

5. Make the "progress update" closure provided to indexing functions a trait object instead of a generic parameter. This avoids monomorphising the indexing code multiple times needlessly.

## Results
Initial build times on my computer before and after these changes:
|        | cargo check | cargo check --no-default-features | cargo test | cargo test --lib | cargo test --no-default-features | cargo test --lib --no-default-features |
|--------|-------------|-----------------------------------|------------|------------------|----------------------------------|----------------------------------------|
| before | 1m05s       | 1m05s                             | 2m06s      | 1m47s            | 2m06                             | 1m47s                                  |
| after  | 28.9s       | 13.1s                             | 40s      | 38s            | 23s                              | 21s                                  |



Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-10-12 08:54:48 +00:00
Akshay Kulkarni
63e79a9039 update comment 2022-10-12 13:36:48 +05:30
Akshay Kulkarni
7f9680f0a0 Enhance word splitting strategy 2022-10-12 13:18:23 +05:30
Loïc Lecrenier
53503f09ca Make milli's default features optional in other executable targets 2022-10-12 09:22:05 +02:00
Loïc Lecrenier
6fbf5dac68 Simplify documents! macro to reduce compile times 2022-10-12 09:22:05 +02:00
Loïc Lecrenier
98fc093823 Optimize a few performance sensitive dependencies on debug builds 2022-10-12 09:22:05 +02:00
Loïc Lecrenier
5cfb5df31e Set opt-level to 0 for debug builds
But speed up compile times by optimising build dependencies of lindera
2022-10-12 09:22:05 +02:00
bors[bot]
55d889522b Merge #658
658: Add proximity calculation for the same word r=ManyTheFish a=msvaljek

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/milli/issues/647

## What does this PR do?
- During [the increase of the current word position](d94339a858/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs (L129-L135)) we extract the proximity between the current position and the next one.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: msvaljek <marko.svaljek@commercetools.com>
2022-10-10 13:33:58 +00:00
msvaljek
762e320c35 Add proximity calculation for the same word 2022-10-07 12:59:12 +02:00
bors[bot]
358aa337ea Merge #657
657: Fix link in Hacktoberfest section r=curquiza a=meili-bot

_This PR is auto-generated._

Fix link in CONTRIBUTING.md.
Following [this PR](https://github.com/meilisearch/meilisearch/pull/2845) and [this issue](https://github.com/meilisearch/meilisearch/issues/2840).


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-10-05 17:19:33 +00:00
meili-bot
1764a33690 Update CONTRIBUTING.md 2022-10-05 19:19:03 +02:00
bors[bot]
a90d7e4cc7 Merge #654
654: Re-upload milli's logo r=curquiza a=jeertmans

# Pull Request

## Related issue
None

## What does this PR do?
Apparently, some [commit](add96f921b) deleted the logo file, and updated the `src` path. It seems to me that this was an error, and that the logo file should have been moved, not deleted.

This fixes the problem of seeing this (see image) instead of the actual logo.
![image](https://user-images.githubusercontent.com/27275099/193786803-e0d11a59-48fa-4331-bd92-48457969d766.png)


## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Jérome Eertmans <jeertmans@icloud.com>
2022-10-04 10:56:33 +00:00
Jérome Eertmans
aec220ab63 chore: move logo to (new) assets folder 2022-10-04 12:20:24 +02:00
Jérome Eertmans
4348c49656 fix: re-upload milli's logo
The logo was deleted with this [commit](add96f921b).
2022-10-04 11:33:19 +02:00
bors[bot]
a18de9b5f0 Merge #650
650: Add missing logging timer to extractors r=Kerollmops a=vishalsodani

# Pull Request

## What does this PR do?
#645 
<!-- Please link the issue you're trying to fix with this PR, if none then please create an issue first. -->

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: vishalsodani <vishalsodani@rediffmail.com>
2022-10-04 07:25:47 +00:00
bors[bot]
f9c2dacf33 Merge #653
653: Fix #652 - Change Spelling of `author` in `README.md` r=curquiza a=anirudhRowjee

# Pull Request

## What does this PR do?
Fixes #652
- Changes spellings of `au{hor` to `author`
- Minor formatting changes in Markdown

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Anirudh Rowjee <ani.rowjee@gmail.com>
2022-10-03 08:20:48 +00:00
Anirudh Rowjee
7d247353d0 [docs] contd - fix #652, revert capitalization of 'Meilisearch' 2022-10-03 09:52:20 +05:30
Anirudh Rowjee
bc502ee125 [docs] Fixed #652, changes spelling of author 2022-10-03 09:38:59 +05:30
vishalsodani
00c02d00f3 Add missing logging timer to extractors 2022-09-30 22:17:06 +05:30
bors[bot]
804db03e41 Merge #649
649: Update Hacktoberfest section in CONTRIBUTING.md r=curquiza a=meili-bot

_This PR is auto-generated._

Following: af850854e4

Update Hacktoberfest section in CONTRIBUTING.md with the global guideline information.


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-09-29 15:50:20 +00:00
meili-bot
26efdf4dd9 Update CONTRIBUTING.md 2022-09-29 16:00:15 +02:00
bors[bot]
4b903719a0 Merge #643
643: Add Hacktoberfest section to CONTRIBUTING.md r=curquiza a=meili-bot

_This PR is auto-generated._

Add Hacktoberfest section to CONTRIBUTING.md


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2022-09-22 16:44:51 +00:00
meili-bot
ed3d87f061 Update CONTRIBUTING.md 2022-09-22 18:43:42 +02:00
bors[bot]
a3622eda46 Merge #642
642: Remove LTO in release profile r=Kerollmops a=loiclec

Since we can't enable it in Meilisearch (see https://github.com/meilisearch/meilisearch/pull/2717 ), we should not enable it in milli either. The goal is for milli's benchmarks to accurately represent its performance within meilisearch.


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-09-21 09:14:46 +00:00
Loïc Lecrenier
513a38f07b Remove LTO in release profile
Since we can't enable it in Meilisearch, there is no point in having it
enabled in milli
2022-09-21 10:44:33 +02:00
bors[bot]
e1e025c319 Merge #641
641: Remove `helpers` crate r=Kerollmops a=loiclec

# Pull Request

## What does this PR do?
Remove the `helpers` crates, because (I think) we don't use it. This should have been part of https://github.com/meilisearch/milli/pull/636 , but I forgot about it then :)





Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-09-21 08:36:05 +00:00
Loïc Lecrenier
b6fe6838d3 Remove helpers crate 2022-09-21 10:25:36 +02:00
bors[bot]
d94339a858 Merge #636
636: Remove unused `infos`, `http-ui`, and `milli/fuzz`, crates r=ManyTheFish a=loiclec

We haven't used the `infos/`, `http-ui/` and `milli/fuzz/` crates in a long time. They are not properly maintained and probably do not work correctly anymore.

This PR removes these crates entirely from the workspace to reduce the amount of code we need to maintain.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-09-14 12:39:57 +00:00
bors[bot]
15d478cf4d Merge #635
635: Use an unstable algorithm for `grenad::Sorter` when possible r=Kerollmops a=loiclec

# Pull Request
## What does this PR do?

Use an unstable algorithm to sort the internal vector used by `grenad::Sorter` whenever possible to speed up indexing.

In practice, every time the merge function creates a `RoaringBitmap`, we use an unstable sort. For every other merge function, such as `keep_first`, `keep_last`, etc., a stable sort is used.


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-09-14 12:00:52 +00:00
Loïc Lecrenier
add96f921b Remove unused infos/ http-ui/ and fuzz/ crates 2022-09-14 06:55:01 +02:00
bors[bot]
4fc6331cb6 Merge #638
638: Update version for the next release (v0.33.4) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-09-13 13:56:53 +00:00
curquiza
753e76d451 Update version for the next release (v0.33.4) in Cargo.toml files 2022-09-13 13:55:50 +00:00
Loïc Lecrenier
3794962330 Use an unstable algorithm for grenad::Sorter when possible 2022-09-13 14:49:53 +02:00
bors[bot]
2865b063ad Merge #637
637: We avoid skipping errors in the indexing pipeline r=ManyTheFish a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2764 and should fix it when merged into Meilisearch.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-09-13 12:12:05 +00:00
Kerollmops
d4d7c9d577 We avoid skipping errors in the indexing pipeline 2022-09-13 14:03:00 +02:00
bors[bot]
f8697075ea Merge #632
632: Make charabia default feature optional r=ManyTheFish a=vincent-herlemont

# Pull Request

## What does this PR do?
Fixes [#627](https://github.com/meilisearch/milli/issues/627#issuecomment-1239769122)

Thank you so much for contributing to Meilisearch!


Co-authored-by: Vincent Herlemont <vincent@herlemont.fr>
2022-09-08 14:33:26 +00:00
bors[bot]
7cd0aea1d3 Merge #633
633: Upgrade ubuntu-18.04 to 20.04 r=Kerollmops a=curquiza

Ubuntu-18.04 is going to be deprecated by GitHub
https://github.com/actions/runner-images/issues/6002

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-09-08 14:08:28 +00:00
Clémentine Urquizar
69b2d31b71 Upgrade ubuntu-18.04 to 20.04 2022-09-08 14:58:06 +02:00
Vincent Herlemont
8cd5200f48 Make charabia languages configurable 2022-09-08 12:21:43 +02:00
bors[bot]
99b45a7820 Merge #631
631: Revert "Remove Bors required test for Windows" r=Kerollmops a=curquiza

Reverts meilisearch/milli#612

Because the issue does not seem to be there!

Closes https://github.com/meilisearch/milli/issues/614

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-09-07 21:07:44 +00:00
Vincent Herlemont
5e07ea79c2 Make charabia default feature optional 2022-09-07 20:54:31 +02:00
Clémentine Urquizar - curqui
3af3d3f7d9 Revert "Remove Bors required test for Windows" 2022-09-07 18:36:10 +02:00
bors[bot]
549fa12d5a Merge #629
629: Update version for the next release (v0.33.3) in Cargo.toml files r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2022-09-07 15:55:04 +00:00
curquiza
077dcd2002 Update version for the next release (v0.33.3) in Cargo.toml files 2022-09-07 15:48:53 +00:00
bors[bot]
2907928d93 Merge #628
628: Make sure that long words are ignored r=ManyTheFish a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2743 and is fixing it.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-09-07 13:04:59 +00:00
Kerollmops
fe3973a51c Make sure that long words are correctly skipped 2022-09-07 15:03:32 +02:00
Kerollmops
c83c3cd796 Add a test to make sure that long words are correctly skipped 2022-09-07 14:12:36 +02:00
bors[bot]
b9539c59f3 Merge #625 #626
625: Bump actions/checkout from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/releases">actions/checkout's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Updated to the node16 runtime by default
<ul>
<li>This requires a minimum <a href="https://github.com/actions/runner/releases/tag/v2.285.0">Actions Runner</a> version of v2.285.0 to run, which is by default available in GHES 3.4 or later.</li>
</ul>
</li>
</ul>
<h2>v2.4.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Add set-safe-directory input to allow customers to take control. (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/770">#770</a>) by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://github-redirect.dependabot.com/actions/checkout/pull/776">actions/checkout#776</a></li>
<li>Prepare changelog for v2.4.2. by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://github-redirect.dependabot.com/actions/checkout/pull/778">actions/checkout#778</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2...v2.4.2">https://github.com/actions/checkout/compare/v2...v2.4.2</a></p>
<h2>v2.4.1</h2>
<ul>
<li>Fixed an issue where checkout failed to run in container jobs due to the new git setting <code>safe.directory</code></li>
</ul>
<h2>v2.4.0</h2>
<ul>
<li>Convert SSH URLs like <code>org-&lt;ORG_ID&gt;`@github.com:</code>` to <code>https://github.com/</code> - <a href="https://github-redirect.dependabot.com/actions/checkout/pull/621">pr</a></li>
</ul>
<h2>v2.3.5</h2>
<p>Update dependencies</p>
<h2>v2.3.4</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/379">Add missing <code>await</code>s</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/360">Swap to Environment Files</a></li>
</ul>
<h2>v2.3.3</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/345">Remove Unneeded commit information from build logs</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/326">Add Licensed to verify third party dependencies</a></li>
</ul>
<h2>v2.3.2</h2>
<p><a href="https://github-redirect.dependabot.com/actions/checkout/pull/320">Add Third Party License Information to Dist Files</a></p>
<h2>v2.3.1</h2>
<p><a href="https://github-redirect.dependabot.com/actions/checkout/pull/284">Fix default branch resolution for .wiki and when using SSH</a></p>
<h2>v2.3.0</h2>
<p><a href="https://github-redirect.dependabot.com/actions/checkout/pull/278">Fallback to the default branch</a></p>
<h2>v2.2.0</h2>
<p><a href="https://github-redirect.dependabot.com/actions/checkout/pull/258">Fetch all history for all tags and branches when fetch-depth=0</a></p>
<h2>v2.1.1</h2>
<p>Changes to support GHES (<a href="https://github-redirect.dependabot.com/actions/checkout/pull/236">here</a> and <a href="https://github-redirect.dependabot.com/actions/checkout/pull/248">here</a>)</p>
<h2>v2.1.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/191">Group output</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/199">Changes to support GHES alpha release</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/184">Persist core.sshCommand for submodules</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/163">Add support ssh</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/179">Convert submodule SSH URL to HTTPS, when not using SSH</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/blob/main/CHANGELOG.md">actions/checkout's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>v3.0.2</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/770">Add input <code>set-safe-directory</code></a></li>
</ul>
<h2>v3.0.1</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/762">Fixed an issue where checkout failed to run in container jobs due to the new git setting <code>safe.directory</code></a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/744">Bumped various npm package versions</a></li>
</ul>
<h2>v3.0.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/689">Update to node 16</a></li>
</ul>
<h2>v2.3.1</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/284">Fix default branch resolution for .wiki and when using SSH</a></li>
</ul>
<h2>v2.3.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/278">Fallback to the default branch</a></li>
</ul>
<h2>v2.2.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/258">Fetch all history for all tags and branches when fetch-depth=0</a></li>
</ul>
<h2>v2.1.1</h2>
<ul>
<li>Changes to support GHES (<a href="https://github-redirect.dependabot.com/actions/checkout/pull/236">here</a> and <a href="https://github-redirect.dependabot.com/actions/checkout/pull/248">here</a>)</li>
</ul>
<h2>v2.1.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/191">Group output</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/199">Changes to support GHES alpha release</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/184">Persist core.sshCommand for submodules</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/163">Add support ssh</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/179">Convert submodule SSH URL to HTTPS, when not using SSH</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/157">Add submodule support</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/144">Follow proxy settings</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/141">Fix ref for pr closed event when a pr is merged</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/128">Fix issue checking detached when git less than 2.22</a></li>
</ul>
<h2>v2.0.0</h2>
<ul>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/108">Do not pass cred on command line</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/107">Add input persist-credentials</a></li>
<li><a href="https://github-redirect.dependabot.com/actions/checkout/pull/104">Fallback to REST API to download repo</a></li>
</ul>
<h2>v2 (beta)</h2>
<ul>
<li>Improved fetch performance</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="2541b1294d"><code>2541b12</code></a> Prepare changelog for v3.0.2. (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/777">#777</a>)</li>
<li><a href="0ffe6f9c55"><code>0ffe6f9</code></a> Add set-safe-directory input to allow customers to take control. (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/770">#770</a>)</li>
<li><a href="dcd71f6466"><code>dcd71f6</code></a> Enforce safe directory (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/762">#762</a>)</li>
<li><a href="add3486cc3"><code>add3486</code></a> Patch to fix the dependbot alert. (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/744">#744</a>)</li>
<li><a href="5126516654"><code>5126516</code></a> Bump minimist from 1.2.5 to 1.2.6 (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/741">#741</a>)</li>
<li><a href="d50f8ea767"><code>d50f8ea</code></a> Add v3.0 release information to changelog (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/740">#740</a>)</li>
<li><a href="2d1c1198e7"><code>2d1c119</code></a> update test workflows to checkout v3 (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/709">#709</a>)</li>
<li><a href="a12a3943b4"><code>a12a394</code></a> update readme for v3 (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/708">#708</a>)</li>
<li><a href="8f9e05e482"><code>8f9e05e</code></a> Update to node 16 (<a href="https://github-redirect.dependabot.com/actions/checkout/issues/689">#689</a>)</li>
<li>See full diff in <a href="https://github.com/actions/checkout/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

626: Bump yogevbd/enforce-label-action from 2.1.0 to 2.2.2 r=curquiza a=dependabot[bot]

Bumps [yogevbd/enforce-label-action](https://github.com/yogevbd/enforce-label-action) from 2.1.0 to 2.2.2.
<details>
<summary>Commits</summary>
<ul>
<li><a href="a3c219da6b"><code>a3c219d</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/yogevbd/enforce-label-action/issues/26">#26</a> from yogevbd/test1</li>
<li><a href="8279da6fd9"><code>8279da6</code></a> Update enforce-labels.yml</li>
<li><a href="0c6f806593"><code>0c6f806</code></a> Update package.json</li>
<li><a href="2e6b1550e4"><code>2e6b155</code></a> lock <code>`@​actions/http-client</code></li>`
<li><a href="732db2ff3a"><code>732db2f</code></a> test</li>
<li><a href="e662799851"><code>e662799</code></a> Update package.json</li>
<li><a href="f467829919"><code>f467829</code></a> Update enforce-labels.yml</li>
<li><a href="00ff95bb80"><code>00ff95b</code></a> Update package.json</li>
<li><a href="de4244ae68"><code>de4244a</code></a> Update action.yml</li>
<li><a href="9f40e51d60"><code>9f40e51</code></a> Merge pull request <a href="https://github-redirect.dependabot.com/yogevbd/enforce-label-action/issues/25">#25</a> from dominikmeyersap/patch-1</li>
<li>Additional commits viewable in <a href="https://github.com/yogevbd/enforce-label-action/compare/2.1.0...2.2.2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=yogevbd/enforce-label-action&package-manager=github_actions&previous-version=2.1.0&new-version=2.2.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-09-06 16:49:30 +00:00
bors[bot]
f2b140d3d7 Merge #624
624: Bump Swatinem/rust-cache from 1.3.0 to 2.0.0 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 1.3.0 to 2.0.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.0.0</h2>
<ul>
<li>The action code was refactored to allow for caching multiple workspaces and
different <code>target</code> directory layouts.</li>
<li>The <code>working-directory</code> and <code>target-dir</code> input options were replaced by a
single <code>workspaces</code> option that has the form of <code>$workspace -&gt; $target</code>.</li>
<li>Support for considering <code>env-vars</code> as part of the cache key.</li>
<li>The <code>sharedKey</code> input option was renamed to <code>shared-key</code> for consistency.</li>
</ul>
<h2>v1.4.0</h2>
<ul>
<li>Clean both debug and release target directories.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.0.0</h2>
<ul>
<li>The action code was refactored to allow for caching multiple workspaces and
different <code>target</code> directory layouts.</li>
<li>The <code>working-directory</code> and <code>target-dir</code> input options were replaced by a
single <code>workspaces</code> option that has the form of <code>$workspace -&gt; $target</code>.</li>
<li>Support for considering <code>env-vars</code> as part of the cache key.</li>
<li>The <code>sharedKey</code> input option was renamed to <code>shared-key</code> for consistency.</li>
</ul>
<h2>1.4.0</h2>
<ul>
<li>Clean both <code>debug</code> and <code>release</code> target directories.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6720f05bc4"><code>6720f05</code></a> 2.0.0</li>
<li><a href="5733786579"><code>5733786</code></a> rebuild</li>
<li><a href="622616010e"><code>6226160</code></a> prepare v2</li>
<li><a href="0497f9301f"><code>0497f93</code></a> improve registry cleanpu</li>
<li><a href="7b8626742a"><code>7b86267</code></a> update registry cleaning</li>
<li><a href="911d8e9e55"><code>911d8e9</code></a> test sparse registry</li>
<li><a href="875be5ce2d"><code>875be5c</code></a> bump cache</li>
<li><a href="07a2ee71bc"><code>07a2ee7</code></a> lol, dependency check was reversed</li>
<li><a href="7c190ef171"><code>7c190ef</code></a> fix actual test code ;-)</li>
<li><a href="fffd6895b2"><code>fffd689</code></a> add some more tests</li>
<li>Additional commits viewable in <a href="https://github.com/Swatinem/rust-cache/compare/v1.3.0...v2.0.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=1.3.0&new-version=2.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-09-06 16:12:55 +00:00
dependabot[bot]
e3400a05d3 Bump yogevbd/enforce-label-action from 2.1.0 to 2.2.2
Bumps [yogevbd/enforce-label-action](https://github.com/yogevbd/enforce-label-action) from 2.1.0 to 2.2.2.
- [Release notes](https://github.com/yogevbd/enforce-label-action/releases)
- [Commits](https://github.com/yogevbd/enforce-label-action/compare/2.1.0...2.2.2)

---
updated-dependencies:
- dependency-name: yogevbd/enforce-label-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-09-06 16:08:54 +00:00
dependabot[bot]
b308463022 Bump actions/checkout from 2 to 3
Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v2...v3)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-09-06 16:08:51 +00:00
dependabot[bot]
5e85059a71 Bump Swatinem/rust-cache from 1.3.0 to 2.0.0
Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 1.3.0 to 2.0.0.
- [Release notes](https://github.com/Swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/Swatinem/rust-cache/compare/v1.3.0...v2.0.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-09-06 16:08:48 +00:00
bors[bot]
9e661f2cb9 Merge #623
623: Add dependabot for GHA r=Kerollmops a=curquiza

Same as we added in Meilisearch. Only runs once a month.
https://github.com/meilisearch/meilisearch/blob/main/.github/dependabot.yml

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-09-06 15:56:28 +00:00
Clémentine Urquizar
44192d754f Add dependabot for GHA 2022-09-06 17:54:05 +02:00
bors[bot]
1fa851a8d0 Merge #622
622: Minor fixes in the just added update-version CI r=ManyTheFish a=curquiza

These fixes are minor, and do not prevent us to use the current CI

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-09-06 13:14:23 +00:00
Clémentine Urquizar
61abc61a69 Minor fixes in the just added update-version CI 2022-09-05 16:01:32 +02:00
bors[bot]
efee0e3f43 Merge #621
621: Add CI to update the Milli version r=ManyTheFish a=curquiza

Add a CI we can trigger manually to create a PR updating the Milli version
The next step is to create a Slack bot that will trigger this CI
In the meantime, we can trigger this CI manually in the [Actions tab](https://github.com/meilisearch/milli/actions)

The `MEILI_BOT_GH_PAT` secrets has been added to the organization level, and is accessible for the following repositories (so far): Meilisearch, Milli and Charabia

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-09-05 08:31:48 +00:00
Clémentine Urquizar
0639b14906 Add CI to update the Milli version 2022-09-04 11:49:50 +02:00
bors[bot]
f7c352a32d Merge #620
620: Fix word criterion r=Kerollmops a=ManyTheFish

related to https://github.com/meilisearch/meilisearch/issues/2722

- fix the word strategy bug
- update milli version to v0.33.2

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-09-01 10:14:35 +00:00
ManyTheFish
bf750e45a1 Fix word removal issue 2022-09-01 12:10:47 +02:00
ManyTheFish
a38608fe59 Add test mixing phrased and no-phrased words 2022-09-01 12:02:10 +02:00
ManyTheFish
97a04887a3 Update version for next release (v0.33.2) in Cargo.toml 2022-09-01 11:47:23 +02:00
bors[bot]
17d020e996 Merge #618
618: Update version for next release (v0.33.1) in Cargo.toml r=Kerollmops a=curquiza

No breaking for this release

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-08-31 10:43:45 +00:00
Clémentine Urquizar
c3363706c5 Update version for next release (v0.33.1) in Cargo.toml 2022-08-31 11:37:27 +02:00
bors[bot]
2c2f3d38cc Merge #617
617: Accept integers as document ids again r=irevoire a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2723 and will fix when this PR will be merged, a new release deployed and used in Meilisearch itself.

This PR makes the indexer to try to parse the values of the fields identified as numbers i.e. `id:number` as integer first then as float if it fails.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-08-31 09:25:17 +00:00
Clément Renault
7f92116b51 Accept again integers as document ids 2022-08-31 10:56:39 +02:00
bors[bot]
0b55e7ce6a Merge #615
615: Remove the artifacts of the past r=Kerollmops a=irevoire



Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-23 14:22:43 +00:00
Irevoire
f6024b3269 Remove the artifacts of the past 2022-08-23 16:10:38 +02:00
bors[bot]
a79ff8a1a9 Merge #611
611: Upgrade charabia v0.6.0 r=curquiza a=ManyTheFish

# Pull Request

## What does this PR do?

- Update `log`
- Upgrade `charabia`

related to https://github.com/meilisearch/meilisearch/issues/2686


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-23 10:17:29 +00:00
bors[bot]
e314423653 Merge #613
613: Update version for next release (v0.33.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-08-23 10:01:20 +00:00
bors[bot]
d0521e493f Merge #612
612: Remove Bors required test for Windows r=Kerollmops a=curquiza

Remove the required windows test for merging due to the issue with Lindera
https://github.com/meilisearch/milli/runs/7970141278?check_suite_focus=true

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-08-23 09:47:51 +00:00
Clémentine Urquizar
9ed7324995 Update version for next release (v0.33.0) 2022-08-23 11:47:48 +02:00
Clémentine Urquizar
e140227065 Remove Bors required test for Windows 2022-08-23 11:45:29 +02:00
bors[bot]
18886dc6b7 Merge #598
598: Matching query terms policy r=Kerollmops a=ManyTheFish

## Summary

Implement several optional words strategy.

## Content

Replace `optional_words` boolean with an enum containing several term matching strategies:
```rust
pub enum TermsMatchingStrategy {
    // remove last word first
    Last,
    // remove first word first
    First,
    // remove more frequent word first
    Frequency,
    // remove smallest word first
    Size,
    // only one of the word is mandatory
    Any,
    // all words are mandatory
    All,
}
```

All strategies implemented during the prototype are kept, but only `Last` and `All` will be published by Meilisearch in the `v0.29.0` release.

## Related

spec: https://github.com/meilisearch/specifications/pull/173
prototype discussion: https://github.com/meilisearch/meilisearch/discussions/2639#discussioncomment-3447699


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-22 15:51:37 +00:00
ManyTheFish
5391e3842c replace optional_words by term_matching_strategy 2022-08-22 17:47:19 +02:00
ManyTheFish
f9029727e0 Fix benchmarks 2022-08-22 14:55:53 +02:00
ManyTheFish
a5b9a35c50 Activate char_map for highlighting 2022-08-22 14:39:16 +02:00
ManyTheFish
ba5ca8a362 Upgrade charabia v0.6.0 2022-08-22 14:38:00 +02:00
ManyTheFish
5943e1c3b2 Update log dependency 2022-08-22 13:55:01 +02:00
bors[bot]
b46225070f Merge #610
610: Share heed between all sub-crates r=Kerollmops a=irevoire

# Pull Request

## What does this PR do?
Use the reexported version of heed in the benchmarks and the fuzzer

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-22 08:44:31 +00:00
Irevoire
e7624abe63 share heed between all sub-crates 2022-08-19 11:23:41 +02:00
ManyTheFish
993aa1321c Fix query tree building 2022-08-18 17:56:06 +02:00
ManyTheFish
bff9653050 Fix remove count 2022-08-18 17:36:30 +02:00
ManyTheFish
9640976c79 Rename TermMatchingPolicies 2022-08-18 17:36:08 +02:00
bors[bot]
60a7221827 Merge #609
609: Retry downloading the benchmarks datasets r=Kerollmops a=irevoire

Downloading the benchmarks datasets is failing [more and more](https://github.com/meilisearch/milli/pull/607#pullrequestreview-1076023074) often; thus, instead of fixing the issue, I thought we could retry multiple times.


Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-18 11:47:09 +00:00
bors[bot]
afc10acd19 Merge #596
596: Filter operators: NOT + IN[..] r=irevoire a=loiclec

# Pull Request

## What does this PR do?
Implements the changes described in https://github.com/meilisearch/meilisearch/issues/2580
It is based on top of #556 

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-18 11:24:32 +00:00
Loïc Lecrenier
c7a86b56ef Fix filter parser compilation error 2022-08-18 13:16:56 +02:00
Loïc Lecrenier
9b6602cba2 Avoid cloning FilterCondition in filter array parsing 2022-08-18 13:06:57 +02:00
Loïc Lecrenier
8a271223a9 Change a macro_rules to a function in filter parser 2022-08-18 13:03:55 +02:00
Loïc Lecrenier
dd34dbaca5 Add more filter parser tests 2022-08-18 11:55:01 +02:00
Loïc Lecrenier
5d74ebd5e5 Cargo fmt 2022-08-18 11:36:38 +02:00
Loïc Lecrenier
9af69c151b Limit the maximum depth of filters
This should have no impact on the user but is there to safeguard
meilisearch against malicious inputs.
2022-08-18 11:31:38 +02:00
Loïc Lecrenier
c51dcad51b Don't recompute filterable fields in evaluation of IN[] filter 2022-08-18 10:59:21 +02:00
Loïc Lecrenier
98f0da6b38 Simplify representation of nested NOT filters 2022-08-18 10:58:24 +02:00
Loïc Lecrenier
b030efdc83 Fix parsing of IN[] filter followed by whitespace + factorise its impl 2022-08-18 10:58:04 +02:00
Irevoire
84a784834e retry downloading the benchmarks datasets 2022-08-17 19:25:05 +02:00
bors[bot]
79094bcbcf Merge #607
607: Better threshold r=Kerollmops a=irevoire

# Pull Request

## What does this PR do?
Fixes #570 

This PR tries to improve the threshold used to trigger the real deletion of documents.
The deletion is now triggered in two cases;
- 10% of the total available space is used by soft deleted documents
- 90% of the total available space is used.

In this context, « total available space » means the `map_size` of lmdb.
And the size used by the soft deleted documents is actually an estimation. We can't determine precisely the size used by one document thus what we do is; take the total space used, divide it by the number of documents + soft deleted documents to estimate the size of one average document. Then multiply the size of one avg document by the number of soft deleted document.

--------

<img width="808" alt="image" src="https://user-images.githubusercontent.com/7032172/185083075-92cf379e-8ae1-4bfc-9ca6-93b54e6ab4e9.png">

Here we can see we have a ~10GB drift in the end between the space used by the soft deleted and the real space used by the documents.
Personally I don’t think that's a big issue because once the red line reach 90GB everything will be freed but now you know.

If you have an idea on how to improve this estimation I would love to hear it.
It look like the difference is linear so maybe we could simply multiply the current estimation by two?

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-08-17 16:31:04 +00:00
Loïc Lecrenier
497f9817a2 Use snapshot testing for the filter parser 2022-08-17 17:35:01 +02:00
Irevoire
4aae07d5f5 expose the size methods 2022-08-17 17:07:38 +02:00
Irevoire
e96b852107 bump heed 2022-08-17 17:05:50 +02:00
Loïc Lecrenier
238a7be58d Fix filter parser handling of keywords and surrounding spaces
Now the following fragments are allowed:

AND(field =

AND'field' =

AND"field" =
2022-08-17 16:53:40 +02:00
Loïc Lecrenier
b09a8f1b91 Filters: add explicit error message when using a keyword as value 2022-08-17 16:07:00 +02:00
bors[bot]
087da5621a Merge #587
587: Word prefix pair proximity docids indexation refactor r=Kerollmops a=loiclec

# Pull Request

## What does this PR do?
Refactor the code of `WordPrefixPairProximityDocIds` to make it much faster, fix a bug, and add a unit test.

## Why is it faster?
Because we avoid using a sorter to insert the (`word1`, `prefix`, `proximity`) keys and their associated bitmaps, and thus we don't have to sort a potentially very big set of data. I have also added a couple of other optimisations: 

1. reusing allocations
2. using a prefix trie instead of an array of prefixes to get all the prefixes of a word
3. inserting directly into the database instead of putting the data in an intermediary grenad when possible. Also avoid checking for pre-existing values in the database when we know for certain that they do not exist. 

## What bug was fixed?
When reindexing, the `new_prefix_fst_words` prefixes may look like:
```
["ant",  "axo", "bor"]
```
which we group by first letter:
```
[["ant", "axo"], ["bor"]]
```

Later in the code, if we have the word2 "axolotl", we try to find which subarray of prefixes contains its prefixes. This check is done with `word2.starts_with(subarray_prefixes[0])`, but `"axolotl".starts_with("ant")` is false, and thus we wrongly think that there are no prefixes in `new_prefix_fst_words` that are prefixes of `axolotl`.

## StrStrU8Codec
I had to change the encoding of `StrStrU8Codec` to make the second string null-terminated as well. I don't think this should be a problem, but I may have missed some nuances about the impacts of this change.

## Requests when reviewing this PR
I have explained what the code does in the module documentation of `word_pair_proximity_prefix_docids`. It would be nice if someone could read it and give their opinion on whether it is a clear explanation or not. 

I also have a couple questions regarding the code itself:
- Should we clean up and factor out the `PrefixTrieNode` code to try and make broader use of it outside this module? For now, the prefixes undergo a few transformations: from FST, to array, to prefix trie. It seems like it could be simplified.
- I wrote a function called `write_into_lmdb_database_without_merging`. (1) Are we okay with such a function existing? (2) Should it be in `grenad_helpers` instead?

## Benchmark Results

We reduce the time it takes to index about 8% in most cases, but it varies between -3% and -20%. 

```
group                                                                     indexing_main_ce90fc62                  indexing_word-prefix-pair-proximity-docids-refactor_cbad2023
-----                                                                     ----------------------                  ------------------------------------------------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.00  1893.0±233.03µs        ? ?/sec    1.01  1921.2±260.79µs        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.05      9.4±3.51ms        ? ?/sec     1.00      9.0±2.14ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.22    18.3±11.42ms        ? ?/sec     1.00     15.0±5.79ms        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.00     41.4±4.20ms        ? ?/sec     1.28    53.0±13.97ms        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.00   285.6±18.12ms        ? ?/sec     1.03   293.1±16.09ms        ? ?/sec
indexing/Indexing geo_point                                               1.03      60.8±0.45s        ? ?/sec     1.00      58.8±0.68s        ? ?/sec
indexing/Indexing movies in three batches                                 1.14      16.5±0.30s        ? ?/sec     1.00      14.5±0.24s        ? ?/sec
indexing/Indexing movies with default settings                            1.11      13.7±0.07s        ? ?/sec     1.00      12.3±0.28s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.10      10.6±0.11s        ? ?/sec     1.00       9.6±0.15s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.11       9.4±0.15s        ? ?/sec     1.00       8.5±0.10s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.18      66.2±0.39s        ? ?/sec     1.00      56.0±0.67s        ? ?/sec
indexing/Indexing songs with default settings                             1.07      58.7±1.26s        ? ?/sec     1.00      54.7±1.71s        ? ?/sec
indexing/Indexing songs without any facets                                1.08      53.1±0.88s        ? ?/sec     1.00      49.3±1.43s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.08      57.7±1.33s        ? ?/sec     1.00      53.3±0.98s        ? ?/sec
indexing/Indexing wiki                                                    1.06   1051.1±21.46s        ? ?/sec     1.00    989.6±24.55s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.20    1184.8±8.93s        ? ?/sec     1.00     989.7±7.06s        ? ?/sec
indexing/Reindexing geo_point                                             1.04      67.5±0.75s        ? ?/sec     1.00      64.9±0.32s        ? ?/sec
indexing/Reindexing movies with default settings                          1.12      13.9±0.17s        ? ?/sec     1.00      12.4±0.13s        ? ?/sec
indexing/Reindexing songs with default settings                           1.05      60.6±0.84s        ? ?/sec     1.00      57.5±0.99s        ? ?/sec
indexing/Reindexing wiki                                                  1.07   1725.0±17.92s        ? ?/sec     1.00    1611.4±9.90s        ? ?/sec
```

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 14:06:12 +00:00
bors[bot]
fb95e67a2a Merge #608
608: Fix soft deleted documents r=ManyTheFish a=ManyTheFish

When we replaced or updated some documents, the indexing was skipping the replaced documents.

Related to https://github.com/meilisearch/meilisearch/issues/2672

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-17 13:38:10 +00:00
bors[bot]
e4a52e6e45 Merge #594
594: Fix(Search): Fix phrase search candidates computation r=Kerollmops a=ManyTheFish

This bug is an old bug but was hidden by the proximity criterion,
Phrase searches were always returning an empty candidates list when the proximity criterion is deactivated.

Before the fix, we were trying to find any words[n] near words[n]
instead of finding  any words[n] near words[n+1], for example:

for a phrase search '"Hello world"' we were searching for "hello" near "hello" first, instead of "hello" near "world".



Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-17 13:22:52 +00:00
ManyTheFish
8c3f1a9c39 Remove useless lifetime declaration 2022-08-17 15:20:43 +02:00
ManyTheFish
e9e2349ce6 Fix typo in comment 2022-08-17 15:09:48 +02:00
ManyTheFish
2668f841d1 Fix update indexing 2022-08-17 15:03:37 +02:00
ManyTheFish
7384650d85 Update test to showcase the bug 2022-08-17 15:03:08 +02:00
bors[bot]
39869be23b Merge #590
590: Optimise facets indexing r=Kerollmops a=loiclec

# Pull Request

## What does this PR do?
Fixes #589 

## Notes
I added documentation for the whole module which attempts to explain the shape of the databases and their purpose. However, I realise there is already some documentation about this, so I am not sure if we want to keep it.

## Benchmarks

We get a ~1.15x speed up on the geo_point benchmark.

```
group                                                                     indexing_main_57042355                  indexing_optimise-facets-indexation_5728619a
-----                                                                     ----------------------                  --------------------------------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.00  1862.7±294.45µs        ? ?/sec    1.58      2.9±1.32ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.11      8.9±2.44ms        ? ?/sec     1.00      8.0±1.42ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.00     12.8±3.32ms        ? ?/sec     1.32     16.9±6.98ms        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.09     43.8±4.78ms        ? ?/sec     1.00     40.3±3.79ms        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.08   287.4±28.72ms        ? ?/sec     1.00    264.9±9.46ms        ? ?/sec
indexing/Indexing geo_point                                               1.14      61.2±0.39s        ? ?/sec     1.00      53.8±0.57s        ? ?/sec
indexing/Indexing movies in three batches                                 1.00      16.6±0.12s        ? ?/sec     1.00      16.5±0.10s        ? ?/sec
indexing/Indexing movies with default settings                            1.00      14.1±0.30s        ? ?/sec     1.00      14.0±0.28s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.10      10.9±0.50s        ? ?/sec     1.00      10.0±0.10s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.01       9.6±0.23s        ? ?/sec     1.00       9.5±0.06s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.07      66.3±0.55s        ? ?/sec     1.00      61.8±0.63s        ? ?/sec
indexing/Indexing songs with default settings                             1.03      58.8±0.82s        ? ?/sec     1.00      57.1±1.22s        ? ?/sec
indexing/Indexing songs without any facets                                1.00      53.6±1.09s        ? ?/sec     1.01      54.0±0.58s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.02      58.0±1.29s        ? ?/sec     1.00      57.1±1.43s        ? ?/sec
indexing/Indexing wiki                                                    1.00   1064.1±21.20s        ? ?/sec     1.00   1068.0±20.49s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.00    1182.5±9.62s        ? ?/sec     1.01   1191.2±10.96s        ? ?/sec
indexing/Reindexing geo_point                                             1.12      68.0±0.21s        ? ?/sec     1.00      60.5±0.82s        ? ?/sec
indexing/Reindexing movies with default settings                          1.01      14.1±0.21s        ? ?/sec     1.00      14.0±0.26s        ? ?/sec
indexing/Reindexing songs with default settings                           1.04      61.6±0.57s        ? ?/sec     1.00      59.2±0.87s        ? ?/sec
indexing/Reindexing wiki                                                  1.00   1734.0±11.38s        ? ?/sec     1.01   1746.6±22.48s        ? ?/sec
```


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 11:46:55 +00:00
Loïc Lecrenier
6cc975704d Add some documentation to facets.rs 2022-08-17 12:59:52 +02:00
Loïc Lecrenier
93252769af Apply review suggestions 2022-08-17 12:41:22 +02:00
Loïc Lecrenier
196f79115a Run cargo fmt 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
d10d78d520 Add integration tests for the IN filter 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
4ecfb95d0c Improve syntax errors for IN filter 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
2fd20fadfc Implement the NOT IN syntax for negated IN filter 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
ca97cb0eda Implement the IN filter operator 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
90a304cb07 Fix tests after simplification of NOT filter 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
cc7415bb31 Simplify FilterCondition code, made possible by the new NOT operator 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
44744d9e67 Implement the simplified NOT operator 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
01675771d5 Reimplement != filter to select all docids not selected by = 2022-08-17 12:28:33 +02:00
Loïc Lecrenier
258c3dd563 Make AND+OR filters n-ary (store a vector of subfilters instead of 2)
NOTE: The token_at_depth is method is a bit useless now, as the only
cases where there would be a toke at depth 1000 are the cases where
the parser already stack-overflowed earlier.

Example: (((((... (x=1) ...)))))
2022-08-17 12:28:33 +02:00
Loïc Lecrenier
39687908f1 Add documentation and comments to facets.rs 2022-08-17 12:26:49 +02:00
Loïc Lecrenier
8d4b21a005 Switch string facet levels indexation to new algo
Write the algorithm once for both numbers and strings
2022-08-17 12:26:49 +02:00
Loïc Lecrenier
cf0cd92ed4 Refactor Facets::execute to increase performance 2022-08-17 12:26:49 +02:00
bors[bot]
cd2635ccfc Merge #602
602: Use mimalloc as the default allocator r=Kerollmops a=loiclec

## What does this PR do?
Use mimalloc as the global allocator for milli's benchmarks on macOS.

## Why?
On Linux, we use jemalloc, which is a very fast allocator. But on macOS, we currently use the system allocator, which is very slow. In practice, this difference in allocator speed means that it is difficult to gain insight into milli's performance by running benchmarks locally on the Mac.

By using mimalloc, which is another excellent allocator, we reduce the speed difference between the two platforms.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 10:26:13 +00:00
Loïc Lecrenier
78d9f0622d cargo fmt 2022-08-17 12:21:24 +02:00
Loïc Lecrenier
4f9edf13d7 Remove commented-out function 2022-08-17 12:21:24 +02:00
Loïc Lecrenier
405555b401 Add some documentation to PrefixTrieNode 2022-08-17 12:21:24 +02:00
Loïc Lecrenier
1bc4788e59 Remove cached Allocations struct from wpppd indexing 2022-08-17 12:18:22 +02:00
Loïc Lecrenier
ef75a77464 Fix undefined behaviour caused by reusing key from the database
New full snapshot:
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5                a    1  [101, ]
5                a    2  [101, ]
5                am   1  [101, ]
5                b    4  [101, ]
5                be   4  [101, ]
am               a    3  [101, ]
amazing          a    1  [100, ]
amazing          a    2  [100, ]
amazing          a    3  [100, ]
amazing          an   1  [100, ]
amazing          an   2  [100, ]
amazing          b    2  [100, ]
amazing          be   2  [100, ]
an               a    1  [100, ]
an               a    2  [100, 202, ]
an               am   1  [100, ]
an               an   2  [100, ]
an               b    3  [100, ]
an               be   3  [100, ]
and              a    2  [100, ]
and              a    3  [100, ]
and              a    4  [100, ]
and              am   2  [100, ]
and              an   3  [100, ]
and              b    1  [100, ]
and              be   1  [100, ]
at               a    1  [100, 202, ]
at               a    2  [100, 101, ]
at               a    3  [100, ]
at               am   2  [100, 101, ]
at               an   1  [100, 202, ]
at               an   3  [100, ]
at               b    3  [101, ]
at               b    4  [100, ]
at               be   3  [101, ]
at               be   4  [100, ]
beautiful        a    2  [100, ]
beautiful        a    3  [100, ]
beautiful        a    4  [100, ]
beautiful        am   3  [100, ]
beautiful        an   2  [100, ]
beautiful        an   4  [100, ]
bell             a    2  [101, ]
bell             a    4  [101, ]
bell             am   4  [101, ]
extraordinary    a    2  [202, ]
extraordinary    a    3  [202, ]
extraordinary    an   2  [202, ]
house            a    3  [100, 202, ]
house            a    4  [100, 202, ]
house            am   4  [100, ]
house            an   3  [100, 202, ]
house            b    2  [100, ]
house            be   2  [100, ]
rings            a    1  [101, ]
rings            a    3  [101, ]
rings            am   3  [101, ]
rings            b    2  [101, ]
rings            be   2  [101, ]
the              a    3  [101, ]
the              b    1  [101, ]
the              be   1  [101, ]
2022-08-17 12:17:45 +02:00
Loïc Lecrenier
7309111433 Don't run block code in doc tests of word_pair_proximity_docids 2022-08-17 12:17:18 +02:00
Loïc Lecrenier
f6f8f543e1 Run cargo fmt 2022-08-17 12:17:18 +02:00
Loïc Lecrenier
34c991ea02 Add newlines in documentation of word_prefix_pair_proximity_docids 2022-08-17 12:17:18 +02:00
Loïc Lecrenier
06f3fd8c6d Add more comments to WordPrefixPairProximityDocids::execute 2022-08-17 12:17:18 +02:00
Loïc Lecrenier
474500362c Update wpppd snapshots
New snapshot (yes, it's wrong as well, it will get fixed later):

---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
5                a    1  [101, ]
5                a    2  [101, ]
5                am   1  [101, ]
5                b    4  [101, ]
5                be   4  [101, ]
am               a    3  [101, ]
amazing          a    1  [100, ]
amazing          a    2  [100, ]
amazing          a    3  [100, ]
amazing          an   1  [100, ]
amazing          an   2  [100, ]
amazing          b    2  [100, ]
amazing          be   2  [100, ]
an               a    1  [100, ]
an               a    2  [100, 202, ]
an               am   1  [100, ]
an               b    3  [100, ]
an               be   3  [100, ]
and              a    2  [100, ]
and              a    3  [100, ]
and              a    4  [100, ]
and              b    1  [100, ]
and              be   1  [100, ]
                 d\0  0  [100, 202, ]
an               an   2  [100, ]
and              am   2  [100, ]
and              an   3  [100, ]
at               a    2  [100, 101, ]
at               a    3  [100, ]
at               am   2  [100, 101, ]
at               an   1  [100, 202, ]
at               an   3  [100, ]
at               b    3  [101, ]
at               b    4  [100, ]
at               be   3  [101, ]
at               be   4  [100, ]
beautiful        a    2  [100, ]
beautiful        a    3  [100, ]
beautiful        a    4  [100, ]
beautiful        am   3  [100, ]
beautiful        an   2  [100, ]
beautiful        an   4  [100, ]
bell             a    2  [101, ]
bell             a    4  [101, ]
bell             am   4  [101, ]
extraordinary    a    2  [202, ]
extraordinary    a    3  [202, ]
extraordinary    an   2  [202, ]
house            a    4  [100, 202, ]
house            a    4  [100, ]
house            am   4  [100, ]
house            an   3  [100, 202, ]
house            b    2  [100, ]
house            be   2  [100, ]
rings            a    1  [101, ]
rings            a    3  [101, ]
rings            am   3  [101, ]
rings            b    2  [101, ]
rings            be   2  [101, ]
the              a    3  [101, ]
the              b    1  [101, ]
the              be   1  [101, ]
2022-08-17 12:17:18 +02:00
Loïc Lecrenier
ea4a96761c Move content of readme for WordPrefixPairProximityDocids into the code 2022-08-17 12:05:37 +02:00
Loïc Lecrenier
220921628b Simplify and document WordPrefixPairProximityDocIds::execute 2022-08-17 11:59:19 +02:00
Loïc Lecrenier
044356d221 Optimise WordPrefixPairProximityDocIds merge operation 2022-08-17 11:59:18 +02:00
Loïc Lecrenier
d350114159 Add tests for WordPrefixPairProximityDocIds 2022-08-17 11:59:15 +02:00
Loïc Lecrenier
86807ca848 Refactor word prefix pair proximity indexation further 2022-08-17 11:59:13 +02:00
Loïc Lecrenier
306593144d Refactor word prefix pair proximity indexation 2022-08-17 11:59:00 +02:00
Loïc Lecrenier
5d59bfde8a Sort Cargo.toml dependencies 2022-08-17 11:46:56 +02:00
bors[bot]
f55034ed54 Merge #606
606: Make binaries faster on release profile through better compile options r=Kerollmops a=loiclec

Using `codegen-units = 1` and `lto = 'thin'` makes the compile time a bit longer, but also produces faster binaries.

I'd like to run milli's benchmark with these options, so that we can see whether it is worth enabling on meilisearch.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-17 08:57:24 +00:00
Loïc Lecrenier
03e679b634 Make binaries faster on release profile through better compile options 2022-08-17 10:29:33 +02:00
Loïc Lecrenier
f20e588ec1 Make sure there is one newline at eof in cargo.toml 2022-08-17 07:44:33 +02:00
Loïc Lecrenier
20be69e1b9 Always use mimalloc as the global allocator 2022-08-16 20:09:36 +02:00
bors[bot]
293a246af8 Merge #601
601: Introduce snapshot tests r=Kerollmops a=loiclec

# Pull Request
## What does this PR do?
Introduce snapshot tests into milli, by using the `insta` crate. This implements the idea described by #597 

See: [insta.rs](https://insta.rs)

## Design
There is now a new file, `snapshot_tests.rs`, which is compiled only under `#[cfg(test)]`. It exposes the `db_snap!` macro, which is used to snapshot the content of a database.

When running `cargo test`, `insta` will check that the value of the current snapshot is the same as the previous one (on the file system). If they are the same, the test passes. If they are different, the test fails and you are asked to review the new snapshot to approve or reject it.

We don't want to save very large snapshots to the file system, because it will pollute the git repository and increase its size too much. Instead, we only save their `md5` hashes under the name `<snapshot_name>.hash.snap`. There is a new environment variable called `MILLI_TEST_FULL_SNAPS` which can be set to `true` in order to *also* save the full content of the snapshot under the name `<snapshot_name>.full.snap`. However, snapshots with the extension `.full.snap` are never saved to the git repository.

## Example
```rust
// In e.g. facets.rs
#[test]
fn my_test() {
    // create an index
    let index = TempIndex::new():
    index.add_documents(...);
    index.update_settings(|settings| ...);
    
    // then snapshot the content of one of its databases
    // the snapshot will be saved at the current folder under facets.rs/my_test/facet_id_string_docids.snap
    db_snap!(index, facet_id_string_docids);

    index.add_documents(...);   

    // we can also name the snapshot to ensure there is no conflict
    // this snapshot will be saved at facets.rs/my_test/updated/facet_id_string_docids.snap
    db_snap!(index, facet_id_string, docids, "updated");
    
    // and we can also use "inline" snapshots, which insert their content in the given string literal
    db_snap!(index, field_distributions, `@"");`
    // once the snapshot is approved, it will automatically get transformed to, e.g.:
    // db_snap!(index, field_distributions, `@"`
    // my_facet        21
    // other_field     3
    // ");
    
    // now let's add **many** documents
    index.add_documents(...);
    
    // because the snapshot is too big, its hash is saved instead
    // if the MILLI_TEST_FULL_SNAPS env variable is set to true, then the full snapshot will also be saved
    // at facets.rs/my_test/large/facet_id_string_docids.full.snap
    db_snap!(index, facet_id_string_docids, "large", `@"5348bbc46b5384455b6a900666d2a502");`
}
```

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-08-16 11:57:09 +00:00
Loïc Lecrenier
dea00311b6 Add type annotations to remove compiler error 2022-08-16 09:19:30 +02:00
Loïc Lecrenier
fb2b6c0c28 Use mimalloc for benchmarks on all platforms 2022-08-10 16:56:42 +02:00
Loïc Lecrenier
6f49126223 Fix db_snap macro with inline parameter 2022-08-10 15:55:22 +02:00
Loïc Lecrenier
12920f2a4f Fix paths of snapshot tests 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
4b7fd4dfae Update insta version 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
ce560fdcb5 Add documentation for db_snap! 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
748bb86b5b cargo fmt 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
051f24f674 Switch to snapshot tests for search/matches/mod.rs 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
d2e01528a6 Switch to snapshot tests for search/criteria/typo.rs 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
a9c7d82693 Switch to snapshot tests for search/criteria/attribute.rs 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
4bba2f41d7 Switch to snapshot tests for query_tree.rs 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
8ac24d3114 Cargo fmt + fix compiler warnings/error 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
6066256689 Add snapshot tests for indexing of word_prefix_pair_proximity_docids 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
3a734af159 Add snapshot tests for Facets::execute 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
b9907997e4 Remove old snapshot tests code 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
ef889ade5d Refactor snapshot tests 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
334098a7e0 Add index snapshot test helper function 2022-08-10 15:53:46 +02:00
Loïc Lecrenier
8f73251012 Use mimalloc for benchmarks on macOS 2022-08-10 13:30:56 +02:00
ManyTheFish
b389be48a0 Factorize phrase computation 2022-08-08 10:37:31 +02:00
bors[bot]
950d8e4c44 Merge #600
600: Simplify some unit tests r=ManyTheFish a=loiclec

# Pull Request

## What does this PR do?
Simplify the code that is used in unit tests to create and modify an index. Basically, the following code:
```rust
  let path = tempfile::tempdir().unwrap();
  let mut options = EnvOpenOptions::new();
  options.map_size(10 * 1024 * 1024); // 10 MB
  let index = Index::new(options, &path).unwrap();

  let mut wtxn = index.write_txn().unwrap();
  let content = documents!([
      { "id": 0, "name": "kevin" },
  ]);
  let config = IndexerConfig::default();
  let indexing_config = IndexDocumentsConfig::default();
  let builder =
      IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
  let (builder, user_error) = builder.add_documents(content).unwrap();
  user_error.unwrap();
  builder.execute().unwrap();
  wtxn.commit.unwrap();

  let mut wtxn = index.write_txn().unwrap();
  let config = IndexerConfig::default();
  let mut builder = Settings::new(&mut wtxn, &index, &config);
  builder.set_primary_key(S("docid"));
  builder.set_filterable_fields(hashset! { S("label") });
  builder.execute(|_| ()).unwrap();
  wtxn.commit().unwrap();
```
becomes:
```rust
let index = TempIndex::new():
index.add_documents(documents!(
      { "id": 0, "name": "kevin" },
)).unwrap();
index.update_settings(|settings| {
    settings.set_primary_key(S("docid"));
    settings.set_filterable_fields(hashset! { S("label") });
}).unwrap();
```

Then there is a bunch of options to modify the indexing configs, the map size, to reuse a transaction, etc. For example:
```rust
let mut index = TempIndex::new_with_map_size(1000 * 4096 * 10);
index.index_documents_config.autogenerate_docids = true;
let mut wtxn = index.write_txn().unwrap();
index.update_settings_using_wtxn(&mut wtxn, |settings| {
    settings.set_primary_key(S("docids"));
}).unwrap();
wtxn.commit().unwrap();
```

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
2022-08-04 10:19:42 +00:00
Loïc Lecrenier
58cb1c1bda Simplify unit tests in facet/filter.rs 2022-08-04 12:03:44 +02:00
Loïc Lecrenier
acff17fb88 Simplify indexing tests 2022-08-04 12:03:13 +02:00
bors[bot]
21284cf235 Merge #556
556: Add EXISTS filter r=loiclec a=loiclec

## What does this PR do?

Fixes issue [#2484](https://github.com/meilisearch/meilisearch/issues/2484) in the meilisearch repo.

It creates a `field EXISTS` filter which selects all documents containing the `field` key. 
For example, with the following documents:
```json
[{
	"id": 0,
	"colour": []
},
{
	"id": 1,
	"colour": ["blue", "green"]
},
{
	"id": 2,
	"colour": 145238
},
{
	"id": 3,
	"colour": null
},
{
	"id": 4,
	"colour": {
		"green": []
	}
},
{
	"id": 5,
	"colour": {}
},
{
	"id": 6
}]
```
Then the filter `colour EXISTS` selects the ids `[0, 1, 2, 3, 4, 5]`. The filter `colour NOT EXISTS` selects `[6]`.

## Details
There is a new database named `facet-id-exists-docids`. Its keys are field ids and its values are bitmaps of all the document ids where the corresponding field exists.

To create this database, the indexing part of milli had to be adapted. The implementation there is basically copy/pasted from the code handling the `facet-id-f64-docids` database, with appropriate modifications in place.

There was an issue involving the flattening of documents during (re)indexing. Previously, the following JSON:
```json
{
    "id": 0,
    "colour": [],
    "size": {}
}
```
would be flattened to:
```json
{
    "id": 0
}
```
prior to being given to the extraction pipeline.

This transformation would lose the information that is needed to populate the `facet-id-exists-docids` database. Therefore, I have also changed the implementation of the `flatten-serde-json` crate. Now, as it traverses the Json, it keeps track of which key was encountered. Then, at the end, if a previously encountered key is not present in the flattened object, it adds that key to the object with an empty array as value. For example:
```json
{
    "id": 0,
    "colour": {
        "green": [],
        "blue": 1
    },
    "size": {}
} 
```
becomes
```json
{
    "id": 0,
    "colour": [],
    "colour.green": [],
    "colour.blue": 1,
    "size": []
} 
```


Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-08-04 09:46:06 +00:00
bors[bot]
50f6524ff2 Merge #579
579: Stop reindexing already indexed documents r=ManyTheFish a=irevoire

```
 % ./compare.sh indexing_stop-reindexing-unchanged-documents_cb5a1669.json indexing_main_eeba1960.json
group                                                                     indexing_main_eeba1960                 indexing_stop-reindexing-unchanged-documents_cb5a1669
-----                                                                     ----------------------                 -----------------------------------------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.03      2.0±0.22ms        ? ?/sec    1.00  1955.4±336.24µs        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.08     11.0±2.93ms        ? ?/sec    1.00     10.2±4.04ms        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.00     15.1±3.89ms        ? ?/sec    1.14     17.1±5.18ms        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.26    59.2±12.01ms        ? ?/sec    1.00     47.1±8.52ms        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.08   316.6±31.53ms        ? ?/sec    1.00   293.6±17.00ms        ? ?/sec
indexing/Indexing geo_point                                               1.01      60.9±0.31s        ? ?/sec    1.00      60.6±0.36s        ? ?/sec
indexing/Indexing movies in three batches                                 1.04      20.0±0.30s        ? ?/sec    1.00      19.2±0.25s        ? ?/sec
indexing/Indexing movies with default settings                            1.02      19.1±0.18s        ? ?/sec    1.00      18.7±0.24s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.02      26.2±0.29s        ? ?/sec    1.00      25.9±0.22s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.02      25.3±0.32s        ? ?/sec    1.00      24.7±0.26s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.00      66.7±0.41s        ? ?/sec    1.01      67.1±0.86s        ? ?/sec
indexing/Indexing songs with default settings                             1.00      58.3±0.90s        ? ?/sec    1.01      58.8±1.32s        ? ?/sec
indexing/Indexing songs without any facets                                1.00      54.5±1.43s        ? ?/sec    1.01      55.2±1.29s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.00      57.9±1.20s        ? ?/sec    1.01      58.4±0.93s        ? ?/sec
indexing/Indexing wiki                                                    1.00   1052.0±10.95s        ? ?/sec    1.02   1069.4±20.38s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.00    1193.1±8.83s        ? ?/sec    1.00    1189.5±9.40s        ? ?/sec
indexing/Reindexing geo_point                                             3.22      67.5±0.73s        ? ?/sec    1.00      21.0±0.16s        ? ?/sec
indexing/Reindexing movies with default settings                          3.75      19.4±0.28s        ? ?/sec    1.00       5.2±0.05s        ? ?/sec
indexing/Reindexing songs with default settings                           8.90      61.4±0.91s        ? ?/sec    1.00       6.9±0.07s        ? ?/sec
indexing/Reindexing wiki                                                  1.00   1748.2±35.68s        ? ?/sec    1.00   1750.5±18.53s        ? ?/sec
```

tldr: We do not lose any performance on the normal indexing benchmark, but we get between 3 and 8 times faster on the reindexing benchmarks 👍 

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-08-04 08:10:37 +00:00
bors[bot]
e8987cf5aa Merge #599
599: fix: Remove whitespace trimming during document id validation r=ManyTheFish a=ManyTheFish

fix #592


related to https://github.com/meilisearch/meilisearch/issues/2640


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-08-03 14:55:25 +00:00
ManyTheFish
d6f9a60a32 fix: Remove whitespace trimming during document id validation
fix #592
2022-08-03 11:38:40 +02:00
Tamo
7fc35c5586 remove the useless prints 2022-08-02 10:31:22 +02:00
Tamo
f156d7dd3b Stop reindexing already indexed documents 2022-08-02 10:31:20 +02:00
Loïc Lecrenier
1fe224f2c6 Update filter-parser/fuzz/.gitignore
Co-authored-by: Many the fish <many@meilisearch.com>
2022-07-21 16:12:01 +02:00
Loïc Lecrenier
07003704a8 Merge branch 'filter/field-exist' 2022-07-21 14:51:41 +02:00
bors[bot]
e1bc610d27 Merge #595
595: Update version for next release (v0.32.0) r=ManyTheFish a=curquiza

In order to release on `main` (for v0.29.0, not v0.28.1)

<img width="1014" alt="Capture d’écran 2022-07-21 à 13 20 35" src="https://user-images.githubusercontent.com/20380692/180178725-381fbdf1-c0fb-4fa9-9954-452aec5a1574.png">


Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-07-21 11:07:42 +00:00
Clémentine Urquizar
d5e9b7305b Update version for next release (v0.32.0) 2022-07-21 13:20:02 +04:00
ManyTheFish
cbb3b25459 Fix(Search): Fix phrase search candidates computation
This bug is an old bug but was hidden by the proximity criterion,
Phrase search were always returning an empty candidates list.

Before the fix, we were trying to find any words[n] near words[n]
instead of finding  any words[n] near words[n+1], for example:

for a phrase search '"Hello world"' we were searching for "hello" near "hello" first, instead of "hello" near "world".
2022-07-21 10:04:30 +02:00
bors[bot]
941af58239 Merge #561
561: Enriched documents batch reader r=curquiza a=Kerollmops

~This PR is based on #555 and must be rebased on main after it has been merged to ease the review.~
This PR contains the work in #555 and can be merged on main as soon as reviewed and approved.

- [x] Create an `EnrichedDocumentsBatchReader` that contains the external documents id.
- [x] Extract the primary key name and make it accessible in the `EnrichedDocumentsBatchReader`.
- [x] Use the external id from the `EnrichedDocumentsBatchReader` in the `Transform::read_documents`.
- [x] Remove the `update_primary_key` from the _transform.rs_ file.
- [x] Really generate the auto-generated documents ids.
- [x] Insert the (auto-generated) document ids in the document while processing it in `Transform::read_documents`.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-07-21 07:08:50 +00:00
Loïc Lecrenier
41a0ce07cb Add a code comment, as suggested in PR review
Co-authored-by: Many the fish <many@meilisearch.com>
2022-07-20 16:20:35 +02:00
Loïc Lecrenier
1506683705 Avoid using too much memory when indexing facet-exists-docids 2022-07-19 14:42:35 +02:00
Loïc Lecrenier
d0eee5ff7a Fix compiler error 2022-07-19 13:54:30 +02:00
Loïc Lecrenier
aed8c69bcb Refactor indexation of the "facet-id-exists-docids" database
The idea is to directly create a sorted and merged list of bitmaps
in the form of a BTreeMap<FieldId, RoaringBitmap> instead of creating
a grenad::Reader where the keys are field_id and the values are docids.

Then we send that BTreeMap to the thing that handles TypedChunks, which
inserts its content into the database.
2022-07-19 10:07:33 +02:00
Loïc Lecrenier
1eb1e73bb3 Add integration tests for the EXISTS filter 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
4f0bd317df Remove custom implementation of BytesEncode/Decode for the FieldId 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
80b962b4f4 Run cargo fmt 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
ea0642c32d Make filter parser more strict regarding spacing around operators
OR, AND, NOT, TO must now be followed by spaces
2022-07-19 10:07:33 +02:00
Loïc Lecrenier
c17d616250 Refactor index_documents_check_exists_database tests 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
30bd4db0fc Simplify indexing task for facet_exists_docids database 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
392472f4bb Apply suggestions from code review
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-07-19 10:07:33 +02:00
Loïc Lecrenier
bd15f5625a Fix compiler warning 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
722db7b088 Ignore target directory of filter-parser/fuzz crate 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
a5c9162250 Improve parser for NOT EXISTS filter
Allow multiple spaces between NOT and EXISTS
2022-07-19 10:07:33 +02:00
Loïc Lecrenier
0388b2d463 Run cargo fmt 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
dc64170a69 Improve syntax of EXISTS filter, allow “value NOT EXISTS” 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
72452f0cb2 Implements the EXIST filter operator 2022-07-19 10:07:33 +02:00
Loïc Lecrenier
a8641b42a7 Modify flatten_serde_json to keep dummy value for all object keys
Example:
```json
{
    "id": 0,
    "colour" : { "green": 1 }
}
```
becomes:
```json
{
    "id": 0,
    "colour" : [],
    "colour.green": 1
}
```
to retain the information the key "colour" exists in the original
json value.
2022-07-19 10:07:33 +02:00
Loïc Lecrenier
453d593ce8 Add a database containing the docids where each field exists 2022-07-19 10:07:33 +02:00
bors[bot]
5704235521 Merge #584
584: Chores: Enhance smart-crop code comments r=curquiza a=ManyTheFish

Enhance explanation around smart crop algorithms

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2022-07-19 07:08:14 +00:00
bors[bot]
f6415b679f Merge #588
588: Fix name of "release_date" facet in movies benchmarks r=ManyTheFish a=loiclec

## What does this PR do?
The `movies.json` file in the benchmark datasets contains a filterable field called "release_date", but the indexing benchmarks wrongly called the field "released_date" instead. This PR fixes that.


Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-07-18 15:51:09 +00:00
Many the fish
2d79720f5d Update milli/src/search/matches/mod.rs 2022-07-18 17:48:04 +02:00
Many the fish
8ddb4e750b Update milli/src/search/matches/mod.rs 2022-07-18 17:47:39 +02:00
Many the fish
a277daa1f2 Update milli/src/search/matches/mod.rs 2022-07-18 17:47:13 +02:00
Many the fish
fb794c6b5e Update milli/src/search/matches/mod.rs 2022-07-18 17:46:00 +02:00
Many the fish
1237cfc249 Update milli/src/search/matches/mod.rs 2022-07-18 17:45:37 +02:00
Many the fish
d7fd5c58cd Update milli/src/search/matches/mod.rs 2022-07-18 17:45:06 +02:00
Loïc Lecrenier
fc9f3f31e7 Change DocumentsBatchReader to access cursor and index at same time
Otherwise it is not possible to iterate over all documents while
using the fields index at the same time.
2022-07-18 16:08:14 +02:00
Loïc Lecrenier
ab1571cdec Simplify Transform::read_documents, enabled by enriched documents reader 2022-07-18 12:45:47 +02:00
Loïc Lecrenier
8270e2b768 Fix name of "release_date" facet in movies benchmarks 2022-07-18 10:34:12 +02:00
Many the fish
e261ef64d7 Update milli/src/search/matches/mod.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-07-18 10:18:51 +02:00
Many the fish
1da4ab5918 Update milli/src/search/matches/mod.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-07-18 10:18:03 +02:00
Kerollmops
448114cc1c Fix the benchmarks with the new indexation API 2022-07-12 15:22:09 +02:00
Kerollmops
25e768f31c Fix another issue with the nested primary key selector 2022-07-12 15:14:07 +02:00
Kerollmops
192793ee38 Add some tests to check for the nested documents ids 2022-07-12 15:14:07 +02:00
Kerollmops
a892a4a79c Introduce a function to extend from a JSON array of objects 2022-07-12 15:14:06 +02:00
Kerollmops
dc61105554 Fix the nested document id fetching function 2022-07-12 15:14:06 +02:00
Kerollmops
2eec290424 Check the validity of the latitute and longitude numbers 2022-07-12 15:14:06 +02:00
Kerollmops
5d149d631f Remove tests for a function that no more exists 2022-07-12 15:14:06 +02:00
Kerollmops
0bbcc7b180 Expose the DocumentId struct to be sure to inject the generated ids 2022-07-12 15:14:06 +02:00
Kerollmops
d1a4da9812 Generate a real UUIDv4 when ids are auto-generated 2022-07-12 15:14:06 +02:00
Kerollmops
c8ebf0de47 Rename the validate function as an enriching function 2022-07-12 15:14:06 +02:00
Kerollmops
905af2a2e9 Use the primary key and external id in the transform 2022-07-12 15:14:05 +02:00
Kerollmops
742543091e Constify the default primary key name 2022-07-12 14:55:52 +02:00
Kerollmops
5f1bfb73ee Extract the primary key name and make it accessible 2022-07-12 14:55:52 +02:00
Kerollmops
6a0a0ae94f Make the Transform read from an EnrichedDocumentsBatchReader 2022-07-12 14:55:52 +02:00
Kerollmops
ea852200bb Fix the format used for a geo deleting benchmark 2022-07-12 14:55:52 +02:00
Kerollmops
dc3f092d07 Do not leak an internal grenad Error 2022-07-12 14:55:52 +02:00
Kerollmops
8ebf5eed0d Make the nested primary key work 2022-07-12 14:55:52 +02:00
Kerollmops
19eb3b4708 Make sur that we do not accept floats as documents ids 2022-07-12 14:55:52 +02:00
Kerollmops
2ceeb51c37 Support the auto-generated ids when validating documents 2022-07-12 14:55:51 +02:00
Kerollmops
399eec5c01 Fix the indexation tests 2022-07-12 14:55:51 +02:00
Kerollmops
fcfc4caf8c Move the Object type in the lib.rs file and use it everywhere 2022-07-12 14:55:51 +02:00
Kerollmops
0146175fe6 Introduce the validate_documents_batch function 2022-07-12 14:55:51 +02:00
Kerollmops
cefffde9af Improve the .gitignore of the fuzz crate 2022-07-12 14:55:51 +02:00
Kerollmops
bdc4263883 Introduce the validate_documents_batch function 2022-07-12 14:55:51 +02:00
Kerollmops
a97d4d63b9 Fix the benchmarks 2022-07-12 14:55:50 +02:00
Kerollmops
f29114f94a Fix http-ui to fit with the new DocumentsBatchBuilder/Reader structs 2022-07-12 14:52:56 +02:00
Kerollmops
a4ceef9624 Fix the cli for the new DocumentsBatchBuilder/Reader structs 2022-07-12 14:52:56 +02:00
Kerollmops
6d0498df24 Fix the fuzz tests 2022-07-12 14:52:56 +02:00
Kerollmops
e8297ad27e Fix the tests for the new DocumentsBatchBuilder/Reader 2022-07-12 14:52:56 +02:00
Kerollmops
419ce3966c Rework the DocumentsBatchBuilder/Reader to use grenad 2022-07-12 14:52:55 +02:00
Kerollmops
eb63af1f10 Update grenad to 0.4.2 2022-07-12 14:52:55 +02:00
Kerollmops
048e174efb Do not allocate when parsing CSV headers 2022-07-12 14:52:55 +02:00
ManyTheFish
5d79617a56 Chores: Enhance smart-crop code comments 2022-07-07 16:28:09 +02:00
bors[bot]
ce90fc628a Merge #583
583: Use BufReader to read datasets in benchmarks r=ManyTheFish a=loiclec

## What does this PR do?
Ensure that the datasets used by the benchmarks are read efficiently by using a `BufReader`.

## Why?
Using a `BufReader` is more representative of how `meilisearch` works. It will also make performance comparisons between different branches of `milli` more  accurate.




Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-07-07 08:13:07 +00:00
Loïc Lecrenier
aae03356cb Use BufReader to read datasets in benchmarks 2022-07-06 18:20:15 +02:00
bors[bot]
ebddfdb9a3 Merge #578
578: Bump uuid to 1.1.2 r=ManyTheFish a=Kerollmops

Just to [align the version with Meilisearch](https://github.com/meilisearch/meilisearch/pull/2584).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-07-05 14:56:08 +00:00
bors[bot]
eeba196053 Merge #572
572: Add reindexing benchmarks r=Kerollmops a=irevoire

With #557 coming, we should add benchmarks that measure our impact on the reindexing process.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-07-05 14:43:01 +00:00
Kerollmops
1bfdcfc84f Bump uuid to 1.1.2 2022-07-05 16:23:36 +02:00
bors[bot]
dd1e606f13 Merge #557
557: Fasten documents deletion and update r=Kerollmops a=irevoire

When a document deletion occurs, instead of deleting the document we mark it as deleted in the new “soft deleted” bitmap. It is then removed from the search and all the other endpoints.

I ran the benchmarks against main;
```
% ./compare.sh indexing_main_83ad1aaf.json indexing_fasten-document-deletion_abab51fb.json
group                                                                     indexing_fasten-document-deletion_abab51fb    indexing_main_83ad1aaf
-----                                                                     ------------------------------------------    ----------------------
indexing/-geo-delete-facetedNumber-facetedGeo-searchable-                 1.05      2.0±0.40ms        ? ?/sec           1.00  1904.9±190.00µs        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-           1.00     10.3±2.64ms        ? ?/sec           961.61      9.9±0.12s        ? ?/sec
indexing/-movies-delete-facetedString-facetedNumber-searchable-nested-    1.00     15.1±3.90ms        ? ?/sec           554.63      8.4±0.12s        ? ?/sec
indexing/-songs-delete-facetedString-facetedNumber-searchable-            1.00     45.1±7.53ms        ? ?/sec           710.15     32.0±0.10s        ? ?/sec
indexing/-wiki-delete-searchable-                                         1.00    277.8±7.97ms        ? ?/sec           1946.57    540.8±3.15s        ? ?/sec
indexing/Indexing geo_point                                               1.00      12.0±0.20s        ? ?/sec           1.03      12.4±0.19s        ? ?/sec
indexing/Indexing movies in three batches                                 1.00      19.3±0.30s        ? ?/sec           1.01      19.4±0.16s        ? ?/sec
indexing/Indexing movies with default settings                            1.00      18.8±0.09s        ? ?/sec           1.00      18.9±0.10s        ? ?/sec
indexing/Indexing nested movies with default settings                     1.00      25.9±0.19s        ? ?/sec           1.00      25.9±0.12s        ? ?/sec
indexing/Indexing nested movies without any facets                        1.00      24.8±0.17s        ? ?/sec           1.00      24.8±0.18s        ? ?/sec
indexing/Indexing songs in three batches with default settings            1.00      65.9±0.96s        ? ?/sec           1.03      67.8±0.82s        ? ?/sec
indexing/Indexing songs with default settings                             1.00      58.8±1.11s        ? ?/sec           1.02      59.9±2.09s        ? ?/sec
indexing/Indexing songs without any facets                                1.00      53.4±0.72s        ? ?/sec           1.01      54.2±0.88s        ? ?/sec
indexing/Indexing songs without faceted numbers                           1.00      57.9±1.17s        ? ?/sec           1.01      58.3±1.20s        ? ?/sec
indexing/Indexing wiki                                                    1.00   1065.2±13.26s        ? ?/sec           1.00   1065.8±12.66s        ? ?/sec
indexing/Indexing wiki in three batches                                   1.00    1182.4±6.20s        ? ?/sec           1.01    1190.8±8.48s        ? ?/sec
```

Most things do not change, we lost 0.1ms on the indexing of geo point (I don’t get why), and then we are between 500 and 1900 times faster when we delete documents.


Co-authored-by: Tamo <tamo@meilisearch.com>
2022-07-05 14:14:38 +00:00
Tamo
250be9fe6c put the threshold back to 10k 2022-07-05 15:57:44 +02:00
bors[bot]
62692c171d Merge #577
577: Fix deserialisation of NDJson documents in benchmarks r=irevoire a=loiclec

Previously, the first document in the NDJson file was read over and over again. So the `geo_point` benchmark was not working properly: it only indexed one document.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2022-07-05 13:54:47 +00:00
Loïc Lecrenier
9bc7627e27 Fix deserialisation of NDJson documents in benchmarks 2022-07-05 15:51:06 +02:00
Tamo
b61efd09fc Makes the internal soft deleted error a UserError 2022-07-05 15:34:45 +02:00
Tamo
eaf28b0628 Apply review suggestions
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-07-05 15:30:33 +02:00
Tamo
3b309f654a Fasten the document deletion
When a document deletion occurs, instead of deleting the document we mark it as deleted
in the new “soft deleted” bitmap. It is then removed from the search, and all the other
endpoints.
2022-07-05 15:30:33 +02:00
Tamo
2700d8dc67 Add reindexing benchmarks 2022-07-05 14:46:46 +02:00
bors[bot]
77c837fc1b Merge #575
575: Bump charabia r=loiclec a=irevoire

This fix #573

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-07-05 11:53:57 +00:00
Tamo
446439e8be bump charabia 2022-07-05 12:19:30 +02:00
bors[bot]
c6f4775fde Merge #568
568: Fix not equal filter when field contains both number and strings r=Kerollmops a=GraDKh

Related to https://github.com/meilisearch/meilisearch/issues/2516
Looks like the issue should be moved to this repo, but I'm not sure what the right procedure for it.

Co-authored-by: Dmytro Gordon <dmytro@bigstream.co>
2022-06-28 08:46:23 +00:00
Dmytro Gordon
3ff03a3f5f Fix not equal filter when field contains both number and strings 2022-06-27 15:55:17 +03:00
bors[bot]
83ad1aaf05 Merge #567
567: Bump the milli version to 0.31.1 r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 15:07:03 +00:00
Kerollmops
cc48992e79 Bump the milli version to 0.31.1 2022-06-22 17:05:51 +02:00
bors[bot]
68bb170732 Merge #566
566: Introduce the copy_to_path method on the Index r=irevoire a=Kerollmops

Meilisearch needs this method to do snapshots.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 14:52:19 +00:00
Kerollmops
238692a8e7 Introduce the copy_to_path method on the Index 2022-06-22 16:49:47 +02:00
bors[bot]
290a40b7a5 Merge #564
564: Rename the limitedTo parameter into maxTotalHits r=curquiza a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2542, it renames the `limitedTo` parameter into `maxTotalHits`.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 13:48:33 +00:00
bors[bot]
d546f6f40e Merge #563
563: Improve the `estimatedNbHits` when a `distinctAttribute` is specified r=irevoire a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2532 but it doesn't fix it entirely. It improves it by computing the excluded documents (the ones with an already-seen distinct value) before stopping the loop, I think it was a mistake and should always have been this way.

The reason it doesn't fix the issue is that Meilisearch is lazy, just to be sure not to compute too many things and answer by taking too much time. When we deduplicate the documents by their distinct value we must do it along the water, everytime we see a new document we check that its distinct value of it doesn't collide with an already returned document. 

The reason we can see the correct result when enough documents are fetched is that we were lucky to see all of the different distinct values possible in the dataset and all of the deduplication was done, no document can be returned.

If we wanted to implement that to have a correct `extimatedNbHits` every time we should have done a pass on the whole set of possible distinct values for the distinct attribute and do a big intersection, this could cost a lot of CPU cycles.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 12:39:44 +00:00
bors[bot]
38a8d3cae1 Merge #565
565: Bump the milli version to 0.31.0 r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-22 10:09:41 +00:00
Kerollmops
f5c3b951bc Bump the milli version to 0.31.0 2022-06-22 12:08:16 +02:00
Kerollmops
d7c248042b Rename the limitedTo parameter into maxTotalHits 2022-06-22 12:00:48 +02:00
Kerollmops
d2f84a9d9e Improve the estimatedNbHits when distinct is enabled 2022-06-22 11:39:21 +02:00
bors[bot]
4f547eff02 Merge #560
560: Update version for next release (v0.30.0) r=curquiza a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-06-20 12:37:01 +00:00
bors[bot]
64b833410c Merge #559
559: Avoid having an ending separator before crop marker r=irevoire a=ManyTheFish

related to https://github.com/meilisearch/meilisearch/issues/2528


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-20 11:06:52 +00:00
Clémentine Urquizar
31f749b5d8 Update version for next release (v0.30.0) 2022-06-20 12:09:57 +02:00
ManyTheFish
a0ab90a4d7 Avoid having an ending separator before crop marker 2022-06-16 18:23:57 +02:00
bors[bot]
a59ae19842 Merge #558
558: Deletion benchmarks r=ManyTheFish a=ManyTheFish

Add benchmarks on the deletion and start rethinking benchmark names.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-16 09:34:37 +00:00
ManyTheFish
2652310f2a Change delete benchmark names 2022-06-16 10:32:58 +02:00
ManyTheFish
adbb0ff318 Add deletion benchmarks 2022-06-16 10:17:58 +02:00
bors[bot]
0a5d1a445e Merge #554
554: Enhance tests for soft deletetion r=irevoire a=ManyTheFish

#### tests: (skip in changelog)
- [x] placeholder search shouldn’t return soft deleted
- [x] search shouldn’t return soft deleted
- [x] filtered placeholder search shouldn’t return soft deleted
- [x] geo-filtered placeholder search shouldn’t return soft deleted
- [x] documents list/get shouldn’t return soft deleted
- [x] stats shouldn’t count soft deleted

#### other: (API breaking)
- [x] ensure that Index methods are not bypassed by Meilisearch


Poke `@irevoire,` we may merge this into your branch.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-14 09:49:37 +00:00
ManyTheFish
447195a27a Replace format by to_string 2022-06-14 10:32:44 +02:00
ManyTheFish
177154828c Extends deletion tests 2022-06-13 17:34:16 +02:00
ManyTheFish
0d1d354052 Ensure that Index methods are not bypassed by Meilisearch 2022-06-13 17:34:11 +02:00
bors[bot]
f1d848bb9a Merge #552
552: Fix escaped quotes in filter r=Kerollmops a=irevoire

Will fix https://github.com/meilisearch/meilisearch/issues/2380

The issue was that in the evaluation of the filter, I was using the deref implementation instead of calling the `value` method of my token.

To avoid the problem happening again, I removed the deref implementation; now, you need to either call the `lexeme` or the `value` methods but can't rely on a « default » implementation to get a string out of a token.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-06-09 14:56:44 +00:00
Tamo
676187ba43 bump milli version 2022-06-09 16:53:32 +02:00
Tamo
90afde435b fix escaped quotes in filter 2022-06-09 16:03:49 +02:00
bors[bot]
19d44142a1 Merge #550
550: Add the two new pagination and faceting settings r=ManyTheFish a=Kerollmops

This PR adds two new settings in the database, those settings are described [in this spec](https://github.com/meilisearch/specifications/pull/157).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-09 08:16:01 +00:00
Kerollmops
445d5474cc Add the pagination_limited_to setting to the database 2022-06-08 18:14:27 +02:00
Kerollmops
69931e50d2 Add the max_values_by_facet setting to the database 2022-06-08 17:54:56 +02:00
Kerollmops
52a494bd3b Add the new pagination.limited_to and faceting.max_values_per_facet settings 2022-06-08 17:15:36 +02:00
bors[bot]
9580b9de79 Merge #549
549: Bump the version to 0.29.2 r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-08 14:29:47 +00:00
bors[bot]
a762d7f462 Merge #548
548: Setup the new limits on the number of facet values to return r=ManyTheFish a=Kerollmops

This PR implements the early draft of the new spec (waiting for it) specifying how the new facet limit feature should work and which limit we apply to the number of facet values to return by facet.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-08 14:00:33 +00:00
Kerollmops
56ee9cc21f Bump the version to 0.29.2 2022-06-08 16:00:06 +02:00
Kerollmops
2a505503b3 Change the number of facet values returned by default to 100 2022-06-08 15:58:57 +02:00
Kerollmops
bae4007447 Remove the hard limit on the number of facet values returned 2022-06-08 15:58:57 +02:00
bors[bot]
7313d6c533 Merge #547
547: Update version for next release (v0.29.1) r=Kerollmops a=curquiza

A new milli version will be released once this PR is merged https://github.com/meilisearch/milli/pull/543

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-06-08 10:20:24 +00:00
bors[bot]
306d2f37ff Merge #543
543: Fix wrong internal ids assignments r=irevoire a=irevoire

Fix https://github.com/meilisearch/meilisearch/issues/2470

Co-authored-by: ad hoc <postma.marin@protonmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-06-08 09:19:58 +00:00
Clémentine Urquizar
478dbfa45a Update version for next release (v0.29.1) 2022-06-07 18:59:33 +02:00
Tamo
d0aaa7ff00 Fix wrong internal ids assignments 2022-06-07 15:49:33 +02:00
ad hoc
31776fdc3f add failing test 2022-06-07 15:49:33 +02:00
bors[bot]
05ae6dbfa4 Merge #541
541: Update version for next release (v0.29.0) r=ManyTheFish a=curquiza

Need to update the version since #540 was merged and breaking

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-06-02 16:53:28 +00:00
bors[bot]
78f76c841d Merge #542
542: Refactor matching word r=Kerollmops a=ManyTheFish

Simplify MatchingWords API


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-02 16:23:41 +00:00
ManyTheFish
d212dc6b8b Remove useless newline 2022-06-02 18:22:56 +02:00
ManyTheFish
a5c790bf4b Update http-ui 2022-06-02 18:15:36 +02:00
Clémentine Urquizar
6ce1c6487a Update version for next release (v0.29.0) 2022-06-02 18:07:55 +02:00
ManyTheFish
727d663f28 Update benchmarks 2022-06-02 18:07:10 +02:00
ManyTheFish
7aabe42ae0 Refactor matching words 2022-06-02 17:59:04 +02:00
bors[bot]
dd186533f0 Merge #540
540: Integrate charabia r=Kerollmops a=ManyTheFish

related to https://github.com/meilisearch/meilisearch/issues/2375
related to https://github.com/meilisearch/meilisearch/issues/2144
related to https://github.com/meilisearch/meilisearch/issues/2417

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-06-02 15:34:33 +00:00
ManyTheFish
4dd7b20c32 Update benchmarks 2022-06-02 17:33:25 +02:00
ManyTheFish
4dd3675d2b Update http-ui 2022-06-02 16:59:11 +02:00
ManyTheFish
86ac8568e6 Use Charabia in milli 2022-06-02 16:59:11 +02:00
ManyTheFish
192e024ada Add Charabia in Cargo.toml 2022-06-02 16:59:07 +02:00
bors[bot]
ac6df0df57 Merge #539
539: Update version to v0.28.1 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-06-01 16:40:12 +00:00
Clémentine Urquizar
c19c17eddb Update version to v0.28.1 2022-06-01 18:31:02 +02:00
bors[bot]
74d1914a64 Merge #535
535: Reintroduce the max values by facet limit r=ManyTheFish a=Kerollmops

This PR reintroduces the max values by facet limit this is related to https://github.com/meilisearch/meilisearch/issues/2349.

~I would like some help in deciding on whether I keep the default 100 max values in milli and set up the `FacetDistribution` settings in Meilisearch to use 1000 as the new value, I expose the `max_values_by_facet` for this purpose.~

I changed the default value to 1000 and the max to 10000, thank you `@ManyTheFish` for the help!

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-06-01 14:30:50 +00:00
bors[bot]
582930dbbb Merge #538
538: speedup exact words r=Kerollmops a=MarinPostma

This PR make `exact_words` return an `Option` instead of an empty set, since set creation is costly, as noticed by `@kerollmops.`

I was not convinces that this was the cause for all of the performance drop we measured, and then realized that methods that initialized it were called recursively which caused initialization times to add up. While the first fix solves the issue when not using exact words, using exact word remained way more expensive that it should be. To address this issue, the exact words are cached into the `Context`, so they are only initialized once.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-05-30 08:20:34 +00:00
bors[bot]
9f78e392b1 Merge #536
536: Improves ranking rules error message r=Kerollmops a=matthias-wright

This PR improves the ranking rules error message to properly reflect the case sensitivity.
The issue was highlighted in [meilisearch/issues/2407](https://github.com/meilisearch/meilisearch/issues/2407).
Cheers!

Co-authored-by: Matthias Wright <matthias.s.wright@gmail.com>
2022-05-24 16:43:52 +00:00
ad hoc
25fc576696 review changes 2022-05-24 14:15:33 +02:00
ad hoc
69dc4de80f change &Option<Set> to Option<&Set> 2022-05-24 12:14:55 +02:00
ad hoc
ac975cc747 cache context's exact words 2022-05-24 09:43:17 +02:00
ad hoc
8993fec8a3 return optional exact words 2022-05-24 09:15:49 +02:00
Matthias Wright
754f48a4fb Improves ranking rules error message 2022-05-20 21:25:43 +02:00
Kerollmops
cd7c6e19ed Reintroduce the max values by facet limit 2022-05-18 15:57:57 +02:00
bors[bot]
19dac01c5c Merge #534
534: Bump milli version to v0.28.0 r=curquiza a=ManyTheFish



Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-05-18 09:04:46 +00:00
ManyTheFish
895f5d8a26 Bump milli version 2022-05-18 10:37:12 +02:00
bors[bot]
3389561f34 Merge #532
532: Add some implementation on MatchBounds r=Kerollmops a=ManyTheFish

Theses Implementations are needed in meilisearch

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-05-17 14:50:22 +00:00
ManyTheFish
137434a1c8 Add some implementation on MatchBounds 2022-05-17 15:57:09 +02:00
bors[bot]
08c6d50cd1 Merge #531
531: fix the mixed dataset geosearch indexing bug r=Kerollmops a=irevoire

port #529 to main

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-16 16:06:36 +00:00
bors[bot]
cf3e574cb4 Merge #530
530: fix the searchable fields bug when a field is nested r=Kerollmops a=irevoire

port #528 to main

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-16 15:52:30 +00:00
Tamo
0af399a6d7 fix the mixed dataset geosearch indexing bug 2022-05-16 17:37:45 +02:00
Tamo
f586028f9a fix the searchable fields bug when a field is nested
Update milli/src/index.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-05-16 17:24:36 +02:00
bors[bot]
e1e85267fd Merge #526
526: remove useless comment r=irevoire a=MarinPostma



Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-05-16 10:01:43 +00:00
bors[bot]
51809eb260 Merge #525
525: Simplify the error creation with thiserror r=irevoire a=irevoire

I introduced [`thiserror`](https://docs.rs/thiserror/latest/thiserror/) to implements all the `Display` trait and most of the `impl From<xxx> for yyy` in way less lines.
And then I introduced a cute macro to implements the `impl<X, Y, Z> From<X> for Z where Y: From<X>, Z: From<X>` more easily.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-04 15:47:32 +00:00
Tamo
484a9ddb27 Simplify the error creation with thiserror and a smol friendly macro 2022-05-04 17:24:00 +02:00
bors[bot]
65e6aa0de2 Merge #523
523: Improve geosearch error messages r=irevoire a=irevoire

Improve the geosearch error messages (#488).
And try to parse the string as specified in https://github.com/meilisearch/meilisearch/issues/2354

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-04 13:36:11 +00:00
bors[bot]
f3b9f7b867 Merge #527
527: Remove the wip section part of the contributing file r=curquiza a=Kerollmops

Everything was good in the _Development Workflow_ section so I removed the _WIP Section_ part, now this PR fixes https://github.com/meilisearch/milli/issues/513.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-05-04 13:11:30 +00:00
Kerollmops
48cdfddebf Remove the wip section part of the contributing file 2022-05-04 14:44:51 +02:00
Tamo
c55368ddd4 apply code suggestion
Co-authored-by: Kerollmops <kero@meilisearch.com>
2022-05-04 14:11:03 +02:00
bors[bot]
60ccb3fa4c Merge #524
524: Add benchmark on nested fields r=irevoire a=irevoire

fixes #500

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-05-04 11:56:18 +00:00
ad hoc
5ad5d56f7e remove useless comment 2022-05-04 10:43:54 +02:00
bors[bot]
0c2c8af44e Merge #520
520: fix mistake in Settings initialization r=irevoire a=MarinPostma

fix settings not being correctly initialized and add a test to make sure that they are in the future.

fix https://github.com/meilisearch/meilisearch/issues/2358


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-05-03 15:32:18 +00:00
bors[bot]
2fe9a02b1c Merge #522
522: Do not generate keys that are too long for LMDB r=Kerollmops a=Kerollmops

This PR fixes https://github.com/meilisearch/meilisearch/issues/2338 by making sure that we do not generate keys that are too long for LMDB especially when we are creating our prefix and proximity pairs keys.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-05-03 11:54:10 +00:00
Kerollmops
211c8763b9 Make sure that we do not generate too long keys 2022-05-03 10:03:15 +02:00
Kerollmops
7e47031bdc Add a test for long keys in LMDB 2022-05-03 10:03:13 +02:00
Tamo
f820c9804d add one nested benchmark 2022-05-02 19:35:57 +02:00
Tamo
3cb1f6d0a1 improve geosearch error messages 2022-05-02 19:20:47 +02:00
ad hoc
1ee3d6ae33 fix mistake in Settings initialization 2022-04-29 16:24:25 +02:00
bors[bot]
312515dd6b Merge #507
507: deny warnings in CI r=Kerollmops a=MarinPostma

Add `RUSTFLAGS= -D warnings` to the CI so all warnings are treated as hard errors.

Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-28 15:16:35 +00:00
ad hoc
3eb3f0269e deny warnings in CI 2022-04-28 15:35:12 +02:00
bors[bot]
9db86aac51 Merge #518
518: Return facets even when there is no value associated to it r=Kerollmops a=Kerollmops

This PR is related to https://github.com/meilisearch/meilisearch/issues/2352 and should fix the issue when Meilisearch is up-to-date with this PR.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-04-28 09:04:36 +00:00
bors[bot]
2aae19dc52 Merge #517
517: Make nightly CI run every week r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-26 16:22:25 +00:00
Kerollmops
a4d343aade Add a test to check for the returned facet distribution 2022-04-26 18:12:58 +02:00
bors[bot]
c2bd94c871 Merge #511
511: Update version in every workspace r=curquiza a=curquiza

Checked with `@Kerollmops` 

- Update the version into every workspace (the current version is v0.27.0, but I forgot to update it for the previous release)
- add `publish = false` except in `milli` workspace.


Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-26 16:06:47 +00:00
Kerollmops
7d1c2d97bf Return facets even when there is no values associated to it 2022-04-26 17:59:53 +02:00
bors[bot]
d388ea0f9d Merge #506
506: fix cargo warnings r=Kerollmops a=MarinPostma

fix cargo warnings


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-26 15:45:20 +00:00
Clémentine Urquizar
ec89030483 Update bors toml 2022-04-26 17:36:04 +02:00
ad hoc
5c29258e8e fix cargo warnings 2022-04-26 17:33:11 +02:00
bors[bot]
2fdf520271 Merge #514
514: Stop flattening every field r=Kerollmops a=irevoire

When we need to flatten a document:
* The primary key contains a `.`.
* Some fields need to be flattened

Instead of flattening the whole object and thus creating a lot of allocations with the `serde_json_flatten_crate`, we instead generate a minimal sub-object containing only the fields that need to be flattened.
That should create fewer allocations and thus index faster.

---------

```
group                                                             indexing_main_e1e362fa                 indexing_stop-flattening-every-field_40d1bd6b
-----                                                             ----------------------                 ---------------------------------------------
indexing/Indexing geo_point                                       1.99      23.7±0.23s        ? ?/sec    1.00      11.9±0.21s        ? ?/sec
indexing/Indexing movies in three batches                         1.00      18.2±0.24s        ? ?/sec    1.01      18.3±0.29s        ? ?/sec
indexing/Indexing movies with default settings                    1.00      17.5±0.09s        ? ?/sec    1.01      17.7±0.26s        ? ?/sec
indexing/Indexing songs in three batches with default settings    1.00      64.8±0.47s        ? ?/sec    1.00      65.1±0.49s        ? ?/sec
indexing/Indexing songs with default settings                     1.00      54.9±0.99s        ? ?/sec    1.01      55.7±1.34s        ? ?/sec
indexing/Indexing songs without any facets                        1.00      50.6±0.62s        ? ?/sec    1.01      50.9±1.05s        ? ?/sec
indexing/Indexing songs without faceted numbers                   1.00      54.0±1.14s        ? ?/sec    1.01      54.7±1.13s        ? ?/sec
indexing/Indexing wiki                                            1.00     996.2±8.54s        ? ?/sec    1.02   1021.1±30.63s        ? ?/sec
indexing/Indexing wiki in three batches                           1.00    1136.8±9.72s        ? ?/sec    1.00    1138.6±6.59s        ? ?/sec
```

So basically everything slowed down a liiiiiittle bit except the dataset with a nested field which got twice faster

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-26 11:50:33 +00:00
Tamo
f19d2dc548 Only flatten the required fields
apply review comments

Co-authored-by: Kerollmops <kero@meilisearch.com>
2022-04-26 12:33:46 +02:00
bors[bot]
5adeac8047 Merge #516
516: Fix the indexing fuzzer r=irevoire a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-26 08:35:03 +00:00
Clémentine Urquizar
7cb7643565 Make nightly CI run every week
Update CI

Fix CI
2022-04-25 18:52:27 +02:00
Clémentine Urquizar
d138b3c704 Update version 2022-04-25 18:43:46 +02:00
Tamo
fa6f495662 fix the indexing fuzzer 2022-04-25 18:32:06 +02:00
bors[bot]
8cc86d5a8d Merge #515
515: Improve the README r=curquiza a=Kerollmops

This PR closes #512 by adding more content to the README. We listed all of the subcrates of the repository, changed the descriptions of the subcrates, and added a simple example usage in the README.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-04-25 16:15:12 +00:00
Clémentine Urquizar - curqui
5e562ffecf Update README.md 2022-04-25 18:14:43 +02:00
Clémentine Urquizar - curqui
2277172f9c Update README.md 2022-04-25 18:14:39 +02:00
Clémentine Urquizar - curqui
2db3d60259 Update README.md 2022-04-25 18:14:35 +02:00
Kerollmops
7e19bf1c0e Add an example usage of the library in the README 2022-04-25 17:25:46 +02:00
Kerollmops
fb192aaa9f Update the list of milli's subcrates 2022-04-25 15:55:38 +02:00
bors[bot]
e1e362fa43 Merge #509
509: Remove pr_status from bors settings r=Kerollmops a=curquiza

Because of multiple issue we had with bors.
https://github.com/bors-ng/bors-ng/issues/1492

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-25 11:45:37 +00:00
Clémentine Urquizar
08753d002a Remove pr_status from bors settings 2022-04-25 13:39:45 +02:00
Clément Renault
8d15ae37a1 Merge pull request #503 from meilisearch/improve-flatten-fuzzer
Improve the fuzzer of the flatten crate
2022-04-25 13:38:43 +02:00
Clément Renault
3e53791de3 Merge pull request #508 from meilisearch/contributing
First version of new CONTRIBUTING.md
2022-04-25 13:36:41 +02:00
bors[bot]
8010eca9c7 Merge #505
505: normalize exact words r=curquiza a=MarinPostma

Normalize the exact words, as specified in the specification.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-25 09:35:32 +00:00
Clémentine Urquizar
dc0d4addd9 First version of new CONTRIBUTING.md 2022-04-21 19:02:22 +02:00
Clément Renault
71414630fc Merge pull request #504 from meilisearch/test-long-words
Add a test to make sure that long words are handled
2022-04-21 16:06:13 +02:00
ad hoc
2e0089d5ff normalize exact words 2022-04-21 15:38:40 +02:00
ad hoc
3a2451fcba add test normalize exact words 2022-04-21 13:52:09 +02:00
Clément Renault
eb5830aa40 Add a test to make sure that long words are handled 2022-04-21 13:45:28 +02:00
Tamo
d81a3f4a74 improve the fuzzer of the flatten crate 2022-04-20 16:11:23 +02:00
bors[bot]
c7d0097c97 Merge #498
498: Get rid of the threshold when comparing benchmarks r=curquiza a=irevoire

It just hides things

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-19 14:04:11 +00:00
Tamo
152a10344c Get rid of the threshold when comparing benchmarks
It just hide things
2022-04-19 15:39:58 +02:00
bors[bot]
04eb32e539 Merge #499
499: fix min-word-len-for-typo not reset properly r=Kerollmops a=MarinPostma

fix min word len for typo not resettign properly, as reported in https://github.com/meilisearch/meilisearch/issues/2330


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-19 13:22:19 +00:00
ad hoc
8b14090927 fix min-word-len-for-typo not reset properly 2022-04-19 15:20:16 +02:00
bors[bot]
ea4bb9402f Merge #483
483: Enhance matching words r=Kerollmops a=ManyTheFish

# Summary

Enhance milli word-matcher making it handle match computing and cropping.

# Implementation

## Computing best matches for cropping

Before we were considering that the first match of the attribute was the best one, this was accurate when only one word was searched but was missing the target when more than one word was searched.

Now we are searching for the best matches interval to crop around, the chosen interval is the one:
1) that have the highest count of unique matches
> for example, if we have a query `split the world`, then the interval `the split the split the` has 5 matches but only 2 unique matches (1 for `split` and 1 for `the`) where the interval `split of the world` has 3 matches and 3 unique matches. So the interval `split of the world` is considered better.
2) that have the minimum distance between matches
> for example, if we have a query `split the world`, then the interval `split of the world` has a distance of 3 (2 between `split` and `the`, and 1 between `the` and `world`) where the interval `split the world` has a distance of 2. So the interval `split the world` is considered better.
3) that have the highest count of ordered matches
> for example, if we have a query `split the world`, then the interval `the world split` has 2 ordered words where the interval `split the world` has 3. So the interval `split the world` is considered better.

## Cropping around the best matches interval

Before we were cropping around the interval without checking the context.

Now we are cropping around words in the same context as matching words.
This means that we will keep words that are farther from the matching words but are in the same phrase, than words that are nearer but separated by a dot.

> For instance, for the matching word `Split` the text:
`Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.`
will be cropped like:
`…. Split The World is a book written by Emily Henry. …`
and  not like:
`Natalie risk her future. Split The World is a book …`


Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-04-19 11:42:32 +00:00
ManyTheFish
f1115e274f Use Copy impl of FormatOption instead of clonning 2022-04-19 10:35:50 +02:00
Clémentine Urquizar - curqui
a68e3a79fb Merge pull request #497 from meilisearch/v0.26.1
Update version for the next release (v0.26.1)
2022-04-14 11:53:31 +02:00
Clémentine Urquizar
8d630a6f62 Update version for the next release (v0.26.1) 2022-04-14 11:44:06 +02:00
Clémentine Urquizar - curqui
d362278a41 Merge pull request #494 from meilisearch/flatten-what-is-needed
Only flatten the required objects
2022-04-14 11:43:28 +02:00
Tamo
00f78d6b5a Apply code suggestions
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-04-14 11:14:08 +02:00
Tamo
399fba16bb only flatten an object if it's nested 2022-04-14 11:14:08 +02:00
Tamo
c2469b6765 create the json-depth-checker crate 2022-04-14 11:14:08 +02:00
bors[bot]
7791ef90e7 Merge #493
493: Use smartstring to store the external id in our hashmap r=Kerollmops a=irevoire

We need to store all the external id (primary key) in a hashmap
associated to their internal id.
The smartstring remove heap allocation / memory usage and should
improve the cache locality.

I ran the benchmarks to measure the impact of this PR on the indexing time.
I think we should merge it whatever happens thought because it'll decrease the memory consumption.

---------

This improve really sliiiiiightly the performances but improve the memory usage thus it should be merged.
```
group                                                             indexing_main_6b073738                 indexing_use-smartsring_3f343511
-----                                                             ----------------------                 --------------------------------
indexing/Indexing geo_point                                       1.02      25.2±0.20s        ? ?/sec    1.00      24.8±0.13s        ? ?/sec
indexing/Indexing movies in three batches                         1.00      18.2±0.10s        ? ?/sec    1.00      18.2±0.23s        ? ?/sec
indexing/Indexing movies with default settings                    1.00      17.5±0.09s        ? ?/sec    1.01      17.7±0.11s        ? ?/sec
indexing/Indexing songs in three batches with default settings    1.00      68.3±1.01s        ? ?/sec    1.00      68.0±0.95s        ? ?/sec
indexing/Indexing songs with default settings                     1.00      63.2±0.78s        ? ?/sec    1.00      63.0±0.58s        ? ?/sec
indexing/Indexing songs without any facets                        1.02      59.6±1.00s        ? ?/sec    1.00      58.5±1.03s        ? ?/sec
indexing/Indexing songs without faceted numbers                   1.00      62.8±0.38s        ? ?/sec    1.00      62.6±1.02s        ? ?/sec
indexing/Indexing wiki                                            1.01   1009.2±25.25s        ? ?/sec    1.00    998.1±11.27s        ? ?/sec
indexing/Indexing wiki in three batches                           1.01    1142.0±9.97s        ? ?/sec    1.00   1134.4±11.21s        ? ?/sec
```

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-13 20:28:28 +00:00
Tamo
ee64f4a936 Use smartstring to store the external id in our hashmap
We need to store all the external id (primary key) in a hashmap
associated to their internal id during.
The smartstring remove heap allocation / memory usage and should
improve the cache locality.
2022-04-13 21:22:07 +02:00
bors[bot]
456887a54a Merge #496
496: Improve the performances of the flattening subcrate r=irevoire a=Kerollmops

This PR adds some benchmarks to the _flatten-serde-json_ crate, this crate is responsible for transforming the original documents into flat versions that the engine can understand. It can probably be speed-up and this is why I added benchmarks to it.

I make some interesting performance improvements when I replaced the `json!` macro calls.

```
flatten/simple          time:   [452.44 ns 453.31 ns 454.18 ns]
                        change: [-15.036% -14.751% -14.473%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 2 outliers among 100 measurements (2.00%)
  2 (2.00%) high mild

Benchmarking flatten/complex: Collecting 100 samples in estimated 5.0007 s (4.9M i                                                                                  flatten/complex         time:   [1.0101 us 1.0131 us 1.0160 us]
                        change: [-18.001% -17.775% -17.536%] (p = 0.00 < 0.05)
                        Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
  5 (5.00%) high mild
  1 (1.00%) high severe
```

---

_I removed this particular commit from this PR._ The reason is that the two other commits were enough for this PR to give enough impact and be merged. We will continue to explore where we can get performances later.

But when I changed the flattening function to accept an owned version of the objects, we lost a lot of performances. Yes, I rewrote the benchmarks (locally) to clone the input object (and measured both, previous and new versions, with the cloning benchmarks). Maybe cloning the benchmark inputs is not the right thing to do...

```
Benchmarking flatten/simple: Collecting 100 samples in estimated 5.0005 s (6.7M it                                                                                  flatten/simple          time:   [746.46 ns 749.59 ns 752.70 ns]
                        change: [+40.082% +40.714% +41.347%] (p = 0.00 < 0.05)
                        Performance has regressed.

Benchmarking flatten/complex: Collecting 100 samples in estimated 5.0047 s (2.9M i                                                                                  flatten/complex         time:   [1.7311 us 1.7342 us 1.7368 us]
                        change: [+40.976% +41.398% +41.807%] (p = 0.00 < 0.05)
                        Performance has regressed.
Found 1 outliers among 100 measurements (1.00%)
  1 (1.00%) low mild
```

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-04-13 11:14:29 +00:00
Kerollmops
b3cec1a383 Prefer using direct method calls instead of using the json macros 2022-04-13 13:12:57 +02:00
Kerollmops
436d2032c4 Add benchmarks to the flatten-serde-json subcrate 2022-04-13 13:12:57 +02:00
bors[bot]
3828635fb2 Merge #489
489: fix distinct count bug r=curquiza a=MarinPostma

fix https://github.com/meilisearch/meilisearch/issues/2152

I think the issue was that we didn't take off the excluded candidates from the initial candidates when returning the candidates with the search result.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-13 10:15:30 +00:00
ad hoc
dda28d7415 exclude excluded canditates from search result candidates 2022-04-13 12:10:35 +02:00
ad hoc
cd83014fff add test for disctinct nb hits 2022-04-13 12:10:35 +02:00
ad hoc
bbb6728d2f add distinct attributes to cli 2022-04-13 12:10:35 +02:00
bors[bot]
49fbbacafc Merge #492
492: Add the new `Specify breaking` check to bors.toml r=curquiza a=curquiza

Should prevent this problem: https://github.com/meilisearch/milli/pull/489#issuecomment-1094988060

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-04-13 08:59:40 +00:00
Clémentine Urquizar - curqui
7ad582f39f Update bors.toml 2022-04-13 10:56:56 +02:00
Clémentine Urquizar - curqui
aa896f0e7a Update bors.toml 2022-04-13 10:56:56 +02:00
Clémentine Urquizar - curqui
0261a0e3cf Add the new Specify breaking check to bors.toml 2022-04-13 10:56:55 +02:00
ManyTheFish
5809d3ae0d Add first benchmarks on formatting 2022-04-12 16:31:58 +02:00
ManyTheFish
827cedcd15 Add format option structure 2022-04-12 13:42:14 +02:00
ManyTheFish
011f8210ed Make compute_matches more rust idiomatic 2022-04-12 10:19:02 +02:00
bors[bot]
6b0737384b Merge #491
491: remove the unused key warning r=curquiza a=irevoire

When I copy-pasted my flatten crate I forgot to remove the key used to publish the package and that throw a warning.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-11 16:55:25 +00:00
Tamo
e153418b8a remove the unused key warning 2022-04-11 14:52:41 +02:00
bors[bot]
c8306616e0 Merge #490
490: Enforce labelling for the PRs r=curquiza a=curquiza

- Enforce one of the following labels to make the CI pass: `no breaking`, `DB breaking`, `API breaking` (milli API, not the Meilisearch API of course), or `skip changelog`. This new CI is now `Required` in the GitHub settings for merging a PR.
- Adapt the release drafter to these new labels
- rename `skip-changelog` into `skip changelog` according to the new label name

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-11 08:24:23 +00:00
Clémentine Urquizar
9383629d13 Enforce labelling for the PRs 2022-04-09 23:47:06 +02:00
ManyTheFish
a16de5de84 Symplify format and remove intermediate function 2022-04-08 11:20:41 +02:00
ManyTheFish
a769e09dfa Make token_crop_bounds more rust idiomatic 2022-04-07 20:15:14 +02:00
bors[bot]
9ac2fd1c37 Merge #487
487: Update version (v0.26.0) r=Kerollmops a=curquiza

breaking because of #458 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-07 17:10:24 +00:00
bors[bot]
80ae020bee Merge #458
458: Nested fields r=Kerollmops a=irevoire

For the following document:
```json
{
  "id": 1,
  "person": {
    "name": "tamo",
    "age": 25,
  }
}
```
Suppose the user sets `person` as a filterable attribute. We need to store `person` in the filterable _obviously_. But we also need to keep track of `person.name` and `person.age` somewhere.
That’s where I changed a little bit the logic of the engine.

Currently, we have a function called `faceted_field` that returns the union of the filterable and sortable.
I renamed this function in `user_defined_faceted_field`. And now, when we finish indexing documents, we look at all the fields and see if they « match » a `user_defined_faceted_field`.
So in our case:
- does `id` match `person`: 🔴 
- does `person.name` match `person`: 🟢 
- does `person.age` match `person`: 🟢 

And thus, we insert in the database the following faceted fields: `person, person.name, person.age`.

The good thing about that solution is that we generate everything during the indexing phase, and then during the search, we can access our field without recomputing too much globbing.

-----

Now the bad thing is that I had to create a new db.

And if that was only one db, that would be ok, but actually, I need to do the same for the:
- Displayed attributes
- Attributes to retrieve
- Attributes to highlight
- Attribute to crop

`@Kerollmops` 
Do you think there is a better way to do it?
Apart from all the code, can we have a problem because we have too many dbs?

Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2022-04-07 16:26:09 +00:00
Tamo
bab898ce86 move the flatten-serde-json crate inside of milli 2022-04-07 18:20:44 +02:00
ManyTheFish
c8ed1675a7 Add some documentation 2022-04-07 17:32:13 +02:00
ManyTheFish
b1905dfa24 Make split_best_frequency returns references instead of owned data 2022-04-07 17:05:44 +02:00
Tamo
ab458d8840 fix tests after rebase 2022-04-07 17:00:00 +02:00
Irevoire
4f3ce6d9cd nested fields 2022-04-07 16:58:46 +02:00
Clémentine Urquizar
ee1d627803 Update version (v0.26.0) 2022-04-07 15:56:10 +02:00
bors[bot]
4ae7aea3b2 Merge #486
486: Update version (v0.25.0) r=curquiza a=curquiza

v0.25.0 will be released once #478 is merged

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-04-06 11:40:41 +00:00
bors[bot]
aadb0c58c9 Merge #478
478: Disable typo on attribute r=Kerollmops a=MarinPostma

disable typo on attributes


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-05 23:45:35 +00:00
ad hoc
86249e2ae4 add missing \t in cli update display
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-04-05 21:35:06 +02:00
ad hoc
b799f3326b rename merge_nothing to merge_ignore_values 2022-04-05 18:44:35 +02:00
ManyTheFish
fa7d3a37c0 Make some cleaning and add comments 2022-04-05 17:48:56 +02:00
ManyTheFish
3bb1e35ada Fix match count 2022-04-05 17:48:45 +02:00
ManyTheFish
56e0edd621 Put crop markers direclty around words 2022-04-05 17:41:32 +02:00
ManyTheFish
a93cd8c61c Fix prefix highlight with special chars 2022-04-05 17:41:32 +02:00
ManyTheFish
b3f0f39106 Make some cleaning 2022-04-05 17:41:32 +02:00
ManyTheFish
6dc345bc53 Test and Fix prefix highlight 2022-04-05 17:41:32 +02:00
ManyTheFish
bd30ee97b8 Keep separators at start of the croped string 2022-04-05 17:41:32 +02:00
ManyTheFish
29c5f76d7f Use new matcher in http-ui 2022-04-05 17:41:32 +02:00
ManyTheFish
734d0899d3 Publish Matcher 2022-04-05 17:41:32 +02:00
ManyTheFish
4428cb5909 Add some tests and fix some corner cases 2022-04-05 17:41:32 +02:00
ManyTheFish
844f546a8b Add matches algorithm V1 2022-04-05 17:41:32 +02:00
ManyTheFish
3be1790803 Add crop algorithm with naive match algorithm 2022-04-05 17:41:32 +02:00
ManyTheFish
d96e72e5dc Create formater with some tests 2022-04-05 17:41:32 +02:00
ad hoc
201fea0fda limit extract_word_docids memory usage 2022-04-05 14:14:15 +02:00
ad hoc
5cfd3d8407 add exact attributes documentation 2022-04-05 14:10:22 +02:00
Clémentine Urquizar
9eec44dd98 Update version (v0.25.0) 2022-04-05 12:06:42 +02:00
ad hoc
b85cd4983e remove field_id_from_position 2022-04-05 09:50:34 +02:00
ad hoc
dac81b2d44 add missing \n in cli settings 2022-04-05 09:48:56 +02:00
ad hoc
ab185a59b5 fix infos 2022-04-05 09:46:56 +02:00
ad hoc
59e41d98e3 add comments to integration test 2022-04-04 21:17:06 +02:00
ad hoc
1810927dbd rephrase exact_attributes doc 2022-04-04 21:04:49 +02:00
ad hoc
b7694c34f5 remove println 2022-04-04 21:00:07 +02:00
ad hoc
6cabd47c32 fix typo in comment 2022-04-04 20:59:20 +02:00
ad hoc
9963f11172 fix infos crate compilation issue 2022-04-04 20:54:03 +02:00
ad hoc
c8d3a09af8 add integration test for disabel typo on attributes 2022-04-04 20:54:03 +02:00
ad hoc
bfd81ce050 add exact atttributes to cli settings 2022-04-04 20:54:03 +02:00
ad hoc
6b2c2509b2 fix bug in exact search 2022-04-04 20:54:03 +02:00
ad hoc
56b4f5dce2 add exact prefix to query_docids 2022-04-04 20:54:03 +02:00
ad hoc
21ae4143b1 add exact_word_prefix to Context 2022-04-04 20:54:03 +02:00
ad hoc
e8f06f6c06 extract exact_word_prefix_docids 2022-04-04 20:54:03 +02:00
ad hoc
6dd2e4ffbd introduce exact_word_prefix database in index 2022-04-04 20:54:03 +02:00
ad hoc
ba0bb29cd8 refactor WordPrefixDocids to take dbs instead of indexes 2022-04-04 20:54:02 +02:00
ad hoc
c4c6e35352 query exact_word_docids in resolve_query_tree 2022-04-04 20:54:02 +02:00
ad hoc
8d46a5b0b5 extract exact word docids 2022-04-04 20:54:02 +02:00
ad hoc
5451c64d5d increase criteria asc desc test map size 2022-04-04 20:54:02 +02:00
ad hoc
0a77be4ec0 introduce exact_word_docids db 2022-04-04 20:54:02 +02:00
ad hoc
5f9f82757d refactor spawn_extraction_task 2022-04-04 20:54:02 +02:00
ad hoc
f82d4b36eb introduce exact attribute setting 2022-04-04 20:54:02 +02:00
ad hoc
c882d8daf0 add test for exact words 2022-04-04 20:54:01 +02:00
ad hoc
7e9d56a9e7 disable typos on exact words 2022-04-04 20:54:01 +02:00
bors[bot]
900825bac0 Merge #474
474: Disable typos on exact word r=MarinPostma a=MarinPostma

This PR introduces the `exact_word` setting to disable typo tolerance on custom words.

If a user query contains a word from `exact_words`, no typo derivation will be made for that particular word.

I have chosen to store the words in a FST, to save on deserialization, and allow for fast lookups.

I had some trouble with the `serde` module, and had to rename it `serde_impl`.

## steps:
- [x] introduce new settings to register words to disable typos on
- [x] in `typos`, return exact match is the current word is part of the word to disable typos for.
- [x] update `Context` to return the exact words dictionary.
- [x] merge #473 


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-04 18:39:43 +00:00
ad hoc
3e67d8818c fix typo in test comment 2022-04-04 20:34:23 +02:00
ad hoc
284d8a24e0 add intergration test for disabled typon on word 2022-04-04 20:15:51 +02:00
ad hoc
30a2711bac rename serde module to serde_impl module
needed because of issues with rustfmt
2022-04-04 20:10:55 +02:00
ad hoc
0fd55db21c fmt 2022-04-04 20:10:55 +02:00
ad hoc
559e46be5e fix bad rebase bug 2022-04-04 20:10:55 +02:00
ad hoc
8b1e5d9c6d add test for exact words 2022-04-04 20:10:55 +02:00
ad hoc
774fa8f065 disable typos on exact words 2022-04-04 20:10:55 +02:00
ad hoc
9bbffb8fee add exact words setting 2022-04-04 20:10:54 +02:00
bors[bot]
48a5ce7434 Merge #473
473: set minimum word len for typos r=MarinPostma a=MarinPostma

this PR allows the configuration on the minimum word length for typos.

The default values are the same as previously.

## steps
- [x] introduce settings for the minimum word length for 1 and 2 typos
- [x] update the settings update flow to set this setting
- [x] create a structure `TypoConfig` to configure typo tolerance in the query builder
- [x] in `typo`, use the configuration to create the appropriate query tree node.
- [x] extend `Context` to return the setting for minimum word length for typos
- [x] return correct error message for wrong settings.
- [x] merge #469 

Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-04 17:53:14 +00:00
bors[bot]
6bf9824fec Merge #485
485: fix bug on 2 typos derivation r=Kerollmops a=MarinPostma

I found a bug while working on #473. This pr fixes it and add the missing tests on word derivations.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-04-04 17:17:53 +00:00
ad hoc
853b4a520f fmt 2022-04-04 10:41:46 +02:00
ad hoc
2cb71dff4a add typo integration tests 2022-04-04 10:41:46 +02:00
ad hoc
1941072bb2 implement Copy on Setting 2022-04-04 10:41:46 +02:00
ad hoc
fdaf45aab2 replace hardcoded value with constant in TestContext 2022-04-04 10:41:46 +02:00
ad hoc
950a740bd4 refactor typos for readability 2022-04-04 10:41:46 +02:00
ad hoc
66020cd923 rename min_word_len* to use plain letter numbers 2022-04-04 10:41:46 +02:00
ad hoc
4c4b336ecb rename min word len for typo error 2022-04-01 11:17:03 +02:00
ad hoc
286dd7b2e4 rename min_word_len_2_typo 2022-04-01 11:17:03 +02:00
ad hoc
55af85db3c add tests for min_word_len_for_typo 2022-04-01 11:17:02 +02:00
ad hoc
9102de5500 fix error message 2022-04-01 11:17:02 +02:00
ad hoc
a1a3a49bc9 dynamic minimum word len for typos in query tree builder 2022-04-01 11:17:02 +02:00
ad hoc
5a24e60572 introduce word len for typo setting 2022-04-01 11:17:02 +02:00
ad hoc
9fe40df960 add word derivations tests 2022-04-01 11:05:18 +02:00
ad hoc
d5ddc6b080 fix 2 typos word derivation bug 2022-04-01 10:51:22 +02:00
bors[bot]
d2d930dd3f Merge #469
469: add authorize typo setting r=Kerollmops a=MarinPostma

This PR adds support for an authorize typo settings. This makes is possible to disable typos for a whole index. Typos are enabled by default.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-31 15:18:08 +00:00
ad hoc
3e34981d9b add test for authorize_typos in update 2022-03-31 14:12:00 +02:00
ad hoc
6ef3bb9d83 fmt 2022-03-31 14:06:23 +02:00
ad hoc
f782fe2062 add authorize_typo_test 2022-03-31 10:08:39 +02:00
ad hoc
c4653347fd add authorize typo setting 2022-03-31 10:05:44 +02:00
bors[bot]
d8dd357326 Merge #480
480: Increase benchmarks (push) CI timeout r=Kerollmops a=Kerollmops

This PR fixes the fact that the benchmarks CI on push were [canceled by GitHub](https://github.com/meilisearch/milli/actions/runs/2028844132) because they reached the default timeout of 6h. This PR changes the timeout to 72h, the same setting as the manually triggered benchmark one.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-03-29 18:13:31 +00:00
Kerollmops
6a77c81a28 Increase benchmarks (push) CI timeout 2022-03-29 09:45:36 -07:00
bors[bot]
e10c26e70d Merge #479
479: Update version (v0.24.1) r=Kerollmops a=curquiza

From v0.23.1 to v0.24.1 since we had an issue with the versionning for the previous release

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-03-24 20:12:37 +00:00
Clémentine Urquizar
ddf78a735b Update version (v0.24.1) 2022-03-24 16:39:45 +01:00
bors[bot]
2c7cafbf20 Merge #475
475: Bump tokenizer r=Kerollmops a=irevoire

This PR bump the tokenizer in v0.2.9 which fixes an issue we had with lindera where reqwest was used with openssl (which was breaking our benchmarks).

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-03-23 13:26:44 +00:00
Irevoire
86dd88698d bump tokenizer 2022-03-23 14:25:58 +01:00
bors[bot]
b82f46e862 Merge #476
476: Rollback meilisearch-tokenizer version r=Kerollmops a=irevoire

Lindera often fails to download some data from google drive we can’t compile consistently meilisearch / milli.
We can’t bump to the latest version (that moved out of google drive) either because lindera uses reqwest with openssl with no way of configuring it our benchmarks were not able to run. The latter issue should be fixed by https://github.com/lindera-morphology/lindera/pull/164.

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-03-22 14:02:00 +00:00
Irevoire
5dc464b9a7 rollback meilisearch-tokenizer version 2022-03-21 17:29:10 +01:00
bors[bot]
90276d9a2d Merge #472
472: Remove useless variables in proximity r=Kerollmops a=ManyTheFish

Was passing by plane sweep algorithm to find some inspiration, and I discover that we have useless variables that were not detected because of the recursive function.

Co-authored-by: ManyTheFish <many@meilisearch.com>
2022-03-16 15:33:11 +00:00
ManyTheFish
49d59d88c2 Remove useless variables in proximity 2022-03-16 16:12:52 +01:00
bors[bot]
5863afa1a5 Merge #468
468: Add a new error message when the filterableAttributes are empty r=Kerollmops a=brunoocasali

Fixes https://github.com/meilisearch/meilisearch/issues/2140

Is there a good way to reduce de duplication here? Maybe adding a shared function? I don't know the best and idiomatic way to do that, I appreciate any tip!

Another doubt is related to the duplication of the calling:

```rs
// filter.rs:373
FilterError::AttributeNotFilterable {
    attribute,
    filterable: filterable_fields.into_iter().collect::<Vec<_>>().join(" "),
},
```

and

```rs
// filter.rs:424
return Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
    attribute: "_geo",
    filterable: filterable_fields.into_iter().collect::<Vec<_>>().join(" "),
}))?;
```

I think we could make the `filterable_fields.into_iter().collect::<Vec<_>>().join(" ")` directly into the error handling like the sortable error. I made it into the last commit, if this is something to avoid, let me know and I can remove it :)

Co-authored-by: Bruno Casali <brunoocasali@gmail.com>
2022-03-16 15:02:19 +00:00
Bruno Casali
adc71742c8 Move string concat to the struct instead of in the calling 2022-03-16 10:26:12 -03:00
bors[bot]
cb6b6915a4 Merge #470
470: Set the cargo crate resolver to v2 r=Kerollmops a=MarinPostma

This PR updates the workspace resolver to v2. This should fix [the benchmarks](https://github.com/meilisearch/milli/runs/5558347765?check_suite_focus=true#step:8:184).


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-16 10:55:22 +00:00
ad hoc
2a31cd13c9 set resolver to v2 2022-03-16 11:47:27 +01:00
Bruno Casali
4822fe1beb Add a better error message when the filterable attrs are empty
Fixes https://github.com/meilisearch/meilisearch/issues/2140
2022-03-15 18:13:59 -03:00
bors[bot]
f04ab67083 Merge #466
466: Bump version to 0.23.1 r=curquiza a=Kerollmops

This PR bumps the crate versions to 0.23.1. Nothing seems to be breaking in the next release.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-03-15 17:19:05 +00:00
bors[bot]
ad4c982c68 Merge #439
439: Optimize typo criterion r=Kerollmops a=MarinPostma

This pr implements a couple of optimization for the typo criterion:

- clamp max typo on concatenated query words to 1: By considering that a concatenated query word is a typo, we clamp the max number of typos allowed o it to 1. This is useful because we noticed that concatenated query words often introduced words with 2 typos in queries that otherwise didn't allow for 2 typo words.

- Make typos on the first letter count for 2. This change is a big performance gain: by considering the typos on the first letter to count as 2 typos, we drastically restrict the search space for 1 typo, and if we reach 2 typos, the search space is reduced as well, as we only consider: (2 typos ∩ correct first letter) ∪ (wrong first letter ∩ 1 typo) instead of 2 typos anywhere in the word.

## benches
```
group                                                                                                    main                                   typo
-----                                                                                                    ----                                   ----
smol-songs.csv: asc + default/Notstandskomitee                                                           2.51      5.8±0.01ms        ? ?/sec    1.00      2.3±0.01ms        ? ?/sec
smol-songs.csv: asc + default/charles                                                                    2.48      3.0±0.01ms        ? ?/sec    1.00   1190.9±1.29µs        ? ?/sec
smol-songs.csv: asc + default/charles mingus                                                             5.56     10.8±0.01ms        ? ?/sec    1.00   1935.3±1.00µs        ? ?/sec
smol-songs.csv: asc + default/david                                                                      1.65      3.9±0.00ms        ? ?/sec    1.00      2.4±0.01ms        ? ?/sec
smol-songs.csv: asc + default/david bowie                                                                3.34     12.5±0.02ms        ? ?/sec    1.00      3.7±0.00ms        ? ?/sec
smol-songs.csv: asc + default/john                                                                       1.00   1849.7±3.74µs        ? ?/sec    1.01   1875.1±4.65µs        ? ?/sec
smol-songs.csv: asc + default/marcus miller                                                              4.32     15.7±0.01ms        ? ?/sec    1.00      3.6±0.01ms        ? ?/sec
smol-songs.csv: asc + default/michael jackson                                                            3.31     12.5±0.01ms        ? ?/sec    1.00      3.8±0.00ms        ? ?/sec
smol-songs.csv: asc + default/tamo                                                                       1.05    565.4±0.86µs        ? ?/sec    1.00    539.3±1.22µs        ? ?/sec
smol-songs.csv: asc + default/thelonious monk                                                            3.49     11.5±0.01ms        ? ?/sec    1.00      3.3±0.00ms        ? ?/sec
smol-songs.csv: asc/Notstandskomitee                                                                     2.59      5.6±0.02ms        ? ?/sec    1.00      2.2±0.01ms        ? ?/sec
smol-songs.csv: asc/charles                                                                              6.05      2.1±0.00ms        ? ?/sec    1.00    347.8±0.60µs        ? ?/sec
smol-songs.csv: asc/charles mingus                                                                       14.46     9.4±0.01ms        ? ?/sec    1.00    649.2±0.97µs        ? ?/sec
smol-songs.csv: asc/david                                                                                3.87      2.4±0.00ms        ? ?/sec    1.00    618.2±0.69µs        ? ?/sec
smol-songs.csv: asc/david bowie                                                                          10.14     9.8±0.01ms        ? ?/sec    1.00    970.8±1.55µs        ? ?/sec
smol-songs.csv: asc/john                                                                                 1.00    546.5±1.10µs        ? ?/sec    1.00    547.1±2.11µs        ? ?/sec
smol-songs.csv: asc/marcus miller                                                                        11.45    10.4±0.06ms        ? ?/sec    1.00    907.9±1.37µs        ? ?/sec
smol-songs.csv: asc/michael jackson                                                                      10.56     9.7±0.01ms        ? ?/sec    1.00    919.6±1.03µs        ? ?/sec
smol-songs.csv: asc/tamo                                                                                 1.03     43.3±0.18µs        ? ?/sec    1.00     42.2±0.23µs        ? ?/sec
smol-songs.csv: asc/thelonious monk                                                                      4.16     10.7±0.02ms        ? ?/sec    1.00      2.6±0.00ms        ? ?/sec
smol-songs.csv: basic filter: <=/Notstandskomitee                                                        1.00     95.7±0.20µs        ? ?/sec    1.15   109.6±10.40µs        ? ?/sec
smol-songs.csv: basic filter: <=/charles                                                                 1.00     27.8±0.15µs        ? ?/sec    1.01     27.9±0.18µs        ? ?/sec
smol-songs.csv: basic filter: <=/charles mingus                                                          1.72    119.2±0.67µs        ? ?/sec    1.00     69.1±0.13µs        ? ?/sec
smol-songs.csv: basic filter: <=/david                                                                   1.00     22.3±0.33µs        ? ?/sec    1.05     23.4±0.19µs        ? ?/sec
smol-songs.csv: basic filter: <=/david bowie                                                             1.59     86.9±0.79µs        ? ?/sec    1.00     54.5±0.31µs        ? ?/sec
smol-songs.csv: basic filter: <=/john                                                                    1.00     17.9±0.06µs        ? ?/sec    1.06     18.9±0.15µs        ? ?/sec
smol-songs.csv: basic filter: <=/marcus miller                                                           1.65    102.7±1.63µs        ? ?/sec    1.00     62.3±0.18µs        ? ?/sec
smol-songs.csv: basic filter: <=/michael jackson                                                         1.76    128.2±1.85µs        ? ?/sec    1.00     72.9±0.19µs        ? ?/sec
smol-songs.csv: basic filter: <=/tamo                                                                    1.00     17.9±0.13µs        ? ?/sec    1.05     18.7±0.20µs        ? ?/sec
smol-songs.csv: basic filter: <=/thelonious monk                                                         1.53    157.5±2.38µs        ? ?/sec    1.00    102.8±0.88µs        ? ?/sec
smol-songs.csv: basic filter: TO/Notstandskomitee                                                        1.00    100.9±4.36µs        ? ?/sec    1.04    105.0±8.25µs        ? ?/sec
smol-songs.csv: basic filter: TO/charles                                                                 1.00     28.4±0.36µs        ? ?/sec    1.03     29.4±0.33µs        ? ?/sec
smol-songs.csv: basic filter: TO/charles mingus                                                          1.71    118.1±1.08µs        ? ?/sec    1.00     68.9±0.26µs        ? ?/sec
smol-songs.csv: basic filter: TO/david                                                                   1.00     24.0±0.26µs        ? ?/sec    1.03     24.6±0.43µs        ? ?/sec
smol-songs.csv: basic filter: TO/david bowie                                                             1.72     95.2±0.30µs        ? ?/sec    1.00     55.2±0.14µs        ? ?/sec
smol-songs.csv: basic filter: TO/john                                                                    1.00     18.8±0.09µs        ? ?/sec    1.06     19.8±0.17µs        ? ?/sec
smol-songs.csv: basic filter: TO/marcus miller                                                           1.61    102.4±1.65µs        ? ?/sec    1.00     63.4±0.24µs        ? ?/sec
smol-songs.csv: basic filter: TO/michael jackson                                                         1.77    132.1±1.41µs        ? ?/sec    1.00     74.5±0.59µs        ? ?/sec
smol-songs.csv: basic filter: TO/tamo                                                                    1.00     18.2±0.14µs        ? ?/sec    1.05     19.2±0.46µs        ? ?/sec
smol-songs.csv: basic filter: TO/thelonious monk                                                         1.49    150.8±1.92µs        ? ?/sec    1.00    101.3±0.44µs        ? ?/sec
smol-songs.csv: basic placeholder/                                                                       1.00     27.3±0.07µs        ? ?/sec    1.03     28.0±0.05µs        ? ?/sec
smol-songs.csv: basic with quote/"Notstandskomitee"                                                      1.00    122.4±0.17µs        ? ?/sec    1.03    125.6±0.16µs        ? ?/sec
smol-songs.csv: basic with quote/"charles"                                                               1.00     88.8±0.30µs        ? ?/sec    1.00     88.4±0.15µs        ? ?/sec
smol-songs.csv: basic with quote/"charles" "mingus"                                                      1.00    685.2±0.74µs        ? ?/sec    1.01    689.4±6.07µs        ? ?/sec
smol-songs.csv: basic with quote/"david"                                                                 1.00    161.6±0.42µs        ? ?/sec    1.01    162.6±0.17µs        ? ?/sec
smol-songs.csv: basic with quote/"david" "bowie"                                                         1.00    731.7±0.73µs        ? ?/sec    1.02    743.1±0.77µs        ? ?/sec
smol-songs.csv: basic with quote/"john"                                                                  1.00    267.1±0.33µs        ? ?/sec    1.01    270.9±0.33µs        ? ?/sec
smol-songs.csv: basic with quote/"marcus" "miller"                                                       1.00    138.7±0.31µs        ? ?/sec    1.02    140.9±0.13µs        ? ?/sec
smol-songs.csv: basic with quote/"michael" "jackson"                                                     1.01    841.4±0.72µs        ? ?/sec    1.00    833.8±0.92µs        ? ?/sec
smol-songs.csv: basic with quote/"tamo"                                                                  1.01    189.2±0.26µs        ? ?/sec    1.00    188.2±0.71µs        ? ?/sec
smol-songs.csv: basic with quote/"thelonious" "monk"                                                     1.00   1100.5±1.36µs        ? ?/sec    1.01   1111.7±2.17µs        ? ?/sec
smol-songs.csv: basic without quote/Notstandskomitee                                                     3.40      7.9±0.02ms        ? ?/sec    1.00      2.3±0.02ms        ? ?/sec
smol-songs.csv: basic without quote/charles                                                              2.57    494.4±0.89µs        ? ?/sec    1.00    192.5±0.18µs        ? ?/sec
smol-songs.csv: basic without quote/charles mingus                                                       1.29      2.8±0.02ms        ? ?/sec    1.00      2.1±0.01ms        ? ?/sec
smol-songs.csv: basic without quote/david                                                                1.95    623.8±0.90µs        ? ?/sec    1.00    319.2±1.22µs        ? ?/sec
smol-songs.csv: basic without quote/david bowie                                                          1.12      5.9±0.00ms        ? ?/sec    1.00      5.2±0.00ms        ? ?/sec
smol-songs.csv: basic without quote/john                                                                 1.24   1340.9±2.25µs        ? ?/sec    1.00   1084.7±7.76µs        ? ?/sec
smol-songs.csv: basic without quote/marcus miller                                                        7.97     14.6±0.01ms        ? ?/sec    1.00   1826.0±6.84µs        ? ?/sec
smol-songs.csv: basic without quote/michael jackson                                                      1.19      3.9±0.00ms        ? ?/sec    1.00      3.3±0.00ms        ? ?/sec
smol-songs.csv: basic without quote/tamo                                                                 1.65    737.7±3.58µs        ? ?/sec    1.00    446.7±0.51µs        ? ?/sec
smol-songs.csv: basic without quote/thelonious monk                                                      1.16      4.5±0.02ms        ? ?/sec    1.00      3.9±0.04ms        ? ?/sec
smol-songs.csv: big filter/Notstandskomitee                                                              3.27      7.6±0.02ms        ? ?/sec    1.00      2.3±0.01ms        ? ?/sec
smol-songs.csv: big filter/charles                                                                       8.26   1957.5±1.37µs        ? ?/sec    1.00    236.8±0.34µs        ? ?/sec
smol-songs.csv: big filter/charles mingus                                                                18.49    11.2±0.06ms        ? ?/sec    1.00    607.7±3.03µs        ? ?/sec
smol-songs.csv: big filter/david                                                                         3.78      2.4±0.00ms        ? ?/sec    1.00    622.8±0.80µs        ? ?/sec
smol-songs.csv: big filter/david bowie                                                                   9.00     12.0±0.01ms        ? ?/sec    1.00   1336.0±3.17µs        ? ?/sec
smol-songs.csv: big filter/john                                                                          1.00    554.2±0.95µs        ? ?/sec    1.01    560.4±0.79µs        ? ?/sec
smol-songs.csv: big filter/marcus miller                                                                 18.09    12.0±0.01ms        ? ?/sec    1.00    664.7±0.60µs        ? ?/sec
smol-songs.csv: big filter/michael jackson                                                               8.43     12.0±0.01ms        ? ?/sec    1.00   1421.6±1.37µs        ? ?/sec
smol-songs.csv: big filter/tamo                                                                          1.00     86.3±0.14µs        ? ?/sec    1.01     87.3±0.21µs        ? ?/sec
smol-songs.csv: big filter/thelonious monk                                                               5.55     14.3±0.02ms        ? ?/sec    1.00      2.6±0.01ms        ? ?/sec
smol-songs.csv: desc + default/Notstandskomitee                                                          2.52      5.8±0.01ms        ? ?/sec    1.00      2.3±0.01ms        ? ?/sec
smol-songs.csv: desc + default/charles                                                                   3.04      2.7±0.01ms        ? ?/sec    1.00    893.4±1.08µs        ? ?/sec
smol-songs.csv: desc + default/charles mingus                                                            6.77     10.3±0.01ms        ? ?/sec    1.00   1520.8±1.90µs        ? ?/sec
smol-songs.csv: desc + default/david                                                                     1.39      5.7±0.00ms        ? ?/sec    1.00      4.1±0.00ms        ? ?/sec
smol-songs.csv: desc + default/david bowie                                                               2.34     15.8±0.02ms        ? ?/sec    1.00      6.7±0.01ms        ? ?/sec
smol-songs.csv: desc + default/john                                                                      1.00      2.5±0.00ms        ? ?/sec    1.02      2.6±0.01ms        ? ?/sec
smol-songs.csv: desc + default/marcus miller                                                             5.06     14.5±0.02ms        ? ?/sec    1.00      2.9±0.01ms        ? ?/sec
smol-songs.csv: desc + default/michael jackson                                                           2.64     14.1±0.05ms        ? ?/sec    1.00      5.4±0.00ms        ? ?/sec
smol-songs.csv: desc + default/tamo                                                                      1.00    567.0±0.65µs        ? ?/sec    1.00    565.7±0.97µs        ? ?/sec
smol-songs.csv: desc + default/thelonious monk                                                           3.55     11.6±0.02ms        ? ?/sec    1.00      3.3±0.00ms        ? ?/sec
smol-songs.csv: desc/Notstandskomitee                                                                    2.58      5.6±0.02ms        ? ?/sec    1.00      2.2±0.02ms        ? ?/sec
smol-songs.csv: desc/charles                                                                             6.04      2.1±0.00ms        ? ?/sec    1.00    348.1±0.57µs        ? ?/sec
smol-songs.csv: desc/charles mingus                                                                      14.51     9.4±0.01ms        ? ?/sec    1.00    646.7±0.99µs        ? ?/sec
smol-songs.csv: desc/david                                                                               3.86      2.4±0.00ms        ? ?/sec    1.00    620.7±2.46µs        ? ?/sec
smol-songs.csv: desc/david bowie                                                                         10.10     9.8±0.01ms        ? ?/sec    1.00    973.9±3.31µs        ? ?/sec
smol-songs.csv: desc/john                                                                                1.00    545.5±0.78µs        ? ?/sec    1.00    547.2±0.48µs        ? ?/sec
smol-songs.csv: desc/marcus miller                                                                       11.39    10.3±0.01ms        ? ?/sec    1.00    903.7±0.95µs        ? ?/sec
smol-songs.csv: desc/michael jackson                                                                     10.51     9.7±0.01ms        ? ?/sec    1.00    924.7±2.02µs        ? ?/sec
smol-songs.csv: desc/tamo                                                                                1.01     43.2±0.33µs        ? ?/sec    1.00     42.6±0.35µs        ? ?/sec
smol-songs.csv: desc/thelonious monk                                                                     4.19     10.8±0.03ms        ? ?/sec    1.00      2.6±0.00ms        ? ?/sec
smol-songs.csv: prefix search/a                                                                          1.00   1008.7±1.00µs        ? ?/sec    1.00   1005.5±0.91µs        ? ?/sec
smol-songs.csv: prefix search/b                                                                          1.00    885.0±0.70µs        ? ?/sec    1.01    890.6±1.11µs        ? ?/sec
smol-songs.csv: prefix search/i                                                                          1.00   1051.8±1.25µs        ? ?/sec    1.00   1056.6±4.12µs        ? ?/sec
smol-songs.csv: prefix search/s                                                                          1.00    724.7±1.77µs        ? ?/sec    1.00    721.6±0.59µs        ? ?/sec
smol-songs.csv: prefix search/x                                                                          1.01    212.4±0.21µs        ? ?/sec    1.00    210.9±0.38µs        ? ?/sec
smol-songs.csv: proximity/7000 Danses Un Jour Dans Notre Vie                                             18.55    48.5±0.09ms        ? ?/sec    1.00      2.6±0.03ms        ? ?/sec
smol-songs.csv: proximity/The Disneyland Sing-Along Chorus                                               8.41     56.7±0.45ms        ? ?/sec    1.00      6.7±0.05ms        ? ?/sec
smol-songs.csv: proximity/Under Great Northern Lights                                                    15.74    38.9±0.14ms        ? ?/sec    1.00      2.5±0.00ms        ? ?/sec
smol-songs.csv: proximity/black saint sinner lady                                                        11.82    40.1±0.13ms        ? ?/sec    1.00      3.4±0.02ms        ? ?/sec
smol-songs.csv: proximity/les dangeureuses 1960                                                          6.90     26.1±0.13ms        ? ?/sec    1.00      3.8±0.04ms        ? ?/sec
smol-songs.csv: typo/Arethla Franklin                                                                    14.93     5.8±0.01ms        ? ?/sec    1.00    390.1±1.89µs        ? ?/sec
smol-songs.csv: typo/Disnaylande                                                                         3.18      7.3±0.01ms        ? ?/sec    1.00      2.3±0.00ms        ? ?/sec
smol-songs.csv: typo/dire straights                                                                      5.55     15.2±0.02ms        ? ?/sec    1.00      2.7±0.00ms        ? ?/sec
smol-songs.csv: typo/fear of the duck                                                                    28.03    20.0±0.03ms        ? ?/sec    1.00    713.3±1.54µs        ? ?/sec
smol-songs.csv: typo/indochie                                                                            19.25  1851.4±2.38µs        ? ?/sec    1.00     96.2±0.13µs        ? ?/sec
smol-songs.csv: typo/indochien                                                                           14.66  1887.7±3.18µs        ? ?/sec    1.00    128.8±0.18µs        ? ?/sec
smol-songs.csv: typo/klub des loopers                                                                    37.73    18.0±0.02ms        ? ?/sec    1.00    476.7±0.73µs        ? ?/sec
smol-songs.csv: typo/michel depech                                                                       10.17     5.8±0.01ms        ? ?/sec    1.00    565.8±1.16µs        ? ?/sec
smol-songs.csv: typo/mongus                                                                              15.33  1897.4±3.44µs        ? ?/sec    1.00    123.8±0.13µs        ? ?/sec
smol-songs.csv: typo/stromal                                                                             14.63  1859.3±2.40µs        ? ?/sec    1.00    127.1±0.29µs        ? ?/sec
smol-songs.csv: typo/the white striper                                                                   10.83     9.4±0.01ms        ? ?/sec    1.00    866.0±0.98µs        ? ?/sec
smol-songs.csv: typo/thelonius monk                                                                      14.40     3.8±0.00ms        ? ?/sec    1.00    261.5±1.30µs        ? ?/sec
smol-songs.csv: words/7000 Danses / Le Baiser / je me trompe de mots                                     5.54     70.8±0.09ms        ? ?/sec    1.00     12.8±0.03ms        ? ?/sec
smol-songs.csv: words/Bring Your Daughter To The Slaughter but now this is not part of the title         3.48    119.8±0.14ms        ? ?/sec    1.00     34.4±0.04ms        ? ?/sec
smol-songs.csv: words/The Disneyland Children's Sing-Alone song                                          8.98     71.9±0.12ms        ? ?/sec    1.00      8.0±0.01ms        ? ?/sec
smol-songs.csv: words/les liaisons dangeureuses 1793                                                     11.88    37.4±0.07ms        ? ?/sec    1.00      3.1±0.01ms        ? ?/sec
smol-songs.csv: words/seven nation mummy                                                                 22.86    23.4±0.04ms        ? ?/sec    1.00   1024.8±1.57µs        ? ?/sec
smol-songs.csv: words/the black saint and the sinner lady and the good doggo                             2.76    124.4±0.15ms        ? ?/sec    1.00     45.1±0.09ms        ? ?/sec
smol-songs.csv: words/whathavenotnsuchforth and a good amount of words to pop to match the first one     2.52    107.0±0.23ms        ? ?/sec    1.00     42.4±0.66ms        ? ?/sec

group                                                                                    main-wiki                              typo-wiki
-----                                                                                    ---------                              ---------
smol-wiki-articles.csv: basic placeholder/                                               1.02     13.7±0.02µs        ? ?/sec    1.00     13.4±0.03µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"film"                                          1.02    409.8±0.67µs        ? ?/sec    1.00    402.6±0.48µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"france"                                        1.00    325.9±0.91µs        ? ?/sec    1.00    326.4±0.49µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"japan"                                         1.00    218.4±0.26µs        ? ?/sec    1.01    220.5±0.20µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"machine"                                       1.00    143.0±0.12µs        ? ?/sec    1.04    148.8±0.21µs        ? ?/sec
smol-wiki-articles.csv: basic with quote/"miles" "davis"                                 1.00     11.7±0.06ms        ? ?/sec    1.00     11.8±0.01ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"mingus"                                        1.00      4.4±0.03ms        ? ?/sec    1.00      4.4±0.00ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"rock" "and" "roll"                             1.00     43.5±0.08ms        ? ?/sec    1.01     43.8±0.06ms        ? ?/sec
smol-wiki-articles.csv: basic with quote/"spain"                                         1.00    137.3±0.35µs        ? ?/sec    1.05    144.4±0.23µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/film                                         1.00    125.3±0.30µs        ? ?/sec    1.06    133.1±0.37µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/france                                       1.21   1782.6±1.65µs        ? ?/sec    1.00   1477.0±1.39µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/japan                                        1.28   1363.9±0.80µs        ? ?/sec    1.00   1064.3±1.79µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/machine                                      1.73    760.3±0.81µs        ? ?/sec    1.00    439.6±0.75µs        ? ?/sec
smol-wiki-articles.csv: basic without quote/miles davis                                  1.03     17.0±0.03ms        ? ?/sec    1.00     16.5±0.02ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/mingus                                       1.07      5.3±0.01ms        ? ?/sec    1.00      5.0±0.00ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/rock and roll                                1.01     63.9±0.18ms        ? ?/sec    1.00     63.0±0.07ms        ? ?/sec
smol-wiki-articles.csv: basic without quote/spain                                        2.07    667.4±0.93µs        ? ?/sec    1.00    322.8±0.29µs        ? ?/sec
smol-wiki-articles.csv: prefix search/c                                                  1.00    343.1±0.47µs        ? ?/sec    1.00    344.0±0.34µs        ? ?/sec
smol-wiki-articles.csv: prefix search/g                                                  1.00    374.4±3.42µs        ? ?/sec    1.00    374.1±0.44µs        ? ?/sec
smol-wiki-articles.csv: prefix search/j                                                  1.00    359.9±0.31µs        ? ?/sec    1.00    361.2±0.79µs        ? ?/sec
smol-wiki-articles.csv: prefix search/q                                                  1.01    102.0±0.12µs        ? ?/sec    1.00    101.4±0.32µs        ? ?/sec
smol-wiki-articles.csv: prefix search/t                                                  1.00    536.7±1.39µs        ? ?/sec    1.00    534.3±0.84µs        ? ?/sec
smol-wiki-articles.csv: prefix search/x                                                  1.00    400.9±1.00µs        ? ?/sec    1.00    399.5±0.45µs        ? ?/sec
smol-wiki-articles.csv: proximity/april paris                                            3.86     14.4±0.01ms        ? ?/sec    1.00      3.7±0.01ms        ? ?/sec
smol-wiki-articles.csv: proximity/diesel engine                                          12.98    10.4±0.01ms        ? ?/sec    1.00    803.5±1.13µs        ? ?/sec
smol-wiki-articles.csv: proximity/herald sings                                           1.00     12.7±0.06ms        ? ?/sec    5.29     67.1±0.09ms        ? ?/sec
smol-wiki-articles.csv: proximity/tea two                                                6.48   1452.1±2.78µs        ? ?/sec    1.00    224.1±0.38µs        ? ?/sec
smol-wiki-articles.csv: typo/Disnaylande                                                 3.89      8.5±0.01ms        ? ?/sec    1.00      2.2±0.01ms        ? ?/sec
smol-wiki-articles.csv: typo/aritmetric                                                  3.78     10.3±0.01ms        ? ?/sec    1.00      2.7±0.00ms        ? ?/sec
smol-wiki-articles.csv: typo/linax                                                       8.91   1426.7±0.97µs        ? ?/sec    1.00    160.1±0.18µs        ? ?/sec
smol-wiki-articles.csv: typo/migrosoft                                                   7.48   1417.3±5.84µs        ? ?/sec    1.00    189.5±0.88µs        ? ?/sec
smol-wiki-articles.csv: typo/nympalidea                                                  3.96      7.2±0.01ms        ? ?/sec    1.00   1810.1±2.03µs        ? ?/sec
smol-wiki-articles.csv: typo/phytogropher                                                3.71      7.2±0.01ms        ? ?/sec    1.00   1934.3±6.51µs        ? ?/sec
smol-wiki-articles.csv: typo/sisan                                                       6.44   1497.2±1.38µs        ? ?/sec    1.00    232.7±0.94µs        ? ?/sec
smol-wiki-articles.csv: typo/the fronce                                                  6.92      2.9±0.00ms        ? ?/sec    1.00    418.0±1.76µs        ? ?/sec
smol-wiki-articles.csv: words/Abraham machin                                             16.63    10.8±0.01ms        ? ?/sec    1.00    649.7±1.08µs        ? ?/sec
smol-wiki-articles.csv: words/Idaho Bellevue pizza                                       27.15    25.6±0.03ms        ? ?/sec    1.00    944.2±5.07µs        ? ?/sec
smol-wiki-articles.csv: words/Kameya Tokujirō mingus monk                                26.87    40.7±0.05ms        ? ?/sec    1.00   1515.3±2.73µs        ? ?/sec
smol-wiki-articles.csv: words/Ulrich Hensel meilisearch milli                            11.99    48.8±0.10ms        ? ?/sec    1.00      4.1±0.02ms        ? ?/sec
smol-wiki-articles.csv: words/the black saint and the sinner lady and the good doggo     4.90    110.0±0.15ms        ? ?/sec    1.00     22.4±0.03ms        ? ?/sec

```

Co-authored-by: mpostma <postma.marin@protonmail.com>
Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-15 16:43:36 +00:00
ad hoc
3f24555c3d custom fst automatons 2022-03-15 17:38:35 +01:00
ad hoc
628c835a22 fix tests 2022-03-15 17:38:34 +01:00
bors[bot]
8efac33b53 Merge #467
467: optimize prefix database r=Kerollmops a=MarinPostma

This pr introduces two optimizations that greatly improve the speed of computing prefix databases.

- The time that it takes to create the prefix FST has been divided by 5 by inverting the way we iterated over the words FST.
- We unconditionally and needlessly checked for documents to remove in  `word_prefix_pair`, which caused an iteration over the whole database.

Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-15 16:14:35 +00:00
ad hoc
d127c57f2d review edits 2022-03-15 17:12:48 +01:00
ad hoc
d633ac5b9d optimize word prefix pair 2022-03-15 16:37:22 +01:00
ad hoc
d68fe2b3c7 optimize word prefix fst 2022-03-15 16:36:48 +01:00
Kerollmops
08a06b49f0 Bump version to 0.23.1 2022-03-15 15:50:28 +01:00
bors[bot]
d87e8b63a9 Merge #465
465: Update dependencies r=ManyTheFish a=Kerollmops

This PR upgrade and updates this crate's dependencies but first, it removes three dependencies that we don't use anymore. I used [cargo udeps](https://github.com/est31/cargo-udeps) to upgrade them ⬆️

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-03-15 13:49:17 +00:00
Clément Renault
0c5f4ed7de Apply suggestions
Co-authored-by: Many <many@meilisearch.com>
2022-03-15 14:18:29 +01:00
Kerollmops
21ec334dcc Fix the compilation error of the dependency versions 2022-03-15 11:17:45 +01:00
Kerollmops
63682c2c9a Upgrade the dependencies 2022-03-15 11:17:44 +01:00
Kerollmops
288a879411 Remove three useless dependencies 2022-03-15 11:17:44 +01:00
bors[bot]
712bf035a7 Merge #464
464: exporting heed to avoid having different versions of Heed in Meilisearch r=curquiza a=psvnlsaikumar

# Pull Request

## What does this PR do?
Fixes the issue in meilisearch https://github.com/meilisearch/meilisearch/issues/2210

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: psvnl sai kumar <psvnlsaikumar@gmail.com>
2022-03-15 09:51:56 +00:00
psvnl sai kumar
5e08fac729 fixes for rustfmt pass 2022-03-14 19:22:41 +05:30
psvnl sai kumar
92e2e09434 exporting heed to avoid having different versions of Heed in Meilisearch 2022-03-14 01:01:58 +05:30
bors[bot]
290a29b5fb Merge #457
457: Avoid iterating on big databases when useless r=Kerollmops a=Kerollmops

This PR makes the prefix database updates to avoid iterating on big grenad files when it is unnecessary. We introduced this regression in #436 but it went unnoticed.

---

According to the following benchmark results, we take more time when we index documents in one run than before #436. It looks like it is probably due to the fact that, now, instead of computing the prefixes database by iterating on the LMDB we directly iterate on the grenad file. Those could be slower to iterate on and could be the slowdown cause.

I just pushed a commit that tests this branch with the new unreleased version of grenad where some work was done to speed up the iteration on grenad files. [The benchmarks for this last commit](https://github.com/meilisearch/milli/actions/runs/1927187408) are currently running. You can [see the diff](https://github.com/meilisearch/grenad/compare/v0.4.1...main) between the v0.4 and the unreleased v0.5 version of grenad.

```diff
  group                                                             indexing_benchmark-multi-batch-indexing-before-speed-up_45f52620    indexing_stop-iterating-on-big-grenad-files_ac8b85c4
  -----                                                             ----------------------------------------------------------------    ----------------------------------------------------
+ indexing/Indexing songs in three batches with default settings    1.12      57.7±2.14s        ? ?/sec                                 1.00      51.3±2.76s        ? ?/sec
- indexing/Indexing wiki                                            1.00    917.3±30.01s        ? ?/sec                                 1.10   1008.4±38.27s        ? ?/sec
+ indexing/Indexing wiki in three batches                           1.10   1091.2±32.73s        ? ?/sec                                 1.00    995.5±24.33s        ? ?/sec
```

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-03-09 16:46:34 +00:00
Kerollmops
1ae13c1374 Avoid iterating on big databases when useless 2022-03-09 15:43:54 +01:00
bors[bot]
a8d28e364d Merge #461
461: Add a new error message when the `valid_fields` is empty r=curquiza a=brunoocasali

I've created a test case to handle the new error formatting behavior, but I'm not sure if:

- this is the right place to add the test?
- this is the best way to test this behavior?

And I'm not sure also regarding the `match` implementation, is this something required? Or maybe just an `if` statement is ok as well?

I left the two messages literally without "reusing the prefix" in the implementation because I think this could help the "searchability" of the error in the future.

# Pull Request

## What does this PR do?
Fixes https://github.com/meilisearch/meilisearch/issues/2140

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [ ] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Bruno Casali <brunoocasali@gmail.com>
2022-03-08 09:55:58 +00:00
bors[bot]
2ef5751795 Merge #463
463: Allow setting the primary-key in the cli r=irevoire a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-03-07 14:11:40 +00:00
Tamo
8bb45956d4 allow to set the primary key in the cli 2022-03-07 14:56:49 +01:00
bors[bot]
3cbadf92b6 Merge #462
462: cli improvements r=Kerollmops a=MarinPostma

a few improvements:
- use bufreader to load documents, so the loading of the document doesn't appear on flamegraphs
- set default db path to current directory so the `-i` flag can be omitted.



Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-03-07 09:39:01 +00:00
ad hoc
db3a1905de default db path 2022-03-07 10:30:47 +01:00
ad hoc
6cf82ba993 bufread documents 2022-03-07 10:29:52 +01:00
Bruno Casali
66c6d5e1ef Add a new error message when the valid_fields is empty
> "Attribute `{}` is not sortable. This index doesn't have configured sortable attributes."
> "Attribute `{}` is not sortable. Available sortable attributes are: `{}`."

coexist in the error handling
2022-03-05 10:38:18 -03:00
bors[bot]
df518d8b0b Merge #459
459: Update heed link in cargo toml r=Kerollmops a=curquiza

Since grenad and heed have been moved to the meilisearch orga, this PR changes the link.
This is a minor change since GitHub handles automatically the redirection. This PR is only for consisitency.

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-03-01 19:47:14 +00:00
Clémentine Urquizar
d9ed9de2b0 Update heed link in cargo toml 2022-03-01 19:45:29 +01:00
bors[bot]
51cf44d6fd Merge #456
456: Remove useless grenad merging r=Kerollmops a=Kerollmops

This PR must be merged after #454.

This PR removes the part of code that was merging all of the grenad Readers merging that we don't need as the indexer should have merged them and, therefore, we should only have one final grenad Reader. We reduce the amount of CPU usage and memory pressure we were doing uselessly.

`@ManyTheFish` are you sure I can skip merging the `word_docids` database?

Here is the benchmark comparison with the previously merged PR #454:
```
group                                              indexing_reintroduce-appending-sorted-values_c05e42a8    indexing_remove-useless-grenad-merging_d5b8b5a2
-----                                              -----------------------------------------------------    -----------------------------------------------
indexing/Indexing movies with default settings     1.06      16.6±1.04s        ? ?/sec                      1.00      15.7±0.93s        ? ?/sec
indexing/Indexing songs with default settings      1.16      60.1±7.07s        ? ?/sec                      1.00      51.7±5.98s        ? ?/sec
indexing/Indexing songs without faceted numbers    1.06      55.4±6.14s        ? ?/sec                      1.00      52.2±4.13s        ? ?/sec
```

And the comparison with multi-batch indexing before #436, we can see that we gain time for benchmarks that index datasets in multiple batches but there is _so much_ variance that it's not clear.

```
group                                                             indexing_benchmark-multi-batch-indexing-before-speed-up_45f52620    indexing_remove-useless-grenad-merging_d5b8b5a2
-----                                                             ----------------------------------------------------------------    -----------------------------------------------
indexing/Indexing geo_point                                       1.07       6.6±0.08s        ? ?/sec                                 1.00       6.2±0.11s        ? ?/sec
indexing/Indexing songs in three batches with default settings    1.12      57.7±2.14s        ? ?/sec                                 1.00      51.5±3.80s        ? ?/sec
indexing/Indexing songs with default settings                     1.00      47.5±2.52s        ? ?/sec                                 1.09      51.7±5.98s        ? ?/sec
indexing/Indexing songs without any facets                        1.00      43.5±1.43s        ? ?/sec                                 1.12      48.8±3.73s        ? ?/sec
indexing/Indexing songs without faceted numbers                   1.00      47.1±2.23s        ? ?/sec                                 1.11      52.2±4.13s        ? ?/sec
indexing/Indexing wiki                                            1.00    917.3±30.01s        ? ?/sec                                 1.09    998.7±38.92s        ? ?/sec
indexing/Indexing wiki in three batches                           1.09   1091.2±32.73s        ? ?/sec                                 1.00    996.5±15.70s        ? ?/sec
```

What do you think `@irevoire?` Should we change the benchmarks to make them do more runs?

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-03-01 16:48:08 +00:00
Kerollmops
d5b8b5a2f8 Replace the ugly unwraps by clean if let Somes 2022-02-28 16:31:33 +01:00
Kerollmops
8d26f3040c Remove a useless grenad file merging 2022-02-28 16:31:33 +01:00
bors[bot]
21898ffc60 Merge #454
454: Reintroduce appending sorted entries when possible r=Kerollmops a=Kerollmops

This PR modifies the `sorter_into_lmdb_database` function to append values into the database instead of get-put-merging them, it should improve the indexation speed for when the database is empty.

```txt
group                                             indexing_main_25123af3                 indexing_reintroduce-appending-sorted-values_c05e42a8
-----                                             ----------------------                 -----------------------------------------------------
indexing/Indexing movies with default settings    1.07      17.8±0.99s        ? ?/sec    1.00      16.6±1.04s        ? ?/sec
indexing/Indexing songs with default settings     1.00      57.0±6.01s        ? ?/sec    1.05      60.1±7.07s        ? ?/sec
indexing/Indexing songs without any facets        1.10      51.8±5.36s        ? ?/sec    1.00      47.3±3.30s        ? ?/sec
```

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-02-28 14:55:37 +00:00
Clément Renault
04b1bbf932 Reintroduce appending sorted entries when possible 2022-02-24 14:50:45 +01:00
bors[bot]
382be56d36 Merge #453
453: Benchmark multi batch indexing r=Kerollmops a=Kerollmops

Hey `@irevoire,` could you please add the new benchmarks into influx?

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-02-24 12:33:13 +00:00
Clément Renault
acfc96525c Apply GitHub suggestions 2022-02-23 16:20:29 +01:00
Clément Renault
a820aa11e6 Add a new movies benchmark to test multi batch indexing 2022-02-23 16:20:29 +01:00
Kerollmops
8d2e3e4aba Add a new wiki benchmark to test multi batch indexing 2022-02-23 16:20:29 +01:00
Kerollmops
ab5247dc64 Add a new songs benchmark to test multi batch indexing 2022-02-23 16:20:28 +01:00
bors[bot]
acd9535588 Merge #455
455: Raise the GitHub CI timeout limit to 72h r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-02-23 14:33:31 +00:00
Kerollmops
19bfb2649b Raise the GitHub CI timeout limit to 72h 2022-02-23 15:27:51 +01:00
bors[bot]
25123af3b8 Merge #436
436: Speed up the word prefix databases computation time r=Kerollmops a=Kerollmops

This PR depends on the fixes done in #431 and must be merged after it.

In this PR we will bring the `WordPrefixPairProximityDocids`, `WordPrefixDocids` and, `WordPrefixPositionDocids` update structures to a new era, a better era, where computing the word prefix pair proximities costs much fewer CPU cycles, an era where this update structure can use the, previously computed, set of new word docids from the newly indexed batch of documents.

---

The `WordPrefixPairProximityDocids` is an update structure, which means that it is an object that we feed with some parameters and which modifies the LMDB database of an index when asked for. This structure specifically computes the list of word prefix pair proximities, which correspond to a list of pairs of words associated with a proximity (the distance between both words) where the second word is not a word but a prefix e.g. `s`, `se`, `a`. This word prefix pair proximity is associated with the list of documents ids which contains the pair of words and prefix at the given proximity.

The origin of the performances issue that this struct brings is related to the fact that it starts its job from the beginning, it clears the LMDB database before rewriting everything from scratch, using the other LMDB databases to achieve that. I hope you understand that this is absolutely not an optimized way of doing things.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-02-16 15:41:14 +00:00
Clément Renault
ff8d7a810d Change the behavior of the as_cloneable_grenad by taking a ref 2022-02-16 15:40:08 +01:00
Clément Renault
f367cc2e75 Finally bump grenad to v0.4.1 2022-02-16 15:28:48 +01:00
bors[bot]
f2984f66e6 Merge #452
452: bump milli r=curquiza a=irevoire



Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-02-16 13:49:14 +00:00
Irevoire
0defeb268c bump milli 2022-02-16 13:27:41 +01:00
bors[bot]
030064da25 Merge #451
451: Update LICENSE with Meili SAS name r=Kerollmops a=curquiza

Check with thomas, we must put the real name of the company

Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-02-15 16:18:47 +00:00
Clémentine Urquizar - curqui
84035a27f5 Update LICENSE 2022-02-15 15:52:50 +01:00
bors[bot]
0885fcf973 Merge #450
450: Get rid of chrono in favor of time r=Kerollmops a=irevoire

We only use `chrono` as a wrapper around `time`, and since there has been an [open CVE on `chrono` for at least 3 months now](https://github.com/chronotope/chrono/pull/632) and the repo seems to be [struggling with maintenance](https://github.com/chronotope/chrono/pull/639), I think we should use `time` directly which is way more active and sufficient for our use case.

EDIT: Actually the CVE status has been known for more than 6 months: https://github.com/chronotope/chrono/issues/602

Co-authored-by: Irevoire <tamo@meilisearch.com>
2022-02-15 10:54:46 +00:00
Irevoire
48542ac8fd get rid of chrono in favor of time 2022-02-15 11:41:55 +01:00
bors[bot]
ea15ad6c34 Merge #447
447: Update version for the next release (v0.22.1) r=curquiza a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-02-07 17:44:09 +00:00
Clémentine Urquizar
d03b3ceb58 Update version for the next release (v0.22.1) 2022-02-07 18:39:29 +01:00
bors[bot]
5d58cb7449 Merge #442
442: fix phrase search r=curquiza a=MarinPostma

Run the exact match search on 7 words windows instead of only two. This makes false positive very very unlikely, and impossible on phrase query that are less than seven words.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-02-07 16:18:20 +00:00
bors[bot]
c5a996aa78 Merge #446
446: Update LICENSE r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar - curqui <clementine@meilisearch.com>
2022-02-07 09:47:39 +00:00
Clémentine Urquizar - curqui
1279c38ac9 Update LICENSE 2022-02-05 18:29:11 +01:00
bors[bot]
267d14c28d Merge #445
445: allow null values in csv r=Kerollmops a=MarinPostma

This pr allows null values in csv:
- if the field is of type string, then an empty field is considered null (`,,`), anything other is turned into a string (i.e `, ,` is a single whitespace string)
- if the field is of type number, when the trimmed field is empty, we consider the value null (i.e `,,`, `, ,` are both null numbers) otherwise we try to parse the number.


Co-authored-by: ad hoc <postma.marin@protonmail.com>
2022-02-03 15:11:32 +00:00
ad hoc
bd2262ceea allow null values in csv 2022-02-03 16:03:01 +01:00
ad hoc
13de251047 rewrite word pair distance gathering 2022-02-03 15:57:20 +01:00
bors[bot]
fda4f229bb Merge #417
417: Change chunk size to 4MiB to fit more the end user usage r=Kerollmops a=ManyTheFish

Reverts meilisearch/milli#379

We made several indexing tests using different sizes of datasets (5 datasets from 9MiB to 100MiB) on several typologies of VMs (`XS: 1GiB RAM, 1 VCPU`, `S: 2GiB RAM, 2 VCPU`, `M: 4GiB RAM, 3 VCPU`, `L: 8GiB RAM, 4 VCPU`).
The result of these tests shows that the `4MiB` chunk size seems to be the best size compared to other chunk sizes (`2Mib`, `4MiB`, `8Mib`, `16Mib`,  `32Mib`, `64Mib`, `128Mib`).

below is the average time per chunk size:

![Capture d’écran 2021-09-27 à 14 27 50](https://user-images.githubusercontent.com/6482087/134909368-ef0bc45e-68d5-49d1-aaf9-91113b7c410f.png)

<details>
<summary>Detailled data</summary>
<br>

![Capture d’écran 2021-09-27 à 14 39 48](https://user-images.githubusercontent.com/6482087/134909952-a36b1457-bbbd-4a6c-bbe5-519e4b926b5a.png)
</br>
</details> 


Co-authored-by: Many <many@meilisearch.com>
2022-02-02 18:30:59 +00:00
bors[bot]
2468ebb76b Merge #444
444: Fix the parsing of ndjson requests to index more than the first line r=Kerollmops a=Kerollmops

This PR correctly uses the `BufRead` trait to read every line of the content instead of just the first one. This bug was only affecting the http-ui test crate.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2022-02-02 17:59:44 +00:00
Kerollmops
9142ba9dd4 Fix the parsing of ndjson requests to index more than the first line 2022-02-02 17:55:13 +01:00
Many
d59bcea749 Revert "Revert "Change chunk size to 4MiB to fit more the end user usage"" 2022-02-02 17:01:13 +01:00
mpostma
7541ab99cd review changes 2022-02-02 12:59:01 +01:00
mpostma
d0aabde502 optimize 2 typos case 2022-02-02 12:56:09 +01:00
mpostma
55e6cb9c7b typos on first letter counts as 2 2022-02-02 12:56:09 +01:00
mpostma
642c01d0dc set max typos on ngram to 1 2022-02-02 12:56:08 +01:00
ad hoc
d852dc0d2b fix phrase search 2022-02-01 20:21:33 +01:00
Kerollmops
fb79c32430 Compute the new, common and, deleted prefix words fst once 2022-01-27 11:00:18 +01:00
Clément Renault
51d1e64b23 Remove, now useless, the WriteMethod enum 2022-01-27 10:08:35 +01:00
Clément Renault
e9c02173cf Rework the WordsPrefixPositionDocids update to compute a subset of the database 2022-01-27 10:08:35 +01:00
Clément Renault
dbba5fd461 Create a function to simplify the word prefix pair proximity docids compute 2022-01-27 10:08:35 +01:00
Clément Renault
e760e02737 Fix the computation of the newly added and common prefix pair proximity words 2022-01-27 10:08:35 +01:00
Clément Renault
d59e559317 Fix the computation of the newly added and common prefix words 2022-01-27 10:08:34 +01:00
Clément Renault
2ec8542105 Rework the WordPrefixDocids update to compute a subset of the database 2022-01-27 10:08:34 +01:00
Clément Renault
28692f65be Rework the WordPrefixDocids update to compute a subset of the database 2022-01-27 10:08:34 +01:00
Clément Renault
5404bc02dd Move the fst_stream_into_hashset method in the helper methods 2022-01-27 10:06:00 +01:00
Clément Renault
c90fa95f93 Only compute the word prefix pairs on the created word pair proximities 2022-01-27 10:06:00 +01:00
Clément Renault
822f67e9ad Bring the newly created word pair proximity docids 2022-01-27 10:06:00 +01:00
Clément Renault
d28f18658e Retrieve the previous version of the words prefixes FST 2022-01-27 10:05:59 +01:00
bors[bot]
38d23546a5 Merge #431
431: Fix and improve word prefix pair proximity r=ManyTheFish a=Kerollmops

This PR first fixes the algorithm we used to select and compute the word prefix pair proximity database. The previous version was skipping nearly all of the prefixes. The issue is that this fix made this method to take more time and we were trying to reduce the time spent in it.

With `@ManyTheFish` we found out that we could skip some of the work we were doing by:
 - discarding the prefixes that were shorter than a specific threshold (default: 2).
 - discarding the word prefix pairs with proximity bigger than a specific threshold (default: 4).
 - remove the unused threshold that was specifying a minimum amount of word docids to merge.

We will take more time to do some more optimization, like stop clearing and recomputing from scratch the database, we will compute the subsets of keys to create, keep and merge. This change is a little bit more complex than what this PR does.

I keep this PR as a draft as I want to further test the real gain if it is enough or not if it is valid or not. I advise reviewers to review commit by commit to see the changes bit by bit, reviewing the whole PR can be hard.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-01-27 07:04:56 +00:00
bors[bot]
c63f945093 Merge #441
441: Changes related to the rebranding r=curquiza a=meili-bot

_This PR is auto-generated._

 - [X] Change the name `MeiliSearch` to `Meilisearch` in README.
 - [x] ⚠️ Ensure the bot did not update part you don’t want it to update, especially in the code examples in the Getting started.
 - [x] Please, ensure there is no other "MeiliSearch". For example, in the comments or in the tests name.
 - [x] Put the new logo on the README if needed -> still using the milli logo so far


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-01-26 17:07:37 +00:00
Clémentine Urquizar
0f213f2202 Replace MeiliSearch by Meilisearch 2022-01-26 17:49:55 +01:00
Clémentine Urquizar
de808a391a Replace meilisearch by Meilisearch 2022-01-26 17:48:22 +01:00
meili-bot
0d282e3cc5 Update README.md 2022-01-26 16:33:16 +01:00
bors[bot]
d342c3c357 Merge #438
438: CLI improvements r=Kerollmops a=MarinPostma

I've made the following changes to the cli:
- `settings-update` become `settings`, with two subcommands: `update` and `show`.
- `document-addition` becomes `documents` with a subcommands: `add` (I'll add a feature to list documents later)
- `search` now has an interactive mode `-i`
- search return the number of documents and the time it took to perform the search.


Co-authored-by: mpostma <postma.marin@protonmail.com>
2022-01-26 15:18:20 +00:00
Clément Renault
f9b214f34e Apply suggestions from code review
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2022-01-26 11:28:11 +01:00
bors[bot]
e1cc025cbd Merge #440
440: fix(fuzzer): fix the fuzzer after #430 r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-25 16:33:57 +00:00
Clément Renault
f04cd19886 Introduce a max prefix length parameter to the word prefix pair proximity update 2022-01-25 17:04:23 +01:00
Clément Renault
1514dfa1b7 Introduce a max proximity parameter to the word prefix pair proximity update 2022-01-25 17:04:23 +01:00
Clément Renault
23ea3ad738 Remove the useless threshold when computing the word prefix pair proximity 2022-01-25 17:04:23 +01:00
Clément Renault
e3c34684c6 Fix a bug where we were skipping most of the prefix pairs 2022-01-25 17:04:23 +01:00
mpostma
b5f01b52c7 cli improvements 2022-01-25 14:08:30 +01:00
Tamo
fb51d511be fix(fuzzer): fix the fuzzer after #430 2022-01-25 12:08:47 +01:00
bors[bot]
9f2ff71581 Merge #434
434: bump milli to v0.22.0 r=curquiza a=irevoire

This is breaking because of this PR:
98a365aaae

Should we do a special branch to only release the [patch](https://github.com/meilisearch/milli/pull/433) for https://github.com/meilisearch/MeiliSearch/issues/2082 (which is non-breaking)?

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-24 17:31:20 +00:00
bors[bot]
fd177b63f8 Merge #423
423: Remove an unused file r=irevoire a=irevoire

This empty file is not included anywhere

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-19 14:18:05 +00:00
bors[bot]
8433516d85 Merge #430
430: Document batch support r=Kerollmops a=MarinPostma

This pr adds support for document batches in milli. It changes the API of the `IndexDocuments` builder by adding a `add_documents` method. The API of the updates is changed a little, with the `UpdateBuilder` being renamed to `IndexerConfig` and being passed to the update builders. This makes it easier to pass around structs that need to access the indexer config, rather that extracting the fields each time. This change impacts many function signatures and simplify them.

The change in not thorough, and may require another PR to propagate to the whole codebase. I restricted to the necessary for this PR.


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2022-01-19 13:32:59 +00:00
Marin Postma
0c84a40298 document batch support
reusable transform

rework update api

add indexer config

fix tests

review changes

Co-authored-by: Clément Renault <clement@meilisearch.com>

fmt
2022-01-19 12:40:20 +01:00
bors[bot]
74962b2fd9 Merge #435
435: Ensure we get no documents and no error when filtering on an empty db r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-18 10:46:26 +00:00
Tamo
01968d7ca7 ensure we get no documents and no error when filtering on an empty db 2022-01-18 11:40:30 +01:00
Tamo
367f403693 bump milli 2022-01-17 16:41:34 +01:00
bors[bot]
8f4499090b Merge #433
433: fix(filter): Fix two bugs. r=Kerollmops a=irevoire

- Stop lowercasing the field when looking in the field id map
- When a field id does not exist it means there is currently zero
  documents containing this field thus we return an empty RoaringBitmap
  instead of throwing an internal error

Will fix https://github.com/meilisearch/MeiliSearch/issues/2082 once meilisearch is released

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-17 14:06:53 +00:00
bors[bot]
4c516c00da Merge #426
426: Fix search highlight for non-unicode chars r=ManyTheFish a=Samyak2

# Pull Request

## What does this PR do?
Fixes https://github.com/meilisearch/MeiliSearch/issues/1480
<!-- Please link the issue you're trying to fix with this PR, if none then please create an issue first. -->

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

## Changes

The `matching_bytes` function takes a `&Token` now and:
- gets the number of bytes to highlight (unchanged).
- uses `Token.num_graphemes_from_bytes` to get the number of grapheme clusters to highlight.

In essence, the `matching_bytes` function now returns the number of matching grapheme clusters instead of bytes.

Added proper highlighting in the HTTP UI:
- requires dependency on `unicode-segmentation` to extract grapheme clusters from tokens
- `<mark>` tag is put around only the matched part
    - before this change, the entire word was highlighted even if only a part of it matched

## Questions

Since `matching_bytes` does not return number of bytes but grapheme clusters, should it be renamed to something like `matching_chars` or `matching_graphemes`? Will this break the API?

Thank you very much `@ManyTheFish` for helping 😄 

Co-authored-by: Samyak S Sarnayak <samyak201@gmail.com>
2022-01-17 13:39:00 +00:00
Tamo
d1ac40ea14 fix(filter): Fix two bugs.
- Stop lowercasing the field when looking in the field id map
- When a field id does not exist it means there is currently zero
  documents containing this field thus we returns an empty RoaringBitmap
  instead of throwing an internal error
2022-01-17 13:51:46 +01:00
bors[bot]
15bbde1022 Merge #432
432: Fuzzer r=Kerollmops a=irevoire

Provide a first way of fuzzing the indexing part of milli.
It depends on [cargo-fuzz](https://rust-fuzz.github.io/book/cargo-fuzz.html)

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-17 12:50:26 +00:00
Samyak S Sarnayak
c0313f3026 Use chars for highlight instead of graphemes
Tokenizer v0.2.7 uses chars instead of graphemes for matching bytes.
`unicode-segmentation` dependency isn't needed anymore.

Also, oxidised the highlight code :)

Co-authored-by: many <maxime@meilisearch.com>
2022-01-17 13:15:31 +05:30
Samyak S Sarnayak
2d7607734e Run cargo fmt on matching_words.rs 2022-01-17 13:04:33 +05:30
Samyak S Sarnayak
5ab505be33 Fix highlight by replacing num_graphemes_from_bytes
num_graphemes_from_bytes has been renamed in the tokenizer to
num_chars_from_bytes.

Highlight now works correctly!
2022-01-17 13:02:55 +05:30
Samyak S Sarnayak
c10f58b7bd Update tokenizer to v0.2.7 2022-01-17 13:02:00 +05:30
Samyak S Sarnayak
e752bd06f7 Fix matching_words tests to compile successfully
The tests still fail due to a bug in https://github.com/meilisearch/tokenizer/pull/59
2022-01-17 11:37:45 +05:30
Samyak S Sarnayak
30247d70cd Fix search highlight for non-unicode chars
The `matching_bytes` function takes a `&Token` now and:
- gets the number of bytes to highlight (unchanged).
- uses `Token.num_graphemes_from_bytes` to get the number of grapheme
  clusters to highlight.

In essence, the `matching_bytes` function returns the number of matching
grapheme clusters instead of bytes. Should this function be renamed
then?

Added proper highlighting in the HTTP UI:
- requires dependency on `unicode-segmentation` to extract grapheme
  clusters from tokens
- `<mark>` tag is put around only the matched part
    - before this change, the entire word was highlighted even if only a
      part of it matched
2022-01-17 11:37:44 +05:30
Tamo
0605c0ac68 apply review comments 2022-01-13 18:51:08 +01:00
Tamo
b22c80106f add some settings to the fuzzed milli and use the published version of arbitrary json 2022-01-13 15:35:24 +01:00
Tamo
c94952e25d update the readme + dependencies 2022-01-12 18:30:11 +01:00
Tamo
e1053989c0 add a fuzzer on milli 2022-01-12 17:57:54 +01:00
bors[bot]
559e019de1 Merge #424
424: Store the geopoint in three dimensions r=Kerollmops a=irevoire

Related to this issue: https://github.com/meilisearch/MeiliSearch/issues/1872

Fix the whole computation of distance for any “geo” operations (sort or filter). Now when you sort points they are returned to you in the right order.
And when you filter on a specific radius you only get points included in the radius.

This PR changes the way we store the geo points in the RTree.
Instead of considering the latitude and longitude as orthogonal coordinates, we convert them to real orthogonal coordinates projected on a sphere with a radius of 1.
This is the conversion formulae.
![image](https://user-images.githubusercontent.com/7032172/145990456-eefe840a-384f-4486-848b-81d0036814ec.png)
Which, in rust, translate to this function:
```rust
pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
    let [lat, lng] = coord.map(|f| f.to_radians());
    let x = lat.cos() * lng.cos();
    let y = lat.cos() * lng.sin();
    let z = lat.sin();

    [x, y, z]
}
```

Storing the points on a sphere is easier / faster to compute than storing the point on an approximation of the real earth shape.
But when we need to compute the distance between two points we still need to use the haversine distance which works with latitude and longitude.
So, to do the fewest search-time computation possible I'm now associating every point with its `DocId` and its lat/lng.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-10 15:23:43 +00:00
bors[bot]
660eac50b2 Merge #427
427: Handle escaped characters in filters r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-10 15:01:23 +00:00
Tamo
92804f6f45 apply clippy suggestions 2022-01-10 15:59:04 +01:00
Tamo
0fcde35a20 Update filter-parser/src/value.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-01-10 15:53:44 +01:00
Tamo
3c7ea1d298 Apply code suggestions
Co-authored-by: Clément Renault <clement@meilisearch.com>
2022-01-10 15:19:21 +01:00
bors[bot]
74594be234 Merge #429
429: Benchmark CIs: not use a default label to call the GH runner r=irevoire a=curquiza

Since we now have multiple self-hosted github runners, we need to differentiate them calling them in the CI. The `self-hosted` label is the default one, so we need to use the unique and appropriate one for the benchmark machine

<img width="925" alt="Capture d’écran 2022-01-04 à 15 42 18" src="https://user-images.githubusercontent.com/20380692/148079840-49cd7878-5912-46ff-8ab8-bf646777f782.png">


Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2022-01-04 15:41:08 +00:00
Clémentine Urquizar
3d99686f7a Change self-hosted label by benchmarks 2022-01-04 16:01:01 +01:00
bors[bot]
c039562723 Merge #428
428: Reintroduce the gitignore for the fuzzer r=Kerollmops a=irevoire

Reintroduce the gitignore in the fuzz directory

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-04 12:09:06 +00:00
Tamo
9bdcd42b9b reintroduce the gitignore for the fuzzer 2022-01-04 13:07:32 +01:00
bors[bot]
4cae691b86 Merge #425
425: Push the result of the benchmarks to influxdb r=irevoire a=irevoire

Now execute a benchmark for every PR merged into main and then upload the results to influxdb.

Co-authored-by: Tamo <tamo@meilisearch.com>
2022-01-04 11:04:16 +00:00
Tamo
6a1216bd51 Integrate telegraf into our CI 2022-01-04 11:59:05 +01:00
Tamo
02a21fd309 Handle the escapes of quote in the filters 2022-01-04 04:04:10 +01:00
Tamo
98a365aaae store the geopoint in three dimensions 2021-12-14 12:21:24 +01:00
Tamo
d671d6f0f1 remove an unused file 2021-12-13 19:27:34 +01:00
bors[bot]
11a056d116 Merge #422
422: Prefer returning `None` instead of using an `FilterCondition::Empty` state r=Kerollmops a=Kerollmops

This PR is related to the issue comment https://github.com/meilisearch/MeiliSearch/issues/1338#issuecomment-989322889 which exhibits the fact that when a filter is known to be empty no results are returned which is wrong, the filter should not apply as no restriction is done on the documents set.

The filter system on the milli side has introduced an Empty state which was used in this kind of situation but I found out that it is not needed and that when we parse a filter and that it is empty we can simply return `None` as the `Filter::from_array` constructor does. So I removed it and added tests!

On the MeiliSearch side, we just need to match on a `None` and completely ignore the filter in such a case.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-12-09 15:03:04 +00:00
Clément Renault
94011bb9a8 Fix the benchmarks to work with optional filters 2021-12-09 12:14:16 +01:00
Clément Renault
1c6c89f345 Fix the binaries that use the new optional filters 2021-12-09 11:57:53 +01:00
Clément Renault
25faef67d0 Remove the database setup in the filter_depth test 2021-12-09 11:57:53 +01:00
Clément Renault
65519bc04b Test that empty filters return a None 2021-12-09 11:57:53 +01:00
Clément Renault
ef59762d8e Prefer returning None instead of the Empty Filter state 2021-12-09 11:57:52 +01:00
bors[bot]
80dcfd5c3e Merge #421
421: Introduce the depth method on FilterCondition r=Kerollmops a=Kerollmops

This PR introduces the depth method on the FilterCondition type to be able to react to it. It is meant to be used to reject filters that go too deep and can make the engine stack overflow.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-12-09 10:28:52 +00:00
Clément Renault
ee856a7a46 Limit the max filter depth to 2000 2021-12-07 17:36:45 +01:00
Clément Renault
32bd9f091f Detect the filters that are too deep and return an error 2021-12-07 17:20:11 +01:00
Clément Renault
90f49eab6d Check the filter max depth limit and reject the invalid ones 2021-12-07 16:32:48 +01:00
Clément Renault
49c2db9485 Change the depth function to return the token depth 2021-12-07 16:06:10 +01:00
Clément Renault
57502fcf6a Introduce the depth method on FilterCondition 2021-12-06 17:35:20 +01:00
bors[bot]
c83b77304a Merge #420
420: Update milli 0.21.0 r=ManyTheFish a=ManyTheFish

Update all modules to 0.21.0

Co-authored-by: many <maxime@meilisearch.com>
2021-11-30 17:22:12 +00:00
many
1b3923b5ce Update all packages to 0.21.0 2021-11-29 12:17:59 +01:00
bors[bot]
26629a3f9e Merge #419
419:  fix word pair proximity indexing r=ManyTheFish a=ManyTheFish

# Pull Request

Sort positions before iterating over them during word pair proximity extraction.

fixes [Meilisearch#1913](https://github.com/meilisearch/MeiliSearch/issues/1913)

Co-authored-by: many <maxime@meilisearch.com>
2021-11-23 10:21:05 +00:00
many
8970246bc4 Sort positions before iterating over them during word pair proximity extraction 2021-11-22 18:16:54 +01:00
bors[bot]
cc32519a2d Merge #418
418: change visibility of DocumentDeletionResult r=Kerollmops a=MarinPostma

Change the visibility of `DocumentDeletionResult`, so its fields can be accesses from outside milli.


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-11-22 14:45:55 +00:00
Marin Postma
6e977dd8e8 change visibility of DocumentDeletionResult 2021-11-22 15:44:44 +01:00
bors[bot]
68f1db123a Merge #416
416: Update tokenizer v0.2.6 r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-11-18 16:01:11 +00:00
many
35f9499638 Export tokenizer from milli 2021-11-18 16:57:12 +01:00
many
64ef5869d7 Update tokenizer v0.2.6 2021-11-18 16:56:05 +01:00
bors[bot]
2c14efa8a2 Merge #409
409: remove update_id in UpdateBuilder r=ManyTheFish a=MarinPostma

Removing the `update_id` from `UpdateBuidler`, since it serves no purpose. I had introduced it when working in HA some time ago, but I think there are better ways to do it now, so it can be removed an stop being in our way.

Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-11-16 14:59:09 +00:00
Marin Postma
6eb47ab792 remove update_id in UpdateBuilder 2021-11-16 13:07:04 +01:00
bors[bot]
21b78f3926 Merge #414
414: improve update result types r=ManyTheFish a=MarinPostma

Inprove the returned meta when performing document additions and deletions:

- On document addition return the number of indexed documents and the total number of documents in the index after the indexion
- On document deletion return the number of deleted documents, and the remaining number of documents in the index after the deletion is performed


I also fixed a potential bug when performing a document deletion and the primary key couldn't be found: before we assumed that the db was empty and returned that no documents were deleted, but since we checked before that the db wasn't empty, entering this branch is actually a bug, and now returns a 'MissingPrimaryKey' error.


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-11-15 09:06:10 +00:00
Marin Postma
09b4281cff improve document addition returned metaimprove document addition
returned metaimprove document addition returned metaimprove document
addition returned metaimprove document addition returned metaimprove
document addition returned metaimprove document addition returned
metaimprove document addition returned meta
2021-11-10 14:08:36 +01:00
Marin Postma
721fc294be improve document deletion returned meta
returns both the remaining number of documents and the number of deleted
documents.
2021-11-10 14:08:18 +01:00
bors[bot]
8dff08d772 Merge #400
400: Rewrite the filter parser and add a lot of tests r=irevoire a=irevoire

This PR is a complete rewrite of #358, which was reverted in #403.
You can already try this PR in Meilisearch here https://github.com/meilisearch/MeiliSearch/pull/1880.

Since writing a parser is quite complicated, I moved all the logic to another workspace called `filter_parser`.
In this workspace, we don't know anything about milli, the filterable fields / field ID or anything.
As you can see in its `cargo.toml`, it has only three dependencies entirely focused on the parsing part:
```
nom = "7.0.0"
nom_locate = "4.0.0"
```

But introducing this new workspace made some changes necessary on the “AST”. Now the parser only returns `Tokens` (a simple `&str` with a bit of context). Everything is interpreted when we execute the filter later in milli.
This crate provides a new error type for all filter related errors.

---------
## Errors

Currently, we have multiple kinds of errors. Sometimes we are generating errors looking like that: (for `name = truc`)
```
Attribute `name` is not filterable. Available filterable attributes are: ``.
```
While sometimes pest was generating errors looking like that:
```
Invalid syntax for the filter parameter: ` --> 1:7
  |
1 | name =
  |       ^---
  |
  = expected word`.
```

Which most people were seeing like that: (for `name =`)
```
Invalid syntax for the filter parameter: ` --> 1:7\n  |\n1 | name =\n  |       ^---\n  |\n  = expected word`.
```

-----------

With this PR, the error format is unified between all errors.
All errors follow this more straightforward format:
```
The error message.
[from char]:[to char] filter
```

This should be way easier to read when embedded in the JSON for a human. And it should also allow us to parse the errors easily and provide highlighting or something with a frontend playground.

Here is an example of the two previous errors with the new format:
For `name = truc`:
```
Attribute `name` is not filterable. Available filterable attributes are: ``.
1:4 name = truc
```
Or in one line:
```
Attribute `name` is not filterable. Available filterable attributes are: ``.\n1:4 name = truc
```

And for `name =`:
```
Was expecting a value but instead got nothing.
7:7 name =
```
Or in one line:
```
Was expecting a value but instead got nothing.\n7:7 name =
```

Also, since we now have control over the parser, we can generate more explicit error messages so a lot of new errors have been created. I tried to be as helpful as possible for the user; here is a little overview of the new error message you can get when misusing a filter:
```
Expression `"truc` is missing the following closing delimiter: `"`.
8:13 name = "truc
```
The `_geoRadius` filter is an operation and can't be used as a value.
8:30 name = _geoRadius(12, 13, 14)
```
etc

## Tests
A lot of tests have been written in the `filter_parser` crate. I think there is a unit test for every part of the syntax. 
But since we can never be sure we covered all the cases, I also fuzzed the new parser A LOT (for ±8 hours on 20 threads). And the code to fuzz the parser is included in the workspace, so if one day we need to change something to the syntax, we'll be able to re-use it by simply running:
```
cargo fuzz run --release parse
```

## Milli
I renamed the type and module `filter_condition.rs` / `FilterCondition` to `filter.rs` / `Filter`.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-11-09 16:09:34 +00:00
Irevoire
7c3017734a re-ignore the ! symbol when generating a good error message 2021-11-09 17:08:04 +01:00
Tamo
bff48681d2 Re-order the operator
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-11-09 17:05:36 +01:00
Irevoire
519d6b2bf3 remove the ! syntax for the not 2021-11-09 16:47:54 +01:00
Irevoire
73df873f44 fix typos 2021-11-09 16:41:10 +01:00
Irevoire
99197387af fix the test with the new escaped format 2021-11-09 16:41:10 +01:00
Tamo
f28600031d Rename the filter_parser crate into filter-parser
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-11-09 16:41:10 +01:00
Irevoire
0ea0146e04 implement deref &str on the tokens 2021-11-09 11:34:10 +01:00
Irevoire
a211a9cdcd update the error format so it can be easily parsed by someone else 2021-11-09 11:19:30 +01:00
Irevoire
9b24f83456 in case of error return a range of chars position instead of one line and column 2021-11-09 10:27:29 +01:00
Tamo
2c6d08c519 Simplify the tokens to only wrap one span and no inner value
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 10:12:20 +01:00
Irevoire
18eb4b9c51 fix spaces in the bnf 2021-11-09 01:04:50 +01:00
Tamo
cf98bf37d0 Simplify some closure
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 01:03:02 +01:00
Tamo
bc9daf9041 update the bnf
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 01:00:42 +01:00
Tamo
9c36e497d9 Rename the key_component into a value_component
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 00:59:44 +01:00
Irevoire
6515838d35 improve the readability of the _geoPoint thingy in the value 2021-11-09 00:57:46 +01:00
Tamo
ea52aff6dc Rename the ExtendNomError trait to NomErrorExt
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 00:52:17 +01:00
Irevoire
ef0d5a8240 flatten a match 2021-11-09 00:49:13 +01:00
Tamo
15bd14297e Remove useless closure
Co-authored-by: marin <postma.marin@protonmail.com>
2021-11-09 00:45:46 +01:00
Irevoire
21d115dcbb remove greedy-error 2021-11-08 17:53:41 +01:00
Irevoire
959ca66125 improve the error diagnostic when parsing values 2021-11-08 15:58:21 +01:00
Tamo
7483c7513a fix the filterable fields 2021-11-07 01:52:19 +01:00
Tamo
e5af3ac65c rename the filter_condition.rs to filter.rs 2021-11-06 16:37:55 +01:00
Tamo
6831c23449 merge with main 2021-11-06 16:34:30 +01:00
Tamo
5c01e9bf7c fix the benchmarks 2021-11-06 16:03:49 +01:00
Tamo
075d9c97c0 re-implement the equality between tokens to only compare the inner value 2021-11-06 16:02:27 +01:00
Tamo
b249989bef fix most of the tests 2021-11-06 01:32:12 +01:00
Tamo
070ec9bd97 small update on the README 2021-11-05 17:45:20 +01:00
Tamo
27a6a26b4b makes the parse function part of the filter_parser 2021-11-05 10:46:54 +01:00
Tamo
76d961cc77 implements the last errors 2021-11-04 17:42:06 +01:00
Tamo
8234f9fdf3 recreate most filter error except for the geosearch 2021-11-04 17:24:55 +01:00
Tamo
7328ffb034 stop panicking in case of internal error 2021-11-04 16:20:53 +01:00
Tamo
3e5550c910 clean the errors 2021-11-04 16:12:17 +01:00
Tamo
72a9071203 fix typo 2021-11-04 16:03:52 +01:00
Tamo
07a5ffb04c update http-ui 2021-11-04 15:52:22 +01:00
Tamo
a58bc5bebb update milli with the new parser_filter 2021-11-04 15:02:36 +01:00
Tamo
b1a0110a47 update the main 2021-11-04 14:48:39 +01:00
Tamo
d0fe9dea61 update the readme 2021-11-04 14:43:36 +01:00
Tamo
b165c77fa7 add a smol README 2021-11-04 14:39:02 +01:00
Tamo
54aec7ac5f update the filter parser and some code for the fuzzer 2021-11-04 14:22:35 +01:00
bors[bot]
a2fc74f010 Merge #412
412: Change Attribute and Ranking rules errors r=ManyTheFish a=ManyTheFish

# Pull Request

Fixes Meilisearch [PR comment](https://github.com/meilisearch/MeiliSearch/pull/1873#issuecomment-959786406)


Co-authored-by: many <maxime@meilisearch.com>
2021-11-04 13:08:50 +00:00
many
743ed9f57f Bump milli version 2021-11-04 14:04:21 +01:00
many
7b3bac46a0 Change Attribute and Ranking rules errors 2021-11-04 13:19:32 +01:00
bors[bot]
3be37b00e7 Merge #410
410: Update version for the next release (v0.20.1) r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-11-03 13:32:03 +00:00
many
702589104d Update version for the next release (v0.20.1) 2021-11-03 14:20:01 +01:00
bors[bot]
cb9e7e510b Merge #408
408: Change last error messages r=ManyTheFish a=ManyTheFish

Change forgotten error messages

Co-authored-by: many <maxime@meilisearch.com>
2021-11-03 10:51:33 +00:00
many
0c0038488c Change last error messages 2021-11-03 11:24:06 +01:00
Tamo
5d3af5f273 remove all genericity in favor of my custom error type 2021-11-02 20:27:07 +01:00
Tamo
76a2adb7c3 re-enable the tests in the parser and start the creation of an error type 2021-11-02 17:35:17 +01:00
bors[bot]
5a6d22d4ec Merge #407
407: Update version for the next release (v0.20.0) r=curquiza a=curquiza

Breaking because of #405 and #406 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-28 13:43:48 +00:00
bors[bot]
08ae47e475 Merge #405
405: Change some error messages r=ManyTheFish a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-10-28 13:35:55 +00:00
Clémentine Urquizar
056ff13c4d Update version for the next release (v0.20.0) 2021-10-28 14:52:57 +02:00
many
9f1e0d2a49 Refine asc/desc error messages 2021-10-28 14:47:17 +02:00
many
ed6db19681 Fix PR comments 2021-10-28 11:18:32 +02:00
bors[bot]
9875f2646a Merge #406
406: return document count from builder r=MarinPostma a=MarinPostma

`DocumentBatchBuilder::finish` now returns the number of documents in the batch. This is more compact that calling `len()` just before calling finish.


Co-authored-by: marin postma <postma.marin@protonmail.com>
2021-10-28 08:42:38 +00:00
marin postma
183d3dada7 return document count from builder 2021-10-28 10:33:04 +02:00
many
2be755ce75 Lower error check, already check in meilisearch 2021-10-27 19:50:41 +02:00
many
3599df77f0 Change some error messages 2021-10-27 19:33:01 +02:00
bors[bot]
d7943fe225 Merge #402
402: Optimize document transform r=MarinPostma a=MarinPostma

This pr optimizes the transform of documents additions in the obkv format. Instead on accepting any serializable objects, we instead treat json and CSV specifically:
- For json, we build a serde `Visitor`, that transform the json straight into obkv without intermediate representation.
- For csv, we directly write the lines in the obkv, applying other optimization as well.

Co-authored-by: marin postma <postma.marin@protonmail.com>
2021-10-26 09:55:28 +00:00
bors[bot]
6758146213 Merge #404
404: remove search crate r=Kerollmops a=MarinPostma

The functionalities of the search crate have been moved to the cli crate. The outstanding files are removed by this pr.


Co-authored-by: marin postma <postma.marin@protonmail.com>
2021-10-26 09:40:34 +00:00
marin postma
9b8ab40d80 remove search folder 2021-10-26 11:35:49 +02:00
marin postma
baddd80069 implement review suggestions 2021-10-25 18:29:12 +02:00
marin postma
f9445c1d90 return float parsing error context in csv 2021-10-25 17:27:10 +02:00
bors[bot]
15c29cdd9b Merge #401
401: Update version for the next release (v0.19.0) r=curquiza a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-25 12:49:53 +00:00
bors[bot]
13d8272173 Merge #403
403: Revert "Replacing pest with nom" r=curquiza a=curquiza

Reverts meilisearch/milli#358

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-25 12:16:49 +00:00
Clémentine Urquizar
208903ddde Revert "Replacing pest with nom " 2021-10-25 11:58:00 +02:00
Clémentine Urquizar
679fe18b17 Update version for the next release (v0.19.0) 2021-10-25 11:52:17 +02:00
marin postma
3fcccc31b5 add document builder example 2021-10-25 10:26:43 +02:00
marin postma
430e9b13d3 add csv builder tests 2021-10-25 10:26:43 +02:00
marin postma
53c79e85f2 document errors 2021-10-25 10:26:43 +02:00
marin postma
2e62925a6e fix tests 2021-10-25 10:26:42 +02:00
marin postma
0f86d6b28f implement csv serialization 2021-10-25 10:26:42 +02:00
marin postma
8d70b01714 optimize document deserialization 2021-10-25 10:26:42 +02:00
Tamo
1327807caa add some error messages 2021-10-22 19:00:33 +02:00
Tamo
c8d03046bf add a check on the fid in the geosearch 2021-10-22 18:08:18 +02:00
Tamo
3942b3732f re-implement the geosearch 2021-10-22 18:03:39 +02:00
Tamo
7cd9109e2f lowercase value extracted from Token 2021-10-22 17:50:15 +02:00
Tamo
4e113bbf1b handle the case of empty input 2021-10-22 17:49:08 +02:00
Tamo
e25ca9776f start updating the exposed function to makes other modules happy 2021-10-22 17:23:22 +02:00
Tamo
6c9165b6a8 provide a helper to parse the token but to not handle the errors 2021-10-22 16:52:13 +02:00
Tamo
efb2f8b325 convert the errors 2021-10-22 16:38:35 +02:00
Tamo
d6ba84ea99 re introduce the special error type to be able to add context to the errors 2021-10-22 15:09:56 +02:00
Tamo
c27870e765 integrate a first version without any error handling 2021-10-22 14:33:18 +02:00
Tamo
01dedde1c9 update some names and move some parser out of the lib.rs 2021-10-22 01:59:38 +02:00
Tamo
7e5c5c4d27 start a new rewrite of the filter parser 2021-10-22 01:15:42 +02:00
Tamo
c634d43ac5 add a simple test on the filters with an integer 2021-10-21 17:10:27 +02:00
Tamo
6c15f50899 rewrite the parser logic 2021-10-21 16:45:42 +02:00
Tamo
e1d81342cf add test on the or and and operator 2021-10-21 13:01:25 +02:00
Tamo
423baac08b fix the tests 2021-10-21 12:45:40 +02:00
Tamo
36281a653f write all the simple tests 2021-10-21 12:40:11 +02:00
Clémentine Urquizar
f8fe9316c0 Update version for the next release (v0.18.1) 2021-10-21 11:56:14 +02:00
Tamo
661bc21af5 Fix the filter parser
And add a bunch of tests on the filter::from_array
2021-10-21 11:45:03 +02:00
bors[bot]
b6af84eb77 Merge #394
394:  Added search_geo benchmark in cron job r=irevoire a=fumblehool

fixes: #392 
`search_geo` cron will run every friday at 18:30

Co-authored-by: Damanpreet Singh <daman.4880@gmail.com>
2021-10-18 14:33:32 +00:00
bors[bot]
7906461c14 Merge #396
396: Fix indexing benchmark GH actions upload filename r=irevoire a=fumblehool

fixes: #393 

Co-authored-by: Damanpreet Singh <daman.4880@gmail.com>
2021-10-18 13:34:10 +00:00
Damanpreet Singh
2e4604b0b9 fixed filename for search_* crons 2021-10-18 18:48:38 +05:30
Damanpreet Singh
4c34164d2e fixed filename for search_geo cron 2021-10-18 18:43:36 +05:30
bors[bot]
9df4f3aaad Merge #397
397: Fix typo in repo r=curquiza a=saintmalik

Fix the single typo found in this repo

Co-authored-by: SaintMalik <37118134+saintmalik@users.noreply.github.com>
2021-10-18 11:59:48 +00:00
bors[bot]
513d3178c6 Merge #398
398: Update version for the next release (v0.18.2) r=irevoire a=curquiza

Breaking because of https://github.com/meilisearch/milli/pull/358

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-18 11:47:26 +00:00
Clémentine Urquizar
2209acbfe2 Update version for the next release (v0.18.2) 2021-10-18 13:45:48 +02:00
SaintMalik
70121e3c6b fix typo in repo 2021-10-18 04:00:19 +01:00
bors[bot]
59cc59e93e Merge #358
358: Replacing pest with nom  r=Kerollmops a=CNLHC



Co-authored-by: 刘瀚骋 <cn_lhc@qq.com>
2021-10-16 20:44:38 +00:00
Damanpreet Singh
493d9b98f5 fix indexing benchmark GH actions upload filename 2021-10-16 21:52:36 +05:30
Damanpreet Singh
efaef4f748 Added search_geo benchmark in cron job 2021-10-16 21:41:45 +05:30
刘瀚骋
7666e4f34a follow the suggestions 2021-10-14 21:37:59 +08:00
刘瀚骋
2ea2f7570c use nightly cargo to format the code 2021-10-14 16:46:13 +08:00
刘瀚骋
e750465e15 check logic for geolocation. 2021-10-14 16:12:00 +08:00
bors[bot]
aa5e099718 Merge #390
390: Add helper methods on the settings r=Kerollmops a=irevoire

This would be a good addition to look at the content of a setting without consuming it.
It’s useful for analytics.

Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-10-13 20:36:30 +00:00
bors[bot]
c7db4176f3 Merge #384
384: Replace memmap with memmap2 r=Kerollmops a=palfrey

[memmap is unmaintained](https://rustsec.org/advisories/RUSTSEC-2020-0077.html) and needs replacing. memmap2 is a drop-in replacement fork that's well maintained. Note that the version numbers got reset on fork, hence the lower values.

Co-authored-by: Tom Parker-Shemilt <palfrey@tevp.net>
2021-10-13 13:47:23 +00:00
Irevoire
a3e7c468cd add helper methods on the settings 2021-10-13 13:05:07 +02:00
刘瀚骋
cd359cd96e WIP: extract the error trait bound to new trait. 2021-10-13 18:04:15 +08:00
刘瀚骋
5de5dd80a3 WIP: remove '_nom' suffix/redundant error enum/... 2021-10-13 11:06:15 +08:00
刘瀚骋
2c65781d91 format 2021-10-12 22:20:22 +08:00
bors[bot]
6e3b869e6a Merge #388
388: fix primary key inference r=MarinPostma a=MarinPostma

The primary key is was infered from a hashtable index of the field. For this reason the order in which the fields were interated upon was not deterministic, and the primary key was chosed ffrom the first field containing "id".

This fix sorts the the index by field_id when infering the primary key.


Co-authored-by: mpostma <postma.marin@protonmail.com>
2021-10-12 09:25:16 +00:00
mpostma
86ead92ed5 infer primary key on sorted fields 2021-10-12 11:15:11 +02:00
mpostma
9a266a531b test correct primary key inference 2021-10-12 11:08:53 +02:00
bors[bot]
3f7f24b90e Merge #368
368: Remove limit of 1000 position per attribute r=irevoire a=ManyTheFish

Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.

- [x] check database size difference

below is the database size difference for each dataset:
![Capture d’écran 2021-09-27 à 18 01 44](https://user-images.githubusercontent.com/6482087/134944199-bd25fed0-6c34-475c-9afc-197871e06553.png)

- [ ] check search time on big dataset


Related to [product#202](https://github.com/meilisearch/product/issues/202)

Co-authored-by: many <maxime@meilisearch.com>
2021-10-12 08:30:33 +00:00
many
c5a6075484 Make max_position_per_attributes changable 2021-10-12 10:10:50 +02:00
many
360c5ff3df Remove limit of 1000 position per attribute
Instead of using an arbitrary limit we encode the absolute position in a u32
using one strong u16 for the field id and a weak u16 for the relative position in the attribute.
2021-10-12 10:10:50 +02:00
刘瀚骋
d323e35001 add a test case 2021-10-12 13:30:40 +08:00
刘瀚骋
70f576d5d3 error handling 2021-10-12 13:30:40 +08:00
刘瀚骋
28f9be8d7c support syntax 2021-10-12 13:30:40 +08:00
刘瀚骋
469d92c569 tweak error handling 2021-10-12 13:30:40 +08:00
刘瀚骋
7a90a101ee reorganize parser logic 2021-10-12 13:30:40 +08:00
刘瀚骋
f7796edc7e remove everything about pest 2021-10-12 13:30:40 +08:00
刘瀚骋
ac1df9d9d7 fix typo and remove pest 2021-10-12 13:30:40 +08:00
刘瀚骋
50ad750ec1 enhance error handling 2021-10-12 13:30:40 +08:00
刘瀚骋
8748df2ca4 draft without error handling 2021-10-12 13:30:40 +08:00
bors[bot]
8f6b6c9042 Merge #385
385: Fix the wiki indexing benchmark r=ManyTheFish a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2021-10-11 15:12:24 +00:00
bors[bot]
07fb6d64e5 Merge #386
386: fix obkv document r=curquiza a=MarinPostma

When serializing a document, the serializer resolved the field_id of the current field and immediately added it to the obkv document under construction. The issue with that is that obkv expects the fields to be inserted in order, and when a document with out of order fields was added, obkv failed to insert the field.

The current fix first resolves each field_id, and adds all the fields to a temporary `BTreeMap`, until `end` is called on the map serializer, where all the fields are added to the obkv at once, and in order.


Co-authored-by: mpostma <postma.marin@protonmail.com>
2021-10-11 13:45:04 +00:00
bors[bot]
e45c846af5 Merge #387
387: Update version for the next release (v0.17.2) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-11 13:21:47 +00:00
Clémentine Urquizar
dd56e82dba Update version for the next release (v0.17.2) 2021-10-11 15:20:35 +02:00
mpostma
99889a0ed0 add obkv document serialization test 2021-10-11 15:13:17 +02:00
mpostma
799f3d43c8 fix serialization to obkv format 2021-10-11 15:04:47 +02:00
Tamo
ed7fd855af fix the wiki indexing benchmark 2021-10-11 14:26:36 +02:00
Tom Parker-Shemilt
2dfe24f067 memmap -> memmap2 2021-10-10 22:47:12 +01:00
bors[bot]
a2743baaa3 Merge #383
383: Add check on latitude and longitude r=irevoire a=irevoire

Latitudes are not supposed to go beyond 90 degrees or below -90.
The same goes for longitudes with 180 or -180.

This was badly implemented in the filters, and was not implemented for the `AscDesc` rules.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-10-08 10:15:25 +00:00
Irevoire
b65aa7b5ac Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-10-07 17:51:52 +02:00
Tamo
11dfe38761 Update the check on the latitude and longitude
Latitude are not supposed to go beyound 90 degrees or below -90.
The same goes for longitude with 180 or -180.

This was badly implemented in the filters, and was not implemented for the AscDesc rules.
2021-10-07 16:10:43 +02:00
bors[bot]
dde1da1c0e Merge #382
382: Refactor attribute criterion r=Kerollmops a=ManyTheFish

### Re-implement set based algorithm for attribute criterion
#### Levels
Instead of doing level iteration and digging in the interesting level, we only iterate over the lowest level.

#### crossword iteration VS minimal position iteration
Instead of crossing word position in order to iterate strictly over the position that gives the best rank in good order; we iterate word by word starting with the word that increases the rank the little as possible.
This new method is a bit less precise but way simpler.

### Simplify word-level-position database
We don't use levels anymore in the attribute criterion, and so we removed the level complexity of the database making a word-position-docids database.

### Benchmarks on search on big datasets

#### songs main VS refactor-attribute-criterion
```diff
  group                                                   search_songsmain_31c18f09               search_songsrefactor-attribute-criterion_1bd15d84
  -----                                                   -------------------------               -------------------------------------------------
- smol-songs.csv: basic filter: <=/Notstandskomitee       1.00     84.8±0.58µs        ? ?/sec     1.09     92.2±8.98µs        ? ?/sec
+ smol-songs.csv: basic filter: TO/Notstandskomitee       1.18     98.0±6.30µs        ? ?/sec     1.00     83.2±0.97µs        ? ?/sec
+ smol-songs.csv: basic with quote/"david" "bowie"        114.68    76.0±0.20ms        ? ?/sec    1.00    662.5±5.03µs        ? ?/sec
- smol-songs.csv: basic with quote/"john"                 1.00    197.4±1.06µs        ? ?/sec     1.05    208.1±1.53µs        ? ?/sec
+ smol-songs.csv: basic with quote/"michael" "jackson"    2.75      2.0±0.01ms        ? ?/sec     1.00    738.9±3.91µs        ? ?/sec
+ smol-songs.csv: basic without quote/david bowie         297.42  1499.3±0.86ms        ? ?/sec    1.00      5.0±0.02ms        ? ?/sec
+ smol-songs.csv: basic without quote/michael jackson     2.55      8.9±0.02ms        ? ?/sec     1.00      3.5±0.01ms        ? ?/sec
+ smol-songs.csv: big filter/john                         1.08    473.6±2.25µs        ? ?/sec     1.00    438.1±2.59µs        ? ?/sec
- smol-songs.csv: prefix search/a                         1.00    446.9±1.81µs        ? ?/sec     1.79    800.5±4.45µs        ? ?/sec
- smol-songs.csv: prefix search/b                         1.00    398.5±2.74µs        ? ?/sec     1.81    723.1±5.46µs        ? ?/sec
- smol-songs.csv: prefix search/i                         1.00    486.3±1.99µs        ? ?/sec     1.69    823.6±9.42µs        ? ?/sec
- smol-songs.csv: prefix search/s                         1.00    229.6±3.29µs        ? ?/sec     2.59    594.4±2.22µs        ? ?/sec
- smol-songs.csv: prefix search/x                         1.00    150.2±0.76µs        ? ?/sec     1.11    166.0±0.87µs        ? ?/sec
```

On songs, the new algorithm gives a big improvement on slow queries, and is slower on one char prefix search (fast queries <1ms).

#### wiki main VS refactor-attribute-criterion
```diff
  group                                                           search_wikimain_31c18f09               search_wikirefactor-attribute-criterion_1bd15d84
  -----                                                           ------------------------               ------------------------------------------------
- smol-wiki-articles.csv: basic with quote/"rock" "and" "roll"    1.00      3.2±0.01ms        ? ?/sec    1.15      3.7±0.01ms        ? ?/sec
- smol-wiki-articles.csv: basic without quote/film                1.00    351.5±2.47µs        ? ?/sec    1.13    396.8±1.63µs        ? ?/sec
+ smol-wiki-articles.csv: basic without quote/rock and roll       1.10      9.4±0.02ms        ? ?/sec    1.00      8.6±0.04ms        ? ?/sec
- smol-wiki-articles.csv: basic without quote/spain               1.00    446.0±3.23µs        ? ?/sec    1.11    496.6±7.75µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/c                         1.00    115.6±0.61µs        ? ?/sec    2.22    256.7±1.24µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/g                         1.00    189.7±2.03µs        ? ?/sec    1.57    297.0±1.35µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/j                         1.00    209.2±1.11µs        ? ?/sec    1.40    293.0±2.09µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/q                         1.00     79.0±0.44µs        ? ?/sec    1.10     87.2±0.69µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/t                         1.00    270.1±1.15µs        ? ?/sec    1.55    419.9±5.16µs        ? ?/sec
- smol-wiki-articles.csv: prefix search/x                         1.00    244.9±1.33µs        ? ?/sec    1.07    260.9±1.95µs        ? ?/sec
- smol-wiki-articles.csv: words/Abraham machin                    1.00      8.1±0.03ms        ? ?/sec    1.17      9.4±0.02ms        ? ?/sec
- smol-wiki-articles.csv: words/Idaho Bellevue pizza              1.00     19.3±0.07ms        ? ?/sec    1.07     20.6±0.05ms        ? ?/sec
```
On wiki we have some regressions `+17%` and `+15%` on request `>1ms`.

Co-authored-by: many <maxime@meilisearch.com>
2021-10-06 09:19:33 +00:00
many
085bc6440c Apply PR comments 2021-10-06 11:12:26 +02:00
many
1bd15d849b Reduce candidates threshold 2021-10-05 18:52:14 +02:00
many
ea4bd29d14 Apply PR comments 2021-10-05 17:35:07 +02:00
many
5ed75de0db Update infos crate 2021-10-05 13:56:12 +02:00
many
3296bb243c Simplify word level position DB into a word position DB 2021-10-05 12:15:02 +02:00
many
75d341d928 Re-implement set based algorithm for attribute criterion 2021-10-05 12:14:50 +02:00
bors[bot]
31c18f0953 Merge #381
381: Update version for the next release (v0.17.1) r=irevoire a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-10-03 02:12:43 +00:00
Clémentine Urquizar
05d8a33a28 Update version for the next release (v0.17.1) 2021-10-02 16:21:31 +02:00
bors[bot]
c9092c72bf Merge #380
380: Reserved keyword error message r=Kerollmops a=irevoire

And I missed _another_ reserved keyword error message in the filter :(

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-10-01 07:13:31 +00:00
Tamo
d9eba9d145 improve and test the sort error message 2021-09-30 14:38:27 +02:00
Tamo
0ee67bb7d1 improve the reserved keyword error message for the filters 2021-09-30 14:38:27 +02:00
bors[bot]
22551d0941 Merge #379
379: Revert "Change chunk size to 4MiB to fit more the end user usage" r=curquiza a=ManyTheFish

Reverts meilisearch/milli#370

Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-09-29 13:20:53 +00:00
Many
26b5dad042 Revert "Change chunk size to 4MiB to fit more the end user usage" 2021-09-29 15:08:39 +02:00
bors[bot]
6a057a3bd0 Merge #378
378: Hotfix meilisearch#1707 r=Kerollmops a=ManyTheFish

This PR contains an ugly quick fix of [meilisearch#1707](https://github.com/meilisearch/MeiliSearch/issues/1707).

- remove comparison reverse on rank. Enhancing relevancy and performances
- iterate over level 0 only. Enhancing performances.

A better fix is in development.

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-09-29 12:57:31 +00:00
Many
2e49230ca2 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-29 14:49:45 +02:00
Many
7ad0214089 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-29 14:49:41 +02:00
many
1df5b8712b Hotfix meilisearch#1707 2021-09-29 14:41:56 +02:00
bors[bot]
bfedbc1b6d Merge #374
374: Enhance CSV document parsing r=Kerollmops a=ManyTheFish

Benchmarks on `search_songs` were crashing because of the CSV parsing.

Co-authored-by: many <maxime@meilisearch.com>
2021-09-29 08:55:54 +00:00
bors[bot]
68c758a533 Merge #376
376: Stop casting integer docids to string r=Kerollmops a=irevoire

When a docid is an integer, we stop casting it to a string, and thus we don't add `"` around it.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-29 08:32:48 +00:00
many
d2427f18e5 Enhance CSV document parsing 2021-09-29 10:25:33 +02:00
bors[bot]
00f94b1ffd Merge #377
377: Update version for the next release (v0.17.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-28 20:43:33 +00:00
Clémentine Urquizar
0e8665bf18 Update version for the next release (v0.17.0) 2021-09-28 19:38:12 +02:00
Tamo
f65153ad64 stop casting integer docids to string 2021-09-28 18:35:54 +02:00
bors[bot]
adddf3f179 Merge #375
375: Fixes #365 r=Kerollmops a=vishnugt



Co-authored-by: Vishnu Ganesan <vganesan@microsoft.com>
Co-authored-by: Vishnu Gt <vishnugt@hotmail.com>
2021-09-28 14:42:48 +00:00
Vishnu Gt
785c1372f2 Change "settings" to "setting"
Co-authored-by: Clément Renault <renault.cle@gmail.com>
2021-09-28 20:11:32 +05:30
Vishnu Ganesan
3580b2d803 Fixes #365 2021-09-28 19:30:23 +05:30
bors[bot]
3a12f5887e Merge #373
373: Improve error message for bad sort syntax with geosearch r=Kerollmops a=irevoire

`@Kerollmops` This should be the last PR for the geosearch and error handling, sorry for doing it in so many steps 😬 

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-28 12:39:32 +00:00
Tamo
a80dcfd4a3 improve error message for bad sort syntax with geosearch 2021-09-28 14:32:24 +02:00
bors[bot]
b2a332599e Merge #372
372: Fix Meilisearch 1714 r=Kerollmops a=ManyTheFish

The bug comes from the typo tolerance, to know how many typos are accepted we were counting bytes instead of characters in a word.
On Chinese Script characters, we were allowing  2 typos on 3 characters words.
We are now counting the number of char instead of counting bytes to assign the typo tolerance.

Related to [Meilisearch#1714](https://github.com/meilisearch/MeiliSearch/issues/1714)

Co-authored-by: many <maxime@meilisearch.com>
2021-09-28 11:59:45 +00:00
many
8046ae4bd5 Count the number of char instead of counting bytes to assign the typo tolerance 2021-09-28 12:10:43 +02:00
many
1988416295 Add failing test related to Meilisearch#1714 2021-09-28 12:05:11 +02:00
bors[bot]
3b479948c6 Merge #371
371: Provide a sort error handler r=Kerollmops a=irevoire

This PR simplify the error handling of asc-desc rules for Meilisearch or any other wrapper by providing directly in milli a new error type called `SortError` that can be generated from an `AscDescError` and that can be automatically converted to a `UserError`.

Basically now, wherever you are in the code as a user or in milli you can parse an `AscDesc` syntax and depending on the context, cast it either as a `SortError` or a `CriterionError` in one line with improved error messages.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-28 09:28:32 +00:00
Tamo
cc732fe95e update http-ui to use the sort-error 2021-09-28 11:15:24 +02:00
Tamo
c7cb816ae1 simplify the error handling of the sort syntax for meilisearch 2021-09-27 19:07:22 +02:00
bors[bot]
4c09f6838f Merge #370
370: Change chunk size to 4MiB to fit more the end user usage r=ManyTheFish a=ManyTheFish

We made several indexing tests using different sizes of datasets (5 datasets from 9MiB to 100MiB) on several typologies of VMs (`XS: 1GiB RAM, 1 VCPU`, `S: 2GiB RAM, 2 VCPU`, `M: 4GiB RAM, 3 VCPU`, `L: 8GiB RAM, 4 VCPU`).
The result of these tests shows that the `4MiB` chunk size seems to be the best size compared to other chunk sizes (`2Mib`, `4MiB`, `8Mib`, `16Mib`,  `32Mib`, `64Mib`, `128Mib`).

below is the average time per chunk size:

![Capture d’écran 2021-09-27 à 14 27 50](https://user-images.githubusercontent.com/6482087/134909368-ef0bc45e-68d5-49d1-aaf9-91113b7c410f.png)

<details>
<summary>Detailled data</summary>
<br>

![Capture d’écran 2021-09-27 à 14 39 48](https://user-images.githubusercontent.com/6482087/134909952-a36b1457-bbbd-4a6c-bbe5-519e4b926b5a.png)
</br>
</details> 


Co-authored-by: many <maxime@meilisearch.com>
2021-09-27 12:57:52 +00:00
many
b188063869 Change chunk size to 4MiB to fit more the end user usage 2021-09-27 14:26:21 +02:00
bors[bot]
0f8320bdc2 Merge #369
369: Add test checking the bug reported in meilisearch issue 1716 r=Kerollmops a=ManyTheFish

The bug is not present in the newer milli version.

Related to [Meilisearch#1716](https://github.com/meilisearch/MeiliSearch/issues/1716)

Co-authored-by: many <maxime@meilisearch.com>
2021-09-23 14:27:34 +00:00
many
551df0cb77 Add test checking the bug reported in meilisearch issue 1716 2021-09-23 15:55:39 +02:00
bors[bot]
87dd441a3a Merge #367
367: Update version for the next release (v0.16.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-22 15:20:20 +00:00
Clémentine Urquizar
1eacab2169 Update version for the next release (v0.15.1) 2021-09-22 17:18:54 +02:00
bors[bot]
b806097141 Merge #366
366: Geosearch error handling r=Kerollmops a=irevoire

Rewrite most of geosearch error handling and another batch of tests on the criterion parsing.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-09-22 15:08:11 +00:00
Irevoire
218f0a6661 Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-22 17:00:27 +02:00
Tamo
47ee93b0bd return an error when _geoPoint is used but _geo is not sortable 2021-09-22 16:37:41 +02:00
Tamo
1e5e3d57e2 auto convert AscDescError into CriterionError 2021-09-22 16:37:41 +02:00
Tamo
023446ecf3 create a smaller and easier to maintain CriterionError type 2021-09-22 16:37:41 +02:00
Tamo
86e272856a create an asc_desc error type that is never supposed to be returned to the end user 2021-09-22 16:37:41 +02:00
Tamo
257e621d40 create an asc_desc module 2021-09-22 16:37:41 +02:00
Tamo
113a061bee fix the error handling on the criterion side 2021-09-22 15:09:07 +02:00
bors[bot]
ad3befaaf5 Merge #364
364: Fix all the benchmarks  r=Kerollmops a=irevoire

#324 broke all benchmarks.
I fixed everything and noticed that `cargo check --all` was insufficient to check the bench in multiple workspaces, so I also updated the CI to use `cargo check --workspace --all-targets`.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-22 12:40:34 +00:00
Tamo
176160d32f fix all benchmarks and add the compile time checking of the benhcmarks in the ci 2021-09-22 12:10:21 +02:00
bors[bot]
16790ee620 Merge #363
363: Fix the returned `AscDesc` error r=Kerollmops a=irevoire

With my previous PR on the geosearch I erased the change I've introduced with my pre-previous PR about the new error type when we fail to parse the `AscDesc` type.

Sorry for that, here is the fix

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-22 09:53:35 +00:00
Tamo
78b0bce9a1 fix the returned error when asc desc fails to be parsed 2021-09-22 11:37:05 +02:00
bors[bot]
2837cab5da Merge #362
362: Remove the `Cargo.lock` again r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-22 09:33:09 +00:00
Tamo
2e99fa8251 remove the cargo.lock again 2021-09-22 11:30:33 +02:00
bors[bot]
fe9f380993 Merge #361
361: Update version for the next release (v0.15.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-21 16:19:16 +00:00
Clémentine Urquizar
f8ecbc28e2 Update version for the next release (v0.15.0) 2021-09-21 18:09:14 +02:00
bors[bot]
700318dc62 Merge #357
357: Add benchmarks for the geosearch r=Kerollmops a=irevoire

closes #336

Should I merge this PR in #322 and then we merge everything in `main` or should we wait for #322 to be merged and then merge this one in `main` later?

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-09-21 16:08:06 +00:00
bors[bot]
9d9010e45f Merge #324
324: Implement documents API r=Kerollmops a=MarinPostma

This pr implement the intermediary document representation for milli. The JSON, JSONL and CSV formats are replaced with the format instead, to push the serialization duty on the client side.

The `documents` module contains the interface to the new document format:

- The `DocumentsBuilder` allows the creation of a writer backed document addition, when documents are added either one by one, or as arrays of depth 1. This is made possible by the fact that the seriliazer used by the `add_documents` methods only accepts `[Object]` and `Object`. The related serialization logic is located in the `serde.rs` file.
- The `DocumentsReader` allows to to iterate over the documents created by a `DocumentsBuilder`. A call to `next_document_with_index` returns the next obkv reader in the document addition, along with a reference to the index used to map the field ids in the obkv reader to the field names

All references to json, jsonl or csv in the tests have been replaced with the `documents!` macro, works exaclty like the `serde_json::json` macro, as a convenient way to create a `DocumentsReader`.

Rewrote the search cli, to the `cli` crate, to also allow index manipulation. This only offers basic functionalities for now, but is meant to be easier to extend than http ui


blocked by #308

Co-authored-by: mpostma <postma.marin@protonmail.com>
2021-09-21 15:40:03 +00:00
mpostma
aa6c5df0bc Implement documents format
document reader transform

remove update format

support document sequences

fix document transform

clean transform

improve error handling

add documents! macro

fix transform bug

fix tests

remove csv dependency

Add comments on the transform process

replace search cli

fmt

review edits

fix http ui

fix clippy warnings

Revert "fix clippy warnings"

This reverts commit a1ce3cd96e603633dbf43e9e0b12b2453c9c5620.

fix review comments

remove smallvec in transform loop

review edits
2021-09-21 16:58:33 +02:00
bors[bot]
94764e5c7c Merge #360
360: Update version for the next release (v0.14.0) r=Kerollmops a=curquiza

Release containing the geosearch, cf #322 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-21 08:43:27 +00:00
bors[bot]
31c8de1cca Merge #322
322: Geosearch r=ManyTheFish a=irevoire

This PR introduces [basic geo-search functionalities](https://github.com/meilisearch/specifications/pull/59), it makes the engine able to index, filter and, sort by geo-point. We decided to use [the rstar library](https://docs.rs/rstar) and to save the points in [an RTree](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html) that we de/serialize in the index database [by using serde](https://serde.rs/) with [bincode](https://docs.rs/bincode). This is not an efficient way to query this tree as it will consume a lot of CPU and memory when a search is made, but at least it is an easy first way to do so.

### What we will have to do on the indexing part:
 - [x] Index the `_geo` fields from the documents.
   - [x] Create a new module with an extractor in the `extract` module that takes the `obkv_documents` and retrieves the latitude and longitude coordinates, outputting them in a `grenad::Reader` for further process.
   - [x] Call the extractor in the `extract::extract_documents_data` function and send the result to the `TypedChunk` module.
   - [x] Get the `grenad::Reader` in the `typed_chunk::write_typed_chunk_into_index` function and store all the points in the `rtree`
- [x] Delete the documents from the `RTree` when deleting documents from the database. All this can be done in the `delete_documents.rs` file by getting the data structure and removing the points from it, inserting it back after the modification.
- [x] Clearing the `RTree` entirely when we clear the documents from the database, everything happens in the `clear_documents.rs` file.
- [x] save a Roaring bitmap of all documents containing the `_geo` field

### What we will have to do on the query part:
- [x] Filter the documents at a certain distance around a point, this is done by [collecting the documents from the searched point](https://docs.rs/rstar/0.9.1/rstar/struct.RTree.html#method.nearest_neighbor_iter) while they are in range.
  - [x] We must introduce new `geoLowerThan` and `geoGreaterThan` variants to the `Operator` filter enum.
  - [x] Implement the `negative` method on both variants where the `geoGreaterThan` variant is implemented by executing the `geoLowerThan` and removing the results found from the whole list of geo faceted documents.
  - [x] Add the `_geoRadius` function in the pest parser.
- [x] Introduce a `_geo` ascending ranking function that takes a point in parameter, ~~this function must keep the iterator on the `RTree` and make it peekable~~ This was not possible for now, we had to collect the whole iterator. Only the documents that are part of the candidates must be sent too!
  - [x] This ascending ranking rule will only be active if the search is set up with the `_geoPoint` parameter that indicates the center point of the ascending ranking rule.

-----------

- On Meilisearch part: We must introduce a new concept, returning the documents with a new `_geoDistance` field when it passed by the `_geo` ranking rule, this has never been done before. We could maybe just do it afterward when the documents have been retrieved from the database, computing the distance from the `_geoPoint` and all of the documents to be returned.

Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: cvermand <33010418+bidoubiwa@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-20 19:04:57 +00:00
Irevoire
0d104a0fce Update milli/src/criterion.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-20 18:13:17 +02:00
Clémentine Urquizar
3f1453f470 Update version for the next release (v0.14.0) 2021-09-20 18:12:23 +02:00
Tamo
f4b8e5675d move the reserved keyword logic for the criterion and sort + add test 2021-09-20 17:21:02 +02:00
Irevoire
3b7a2cdbce fix typo
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-20 16:10:39 +02:00
bors[bot]
203aa727a7 Merge #359
359: Improve the benchmark comparison script r=irevoire a=irevoire

This modification allow us to compare more than 2 benchmarks or to only print the results of one benchmark



Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-20 12:39:59 +00:00
Tamo
eaba772f21 update the README to better match the new critcmp usage
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-20 10:59:55 +02:00
Irevoire
9a920d1f93 Fix datasets links in the readme
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-20 10:44:37 +02:00
Tamo
5e683ba472 add benchmarks for the geosearch 2021-09-20 10:44:37 +02:00
Irevoire
f6c6b026bb improve the comparison script 2021-09-16 11:25:51 +02:00
Tamo
c695a1ffd2 add the possibility to sort by descending order on geoPoint 2021-09-15 11:49:58 +02:00
Tamo
91ce4d1721 Stop iterating through the whole list of points
We stop when there is no possible candidates left
2021-09-15 11:49:58 +02:00
bors[bot]
3b1885859d Merge #356
356: Update the README r=curquiza a=Kerollmops

This PR updates a little bit the README and more specifically the indexing times, fixes #352.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-09-14 10:13:05 +00:00
Kerollmops
2741aa8589 Update the indexing timings in the README 2021-09-14 11:42:59 +02:00
Kerollmops
a43f99c600 Inform the users that documents must have an id in there documents 2021-09-13 14:01:02 +02:00
bors[bot]
90d64d257f Merge #354
354: Update version for the next release (v0.13.1) r=ManyTheFish a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-13 09:30:07 +00:00
Clémentine Urquizar
f167f7b412 Update version for the next release (v0.13.1) 2021-09-10 09:48:17 +02:00
bors[bot]
4af31ec9a6 Merge #353
353: Add lacking parameter to word level position builder r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-09-09 16:36:33 +00:00
Tamo
cfc62a1c15 use geoutils instead of haversine 2021-09-09 18:11:38 +02:00
many
26deeb45a3 Add lacking parameter to word level position builder 2021-09-09 17:49:04 +02:00
Tamo
3fc145c254 if we have no rtree we return all other provided documents 2021-09-09 17:44:09 +02:00
Irevoire
a84f3a8b31 Apply suggestions from code review
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-09-09 15:09:35 +02:00
Tamo
c81ff22c5b delete the invalid criterion name error in favor of invalid ranking rule name 2021-09-08 19:17:00 +02:00
Tamo
bad8ea47d5 edit the two lasts TODO comments 2021-09-08 18:24:09 +02:00
Tamo
b15c77ebc4 return an error in case a user try to sort with :desc 2021-09-08 18:24:09 +02:00
Tamo
4b618b95e4 rebase on main 2021-09-08 18:24:09 +02:00
Tamo
2988d3c76d tests the geo filters 2021-09-08 18:24:09 +02:00
Tamo
e5ef0cad9a use meters in the filters 2021-09-08 18:24:09 +02:00
Tamo
4f69b190bc remove the distance from the search, the computation of the distance will be made on meilisearch side 2021-09-08 18:24:09 +02:00
Tamo
7ae2a7341c introduce the reserved keywords in the filters 2021-09-08 18:24:09 +02:00
Tamo
6d5762a6c8 handle the case where you forgot entirely the parenthesis 2021-09-08 18:24:09 +02:00
Tamo
ebf82ac28c improve the error messages and add tests for the filters 2021-09-08 18:24:09 +02:00
Tamo
bd4c248292 improve the error handling in general and introduce the concept of reserved keywords 2021-09-08 18:24:09 +02:00
Tamo
e8c093c1d0 fix the error handling in the filters 2021-09-08 18:24:09 +02:00
Tamo
f0b74637dc fix all the tests 2021-09-08 18:24:09 +02:00
Tamo
b1bf7d4f40 reformat 2021-09-08 18:24:09 +02:00
Tamo
aca707413c remove the memory leak 2021-09-08 18:24:09 +02:00
Tamo
a8a1f5bd55 move the geosearch criteria out of asc_desc.rs 2021-09-08 18:24:09 +02:00
Tamo
dc84ecc40b fix a bug 2021-09-08 18:24:09 +02:00
Tamo
7483614b75 [HTTP-UI] add the sorters 2021-09-08 18:24:09 +02:00
Tamo
4820ac71a6 allow spaces in a geoRadius 2021-09-08 18:24:09 +02:00
Tamo
13c78e5aa2 Implement the _geoPoint in the sortable 2021-09-08 18:24:09 +02:00
Tamo
5bb175fc90 only index _geo if it's set as sortable OR filterable
and only allow the filters if geo was set to filterable
2021-09-08 17:51:08 +02:00
Tamo
f73273d71c only call the extractor if needed 2021-09-08 17:51:08 +02:00
cvermand
4fd0116a0d Stringify objects on dashboard to avoid [Object object] 2021-09-08 17:51:08 +02:00
Irevoire
ea2f2ecf96 create a new database containing all the documents that were geo-faceted 2021-09-08 17:51:08 +02:00
Irevoire
4b459768a0 create the _geoRadius filter 2021-09-08 17:51:07 +02:00
Irevoire
6d70978edc update the facet filter grammar 2021-09-08 17:51:07 +02:00
Irevoire
216a8aa3b2 add a tests for the indexation of the geosearch 2021-09-08 17:51:07 +02:00
Irevoire
a21c854790 handle errors 2021-09-08 17:51:07 +02:00
Irevoire
70ab2c37c5 remove multiple bugs 2021-09-08 17:51:07 +02:00
Irevoire
b4b6ba6d82 rename all the ’long’ into ’lng’ like written in the specification 2021-09-08 17:51:07 +02:00
Irevoire
3b9f1db061 implement the clear of the rtree 2021-09-08 17:51:07 +02:00
Irevoire
d344489c12 implement the deletion of geo points 2021-09-08 17:51:07 +02:00
Irevoire
44d6b6ae9e Index the geo points 2021-09-08 17:51:07 +02:00
Irevoire
8d9c2c4425 create a new db with getters and setters 2021-09-08 17:51:07 +02:00
bors[bot]
b22aac92ac Merge #342
342: Let the caller decide what kind of error they want to returns when parsing `AscDesc` r=Kerollmops a=irevoire

This is one possible fix for #339 
We would then need to patch these lines https://github.com/meilisearch/MeiliSearch/blob/main/meilisearch-http/src/index/search.rs#L110-L114 to return the error we want.

Another solution would be to add a parameter to the `from_str` to specify which context we are in.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-08 14:18:57 +00:00
Tamo
932998f5cc let the caller decide if they want to return an invalidSortName or an
invalidCriterionName error
2021-09-08 16:17:31 +02:00
bors[bot]
86c3b0c8c2 Merge #350
350: Fix mdb val size error r=Kerollmops a=ManyTheFish

Related to [#1677](https://github.com/meilisearch/MeiliSearch/issues/1677)

Co-authored-by: many <maxime@meilisearch.com>
2021-09-08 13:32:15 +00:00
many
e54280fbfc Skip empty normalized words 2021-09-08 15:25:23 +02:00
many
d18ee58ab9 Check if key are not empty in validator 2021-09-08 15:25:23 +02:00
bors[bot]
63bc231243 Merge #349
349: Enable the grenad tempfile feature back r=ManyTheFish a=Kerollmops

This PR enables the grenad `tempfile` feature back, [when this is feature is disabled the sorter writes the entries in memory](7c082d05bf/src/sorter.rs (L470-L476)) instead of on disk and therefore, consumes more memory. By enabling this feature grenad merges on disk by using the `tempfile` dependency.

This PR also bumps milli to v0.3.1 where `@ManyTheFish` added an assert for when the allocator can't allocate and disable the default snappy compression in the `http-ui` crate.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-09-08 13:23:57 +00:00
Kerollmops
68856e5e2f Disable the default snappy compression for the http-ui crate 2021-09-08 14:17:32 +02:00
Kerollmops
8a088fb99e Bump grenad to v0.3.1 2021-09-08 14:08:55 +02:00
Kerollmops
20ad43b908 Enable the grenad tempfile feature back 2021-09-08 14:06:28 +02:00
bors[bot]
772e55d174 Merge #347
347: Update version for the next release (v0.13.0) r=curquiza a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-08 11:41:15 +00:00
bors[bot]
d160305868 Merge #348
348: Drop sorter before creating a new one r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-09-08 11:34:20 +00:00
many
9961b78b06 Drop sorter before creating a new one 2021-09-08 13:30:26 +02:00
Clémentine Urquizar
eb7b9d9dbf Update version for the next release (v0.13.0) 2021-09-08 10:59:30 +02:00
bors[bot]
f5e418ace7 Merge #345
345: Better dependencies cache for CI r=irevoire a=shekhirin



Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-09-08 08:43:19 +00:00
bors[bot]
48d211b8b0 Merge #344
344: Move the sort ranking rule before the exactness ranking rule r=ManyTheFish a=Kerollmops

This PR moves the sort ranking rule at the 5th position by default, right before the exactness one.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-09-07 15:47:15 +00:00
Alexey Shekhirin
dbd91e7151 chore(ci): use smarter dependencies cache 2021-09-07 18:16:33 +03:00
bors[bot]
720becb5e8 Merge #341
341: Throw a query time error when a sort parameter is used but the sort ranking rule is missing r=Kerollmops a=Kerollmops

This PR makes the engine throw an error for when the ranking rules don't contain the `sort` rule, the `sortable_fields` are correctly set but the user tries to use the `sort` query parameter. Doing so will have no effect on the returned documents so we preferred returning an error to help debug this.

That's breaking on the MeiliSearch side as we added a new variant to the `UserError` enum.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-09-07 14:45:05 +00:00
Kerollmops
e2cefc9b4f Move the sort ranking rule before the exactness ranking rule 2021-09-07 16:41:33 +02:00
bors[bot]
a0b3620b05 Merge #346
346: remove unused grenad default features r=Kerollmops a=MarinPostma

Milli is not using any of grenad default features, and it's zstd feature creates conflict with meilisearch. This pr simply remove the unused features.


Co-authored-by: mpostma <postma.marin@protonmail.com>
2021-09-07 14:30:20 +00:00
mpostma
cd043d4461 remove unused grenad default features 2021-09-07 16:21:46 +02:00
Kerollmops
5989528833 Add a test to make sure we throw the right error message 2021-09-07 11:02:00 +02:00
Kerollmops
fd3daa4423 Throw a query time error when a sort param is used but sort ranking rule is missing 2021-09-07 11:02:00 +02:00
Kerollmops
8dca36433c Introduce the new SortRankingRuleMissing user error variant 2021-09-07 11:01:59 +02:00
bors[bot]
446ed17589 Merge #338
338: Fix string fields sorting r=Kerollmops a=shekhirin

Resolves https://github.com/meilisearch/milli/issues/333

<details>
  <summary>curl checks</summary>
  
 ```console
➜  ~ curl -s 'localhost:7700/indexes/movies/search' -d '{"sort": ["title:asc"], "limit": 30}' | jq -r '.hits | map(.title)[]'
#1 Cheerleader Camp
#Horror
#RealityHigh
#Roxy
#SquadGoals
$5 a Day
$9.99
'71
(2)
(500) Days of Summer
(Girl)Friend
*batteries not included
...And God Created Woman
...And Justice for All
...E fuori nevica!
.45
1
1 Mile To You
1 Night
10
10 Cloverfield Lane
10 giorni senza mamma
10 Items or Less
10 Rillington Place
10 Rules for Sleeping Around
10 Things I Hate About You
10 to Midnight
10 Years
10,000 BC
10,000 Saints

➜  ~ curl -s 'localhost:7700/indexes/movies/search' -d '{"sort": ["title:desc"], "limit": 30}' | jq -r '.hits | map(.title)[]'
크게 될 놈
왓칭
뷰티플 마인드
노무현과 바보들
ハニー
Счастье – это… Часть 2
СОТКА
Смотри мою любовь
Позивний 'Бандерас'
Лошо момиче
Күлүк Хомус
Куда течет море
Каникулы президента
Ακίνητο Ποτάμι
Üç Harfliler: Beddua
È nata una Star?
Æon Flux
Ága
À propos de Nice
À Nos Amours
À l'aventure
¡Three Amigos!
Zulu Dawn
Zulu
Zulu
Zu: Warriors from the Magic Mountain
Zu Warriors
Zorro
Zorba the Greek
Zootopia
```
</details>

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-09-07 08:28:23 +00:00
Alexey Shekhirin
0be09555f1 test(search): asc/desc criteria for large datasets 2021-09-03 18:00:08 +03:00
Alexey Shekhirin
c2517e7d5f fix(facet): string fields sorting 2021-09-03 11:58:26 +03:00
bors[bot]
5cbe879325 Merge #308
308: Implement a better parallel indexer r=Kerollmops a=ManyTheFish

Rewrite the indexer:
- enhance memory consumption control
- optimize parallelism using rayon and crossbeam channel
- factorize the different parts and make new DB implementation easier
- optimize and fix prefix databases


Co-authored-by: many <maxime@meilisearch.com>
2021-09-02 15:03:52 +00:00
many
741a4444a9 Remove log in chunk generator 2021-09-02 16:57:46 +02:00
many
7f7fafb857 Make document_chunk_size settable from update builder 2021-09-02 15:25:39 +02:00
many
db0c681bae Fix Pr comments 2021-09-02 15:17:52 +02:00
bors[bot]
46f7df232a Merge #337
337: Update version for the next release (v0.12.0) r=Kerollmops a=curquiza

Breaking because of the new indexer that implies DB changes #308 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-09-02 10:13:31 +00:00
Clémentine Urquizar
285849e3a6 Update version for the next release (v0.12.0) 2021-09-02 10:08:41 +02:00
bors[bot]
a589f6c60b Merge #335
335: Get sortable_fields from index only if criteria present in query r=Kerollmops a=shekhirin

Seems like we don't need to retrieve `sortable_fields` from the index if there's no any `sort_criteria` in the query.

Small 🤏  optimization opportunity out there.

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-09-01 16:01:00 +00:00
bors[bot]
3e0a78acf3 Merge #329
329: Run all benchmarks once every friday r=irevoire a=irevoire

All the benchmarks run every Friday on the `main` branch.
To avoid having pending benchmarks everywhere, we execute one benchmark every 8 hours.
Then the results are uploaded as if it was a normal user-run benchmark.

This PR closes #314 and #321 

Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2021-09-01 15:20:49 +00:00
many
4860fd4529 Ignore empty facet values 2021-09-01 16:48:40 +02:00
many
b3a22f31f6 Fix memory consuption in word pair proximity extractor 2021-09-01 16:48:40 +02:00
many
9452fabfb2 Optimize cbo roaring bitmaps merge 2021-09-01 16:48:40 +02:00
many
8f702828ca Ignore errors comming from crossbeam channel senders 2021-09-01 16:48:40 +02:00
many
e09eec37bc Handle distance addition with hard separators 2021-09-01 16:48:40 +02:00
many
fc7cc770d4 Add logging timers 2021-09-01 16:48:40 +02:00
many
a2f59a28f7 Remove unwrap sending errors in channel 2021-09-01 16:48:40 +02:00
many
5c962c03dd Fix and optimize word_prefix_pair_proximity_docids database 2021-09-01 16:48:40 +02:00
many
2d1727697d Take stop word in account 2021-09-01 16:48:40 +02:00
many
823da19745 Fix test and use progress callback 2021-09-01 16:48:39 +02:00
many
1d314328f0 Plug new indexer 2021-09-01 16:48:36 +02:00
many
3aaf1d62f3 Publish grenad CompressionType type in milli 2021-09-01 16:42:08 +02:00
Alexey Shekhirin
0e379558a1 fix(search): get sortable_fields only if criteria present 2021-08-31 21:35:41 +03:00
bors[bot]
d6bba0663a Merge #334
334: Wrap long values into BStr for warn logs r=Kerollmops a=shekhirin

Resolves https://github.com/meilisearch/milli/issues/263

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-08-31 17:38:54 +00:00
Alexey Shekhirin
0b02eb456c chore(update): wrap long values into BStr for warn logs 2021-08-31 20:28:16 +03:00
bors[bot]
df38794c7d Merge #330
330: Introduce the reset_sortable_fields Settings method r=irevoire a=Kerollmops

I forgot to add the `reset_sortable_fields` method on the `Settings` builder, it is no big deal as the library user (like MeiliSearch) can always call `set_sortable_fields` with an empty list of fields, it is equivalent.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
2021-08-30 23:26:54 +00:00
bors[bot]
6cdb6722d1 Merge #332
332: Sortable attributes in http-ui r=Kerollmops a=irevoire

- Add a `reset_sortable_attribute` method
- Add the `sortable_attributes` to http-ui
- Fix some broken test in http-ui

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-08-30 15:31:05 +00:00
Tamo
d106eb5b90 add the sortable attributes to http-ui and fix the tests 2021-08-30 16:25:10 +02:00
Tamo
5e639bc0c1 postfix all action name with (cron) 2021-08-30 13:55:00 +02:00
Irevoire
49a6d2d5f1 run all benchmarks once every friday 2021-08-30 13:55:00 +02:00
Kerollmops
f230ae6fd5 Introduce the reset_sortable_fields Settings method 2021-08-25 17:44:16 +02:00
bors[bot]
c8930781eb Merge #328
328: Remove `beta` compilation in CI r=Kerollmops a=shekhirin

Resolves https://github.com/meilisearch/milli/issues/326

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-08-25 08:45:18 +00:00
Alexey Shekhirin
01461af333 chore(ci): remove Rust beta from tests job 2021-08-24 22:18:13 +03:00
bors[bot]
c51bb6789c Merge #325
325: Update milli version to v0.11.0 r=curquiza a=Kerollmops

This PR also clean-up some dependencies in the Cargo.toml.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-08-24 16:18:49 +00:00
Kerollmops
af65485ba7 Reexport the grenad CompressionType from milli 2021-08-24 18:15:31 +02:00
Kerollmops
f2e1591826 Remove the unused tinytemplate dependency 2021-08-24 18:10:58 +02:00
Kerollmops
2f20257070 Update milli to the v0.11.0 2021-08-24 18:10:11 +02:00
bors[bot]
794c0f64a9 Merge #315
315: Rewrite the indexing benchmarks r=Kerollmops a=irevoire

There was a panic on the benchmark and while I was trying to understand what was happening I decided to rewrite the way the benchmarks were working.

Before we were creating a database with the good setting, and then for each benchmarks we were:
1. Deleting all documents in the database
2. Indexing a batch of documents

Now for each iteration we recreate entirely a new database from scratch.
Since deleting all the documents in a database may not be the same as starting with a fresh new database I prefer this solution.

Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-08-24 15:34:50 +00:00
bors[bot]
731e0e5321 Merge #320
320: Sort at query time r=Kerollmops a=Kerollmops

Re-introduce the Sort at the query time (https://github.com/meilisearch/milli/issues/305)

Co-authored-by: Clément Renault <renault.cle@gmail.com>
2021-08-24 14:19:43 +00:00
Clément Renault
89d0758713 Revert "Revert "Sort at query time"" 2021-08-24 11:55:16 +02:00
bors[bot]
879d5e8799 Merge #319
319: Update version for the next release (v0.10.2) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-23 10:03:23 +00:00
Clémentine Urquizar
88f6c18665 Update version for the next release (v0.10.2) 2021-08-23 11:33:30 +02:00
bors[bot]
aa1ce97748 Merge #317
317: Fix the facet string docids filterable deletion bug r=Kerollmops a=Kerollmops

Fixes a bug where the deletion of documents was returning a decoding error. But only when the settings are set with filterable attributes.

This bug was introduced in #254 in which we made the engine faster in returning the facet distribution. We changed the way we were storing the inverted index, we were no more storing only documents ids with the original values but also groups identified with integers, depending on the facet level we were using. This is similar to how facet numbers are already stored.

⚠️ As `@curquiza` already said, we must first revert #309 before merging this!

Related to https://github.com/meilisearch/MeiliSearch/issues/1601.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-08-23 08:57:16 +00:00
Clément Renault
c084f7f731 Fix the facet string docids filterable deletion bug 2021-08-23 10:50:39 +02:00
bors[bot]
0d1f83ba4b Merge #318
318: Revert "Sort at query time" r=Kerollmops a=curquiza

Reverts meilisearch/milli#309

We revert this from `main` not because this leads to a bug, but because we don't want to release it now and we have to merge and release an hotfix on `main`.
Cf:
- https://github.com/meilisearch/milli/issues/316
- https://github.com/meilisearch/milli/pull/317

Once the v0.21.0 is released, we should merge again this awesome addition 👌 

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-21 08:25:17 +00:00
Clémentine Urquizar
922f9fd4d5 Revert "Sort at query time" 2021-08-20 18:09:17 +02:00
Irevoire
4b99d8cb91 rewrite the indexing benchmarks 2021-08-19 15:02:43 +02:00
bors[bot]
41fc0dcb62 Merge #309
309: Sort at query time r=Kerollmops a=Kerollmops

This PR:
 - Makes the `Asc/Desc` criteria work with strings too, it first returns documents ordered by numbers then by strings, and finally the documents that can't be ordered. Note that it is lexicographically ordered and not ordered by character, which means that it doesn't know about wide and short characters i.e. `a`, `丹`, `▲`.
 - Changes the syntax for the `Asc/Desc` criterion by now using a colon to separate the name and the order i.e. `title:asc`, `price:desc`.
 - Add the `Sort` criterion at the third position in the ranking rules by default.
 - Add the `sort_criteria` method to the `Search` builder struct to let the users define the `Asc/Desc` sortable attributes they want to use at query time. Note that we need to check that the fields are registered in the sortable attributes before performing the search.
 - Introduce a new `InvalidSortableAttribute` user error that is raised when the sort criteria declared at query time are not part of the sortable attributes.
 - `@ManyTheFish` introduced integration tests for the dynamic Sort criterion.

Fixes #305.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: many <maxime@meilisearch.com>
2021-08-18 16:55:32 +00:00
many
d1df0d20f9 Add integration test of SortBy criterion 2021-08-18 16:21:51 +02:00
Kerollmops
1b7f6ea1e7 Return a new error when the sort criteria is not sortable 2021-08-18 15:04:07 +02:00
Kerollmops
71602e0f1b Add the sortable fields into the settings and in the index 2021-08-18 15:04:07 +02:00
Kerollmops
407f53872a Add a sort_criteria method to the Search builder struct 2021-08-18 15:04:07 +02:00
Kerollmops
687cd2e205 Introduce the new Sort criterion and AscDesc enum 2021-08-18 15:04:07 +02:00
bors[bot]
198c416bd8 Merge #312
312: Update milli version to v0.10.1 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-18 12:08:04 +00:00
Clémentine Urquizar
6cb9c3b81f Update milli version to v0.10.1 2021-08-18 13:46:27 +02:00
bors[bot]
2a67308e29 Merge #311
311: Update tokenizer version to v0.2.5 r=Kerollmops a=curquiza

Fixes panic when indexing data containing [control characters](https://en.wikipedia.org/wiki/Control_character) but continue accepting whitespace, obviously.

Related to https://github.com/meilisearch/MeiliSearch/issues/1590

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-18 11:41:52 +00:00
Clémentine Urquizar
42cf847a63 Update tokenizer version to v0.2.5 2021-08-18 13:37:41 +02:00
bors[bot]
c4275f0d27 Merge #310
310: Modify the README file r=Kerollmops a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-08-17 15:20:43 +00:00
Kerollmops
ecf8abc518 Modify the README file 2021-08-17 17:18:58 +02:00
Kerollmops
5b88df508e Use the new Asc/Desc syntax everywhere 2021-08-17 14:15:22 +02:00
Kerollmops
fcedff95e8 Change the Asc/Desc criterion syntax to use a colon (:) 2021-08-17 14:03:21 +02:00
Kerollmops
e9ada44509 AscDesc criterion returns documents ordered by numbers then by strings 2021-08-17 13:21:31 +02:00
Kerollmops
110bf6b778 Make the FacetStringIter work in both, ascending and descending orders 2021-08-17 11:18:40 +02:00
Kerollmops
22ebd2658f Introduce the EitherString/RevRange private aliases 2021-08-17 10:47:15 +02:00
Kerollmops
7a5889bc5a Introduce the highest_reverse_iter private method 2021-08-17 10:45:26 +02:00
Kerollmops
ad0d311f8a Introduce the FacetStringLevelZeroRevRange struct 2021-08-17 10:44:43 +02:00
Kerollmops
6214c38da9 Introduce the FacetStringGroupRevRange struct 2021-08-17 10:44:27 +02:00
Kerollmops
1c604de158 Introduce the highest_iter private method on the FacetStringIter struct 2021-08-17 10:41:11 +02:00
Kerollmops
64df159057 Introduce the new_reducing constructor on the FacetStringIter struct 2021-08-17 10:35:06 +02:00
Kerollmops
01a4052828 Move the FacetStringIter creation logic into a private new method 2021-08-17 10:29:43 +02:00
bors[bot]
51581d14f8 Merge #307
307: Update version for the next release (v0.10.0) r=Kerollmops a=curquiza

Replaces https://github.com/meilisearch/milli/pull/304

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-16 10:33:53 +00:00
Clémentine Urquizar
fcc520e49a Update version for the next release (v0.10.0) 2021-08-16 12:00:28 +02:00
bors[bot]
1541bce952 Merge #303
303: Remove max values by facet limit for facet distribution r=Kerollmops a=ManyTheFish



Co-authored-by: many <maxime@meilisearch.com>
2021-08-16 09:58:53 +00:00
many
7dbefae1e3 Make facet string iterator non reducing 2021-08-12 17:23:39 +02:00
many
8fdf860c17 Remove max values by facet limit for facet distribution 2021-08-12 11:29:20 +02:00
bors[bot]
2102e0da6b Merge #302
302: Update milli to v0.9.0 r=curquiza a=curquiza

Updating the minor and not patch since #300 seems to be breaking: it involves a re-indexation to get the fix, so it involves an additional step from the users, not only downloading the latest version.

Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-08-05 08:38:15 +00:00
bors[bot]
89b9b61840 Merge #300
300: Fix prefix level position docids database r=curquiza a=ManyTheFish

The prefix search was inverted when we generated the DB.
Instead of searching if word had a prefix in prefix fst,
we were searching if the word was a prefix of a prefix contained in the prefix fst.
The indexer, now, iterate over prefix contained in the fst
and search them by prefix in the word-level-position-docids database,
aggregating matches in a sorter.

Fix #299

Co-authored-by: many <maxime@meilisearch.com>
2021-08-04 16:52:09 +00:00
Clémentine Urquizar
7f26c75610 Update milli to v0.9.0 2021-08-04 16:04:55 +02:00
many
cdeb07f0fd Fix prefix level position docids database
The prefix search was inverted when we generated the DB.
Instead of searching if word had a prefix in prefix fst,
we were searching if the word was a prefix of a prefix contained in the prefix fst.
The indexer, now, iterate over prefix contained in the fst
and search them by prefix in the word-level-position-docids database,
aggregating matches in a sorter.

Fix #299
2021-08-04 14:11:49 +02:00
bors[bot]
cb45a10bcd Merge #298
298: Rename the search benchmarks r=Kerollmops a=irevoire

And fix a bug. As always, I was not closing the env.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-29 15:33:15 +00:00
Tamo
7eb2d71009 fix the benchmarks 2021-07-29 16:27:05 +02:00
Tamo
976dc1f4bc prefix the search benchmarks with 'search' 2021-07-29 16:27:05 +02:00
bors[bot]
1290edd58a Merge #297
297: Bump milli to v0.8.1 r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-29 14:19:41 +00:00
Kerollmops
341c244965 Bump milli to v0.8.1 2021-07-29 15:56:36 +02:00
bors[bot]
d962e46ed1 Merge #296
296: Fix invalid faceted documents ids buffer size r=Kerollmops a=Kerollmops

Fix a bug found by `@irevoire` when benchmarking the search.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-29 13:52:34 +00:00
Kerollmops
90514e03d1 Fix invalid faceted documents ids buffer size 2021-07-29 15:49:23 +02:00
bors[bot]
200e98c211 Merge #293
293: Make sure that the relevancy is not impacted by other settings r=Kerollmops a=Kerollmops

Fix https://github.com/meilisearch/meilisearch/issues/1505.

fix https://github.com/meilisearch/MeiliSearch/issues/1529

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-27 16:04:52 +00:00
bors[bot]
bc845324df Merge #295
295: Update version for the next release (v0.8.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-07-27 14:42:10 +00:00
Clémentine Urquizar
6a141694da Update version for the next release (v0.8.0) 2021-07-27 16:38:42 +02:00
Kerollmops
dc2b63abdf Introduce an empty FilterCondition variant to support unknown fields 2021-07-27 16:34:04 +02:00
bors[bot]
4ab7ca0e83 Merge #288
288: Stop tracking the Cargo.lock and add cache + windows to the CI r=curquiza a=irevoire

We reuse the same `~/.cargo` and `./target` directory between each run on the same OS and rust toolchain.
The `key` to decide if we can use the cache or not is: `$OS_NAME-$RUST_TOOLCHAIN-$HASH(Cargo.toml)`

We also removed the `Cargo.lock` from this repository. Indeed, milli is a library and [should not track the `Cargo.lock`](https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html)

And finally, we enabled the tests on `windows-latest`. Since `lmdb` has been updated, this is now possible.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-26 14:22:19 +00:00
Tamo
0038b3848a add a simple github cache 2021-07-26 15:31:26 +02:00
Tamo
88646a63a1 update bors 2021-07-26 15:31:00 +02:00
Kerollmops
b12738cfe9 Use the right DB prefixes to store the faceted fields 2021-07-22 19:18:22 +02:00
Kerollmops
7aa6cc9b04 Do not insert fields in the map when changing the settings 2021-07-22 18:40:12 +02:00
bors[bot]
ee3a49cfba Merge #291
291: Fix a bug about zero bytes in the inputs r=irevoire a=Kerollmops

Ok, good news, after a little session of debugging with `@irevoire` we found out that the bug seems to be related to zeroes in the input update. The engine wasn't designed to accept those. The chosen solution is to update the tokenizer to remove those zeroes. We are waiting on https://github.com/meilisearch/tokenizer/pull/52 to be merged and a new version to be released.

It is not an undefined behavior, I repeat: it is a "normal" bug 🎉 👏

----

This PR tries to fix a bug where we use LMDB in the wrong way, leading to panic due to an undefined behavior on the Rust side. I thought [we fixed it in a previous PR](https://github.com/meilisearch/milli/pull/264) but we found out that _a similar_ bug was still present. `@bb` found a way to trigger this bug and helped us find the origin of it.

As I don't have a minimal reproducible example of this bug I bet on the unsafe `put_current` calls when we index new documents as the bug was trigger after a big indexation on a clean database, thus not triggering a deletion update. I only replaced the unsafe `put_current` with two safe calls to `get`/`put`.

I hope it helps and fixes the bug, only `@bb` can help us check that. I am not even sure how I can create a custom Docker image and expose it for testing purposes.

<details>
  <summary>The backtrace leading us to a panic in grenad.</summary>

```
meilisearch_1    | thread 'tokio-runtime-worker' panicked at 'assertion failed: key > &last_key', /root/.cargo/git/checkouts/grenad-e2cb77f65d31bb02/3adcb26/src/block_builder.rs:38:17
meilisearch_1    | stack backtrace:
meilisearch_1    |    0: rust_begin_unwind
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panicking.rs:493:5
meilisearch_1    |    1: core::panicking::panic_fmt
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/core/src/panicking.rs:92:14
meilisearch_1    |    2: core::panicking::panic
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/core/src/panicking.rs:50:5
meilisearch_1    |    3: grenad::block_builder::BlockBuilder::insert
meilisearch_1    |              at ./root/.cargo/git/checkouts/grenad-e2cb77f65d31bb02/3adcb26/src/block_builder.rs:38:17
meilisearch_1    |    4: grenad::writer::Writer<W>::insert
meilisearch_1    |              at ./root/.cargo/git/checkouts/grenad-e2cb77f65d31bb02/3adcb26/src/writer.rs:92:12
meilisearch_1    |    5: milli::update::words_level_positions::write_level_entry
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/words_level_positions.rs:262:5
meilisearch_1    |    6: milli::update::words_level_positions::compute_positions_levels
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/words_level_positions.rs:211:13
meilisearch_1    |    7: milli::update::words_level_positions::WordsLevelPositions::execute
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/words_level_positions.rs:65:23
meilisearch_1    |    8: milli::update::index_documents::IndexDocuments::execute_raw
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/index_documents/mod.rs:831:9
meilisearch_1    |    9: milli::update::index_documents::IndexDocuments::execute
meilisearch_1    |              at ./root/.cargo/git/checkouts/milli-00376cd5db949a15/007fec2/milli/src/update/index_documents/mod.rs:372:9
meilisearch_1    |   10: meilisearch_http::index::updates::<impl meilisearch_http::index::Index>::update_documents_txn
meilisearch_1    |              at ./meilisearch/meilisearch-http/src/index/updates.rs:225:30
meilisearch_1    |   11: meilisearch_http::index::updates::<impl meilisearch_http::index::Index>::update_documents
meilisearch_1    |              at ./meilisearch/meilisearch-http/src/index/updates.rs:183:22
meilisearch_1    |   12: meilisearch_http::index::update_handler::UpdateHandler::handle_update
meilisearch_1    |              at ./meilisearch/meilisearch-http/src/index/update_handler.rs:75:18
meilisearch_1    |   13: meilisearch_http::index_controller::index_actor::actor::IndexActor<S>::handle_update::{{closure}}::{{closure}}
meilisearch_1    |              at ./meilisearch/meilisearch-http/src/index_controller/index_actor/actor.rs:174:35
meilisearch_1    |   14: <tokio::runtime::blocking::task::BlockingTask<T> as core::future::future::Future>::poll
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/blocking/task.rs:42:21
meilisearch_1    |   15: tokio::runtime::task::core::CoreStage<T>::poll::{{closure}}
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/core.rs:243:17
meilisearch_1    |   16: tokio::loom::std::unsafe_cell::UnsafeCell<T>::with_mut
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/loom/std/unsafe_cell.rs:14:9
meilisearch_1    |   17: tokio::runtime::task::core::CoreStage<T>::poll
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/core.rs:233:13
meilisearch_1    |   18: tokio::runtime::task::harness::poll_future::{{closure}}
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/harness.rs:427:23
meilisearch_1    |   19: <std::panic::AssertUnwindSafe<F> as core::ops::function::FnOnce<()>>::call_once
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panic.rs:344:9
meilisearch_1    |   20: std::panicking::try::do_call
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panicking.rs:379:40
meilisearch_1    |   21: std::panicking::try
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panicking.rs:343:19
meilisearch_1    |   22: std::panic::catch_unwind
meilisearch_1    |              at ./rustc/53cb7b09b00cbea8754ffb78e7e3cb521cb8af4b/library/std/src/panic.rs:431:14
meilisearch_1    |   23: tokio::runtime::task::harness::poll_future
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/harness.rs:414:19
meilisearch_1    |   24: tokio::runtime::task::harness::Harness<T,S>::poll_inner
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/harness.rs:89:9
meilisearch_1    |   25: tokio::runtime::task::harness::Harness<T,S>::poll
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/harness.rs:59:15
meilisearch_1    |   26: tokio::runtime::task::raw::RawTask::poll
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/raw.rs:66:18
meilisearch_1    |   27: tokio::runtime::task::Notified<S>::run
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/task/mod.rs:171:9
meilisearch_1    |   28: tokio::runtime::blocking::pool::Inner::run
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/blocking/pool.rs:265:17
meilisearch_1    |   29: tokio::runtime::blocking::pool::Spawner::spawn_thread::{{closure}}
meilisearch_1    |              at ./root/.cargo/registry/src/github.com-1ecc6299db9ec823/tokio-1.7.1/src/runtime/blocking/pool.rs:245:17
meilisearch_1    | note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
```

</details>

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-22 16:14:35 +00:00
Kerollmops
0353fbb5df Bump the tokenizer version to v0.2.4 2021-07-22 17:14:45 +02:00
Kerollmops
92c0a2cdc1 Add a test that triggers a panic when indexing zeroes 2021-07-22 17:14:44 +02:00
Kerollmops
aa02a7fdd8 Add a test to check that we indeed impact the relevancy 2021-07-22 17:04:38 +02:00
bors[bot]
77de82aaa4 Merge #254
254: Improve the facet string distribution speed r=Kerollmops a=Kerollmops

This pull request creates a data structure similar to the one we use for the faceted numbers, a tetratomic decision tree but this time for the facet strings. This PR also changes the facet distribution behavior by returning one of the original facet values, fixes #260.

This data structure defines bucket-like structures where documents ids are stored under their facet value and helps the search decide if it wants to move to a lower level under a given bucket or not, depending on if the current bucket contains interesting documents or not. The whole format, algorithm, and previous attempts are explained in the [`facet_string.rs` file](ec1cfdd42b/milli/src/search/facet/facet_string.rs).

Note that this data structure **could** be used to sort by string lexicographically, that hypothetically possible. We need more testing, in terms of performance and quality, as we will sort on lowercased versions of the facet values.

 - [x] Implement a faster and more precise way to fetch the facet distribution.
 - [x] Store and return the original facet string value. We currently return the lowercased version.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-07-21 15:34:40 +00:00
Clément Renault
0227254a65 Return the original string values for the inverted facet index database 2021-07-21 16:59:39 +02:00
Kerollmops
03a01166ba Display the original facet string value from the linear facet database 2021-07-21 16:59:39 +02:00
Clément Renault
d23c250ad5 Fix a bound error in the facet string range construction 2021-07-21 16:59:39 +02:00
Clément Renault
081278dfd6 Use the facet string levels when computing the facet distribution 2021-07-21 16:59:39 +02:00
Clément Renault
5676b204dd Fix the facet string levels codecs 2021-07-21 16:59:38 +02:00
Kerollmops
8c86348119 Indexing the facet strings levels 2021-07-21 16:59:38 +02:00
Kerollmops
a7ae552ba7 Fix the FacetStringLevelZeroRange range when unbounded 2021-07-21 16:59:38 +02:00
Kerollmops
757b2b502a Remove the FacetValueStringCodec 2021-07-21 16:59:38 +02:00
Kerollmops
adfd4da24c Introduce the FacetStringIter iterator 2021-07-21 16:59:38 +02:00
Kerollmops
a79661c6dc Introduce a lot of facet string helper iterators 2021-07-21 16:59:38 +02:00
Kerollmops
851f979039 Describe the way we want to group the facet strings 2021-07-21 16:59:38 +02:00
Kerollmops
f858f64b1f Move the facet number iterators into their own module 2021-07-21 16:59:37 +02:00
Kerollmops
9f8095c069 Make sure that we don't keep a reference on the LMDB key when using put_current 2021-07-21 10:35:35 +02:00
bors[bot]
fa44e95c91 Merge #290
290: Add a $HOME to the CI r=Kerollmops a=irevoire

This should fix this issue:
https://github.com/meilisearch/milli/runs/3104228432?check_suite_focus=true

I think a real fix would be to fix the configuration of our github runner but I don't know how to do it.
@curquiza could probably help us on that once she's back from vacation 😄 

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-20 07:32:46 +00:00
Tamo
0ab541627b add a $HOME var to the ci 2021-07-19 14:33:49 +02:00
bors[bot]
16698f714b Merge #287
287: Add benchmarks for indexing r=Kerollmops a=irevoire

closes #274 
I don't really know how much time this will take on our bench machine. I'm afraid the wiki dataset will take a really long time to bench (it takes 1h30 on my computer).

If you are ok with it, I would like to merge this first PR since it introduces a first set of benchmarks and see how much time it takes in reality on our setup.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-07 15:41:15 +00:00
Tamo
931021fe57 add benchmarks for indexing 2021-07-07 13:09:05 +02:00
bors[bot]
4c9531bdf3 Merge #285
285: Support documents with at most 65536 fields r=Kerollmops a=Kerollmops

Fixes #248.

In this PR I updated the `obkv` crate, it now supports arbitrary key length and therefore I was able to use an `u16` to represent the fields instead of a single byte. It was impressively easy to update the whole codebase 🍡 🍔

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-06 16:44:51 +00:00
Kerollmops
0a78107525 Fix the infos crate to make it read u16 field ids 2021-07-06 11:58:03 +02:00
Kerollmops
a9553af635 Add a test to check that we can index more that 256 fields 2021-07-06 11:58:03 +02:00
Kerollmops
838ed1cd32 Use an u16 field id instead of one byte 2021-07-06 11:58:03 +02:00
bors[bot]
cc54c41e30 Merge #283
283: Use the AlwaysFreePages flag when opening an index r=irevoire a=Kerollmops

We introduced a new flag in our fork of LMDB, this `AlwaysFreePages` flag forces LMDB to always free the single pages it uses before writing to the disk instead of keeping them in a linked list.

Declaring this flag reduces the memory print (leak) we have on memory after indexing a lot of documents.

Fixes #279.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-05 16:59:16 +00:00
bors[bot]
63db43cc7a Merge #284
284: [http-ui] Introduce the route `die` r=Kerollmops a=irevoire

This route just `exit` the process. This can come in handy when you run `http-ui` inside of another process (a profiler for example), and you don't want to kill everything

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-07-05 15:47:53 +00:00
Irevoire
4562b278a8 remove a warning and add a log
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-07-05 17:46:02 +02:00
Tamo
a57e522a67 introduce a die route let the program exit itself alone 2021-07-05 17:38:10 +02:00
Kerollmops
91c5d0c042 Use the AlwaysFreePages flag when opening an index 2021-07-05 16:36:13 +02:00
bors[bot]
007fec21fc Merge #281
281: Bump to v0.7.2 r=ManyTheFish a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-07-05 09:00:26 +00:00
Kerollmops
a6b4069172 Bump to v0.7.2 2021-07-05 10:54:53 +02:00
bors[bot]
d7bc6a6999 Merge #280
280: Fix matching lenghth in matching_words r=Kerollmops a=ManyTheFish

related to https://github.com/meilisearch/MeiliSearch/issues/1441

Co-authored-by: many <maxime@meilisearch.com>
2021-07-01 18:50:46 +00:00
many
9f62149b94 Fix matching lenghth in matching_words 2021-07-01 19:03:28 +02:00
bors[bot]
f25f454bd4 Merge #275
275: Fix the benchmarks dependencies r=Kerollmops a=irevoire

Import exactly the same dependency as milli instead of a wildcard that can do anything

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <irevoire@protonmail.ch>
2021-07-01 11:07:01 +00:00
bors[bot]
885f243afc Merge #276
276: Fix the fmt of the auto-generated file r=Kerollmops a=irevoire

The file generated by the `build.rs` file of the benchmark was badly formatted and that was causing an issue with the git pre-commit hook I wrote [earlier](https://github.com/meilisearch/milli/blob/main/script/pre-commit)

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-07-01 10:24:36 +00:00
Irevoire
ec87bf3dd5 Update benchmarks/Cargo.toml
Co-authored-by: Clément Renault <renault.cle@gmail.com>
2021-07-01 11:45:05 +02:00
Tamo
ef965aa3f3 fix the fmt of the auto-generated file 2021-07-01 11:43:09 +02:00
Tamo
fc09d77e89 fix the benchmarks dependcies 2021-07-01 11:38:30 +02:00
bors[bot]
056180e6c8 Merge #273
273: Update tokenizer version to v0.2.3 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-07-01 09:02:16 +00:00
Clémentine Urquizar
3c149d8a43 Update tokenizer version to v0.2.3 2021-06-30 18:41:35 +02:00
bors[bot]
b4dcdbf00d Merge #269 #271
269: Fix bug when inserting previously deleted documents r=Kerollmops a=Kerollmops

This PR fixes #268.

The issue was in the `ExternalDocumentsIds` implementation in the specific case that an external document id was in the soft map marked as deleted.

The bug was due to a wrong assumption on my side about how the FST unions were returning the `IndexedValue`s, I thought the values returned in an array were in the same order as the FSTs given to the `OpBuilder` but in fact, [the `IndexedValue`'s `index` field was here to indicate from which FST the values were coming from](https://docs.rs/fst/0.4.7/fst/map/struct.IndexedValue.html).

271: Remove the roaring operation functions warnings r=Kerollmops a=Kerollmops

In this PR we are just replacing the usages of the roaring operations function by the new operators. This removes a lot of warnings.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-30 12:34:55 +00:00
Kerollmops
32b7bd366f Remove the roaring operation functions warnings 2021-06-30 14:12:56 +02:00
bors[bot]
00e2845f0f Merge #270
270: Update milli version to v0.7.1 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-30 12:12:24 +00:00
Kerollmops
c92ef54466 Add a test for when we insert a previously deleted document 2021-06-30 14:00:01 +02:00
Kerollmops
28782ff99d Fix ExternalDocumentsIds struct when inserting previously deleted ids 2021-06-30 14:00:01 +02:00
Clémentine Urquizar
b489515f4d Update milli version to v0.7.1 2021-06-30 13:52:46 +02:00
Kerollmops
54889813ce Implement some debug functions on the ExternalDocumentsIds struct 2021-06-30 11:29:41 +02:00
Kerollmops
4bce66d5ff Make the Index::delete_* method private 2021-06-30 10:07:31 +02:00
bors[bot]
66e6ea56b8 Merge #267
267: Highlighting r=Kerollmops a=irevoire

closes #262 
I basically rewrote a part of the damerau-levenshtein function we were using for the highlighting to accept at most two errors from the user and stop on the third mistake.
Also, now it supports utf-8, so it should fix our issue.

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <irevoire@protonmail.ch>
2021-06-30 05:43:50 +00:00
Irevoire
6044b80362 Update milli/src/search/matching_words.rs
Co-authored-by: Clément Renault <renault.cle@gmail.com>
2021-06-30 00:35:26 +02:00
Tamo
be75e738b1 add more tests 2021-06-29 16:24:58 +02:00
Tamo
56fceb1928 re-implement the Damerau-Levenshtein used for the highlighting 2021-06-29 15:36:03 +02:00
bors[bot]
9dbc8b2dd0 Merge #266
266: Bump LMDB to the latest version (v0.9.70) r=Kerollmops a=Kerollmops

By bumping to a new version of heed (from git, v0.12.0 unpublished yet), this PR fixes Windows disk reservation problems. This new version of heed changes the `del/put_current`, and `append` iterator methods signature by declaring them unsafe.

This PR also bumps milli itself into v0.7.0 as it is breaking due to the heed/LMDB bump.

This PR must be merged after #264.

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-28 17:11:41 +00:00
Clément Renault
80c6aaf1fd Bump milli to 0.7.0 2021-06-28 18:31:56 +02:00
Clément Renault
bdc5599b73 Bump heed to use the git repo with v0.12.0 2021-06-28 18:26:20 +02:00
Clément Renault
73384aec21 Merge pull request #264 from meilisearch/fix-heed-undefined-behavior
Fix the invalid heed usage
2021-06-28 18:23:49 +02:00
Clément Renault
0013236e5d Fix the LMDB and heed invalid interactions.
It is undefined behavior to keep a reference to the database while
modifying it, we were keeping references in the database and also
feeding the heed put_current methods with keys referenced inside
the database itself.

https://github.com/Kerollmops/heed/pull/108
2021-06-28 16:19:02 +02:00
Kerollmops
9e5f9a8a10 Add a test for the words level positions generation bug 2021-06-28 16:08:31 +02:00
bors[bot]
c38b0b883d Merge #257
257: Fix unconditional facet indexing r=Kerollmops a=Kerollmops

We were indexing every searchable field as filterable, this was a mistake.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-23 15:32:46 +00:00
Kerollmops
98285b4b18 Bump milli to 0.6.0 2021-06-23 17:30:26 +02:00
Kerollmops
4fc8f06791 Rename faceted_fields into filterable_fields 2021-06-23 17:26:54 +02:00
Kerollmops
c31cadb54f Do not consider the searchable field as filterable 2021-06-23 17:26:54 +02:00
bors[bot]
41c4a5b60d Merge #246
246: Improve the ci r=Kerollmops a=irevoire

Rewrite the CI entirely:
- run the ci on Linux, macOS and Windows.
- run the ci on rust stable, beta and nightly
- add rustfmt to the CI.
- split the CI into multiple tasks, this way, the ci should be faster to fail

Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-23 12:52:39 +00:00
Irevoire
faa3cd3b71 Update bors.toml
Don't check nightly and beta channel

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-23 14:30:33 +02:00
bors[bot]
2ab24c4f49 Merge #256
256: Update version for the next release (v0.5.1) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-23 12:29:57 +00:00
Clémentine Urquizar
9885fb4159 Update version for the next release (v0.5.1) 2021-06-23 14:05:20 +02:00
bors[bot]
66f55e3e6a Merge #255
255: Fix facet distribution error r=Kerollmops a=Kerollmops

This PR fixes two invalid behaviors and fixes #253:
 - We were ignoring the list of fields for which the user wanted a facet distribution.
 - We were not raising any error for when a non-filterable field was requested a facet distribution.

~For the latter behavior I need the help of @curquiza to help me choose the right error type.~

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-23 12:03:05 +00:00
Kerollmops
a6218a20ae Introduce a new InvalidFacetsDistribution user error 2021-06-23 13:56:19 +02:00
Kerollmops
2364777838 Return an error for when a field distribution cannot be done 2021-06-23 11:50:49 +02:00
Kerollmops
aeaac743ff Replace an if let some by a match 2021-06-23 11:33:30 +02:00
Tamo
5099192c44 update bors.toml 2021-06-23 10:22:40 +02:00
Tamo
d8695da1d1 improve the ci 2021-06-23 10:22:40 +02:00
bors[bot]
28197b2435 Merge #252
252: Run the formatter on the whole project a second time r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-22 13:56:09 +00:00
Tamo
8d2a0b43ff run the formatter on the whole project a second time 2021-06-22 15:36:22 +02:00
bors[bot]
634201244c Merge #250 #251
250: Add the limit field to http-ui r=Kerollmops a=irevoire



251: Fix the limit r=Kerollmops a=irevoire

There was no check on the limit and thus if a user specified a very large number this line could cause a panic.

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-22 13:00:52 +00:00
Tamo
3d90b03d7b fix the limit
There was no check on the limit and thus, if a user especified a very large number this line could causes a panic
2021-06-22 14:52:13 +02:00
Tamo
81643e6d70 add the limit field to http-ui 2021-06-22 14:47:23 +02:00
bors[bot]
5aea8dd75b Merge #249
249: Enable the jemallocator dependencies only when we are running on linux r=Kerollmops a=irevoire



Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-22 12:32:44 +00:00
Tamo
77eb37934f add jemalloc to http-ui and the benchmarks 2021-06-22 14:17:56 +02:00
bors[bot]
5b6adc6d96 Merge #245
245: Warn for when a key is too large for LMDB r=Kerollmops a=Kerollmops

Closes #191, and resolves #140.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-22 12:10:52 +00:00
Tamo
d53df8a002 enable the jemallocator dependencies only when we are running on linux 2021-06-22 14:04:16 +02:00
bors[bot]
ca9fa329d1 Merge #247
247: Return a `MissingDocumentId` error when a document doesn't have one r=Kerollmops a=Kerollmops

We were wrongly returning a `MissingPrimaryKey` instead of a `MissingDocumentId` error for when a document was missing a document id. We also improved the error message for when a document id is invalid (wrong type or wrong format).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-22 10:07:54 +00:00
Kerollmops
51dbb2e06d Warn for when a key is too large for LMDB 2021-06-22 11:51:36 +02:00
Kerollmops
aecbd14761 Improve the error message for InvalidDocumentId 2021-06-22 11:31:58 +02:00
Kerollmops
0cca2ea24f Return a MissingDocumentId when a document doesn't have one 2021-06-22 11:22:33 +02:00
Kerollmops
481b0bf277 Warn for when a facet key is too large for LMDB 2021-06-22 10:57:46 +02:00
bors[bot]
b073fd49ea Merge #244
244: Update version for the next release (v0.5.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-21 14:27:10 +00:00
bors[bot]
be2ebdd395 Merge #243
243: Rename FieldsDistribution into FieldDistribution r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-21 14:00:35 +00:00
Clémentine Urquizar
320670f8fe Update version for the next release (v0.5.0) 2021-06-21 15:59:17 +02:00
Clémentine Urquizar
daef43f504 Rename FieldsDistribution into FieldDistribution 2021-06-21 15:57:41 +02:00
bors[bot]
b120c32cad Merge #242
242: Update version for the next release (v0.4.2) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-21 09:01:42 +00:00
Clémentine Urquizar
35fcc351a0 Update version for the next release (v0.4.2) 2021-06-20 17:37:24 +02:00
bors[bot]
5b19dd23d9 Merge #240
240: Field distribution r=Kerollmops a=irevoire

closes #199
closes #198 


Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-19 10:14:25 +00:00
Tamo
d08cfda796 convert the field_distribution to a BTreeMap and avoid counting twice the same documents 2021-06-17 18:31:54 +02:00
bors[bot]
a9e552ab18 Merge #238
238: Integration tests on filters and distinct r=Kerollmops a=ManyTheFish

Fix #216 
Fix #120 

Co-authored-by: many <maxime@meilisearch.com>
2021-06-17 15:00:51 +00:00
many
6cb1102bdb Fix PR comments 2021-06-17 15:19:03 +02:00
Tamo
969adaefdf rename fields_distribution in field_distribution 2021-06-17 15:16:20 +02:00
bors[bot]
a67ccfdf3a Merge #239
239: Update version to the next release (0.4.1) r=Kerollmops a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-17 13:02:37 +00:00
Kerollmops
ccd6f13793 Update version to the next release (0.4.1) 2021-06-17 15:01:20 +02:00
many
f496cd320d Add distinct integration tests 2021-06-17 14:33:18 +02:00
many
9f4184208e Add test on filters 2021-06-17 13:56:09 +02:00
bors[bot]
bb89ef9fc0 Merge #237
237: change sub errors visibility r=Kerollmops a=MarinPostma

re-export sub-error types so they can be matched upon outside of milli.


Co-authored-by: marin postma <postma.marin@protonmail.com>
Co-authored-by: marin <postma.marin@protonmail.com>
2021-06-17 09:51:18 +00:00
marin
70bee7d405 re-export remaining error types
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-17 11:49:03 +02:00
marin postma
abbebad669 change sub errors visibility 2021-06-17 11:44:01 +02:00
bors[bot]
1bcf43baac Merge #236
236: Format the whole project r=Kerollmops a=irevoire

I need to add `cargo fmt` in the CI before closing #231

Co-authored-by: Tamo <tamo@meilisearch.com>
2021-06-16 18:05:40 +00:00
Tamo
9716fb3b36 format the whole project 2021-06-16 18:33:33 +02:00
bors[bot]
ba30cef987 Merge #234
234: Revert "Enable optimization in every profile" r=Kerollmops a=ManyTheFish

compiling tests in release takes too much time.

Reverts meilisearch/milli#224

Fix #233 

Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-06-16 13:38:58 +00:00
Many
41bdc90f46 Revert "Enable optimization in every profile" 2021-06-16 14:17:02 +02:00
bors[bot]
3bd4cf94cc Merge #235
235: Update version for the next release (v0.4.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-16 12:02:40 +00:00
Clémentine Urquizar
f5ff3e8e19 Update version for the next release (v0.4.0) 2021-06-16 14:01:05 +02:00
bors[bot]
02e0271e44 Merge #225
225: Introduce the error handler r=ManyTheFish a=Kerollmops

Fixes #109.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: many <maxime@meilisearch.com>
2021-06-16 09:46:23 +00:00
many
ce0315a10f Close write transaction in test 2021-06-16 11:03:37 +02:00
Kerollmops
7ac441e473 Fix small typos 2021-06-16 11:03:37 +02:00
Kerollmops
adf0c389c5 Rename FilterParsing into InvalidFilter 2021-06-16 11:03:36 +02:00
Kerollmops
8cfe3e1ec0 Rename DatabaseSizeReached into MaxDatabaseSizeReached 2021-06-16 11:03:36 +02:00
Kerollmops
4eda438f6f Add a new Error for when a user use a non-filtered attribute in a filter 2021-06-16 11:03:36 +02:00
Kerollmops
713acc408b Introduce the primary key to the Settings builder structure 2021-06-16 11:03:36 +02:00
Kerollmops
a7d6930905 Replace the panicking expect by tracked Errors 2021-06-15 11:51:32 +02:00
Kerollmops
f0e804afd5 Rename the FieldIdMapMissingEntry from_db_name field into process 2021-06-15 11:13:04 +02:00
Kerollmops
28c004aa2c Prefer using constant for the database names 2021-06-15 11:13:04 +02:00
Kerollmops
78fe4259a9 Fix the http-ui crate 2021-06-14 18:06:23 +02:00
Kerollmops
312c2d1d8e Use the Error enum everywhere in the project 2021-06-14 16:58:38 +02:00
Kerollmops
ca78cb5aca Introduce more variants to the error module enums 2021-06-14 16:58:38 +02:00
Kerollmops
456541e921 Implement the Display trait on the Error type 2021-06-14 16:48:51 +02:00
Kerollmops
44c353fafd Introduce some way to construct an Error 2021-06-14 16:48:51 +02:00
Kerollmops
23fcf7920e Introduce a basic version of the InternalError struct 2021-06-14 16:48:51 +02:00
Kerollmops
d2b1ecc885 Remove a lot of serialization unreachable errors 2021-06-14 16:48:51 +02:00
Kerollmops
65b1d09d55 Move the obkv merging functions into the merge_function module 2021-06-14 16:48:51 +02:00
Kerollmops
ab727e428b Remove the docid_word_positions_merge method that must never be called 2021-06-14 16:48:51 +02:00
Kerollmops
93a8633f18 Remove the documents_merge method that must never be called 2021-06-14 16:48:51 +02:00
Kerollmops
cfc7314bd1 Prefer using an explicit merge function name 2021-06-14 16:48:50 +02:00
Kerollmops
93978ec38a Serializing a RoaringBitmap into a Vec cannot fail 2021-06-14 16:48:50 +02:00
Kerollmops
ff9414a6ba Use the out of the compute_primary_key_pair function 2021-06-14 16:48:50 +02:00
bors[bot]
0542e2179f Merge #230
230: Update Tokenizer version to v0.2.3 r=ManyTheFish a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-10 16:28:05 +00:00
Clémentine Urquizar
7d5395c12b Update Tokenizer version to v0.2.3 2021-06-10 17:00:04 +02:00
bors[bot]
3e6c05fe13 Merge #227
227: Replace Consecutive by Phrase in query tree r=Kerollmops a=ManyTheFish

Replace `Consecutive` by `Phrase` in the query tree in order to remove theoretical bugs,
due to the `Consecutive` enum type.

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-06-10 09:31:39 +00:00
Many
f4cab080a6 Update milli/src/search/query_tree.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-10 11:30:51 +02:00
Many
36715f571c Update milli/src/search/criteria/proximity.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-10 11:30:33 +02:00
many
e923a3ed6a Replace Consecutive by Phrase in query tree
Replace Consecutive by Phrase in query tree in order to remove theorical bugs,
due of the Consecutive enum type.
2021-06-10 11:16:16 +02:00
bors[bot]
bc02031793 Merge #226
226: Update version for the next release (v0.3.1) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-09 14:13:42 +00:00
Clémentine Urquizar
dc64e139b9 Update version for the next release (v0.3.1) 2021-06-09 14:39:21 +02:00
bors[bot]
5cf1b0b138 Merge #224
224: Enable optimization in every profile r=Kerollmops a=irevoire



Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-09 09:13:21 +00:00
bors[bot]
afb4133bd2 Merge #212 #222 #223
212: Introduce integration test on criteria r=Kerollmops a=ManyTheFish

- add pre-ranked dataset
- test each criterion 1 by 1
- test all criteria in several order

222: Move the `UpdateStore` into the http-ui crate r=Kerollmops a=Kerollmops

We no more need to have the `UpdateStore` inside of the mill crate as this is the job of the caller to stack the updates and sequentially give them to milli.

223: Update dataset links r=Kerollmops a=curquiza



Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-09 08:47:19 +00:00
Irevoire
86b916b008 enable optimization in every profile 2021-06-09 10:26:57 +02:00
bors[bot]
6faa87302c Merge #220
220: Make hard separators split phrase query r=Kerollmops a=ManyTheFish

hard separators will now split a phrase query as two sequential phrases (double-quoted strings):

the query `"Radioactive (Imagine Dragons)"` would be considered equivalent to `"Radioactive" "Imagine Dragons"` which as the little disadvantage of not keeping the order of the two (or more) separate phrases.

Fix #208

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-06-09 08:22:58 +00:00
Many
f4ff30e99d Update milli/tests/search/mod.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-09 10:12:24 +02:00
Many
ab696f6a23 Update milli/tests/search/query_criteria.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-09 10:12:17 +02:00
Clément Renault
d89f5ca48e Merge pull request #219 from meilisearch/fix-criteria-fields-ids-map
Save the criteria field name in the fields ids map
2021-06-08 18:46:57 +02:00
Clémentine Urquizar
7e93811fbc Update dataset links 2021-06-08 18:18:54 +02:00
Kerollmops
0bf4f3f48a Modify a test to check that criteria additions change the fields ids map 2021-06-08 18:14:34 +02:00
Kerollmops
82df524e09 Make sure that we register the field when setting criteria 2021-06-08 18:14:33 +02:00
Clément Renault
8e2c41e7f7 Merge pull request #221 from meilisearch/fix-primary-key-delete
Use the index primary key when deleting documents
2021-06-08 18:13:42 +02:00
Kerollmops
103dddba2f Move the UpdateStore into the http-ui crate 2021-06-08 17:59:51 +02:00
Many
faf148d297 Update milli/src/search/query_tree.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-08 17:52:37 +02:00
Kerollmops
133ab98260 Use the index primary key when deleting documents 2021-06-08 17:33:29 +02:00
many
b489d699ce Make hard separators split phrase query
hard separators will now split a phrase query as double double-quotes

Fix #208
2021-06-08 17:29:38 +02:00
Many
afb09c914d Update milli/tests/search/query_criteria.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-08 16:53:56 +02:00
many
b64cd2a3e3 Resolve PR comments 2021-06-08 14:14:34 +02:00
many
1fcc5f73ac Factorize tests using macro_rules 2021-06-08 12:33:02 +02:00
bors[bot]
32cf5a29ce Merge #218
218: Enable optimization for build.rs and macro r=Kerollmops a=irevoire

It fasten the unzip of the benchmark’s dataset a lot


Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-08 09:56:23 +00:00
Irevoire
e0c327bae2 Update Cargo.toml
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-08 11:39:10 +02:00
Irevoire
c82a382b0b compile every build.rs with optimization 2021-06-08 11:19:22 +02:00
bors[bot]
eb149030eb Merge #215
215: Make the benchmark command more convenient in CI r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-08 09:04:26 +00:00
bors[bot]
fd032165d7 Merge #217
217: Improve the benchmarks readme r=Kerollmops a=irevoire

- Move the Dataset part to the end of the readme so when peoples just want to run the benchmarks they are not tempted to download the benchmarks by hand (which are going to be downloaded anyway by the `build.rs` scritp)
- Fix the links in the dataset -- wiki part


Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-08 08:44:16 +00:00
Irevoire
d912c94034 improve the benchmark’s readme 2021-06-08 10:38:23 +02:00
Irevoire
563492f1e5 update the TOC order 2021-06-07 17:29:22 +02:00
Clémentine Urquizar
38ab541f4a Make the benchmark command more convenient in CI 2021-06-04 00:21:39 +02:00
bors[bot]
af38196a6b Merge #214
214: Add --locked in CI tests r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-03 14:39:36 +00:00
Clémentine Urquizar
e9104a0a32 Add --locked in CI tests 2021-06-03 16:23:59 +02:00
Clémentine Urquizar
70229f07c8 Update Cargo.lock 2021-06-03 16:22:43 +02:00
bors[bot]
ee7d291442 Merge #213
213: Fix the benchmarks script and names r=Kerollmops a=Kerollmops

The benchmarks compare script was not using the `--output` flag and was therefore failing the download of the JSON reports. We also modified the criterion benchmarks to use shorter names, it helps in looking at the benchmarks in the terminal.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-03 14:18:45 +00:00
Kerollmops
29824d05ab Reduce the length of the benchmarks names 2021-06-03 15:59:43 +02:00
Kerollmops
76a2343639 Fix the compare script of the benchmarks 2021-06-03 15:39:52 +02:00
many
10882bcbce Introduce integration test on criteria 2021-06-03 14:44:53 +02:00
bors[bot]
a32236c80c Merge #211
211: Update Cargo.toml for next release v0.3.0 r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-06-03 10:42:52 +00:00
Clémentine Urquizar
3b2b3aeea9 Update Cargo.toml for next release v0.3.0 2021-06-03 12:24:27 +02:00
bors[bot]
39ed133f9f Merge #193
193: Fix primary key behavior r=Kerollmops a=MarinPostma

this pr:
- Adds early returns on empty document additions, avoiding error messages to be returned when adding no documents and no primary key was set.
- Changes the primary key inference logic to match that of legacy meilisearch.

close #194 

Co-authored-by: Marin Postma <postma.marin@protonmail.com>
Co-authored-by: marin postma <postma.marin@protonmail.com>
2021-06-03 10:24:21 +00:00
bors[bot]
fd598f060c Merge #210
210: Check the benchmarks in the CI r=Kerollmops a=Kerollmops

Fixes #209.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-06-03 09:16:06 +00:00
Kerollmops
99b45d2aa0 Make sure that all the workspaces crates compile 2021-06-03 10:56:01 +02:00
marin postma
57898d8a90 fix silent deserialize error 2021-06-03 10:42:55 +02:00
Kerollmops
82fb5f0bef Fix the benchmarks compilation 2021-06-03 10:33:42 +02:00
Kerollmops
6b7841fefc Make sure that the benchmarks always compile 2021-06-03 10:29:21 +02:00
bors[bot]
834504aec0 Merge #204
204: Decorrelate Distinct, Asc/Desc, Filterable fields from the faceted fields r=Kerollmops a=Kerollmops

This PR decorrelates the fields that need to be stored in facet databases (big inverted indexes for fast access) from the filterable fields, the previously named faceted fields are now named filterable fields and are the union of the distinct attribute, all the Asc/Desc criteria and, the filterable fields.

I added two tests to make sure that the engine was correctly generating the faceted databases when a distinct attribute or an Asc/Desc criteria were added, and one to make sure that it was impossible to filter on a non-filterable field even if it was a faceted one.

Note that the `AttributesForFacetting` has also been renamed into `FilterableAttributes`. But it will be the Transplant's job to do that on the API, this change is only visible to the milli's library users.

- Related to https://github.com/meilisearch/transplant/issues/187.
- Fixes #161 by returning the documents that don't have the Asc/Desc field at the end of the bucket.
- Fixes #168.
- Fixes #152.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Marin Postma <postma.marin@protonmail.com>
Co-authored-by: many <maxime@meilisearch.com>
2021-06-02 15:43:39 +00:00
many
26a9974667 Make asc/desc criterion return resting documents
Fix #161.2
2021-06-02 17:41:48 +02:00
bors[bot]
28962bce99 Merge #207
207: Benchmarks r=Kerollmops a=irevoire



Co-authored-by: tamo <tamo@meilisearch.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
Co-authored-by: Tamo <irevoire@hotmail.fr>
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 15:29:09 +00:00
Tamo
6dc08bf45e remove the nop function 2021-06-02 17:09:21 +02:00
Tamo
087ae64899 add a gitignore to avoid pushing the autogenerated file 2021-06-02 17:03:30 +02:00
Tamo
3db25153e5 fix the faceted_fields one last time 2021-06-02 17:00:58 +02:00
Kerollmops
3c304c89d4 Make sure that we generate the faceted database when required 2021-06-02 16:24:58 +02:00
Kerollmops
b0c0490e85 Make sure that we can add a Asc/Desc field without it being filterable 2021-06-02 16:24:58 +02:00
Kerollmops
3b1cd4c4b4 Rename the FacetCondition into FilterCondition 2021-06-02 16:24:58 +02:00
Kerollmops
c2afdbb1fb Move and comment some internal facet_condition helper functions 2021-06-02 16:24:58 +02:00
Kerollmops
6476827d3a Fix the indexer to be sure that distinct and Asc/Desc are also faceted 2021-06-02 16:24:58 +02:00
Kerollmops
c10469ddb6 Patch the http-ui crate to support filterable fields 2021-06-02 16:24:58 +02:00
Marin Postma
1e366dae3e remove useless lifetime on Distinct Trait 2021-06-02 16:24:58 +02:00
Kerollmops
187c713de5 Remove the MapDistinct struct as now distinct attributes are faceted 2021-06-02 16:24:57 +02:00
Kerollmops
ff440c1d9d Introduce the faceted fields method to retrieve those that needs faceting 2021-06-02 16:24:57 +02:00
Kerollmops
2a3f9b32ff Rename the faceted fields into filterable fields 2021-06-02 16:24:57 +02:00
Irevoire
f346805c0c Update benchmarks/Cargo.toml
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-02 15:47:03 +02:00
Clémentine Urquizar
ef1ac8a0cb Update README 2021-06-02 11:13:22 +02:00
Clémentine Urquizar
edfcdb171c Update benchmarks/scripts/list.sh
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 11:13:22 +02:00
Clémentine Urquizar
3c91a9a551 Update following reviews 2021-06-02 11:13:22 +02:00
Tamo
bc4f4ee829 remove s3cmd as a dependency and provide a script to list all the available benchmarks 2021-06-02 11:13:22 +02:00
Clémentine Urquizar
61fe422a88 Update benchmarks/scripts/compare.sh
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 11:13:22 +02:00
Clémentine Urquizar
57ed96622b Update benchmarks/scripts/compare.sh
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 11:13:22 +02:00
Clémentine Urquizar
b3c0d43890 Update benchmarks/scripts/compare.sh
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-06-02 11:13:22 +02:00
Clémentine Urquizar
0d0e900158 Add CI for benchmarks 2021-06-02 11:13:22 +02:00
tamo
4536dfccd0 add a way to provide primary_key or autogenerate documents ids 2021-06-02 11:13:20 +02:00
tamo
06c414a753 move the benchmarks to another crate so we can download the datasets automatically without adding overhead to the build of milli 2021-06-02 11:11:50 +02:00
tamo
3c84075d2d uses an env variable to find the datasets 2021-06-02 11:05:07 +02:00
tamo
4969abeaab update the facets for the benchmarks 2021-06-02 11:05:07 +02:00
tamo
e5dfde88fd fix the facets conditions 2021-06-02 11:05:07 +02:00
tamo
7c7fba4e57 remove the time limitation to let criterion do what it wants 2021-06-02 11:05:07 +02:00
tamo
5d5d115608 reformat all the files 2021-06-02 11:05:07 +02:00
tamo
7086009f93 improve the base search 2021-06-02 11:05:07 +02:00
tamo
d0b44c380f add benchmarks on a wiki dataset 2021-06-02 11:05:07 +02:00
tamo
beae843766 add a missing space 2021-06-02 11:05:07 +02:00
tamo
5132a106a1 refactorize everything related to the songs dataset in a songs benchmark file 2021-06-02 11:05:07 +02:00
tamo
136efd6b53 fix the benches 2021-06-02 11:05:07 +02:00
tamo
4b78ef31b6 add the configuration of the searchable fields and displayed fields and a default configuration for the songs 2021-06-02 11:05:07 +02:00
tamo
ea0c6d8c40 add a bunch of queries and start the introduction of the filters and the new dataset 2021-06-02 11:05:07 +02:00
tamo
3def42abd8 merge all the criterion only benchmarks in one file 2021-06-02 11:05:07 +02:00
tamo
a2bff68c1a remove the optional words for the typo criterion 2021-06-02 11:05:07 +02:00
tamo
aee49bb3cd add the proximity criterion 2021-06-02 11:05:07 +02:00
tamo
49e4cc3daf add the words criterion to the bench 2021-06-02 11:05:07 +02:00
tamo
15cce89a45 update the README with instructions to get the download the dataset 2021-06-02 11:05:07 +02:00
tamo
e425f70ef9 let criterion decide how much iteration it wants to do in 10s 2021-06-02 11:05:07 +02:00
tamo
4fdbfd6048 push a first version of the benchmark for the typo 2021-06-02 11:05:07 +02:00
bors[bot]
270da98c46 Merge #202
202: Add field id word count docids database r=Kerollmops a=LegendreM

This PR introduces a new database, `field_id_word_count_docids`, that maps the number of words in an attribute with a list of document ids. This relation is limited to attributes that contain less than 11 words.
This database is used by the exactness criterion to know if a document has an attribute that contains exactly the query without any additional word.

Fix #165 
Fix #196
Related to [specifications:#36](https://github.com/meilisearch/specifications/pull/36)

Co-authored-by: many <maxime@meilisearch.com>
Co-authored-by: Many <legendre.maxime.isn@gmail.com>
2021-06-01 16:09:48 +00:00
many
e857ca4d7d Fix PR comments 2021-06-01 18:06:46 +02:00
Many
ab2cf69e8d Update milli/src/update/delete_documents.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-01 17:04:10 +02:00
Many
8e6d1ff0dc Update milli/src/update/index_documents/store.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-06-01 17:04:02 +02:00
bors[bot]
168fe0aa28 Merge #206
206: Fix http-ui r=Kerollmops a=irevoire

I just noticed that `http-ui` was not compiling on `main`.
I'm not sure this is the best fix, but it works 👀

Co-authored-by: Tamo <irevoire@hotmail.fr>
2021-06-01 14:31:32 +00:00
Tamo
608c5bad24 fix http-ui 2021-06-01 16:24:46 +02:00
bors[bot]
7d36d664a7 Merge #203
203: Make the MatchingWords return the number of matching bytes r=Kerollmops a=LegendreM

Make the MatchingWords return the number of matching bytes using a custom Levenshtein algorithm.

Fix #138

Co-authored-by: many <maxime@meilisearch.com>
2021-06-01 12:00:33 +00:00
many
225ae6fd25 Resolve PR comments 2021-06-01 11:53:09 +02:00
bors[bot]
2f9f6a1f21 Merge #169
169: Optimize roaring codec r=Kerollmops a=MarinPostma

Optimize the `BoRoaringBitmapCodec` by preventing it from emiting useless error that caused allocation. On my flamegraph, the byte_decode function went from 4.13% to  1.70% (of transplant graph).

This may not be the greatest optimization ever, but hey, this was a low hanging fruit.

before:
![image](https://user-images.githubusercontent.com/28804882/116241125-17018880-a754-11eb-9f9d-a67418d100e1.png)
after:
![image](https://user-images.githubusercontent.com/28804882/116241167-21bc1d80-a754-11eb-9afc-d9d72727477c.png)



Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-06-01 06:30:25 +00:00
Marin Postma
984dc7c1ed rewrite roaring codec without byteorder. 2021-05-31 22:15:39 +02:00
Marin Postma
1373637da1 optimize roaring codec 2021-05-31 22:15:35 +02:00
many
1df68d342a Make the MatchingWords return the number of matching bytes 2021-05-31 18:22:29 +02:00
many
b8e6db0feb Add database in infos crate 2021-05-31 16:29:27 +02:00
many
c701f8bf36 Use field id word count database in exactness criterion 2021-05-31 16:27:28 +02:00
many
4ddf008be2 add field id word count database 2021-05-31 16:27:28 +02:00
bors[bot]
2f5e61bacb Merge #184
184: Transfer numbers and strings facets into the appropriate facet databases r=Kerollmops a=Kerollmops

This pull request is related to https://github.com/meilisearch/milli/issues/152 and changes the layout of the facets values, numbers and strings are now in dedicated databases and the user no more needs to define the type of the fields. No more conversion between the two types is done, numbers (floats and integers converted to f64) go to the facet float database and strings go to the strings facet database.

There is one related issue that I found regarding CSVs, the values in a CSV are always considered to be strings, [meilisearch/specifications#28](d916b57d74/text/0028-indexing-csv.md) fixes this issue by allowing the user to define the fields types using `:` in the "CSV Formatting Rules" section.

All previous tests on facets have been modified to pass again and I have also done hand-driven tests with the 115m songs dataset. Everything seems to be good!

Fixes #192.

Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2021-05-31 13:32:58 +00:00
Kerollmops
1c0a5cd136 Resolve code modification suggestions 2021-05-31 15:22:50 +02:00
bors[bot]
76b9178b16 Merge #200
200: Fix plane sweep algorithm r=Kerollmops a=LegendreM

Fix plain sweep algorithm after creating some tests on proximity.

Co-authored-by: many <maxime@meilisearch.com>
2021-05-26 11:36:24 +00:00
many
a5e98cf46d Fix plane sweep algorithm 2021-05-25 18:21:55 +02:00
Kerollmops
5012cc3a32 Fix the http-ui crate to support split facet databases 2021-05-25 11:31:06 +02:00
Kerollmops
28bd9e183e Fix the infos crate to support split facet databases 2021-05-25 11:31:06 +02:00
Clément Renault
3a4a150ef0 Fix the tests and remaining warnings 2021-05-25 11:31:06 +02:00
Clément Renault
02c655ff1a Refine the facet distribution to use both databases 2021-05-25 11:30:00 +02:00
Clément Renault
79efded841 Refine the FacetCondition from_array constructor 2021-05-25 11:30:00 +02:00
Clément Renault
f7efde11d9 Refine the facet condition to use both facet databases 2021-05-25 11:30:00 +02:00
Clément Renault
e62b89a2ed Make the facet distinct work with the new split facets 2021-05-25 11:30:00 +02:00
Clément Renault
bd7b285bae Split the update side to use the number and the strings facet databases 2021-05-25 11:30:00 +02:00
Clément Renault
038e03a4e4 Use both facet databases in the FacetIter type 2021-05-25 11:30:00 +02:00
Clément Renault
597144b0b9 Use both number and string facet databases in the distinct system 2021-05-25 11:29:59 +02:00
Clément Renault
837c1041c7 Clear and delete the documents from the facet database 2021-05-25 11:28:36 +02:00
Clément Renault
a56c46b6f1 Explode the string and f64 facet databases into two 2021-05-25 11:28:36 +02:00
Clément Renault
df7a32e3d0 Move the creation date initialization into a function 2021-05-25 11:28:35 +02:00
bors[bot]
49bee2ebc5 Merge #190
190: Make bucket candidates optionals r=Kerollmops a=LegendreM

Before the bucket candidates were the result of the facet filters or result of the query tree.
They will now be only the result of the query tree, making the number of candidates more consistent between the same request with or without facet filters.

Fix some clippy warnings.

Fix #186 

Co-authored-by: many <maxime@meilisearch.com>
2021-05-24 11:19:32 +00:00
many
a3944a7083 Introduce a filtered_candidates field 2021-05-11 11:37:40 +02:00
many
efba662ca6 Fix clippy warnings in cirteria 2021-05-10 10:27:18 +02:00
many
e923d51b8f Make bucket candidates optionals 2021-05-10 10:27:04 +02:00
Marin Postma
eeb0c70ea2 meilisearch compatible primary key inference 2021-05-06 22:42:32 +02:00
Marin Postma
313c362461 early return on empty document addition 2021-05-06 18:14:16 +02:00
Many
c620626515 Merge pull request #188 from meilisearch/exactness-criterion
Exactness criterion
2021-05-06 17:56:21 +02:00
Many
44b6843de7 Fix pull request reviews
Update milli/src/fields_ids_map.rs
Update milli/src/search/criteria/exactness.rs
Update milli/src/search/criteria/mod.rs
2021-05-06 14:31:03 +02:00
many
c1ce4e4ca9 Introduce mocked ExactAttribute step in exactness criterion 2021-05-06 14:28:31 +02:00
many
a3f8686fbf Introduce exactness criterion 2021-05-06 14:28:30 +02:00
bors[bot]
25f75d4d03 Merge #189
189: Update version for the next release (v0.2.1) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-05-05 15:28:56 +00:00
bors[bot]
7e63e32960 Merge #187
187: Fix fields distribution after documents merge r=Kerollmops a=shekhirin

Resolves https://github.com/meilisearch/milli/issues/174

The problem was with calculation of fields distribution before the merge in `output_from_sorter()`. So if you'd import two documents with the same primary key value, fields distribution will count it as two documents, while `output_from_sorter()` will merge these documents into one.

---

```console
➜ Downloads cat short_movies.json
[
{"id":"47474","title":"The Serpent's Egg","poster":"https://image.tmdb.org/t/p/w500/n7z0doFkXHcvo8QQWHLFnkEPXRU.jpg","overview":"The Serpent's Egg follows a week in the life of Abel Rosenberg, an out-of-work American circus acrobat living in poverty-stricken Berlin following Germany's defeat in World War I.","release_date":246844800,"genres":["Thriller","Drama","Mystery"]},
{"id":"47474","title":"The Serpent's Egg","poster":"https://image.tmdb.org/t/p/w500/n7z0doFkXHcvo8QQWHLFnkEPXRU.jpg","overview":"The Serpent's Egg follows a week in the life of Abel Rosenberg, an out-of-work American circus acrobat living in poverty-stricken Berlin following Germany's defeat in World War I.","release_date":246844800,"genres":["Thriller","Drama","Mystery"]}
]
➜ Downloads curl -X POST -H "Content-Type: text/json" --data-binary @short_movies.json 127.0.0.1:7700/indexes/movies/documents
{"updateId":0}
```

## Before
```console
➜ Downloads curl -s 127.0.0.1:7700/indexes/movies/stats | jq
{
  "numberOfDocuments": 1,
  "isIndexing": false,
  "fieldsDistribution": {
    "release_date": 2,
    "poster": 2,
    "title": 2,
    "overview": 2,
    "genres": 2,
    "id": 2
  }
}
```

## After
```console
➜ Downloads curl -s 127.0.0.1:7700/indexes/movies/stats | jq
{
  "numberOfDocuments": 1,
  "isIndexing": false,
  "fieldsDistribution": {
    "poster": 1,
    "release_date": 1,
    "title": 1,
    "genres": 1,
    "id": 1,
    "overview": 1
  }
}
```

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
2021-05-05 14:45:08 +00:00
Clémentine Urquizar
1e11578ef0 Update version for the next release (v0.2.1) 2021-05-05 14:57:34 +02:00
Alexey Shekhirin
f8d0f5265f fix(update): fields distribution after documents merge 2021-05-04 22:12:20 +03:00
bors[bot]
1207a058d0 Merge #185
185: Provide an iterator over all the documents in a milli index r=Kerollmops a=irevoire



Co-authored-by: tamo <tamo@meilisearch.com>
2021-05-04 14:04:16 +00:00
tamo
d61566787e provide an iterator over all the documents in a milli index 2021-05-04 11:23:51 +02:00
bors[bot]
c08f4599f2 Merge #183
183: remove tests on main r=Kerollmops a=MarinPostma

remove testing on main since we now use bors for merging.


Co-authored-by: Marin Postma <postma.marin@protonmail.com>
2021-05-03 15:06:28 +00:00
Marin Postma
bb5823c775 remove tests on main 2021-05-03 15:21:20 +02:00
bors[bot]
792225eaff Merge #182
182: Upgrade Milli version (v0.2.0) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-05-03 13:00:16 +00:00
Clémentine Urquizar
a8680887d8 Upgrade Milli version (v0.2.0) 2021-05-03 14:50:47 +02:00
bors[bot]
5b93d6ab91 Merge #181
181: Upgrade Tokenizer version (v0.2.2) r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-05-03 11:03:25 +00:00
bors[bot]
5c762b71dd Merge #177
177: Add bors r=Kerollmops a=curquiza



Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
2021-05-03 10:57:09 +00:00
Clémentine Urquizar
c30f17fafb Add bors 2021-05-03 12:29:30 +02:00
Clémentine Urquizar
34e02aba42 Upgrade Tokenizer version (v0.2.2) 2021-05-03 10:55:55 +02:00
Clément Renault
03bb95539b Merge pull request #180 from shekhirin/disable-autogenerated-doc-ids
Disable autogenerate_docids by default
2021-05-01 12:22:13 +02:00
Alexey Shekhirin
d81c0e8bba feat(update): disable autogenerate_docids by default 2021-04-30 21:41:34 +03:00
Clément Renault
c112877a4a Merge pull request #178 from meilisearch/visible-document-nb
make document addition number visible
2021-04-29 21:54:51 +02:00
Marin Postma
e8e32e0ba1 make document addition number visible 2021-04-29 20:05:07 +02:00
Clément Renault
b31f36d68c Merge pull request #173 from meilisearch/enhance-distinct-attributes
Remove excluded document in criteria iterations
2021-04-29 12:14:44 +02:00
many
ee09e50e7f Remove excluded document in criteria iterations
- pass excluded document to criteria to remove them in higher levels of the bucket-sort
- merge already returned document with excluded documents to avoid duplicas

Related to #125 and #112
Fix #170
2021-04-29 12:09:38 +02:00
Clément Renault
374c2782ad Merge pull request #176 from yanns/patch-1
do not use echo that espaces newline
2021-04-29 10:50:15 +02:00
Yann Simon
566c4a53c5 do not use echo that espaces newline
Fix https://github.com/meilisearch/milli/issues/175
2021-04-29 09:25:35 +02:00
Many
5b9524e1ba Merge pull request #172 from meilisearch/optimize-proximity-criterion
Optimize proximity criterion
2021-04-28 15:41:57 +02:00
many
31607bf9cd Add a threshold on proximity when choosing between linear/set algorithm 2021-04-28 14:57:22 +02:00
Clément Renault
5a10de1b9f Merge pull request #122 from meilisearch/attribute-criterion
Introduce the Attribute criterion
2021-04-28 14:34:50 +02:00
many
3b7e6afb55 Make some refacto and add documentation 2021-04-28 13:53:27 +02:00
Many
0add4d735c Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:40:34 +02:00
Many
3794ffc952 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:39:23 +02:00
Many
329bd4a1bb Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:39:03 +02:00
Many
3b1358b62f Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:32:19 +02:00
Many
c862b1bc6b Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:32:10 +02:00
Many
e92d137676 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:31:42 +02:00
Many
b3d6c6a9a0 Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:31:13 +02:00
Many
498c2b298c Update milli/src/search/criteria/attribute.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:30:02 +02:00
Many
0e4e6dfada Update milli/src/search/criteria/proximity.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 17:29:52 +02:00
Many
47d780b8ce Update milli/src/search/criteria/mod.rs
Co-authored-by: Irevoire <tamo@meilisearch.com>
2021-04-27 14:39:53 +02:00
Many
0daa0e170a Fix PR comments
Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-04-27 14:39:53 +02:00
many
0d7d3ce802 Update roaring package 2021-04-27 14:39:53 +02:00
many
71740805a7 Fix forgotten typo tests 2021-04-27 14:39:53 +02:00
many
e77291a6f3 Optimize Atrribute criterion on big requests 2021-04-27 14:39:53 +02:00
many
716c8e22b0 Add style and comments 2021-04-27 14:39:52 +02:00
many
f853790016 Use the LCM of 10 first numbers to compute attribute rank 2021-04-27 14:39:52 +02:00
many
2b036449be Fix the return of equal candidates in different pages 2021-04-27 14:39:52 +02:00
many
0efa011e09 Make a small code clean-up 2021-04-27 14:39:52 +02:00
many
17c8c6f945 Make set algorithm return None when nothing can be returned 2021-04-27 14:39:52 +02:00
many
b3e2280bb9 Debug attribute criterion
* debug folding when initializing iterators
2021-04-27 14:39:52 +02:00
many
1eee0029a8 Make attribute criterion typo/prefix tolerant 2021-04-27 14:39:52 +02:00
many
59f58c15f7 Implement attribute criterion
* Implement WordLevelIterator
* Implement QueryLevelIterator
* Implement set algorithm based on iterators

Not tested + Some TODO to fix
2021-04-27 14:39:52 +02:00
Clément Renault
361193099f Reduce the amount of branches when query tree flattened 2021-04-27 14:39:52 +02:00
Kerollmops
7ff4a2a708 Display the number of entries in the infos crate 2021-04-27 14:39:52 +02:00
Kerollmops
1aad66bdaa Compute stats about the word prefix level positions database in the infos crate 2021-04-27 14:39:52 +02:00
Kerollmops
e65bad16cc Compute the words prefixes at the end of an update 2021-04-27 14:39:52 +02:00
many
ab92c814c3 Fix attributes score 2021-04-27 14:35:43 +02:00
Clément Renault
0ad9499b93 Fix an indexing bug in the words level positions 2021-04-27 14:35:43 +02:00
Clément Renault
7aa5753ed2 Make the attribute positions range bounds to be fixed 2021-04-27 14:35:43 +02:00
Clément Renault
658f316511 Introduce the Initial Criterion 2021-04-27 14:35:43 +02:00
Kerollmops
89ee2cf576 Introduce the TreeLevel struct 2021-04-27 14:25:35 +02:00
Kerollmops
bd1a371c62 Compute the WordsLevelPositions only once 2021-04-27 14:25:34 +02:00
Kerollmops
8bd4f5d93e Compute the biggest values of the words_level_positions_docids 2021-04-27 14:25:34 +02:00
Kerollmops
f713828406 Implement the clear and delete documents for the word-level-positions database 2021-04-27 14:25:34 +02:00
Kerollmops
3069bf4f4a Fix and improve the words-level-positions computation 2021-04-27 14:25:34 +02:00
Kerollmops
6b1b42b928 Introduce an infos wordsLevelPositionsDocids subcommand 2021-04-27 14:25:34 +02:00
Kerollmops
e8cc7f9cee Expose a route in the http-ui to update the WordsLevelPositions 2021-04-27 14:25:34 +02:00
Kerollmops
3a25137ee4 Expose and use the WordsLevelPositions update 2021-04-27 14:25:34 +02:00
Kerollmops
c765f277a3 Introduce the WordsLevelPositions update 2021-04-27 14:25:34 +02:00
Kerollmops
9242f2f1d4 Store the first word positions levels 2021-04-27 14:25:34 +02:00
Kerollmops
b0a417f342 Introduce the word_level_position_docids Index database 2021-04-27 14:25:34 +02:00
many
75e7b1e3da Implement test Context methods 2021-04-27 14:25:34 +02:00
many
4ff67ec2ee Implement attribute criterion for small amounts of candidates 2021-04-27 14:25:34 +02:00
Kerollmops
0f4c0beffd Introduce the Attribute criterion 2021-04-27 14:25:34 +02:00
Clément Renault
3bcc1c0560 Merge pull request #164 from meilisearch/clippy-fixes
Make clippy happy
2021-04-21 13:32:29 +02:00
tamo
f8dee1b402 [makes clippy happy] search/criteria/proximity.rs 2021-04-21 12:36:45 +02:00
tamo
7fa3a1d23e makes clippy happy http-ui 2021-04-21 12:36:45 +02:00
Clément Renault
28a8df2f0a Merge pull request #160 from shekhirin/query-words-limit
Support query words limit
2021-04-21 11:14:09 +02:00
Alexey Shekhirin
6fa00c61d2 feat(search): support words_limit 2021-04-20 12:22:04 +03:00
Clément Renault
726fcf015a Merge pull request #146 from meilisearch/facet-float-integer-becomes-number
Facet float-integer becomes facet number
2021-04-20 10:31:47 +02:00
Kerollmops
c9b2d3ae1a Warn instead of returning an error when a conversion fails 2021-04-20 10:23:31 +02:00
Kerollmops
2aeef09316 Remove debug logs while iterating through the facet levels 2021-04-20 10:23:31 +02:00
Kerollmops
51767725b2 Simplify integer and float functions trait bounds 2021-04-20 10:23:31 +02:00
Kerollmops
efbfa81fa7 Merge the Float and Integer enum variant into the Number one 2021-04-20 10:23:30 +02:00
Clément Renault
f5ec14c54c Merge pull request #163 from meilisearch/next-release-v0.1.1
Update version for the next release (v0.1.1)
2021-04-19 15:52:13 +02:00
Clémentine Urquizar
127d3d028e Update version for the next release (v0.1.1) 2021-04-19 14:48:13 +02:00
Clément Renault
1095874e7e Merge pull request #158 from shekhirin/synonyms
Support synonyms
2021-04-18 11:00:13 +02:00
Alexey Shekhirin
33860bc3b7 test(update, settings): set & reset synonyms
fixes after review

more fixes after review
2021-04-18 11:24:17 +03:00
Alexey Shekhirin
e39aabbfe6 feat(search, update): synonyms 2021-04-18 11:24:17 +03:00
Clément Renault
995d1a07d4 Merge pull request #162 from michaelchiche/patch-1 2021-04-17 09:47:08 +02:00
Michael Chiche
f6b06d6e5d typo: wrong command in example 2021-04-16 20:08:43 +02:00
Clément Renault
19b6620a92 Merge pull request #125 from meilisearch/distinct
Implement distinct attribute
2021-04-15 16:33:49 +02:00
Marin Postma
9c4660d3d6 add tests 2021-04-15 16:25:56 +02:00
Marin Postma
75464a1baa review fixes 2021-04-15 16:25:56 +02:00
Marin Postma
2f73fa55ae add documentation 2021-04-15 16:25:55 +02:00
Marin Postma
45c45e11dd implement distinct attribute
distinct can return error

facet distinct on numbers

return distinct error

review fixes

make get_facet_value more generic

fixes
2021-04-15 16:25:55 +02:00
Clément Renault
6e126c96a9 Merge pull request #159 from meilisearch/upd-tokenizer-v0.2.1
Update Tokenizer version to v0.2.1
2021-04-14 19:02:36 +02:00
Clémentine Urquizar
2c5c79d68e Update Tokenizer version to v0.2.1 2021-04-14 18:54:04 +02:00
Clément Renault
c2df51aa95 Merge pull request #156 from meilisearch/stop-words
Stop words
2021-04-14 17:33:06 +02:00
tamo
dcb00b2e54 test a new implementation of the stop_words 2021-04-12 18:35:33 +02:00
tamo
da036dcc3e Revert "Integrate the stop_words in the querytree"
This reverts commit 12fb509d84.
We revert this commit because it's causing the bug #150.
The initial algorithm we implemented for the stop_words was:

1. remove the stop_words from the dataset
2. keep the stop_words in the query to see if we can generate new words by
   integrating typos or if the word was a prefix
=> This was causing the bug since, in the case of “The hobbit”, we were
   **always** looking for something starting with “t he” or “th e”
   instead of ignoring the word completely.

For now we are going to fix the bug by completely ignoring the
stop_words in the query.
This could cause another problem were someone mistyped a normal word and
ended up typing a stop_word.

For example imagine someone searching for the music “Won't he do it”.
If that person misplace one space and write “Won' the do it” then we
will loose a part of the request.

One fix would be to update our query tree to something like that:

---------------------
OR
  OR
    TOLERANT hobbit # the first option is to ignore the stop_word
    AND
      CONSECUTIVE   # the second option is to do as we are doing
        EXACT t	    # currently
        EXACT he
      TOLERANT hobbit
---------------------

This would increase drastically the size of our query tree on request
with a lot of stop_words. For example think of “The Lord Of The Rings”.

For now whatsoever we decided we were going to ignore this problem and consider
that it doesn't reduce too much the relevancy of the search to do that
while it improves the performances.
2021-04-12 18:35:33 +02:00
Clément Renault
f9eab6e0de Merge pull request #151 from meilisearch/release-drafter
Add release drafter files
2021-04-12 10:25:52 +02:00
Clémentine Urquizar
6a128d4ec7 Add release drafter files 2021-04-12 10:18:39 +02:00
Clément Renault
5efe67f375 Merge pull request #154 from shekhirin/shekhirin/fix-settings-serde-tests
test(http): fix and refactor settings assert_(ser|de)_tokens
2021-04-11 10:52:38 +02:00
Alexey Shekhirin
3af8fa194c test(http): combine settings assert_(ser|de)_tokens into 1 test 2021-04-10 12:13:59 +03:00
Clément Renault
0d09c64dde Merge pull request #148 from shekhirin/shekhirin/setting-enum
refactor(http, update): introduce setting enum
2021-04-09 22:48:58 +02:00
Alexey Shekhirin
84c1dda39d test(http): setting enum serialize/deserialize 2021-04-08 17:03:40 +03:00
Alexey Shekhirin
dc636d190d refactor(http, update): introduce setting enum 2021-04-08 17:03:40 +03:00
Clément Renault
2bcdd8844c Merge pull request #141 from meilisearch/reorganize-criterion
reorganize criterion
2021-04-01 19:50:16 +02:00
tamo
0a4bde1f2f update the default ordering of the criterion 2021-04-01 19:45:31 +02:00
Clément Renault
ee3f93c029 Merge pull request #136 from shekhirin/index-fields-ids-distribution-cache
feat(index): store fields distribution in index
2021-04-01 18:36:21 +02:00
Alexey Shekhirin
2658c5c545 feat(index): update fields distribution in clear & delete operations
fixes after review

bump the version of the tokenizer

implement a first version of the stop_words

The front must provide a BTreeSet containing the stop words
The stop_words are set at None if an empty Set is provided
add the stop-words in the http-ui interface

Use maplit in the test
and remove all the useless drop(rtxn) at the end of all tests

Integrate the stop_words in the querytree

remove the stop_words from the querytree except if it was a prefix or a typo

more fixes after review
2021-04-01 19:12:35 +03:00
Alexey Shekhirin
27c7ab6e00 feat(index): store fields distribution in index 2021-04-01 18:35:19 +03:00
Clément Renault
67e25f8724 Merge pull request #128 from meilisearch/stop-words
Stop words
2021-04-01 14:02:37 +02:00
tamo
12fb509d84 Integrate the stop_words in the querytree
remove the stop_words from the querytree except if it was a prefix or a typo
2021-04-01 13:57:55 +02:00
tamo
a2f46029c7 implement a first version of the stop_words
The front must provide a BTreeSet containing the stop words
The stop_words are set at None if an empty Set is provided
add the stop-words in the http-ui interface

Use maplit in the test
and remove all the useless drop(rtxn) at the end of all tests
2021-04-01 13:57:55 +02:00
tamo
62a8f1d707 bump the version of the tokenizer 2021-04-01 13:49:22 +02:00
Clément Renault
56777af8e4 Merge pull request #135 from shekhirin/index-fields-ids-distribution
feat(index): introduce fields_ids_distribution
2021-03-31 17:53:45 +02:00
Alexey Shekhirin
9205b640a4 feat(index): introduce fields_ids_distribution 2021-03-31 18:44:47 +03:00
Clément Renault
f2a786ecbf Merge pull request #134 from meilisearch/improve_httpui
add a button to display or show the facets
2021-03-31 17:07:04 +02:00
tamo
13ce0ebb87 stop requestings the facets if the user has hidden them 2021-03-31 16:27:32 +02:00
tamo
bcc131e866 add a button to display or hide the facets 2021-03-31 16:18:53 +02:00
Clément Renault
529c8f0eb1 Merge pull request #131 from shekhirin/criterion-asc-desc-regex
fix(criterion): compile asc/desc regex only once
2021-03-30 15:18:21 +02:00
Alexey Shekhirin
2cb32edaa9 fix(criterion): compile asc/desc regex only once
use once_cell instead of lazy_static

reorder imports
2021-03-30 16:07:14 +03:00
Clément Renault
5a1d3609a9 Merge pull request #127 from shekhirin/main
feat(search, criteria): const candidates threshold
2021-03-30 14:07:19 +02:00
Alexey Shekhirin
1e3f05db8f use fixed number of candidates as a threshold 2021-03-30 11:57:10 +03:00
Alexey Shekhirin
a776ec9718 fix division 2021-03-29 19:16:58 +03:00
Alexey Shekhirin
522e79f2e0 feat(search, criteria): introduce a percentage threshold to the asc/desc 2021-03-29 19:08:31 +03:00
Clément Renault
9ad8b74111 Merge pull request #123 from irevoire/pin_tokenizer
select a specific release of the tokenizer instead of using the latests git commit
2021-03-25 22:58:11 +01:00
tamo
73dcdb27f6 select a specific release of the tokenizer instead of using the latests git commit 2021-03-25 15:00:18 +01:00
Clément Renault
6b7cc0022b Merge pull request #118 from meilisearch/fix-offset
fix broken offset
2021-03-15 22:15:18 +01:00
mpostma
9c27183876 fix broken offset 2021-03-15 20:23:50 +01:00
Clément Renault
25f8789aa5 Merge pull request #117 from meilisearch/update-license
Update LICENSE
2021-03-15 16:26:22 +01:00
Clémentine Urquizar
3455082458 Update LICENSE 2021-03-15 16:15:14 +01:00
Clément Renault
b7b23cd4a8 Merge pull request #116 from meilisearch/index-metadata
add index metadata
2021-03-15 14:20:50 +01:00
mpostma
f0210453a6 add updated at on put primary key 2021-03-15 14:05:48 +01:00
mpostma
615fe095e1 update index updated at on index writes 2021-03-15 14:05:47 +01:00
mpostma
80d0f9c49d methods to update index time metadata 2021-03-15 14:05:47 +01:00
Clément Renault
c9f9d39b54 Merge pull request #114 from meilisearch/github-ci-use-main
Rename master into main in the Github CI
2021-03-11 20:46:06 +01:00
Kerollmops
0cc3132f5a Rename master into main in the Github CI 2021-03-11 14:44:47 +01:00
Clément Renault
38b6e8decd Merge pull request #106 from meilisearch/optimize-words-typo-criteria
Optimize the words criterion
2021-03-10 11:28:46 +01:00
Kerollmops
d48008339e Introduce two new optional_words and authorize_typos Search options 2021-03-10 11:16:30 +01:00
Kerollmops
54b97ed8e1 Update the fetcher comments 2021-03-10 10:56:26 +01:00
Kerollmops
d301859bbd Introduce a special word_derivations function for Proximity 2021-03-10 10:42:53 +01:00
Kerollmops
facfb4b615 Fix the bucket candidates 2021-03-10 10:42:53 +01:00
Kerollmops
42fd7dea78 Remove the useless typo cache 2021-03-10 10:42:53 +01:00
many
62a70c300d Optimize words criterion 2021-03-10 10:42:53 +01:00
Clément Renault
c53be51460 Merge pull request #105 from meilisearch/optimize-number-of-documents
Optimize the number_of_documents function
2021-03-10 10:39:12 +01:00
Kerollmops
f51eb46c69 Use the RoaringBitmapLenCodec to retrieve the count of documents 2021-03-09 10:25:39 +01:00
Clément Renault
7a3ce9bb1d Merge pull request #104 from meilisearch/update-license
Update the LICENSE file to match the year 2021
2021-03-08 19:11:05 +01:00
Clément Renault
2f9af6a707 Fix the REAMD.md bash example 2021-03-08 18:56:22 +01:00
Clément Renault
f204344102 Update the LICENSE file to match the year 2021 2021-03-08 18:54:06 +01:00
Clément Renault
22f20f0c29 Merge pull request #99 from meilisearch/infos-missing-db-names
Add missing databases to the infos subcommand
2021-03-08 18:52:08 +01:00
Clément Renault
18844d60b5 Simplify the output of database sizes in the infos crate 2021-03-08 18:47:33 +01:00
Clément Renault
3d02b19fbd Introduce the docids-words-positions subcommand to the infos crate 2021-03-08 18:47:33 +01:00
Kerollmops
bd63da0a0e Add missing databases to the infos subcommand 2021-03-08 18:47:33 +01:00
Clément Renault
f9be3ad3fd Merge pull request #103 from meilisearch/plane-sweep-proximity
Plane-Sweep proximity
2021-03-08 16:58:34 +01:00
Kerollmops
d781a6164a Rewrite some code with idiomatic Rust 2021-03-08 16:27:52 +01:00
Clément Renault
b18ec00a7a Add a logging_timer macro to te criterion next methods 2021-03-08 16:12:06 +01:00
Kerollmops
82a0f678fb Introduce a cache on the docid_word_positions database method 2021-03-08 16:12:03 +01:00
Clément Renault
5fcaedb880 Introduce a WordDerivationsCache struct 2021-03-08 16:00:53 +01:00
many
2606c92ef9 use plain sweep in proximity criterion 2021-03-08 15:58:39 +01:00
many
ae47bb3594 Introduce plane_sweep function in proximity criterion 2021-03-08 15:58:38 +01:00
Kerollmops
636a9df177 Temporarily fix the tinytemplate doc hidden issue 2021-03-08 15:57:45 +01:00
Clément Renault
f190d5f496 Merge pull request #100 from meilisearch/improve-asc-desc-criterion
Improve the Asc/Desc criteria
2021-03-08 13:37:00 +01:00
Clément Renault
3c76b3548d Rework the Asc/Desc criteria to be facet iterator based 2021-03-08 13:32:25 +01:00
Clément Renault
a58d2b6137 Print the Asc/Desc criterion field name in the debug prints 2021-03-08 13:32:25 +01:00
Clément Renault
08a0ff7091 Merge pull request #101 from meilisearch/criterion-display
implement display for criterion
2021-03-08 13:29:05 +01:00
mpostma
e3095be85c Remove Debug use in Display impl 2021-03-08 12:09:09 +01:00
mpostma
9e1eb25232 implement display for criterion
Update milli/src/criterion.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>
2021-03-08 11:00:30 +01:00
Clément Renault
71b069d3e1 Merge pull request #102 from meilisearch/fix-searchable-settings-test
Fix the searchable settings test
2021-03-08 10:55:30 +01:00
Clément Renault
e5bb96bc3b Fix the searchable settings test 2021-03-06 12:48:41 +01:00
Clément Renault
2924ed31f3 Merge pull request #97 from meilisearch/criteria
Introduce all the criteria
2021-03-03 18:24:22 +01:00
Kerollmops
9b6b35d9b7 Clean up some comments 2021-03-03 18:19:10 +01:00
Kerollmops
2cc4a467a6 Change the criterion output that cannot fail 2021-03-03 18:18:33 +01:00
Kerollmops
1fc25148da Remove useless where clauses for the criteria 2021-03-03 18:09:19 +01:00
Kerollmops
07784c8990 Tune the words prefixes threshold to compute for 1/1000 instead 2021-03-03 15:51:28 +01:00
Kerollmops
f376c6a728 Make sure we retrieve the docid word positions 2021-03-03 15:45:03 +01:00
Kerollmops
5c5e51095c Fix the Asc/Desc criteria to alsways return the QueryTree when available 2021-03-03 15:45:03 +01:00
many
cdaa96df63 optimize proximity criterion 2021-03-03 15:45:03 +01:00
many
246286f0eb take hard separator into account 2021-03-03 15:45:03 +01:00
Kerollmops
6bf6b40495 Remove unused files 2021-03-03 15:45:03 +01:00
Kerollmops
f118d7e067 build criteria from settings 2021-03-03 15:45:03 +01:00
Kerollmops
025835c5b2 Fix the criteria to avoid always returning a placeholder 2021-03-03 15:45:03 +01:00
Kerollmops
36c1f93ceb Do an union of the bucket candidates 2021-03-03 15:45:03 +01:00
many
b0e0c5eba0 remove option of bucket_candidates 2021-03-03 15:45:03 +01:00
Kerollmops
daf126a638 Introduce the final Fetcher criterion 2021-03-03 15:45:03 +01:00
many
7ac09d7b7c remove option of bucket_candidates 2021-03-03 15:45:03 +01:00
Kerollmops
5af63c74e0 Speed-up the MatchingWords highlighting struct 2021-03-03 15:45:03 +01:00
Kerollmops
4510bbccca Add a lot of debug 2021-03-03 15:43:44 +01:00
Kerollmops
ae4a237e58 Fix the maximum_proximity function 2021-03-03 15:43:44 +01:00
Kerollmops
9bc9b36645 Introduce the Proximity criterion 2021-03-03 15:43:44 +01:00
Kerollmops
22b84fe543 Use the words criterion in the search module 2021-03-03 15:43:44 +01:00
many
3d731cc861 remove option on bucket_candidates 2021-03-03 15:43:44 +01:00
Clément Renault
14f9f85c4b Introduce the AscDesc criterion 2021-03-03 15:43:44 +01:00
many
b5b7ec0162 implement initial state for words criterion 2021-03-03 15:43:44 +01:00
Kerollmops
3415812b06 Imrpove the intersection speed in the words criterion 2021-03-03 15:43:43 +01:00
Clément Renault
ef381e17bb Compute the candidates for each sub query tree 2021-03-03 15:43:43 +01:00
Kerollmops
e174ccbd8e Use the words criterion in the search module 2021-03-03 15:43:43 +01:00
Clément Renault
1e47f9b3ff Introduce the Words criterion 2021-03-03 15:43:43 +01:00
many
2d068bd45b implement Context trait for criteria 2021-03-03 15:43:43 +01:00
many
d92ad5640a remove option on bucket_candidates 2021-03-03 15:43:43 +01:00
many
64688b3786 fix query tree builder 2021-03-03 15:43:43 +01:00
many
fb7e6df790 add tests on typo criterion 2021-03-03 15:43:43 +01:00
Kerollmops
c5a32fd4fa Fix the typo criterion 2021-03-03 15:43:42 +01:00
many
a273c46559 clean warnings 2021-03-03 15:43:42 +01:00
many
9e093d5ff3 add cache on alterate_query_tree function 2021-03-03 15:43:42 +01:00
many
41fc51ebcf optimize alterate_query_tree when number_typos is zero 2021-03-03 15:43:42 +01:00
many
4da6e1ea9c add cache in typo criterion 2021-03-03 15:43:42 +01:00
Kerollmops
67c71130df Reduce the number of calls to alterate_query_tree 2021-03-03 15:43:42 +01:00
many
9ccaea2afc simplify criterion context 2021-03-03 15:43:42 +01:00
Clément Renault
fea9ffc46a Use the bucket candidates in the search module 2021-03-03 15:43:42 +01:00
Clément Renault
229130ed25 Correctly compute the bucket candidates for the Typo criterion 2021-03-03 15:43:42 +01:00
Clément Renault
5344abc008 Introduce the CriterionResult return type 2021-03-03 15:43:41 +01:00
many
86bcecf840 change variable's name from distance to proximity 2021-03-03 15:43:41 +01:00
many
4128bdc859 reduce match possibilities in docids fetchers 2021-03-03 15:43:41 +01:00
many
907482c8ac clean docids fetchers 2021-03-03 15:43:41 +01:00
many
774a255f2e use prefix cache in criteria 2021-03-03 15:43:41 +01:00
many
98e69e63d2 implement Context trait for criteria 2021-03-03 15:43:41 +01:00
Clément Renault
f091f370d0 Use the Typo criteria in the search module 2021-03-03 15:43:41 +01:00
Clément Renault
ad20d72a39 Introduce the Typo criterion 2021-03-03 15:43:41 +01:00
Clément Renault
f0ddea821c Introduce the Typo criterion 2021-03-03 15:43:41 +01:00
many
73286dc8bf Introduce the query tree data structure 2021-03-03 15:43:40 +01:00
Clément Renault
4e84999f20 Merge pull request #80 from meilisearch/query_tree
Introduce the query tree data structure
2021-03-03 14:25:29 +01:00
Kerollmops
411a118148 Avoid testing on nightly to fix a crate issue 2021-03-03 13:57:36 +01:00
Kerollmops
240b02e175 Remove unused Operation constructors 2021-03-03 13:40:19 +01:00
many
a463ae821e Add methods optional_words and authorize_typos on the query tree 2021-03-03 13:40:19 +01:00
Kerollmops
6d135beb21 Introduce the maximum_proximity helper function 2021-03-03 13:40:18 +01:00
Kerollmops
6008f528d0 Introduce the maximum_typo helper function 2021-03-03 13:40:18 +01:00
Kerollmops
1dc857a4b2 Fix the query tree optional word generation with phrases 2021-03-03 13:40:18 +01:00
Kerollmops
4f19749252 Introduce the word_documents_count method on the Context trait 2021-03-03 13:40:18 +01:00
Kerollmops
79a143b32f Introduce the query tree data structure 2021-03-03 13:40:18 +01:00
Clément Renault
5f109e8589 Merge pull request #95 from meilisearch/helpers-crate
Introduce an helpers crate that export the database to stdout
2021-03-01 19:59:18 +01:00
Clément Renault
9423310816 Introduce an helpers crate that export the database to stdout 2021-03-01 19:55:04 +01:00
Clément Renault
68102fced8 Merge pull request #86 from meilisearch/clean-up-infos-crate
Clean up the infos crate
2021-03-01 19:54:21 +01:00
Clément Renault
1eb7ce5cdb Improve the export-documents infos command by accepting internal ids 2021-03-01 19:48:01 +01:00
Clément Renault
4884b324e6 Remove the useless external ids patch method in the infos crate 2021-03-01 19:48:01 +01:00
Clément Renault
78bede1ffb Fix error displaying of the workspace members 2021-03-01 19:48:01 +01:00
Clément Renault
b59fe77ec7 Avoid creating a default empty database in the search crate 2021-03-01 19:48:01 +01:00
Clément Renault
45330a5e47 Avoid creating a default empty database in the infos crate 2021-03-01 19:48:00 +01:00
Clément Renault
794fce7bff Merge pull request #91 from meilisearch/add-primary-key-to-fields-map
add primary key to fields_id_map when not present
2021-03-01 16:20:41 +01:00
mpostma
e08b6b3ec7 add primary key to fields_id_map when not present 2021-03-01 16:10:16 +01:00
Clément Renault
8dcb3e0c41 Merge pull request #90 from meilisearch/words-prefixes-update
Expose the WordsPrefixes update from the UpdateBuilder
2021-02-21 12:27:48 +01:00
Clément Renault
c62d2f56d8 Expose an http route for the WordsPrefixes update 2021-02-21 12:16:53 +01:00
Clément Renault
c318373b88 Expose the WordsPrefixes update on the UpdateBuilder 2021-02-21 12:15:35 +01:00
Clément Renault
3090751dfc Merge pull request #94 from meilisearch/update-dependencies
Update dependencies
2021-02-21 12:08:18 +01:00
Kerollmops
519b1cb5c9 Update dependencies 2021-02-21 10:26:04 +01:00
Clément Renault
e62157e896 Merge pull request #88 from meilisearch/heed-error-word-documents-count
Return an heed error from the word_documents_count method
2021-02-18 15:05:00 +01:00
Kerollmops
c2ffcc4bd1 Return an heed error from the word_documents_count method 2021-02-18 14:59:37 +01:00
Clément Renault
09ca5d14c9 Merge pull request #87 from meilisearch/roaring-bitmap-length
Introduce fast methods to get roaring bitmap lengths
2021-02-18 14:52:40 +01:00
Kerollmops
2f561c77f5 Introduce the word documents count method on the index 2021-02-18 14:35:14 +01:00
Kerollmops
8d710c5130 Introduce heed codecs to retrieve the length of roaring bitmaps 2021-02-18 14:30:47 +01:00
Kerollmops
fcfb39c5de Move the RoaringBitmap related codecs into a module 2021-02-18 13:56:28 +01:00
Clément Renault
85c3d8aa52 Merge pull request #79 from meilisearch/prefix-caches
Introduce prefix databases
2021-02-17 11:27:15 +01:00
Kerollmops
aa4d9882d2 Introduce the new words-prefixes-docids infos subcomand 2021-02-17 11:22:27 +01:00
Kerollmops
49aee6d02c Fix the database-stats infos subcommand 2021-02-17 11:22:27 +01:00
Kerollmops
7a0f86a04f Introduce an infos command to extract the words prefixes fst 2021-02-17 11:22:27 +01:00
Kerollmops
a4a48be923 Run the words prefixes update inside of the indexing documents update 2021-02-17 11:22:26 +01:00
Kerollmops
8788485924 Take the prefix databases into account in the infos subcommand 2021-02-17 11:22:26 +01:00
Kerollmops
616ed8f73c Clean up the word prefix pair proximities when deleting documents 2021-02-17 11:22:26 +01:00
Clément Renault
ea37fd821d Clean up the words prefixes when deleting documents and words 2021-02-17 11:22:25 +01:00
Clément Renault
62eee9c69e Introduce the sorter_into_lmdb_database helper function 2021-02-17 11:12:39 +01:00
Clément Renault
b5b89990eb Compute and write the word prefix pair proximities database 2021-02-17 11:12:38 +01:00
Kerollmops
9b03b0a1b2 Introduce the word prefix pair proximity docids database 2021-02-17 11:12:38 +01:00
Clément Renault
f365de636f Compute and write the word-prefix-docids database 2021-02-17 11:12:38 +01:00
Clément Renault
ee5a60e1c5 Clear the words prefixes when clearing an index 2021-02-17 10:45:17 +01:00
Clément Renault
5e7b26791b Take the words-prefixes into account while computing the biggest values 2021-02-17 10:45:17 +01:00
Clément Renault
b3a21d5a50 Introduce the getters and setters for the words prefixes FST 2021-02-17 10:45:17 +01:00
Clément Renault
48b470140b Merge pull request #84 from meilisearch/stringify-documents-ids
Stringify documents ids even when deleting documents
2021-02-15 21:30:51 +01:00
Clément Renault
89ce4e74fe Do not change the primary key type when we serialize documents 2021-02-15 21:24:36 +01:00
Clément Renault
69acdd437e Deserialize documents ids into JSON Values on deletion 2021-02-15 21:24:36 +01:00
Clément Renault
b3776598d8 Add a test to check deletion of documents with number as primary key 2021-02-15 21:24:35 +01:00
Clément Renault
5d0ac3e3e6 Merge pull request #81 from meilisearch/smart-workspace
Change the project to become a workspace
2021-02-14 19:02:00 +01:00
Clément Renault
fecf3d6fc1 Move the command lines helpers into different crates 2021-02-14 18:55:15 +01:00
Clément Renault
d8f3421608 Update the dependencies and remove the unused ones 2021-02-14 18:32:46 +01:00
Clément Renault
e8639517da Change the project to become a workspace with milli as a default-member 2021-02-12 16:15:09 +01:00
Clément Renault
d450b971f9 Merge pull request #78 from meilisearch/required-changes-for-transplant
Changes for transplant
2021-02-02 16:22:09 +01:00
mpostma
8f43698a60 fix httpui 2021-02-01 19:49:51 +01:00
mpostma
3b60432687 Use update_id in UpdateBuilder
Add `the update_id` to the to the updates. The rationale is the
following:
- It allows for better tracability of the update events, thus improved
  debugging and logging.
- The enigne is now aware of what he's already processed, and can return
  it if asked. It may not make sense now, but in the future, the update
  store may not work the same way, and this information about the state
  of the engine will be desirable (distributed environement).
2021-02-01 19:46:34 +01:00
mpostma
d487791b03 derive serde for method and format
This is nicer when working with UpdateMeta struct
2021-02-01 19:46:34 +01:00
mpostma
91d8198d17 return documents number on addition 2021-02-01 19:42:10 +01:00
Clément Renault
fa0cc2dc13 Merge pull request #66 from meilisearch/show-available-facets
Expose an API to compute facets distribution
2021-02-01 18:39:45 +01:00
Clément Renault
14ae01a6c9 Fix some typos in error messages 2021-02-01 18:10:57 +01:00
Clément Renault
f5f4438b43 Remove the duplicated code inside the facet_values_from_documents method 2021-01-28 11:22:18 +01:00
Clément Renault
b6e91291fb Add a comment to explain Serialize on FacetValue is implemented by hand 2021-01-27 18:29:56 +01:00
Clément Renault
b41bf58658 Split the FacetDistribution facet_values method into three 2021-01-27 18:29:56 +01:00
Clément Renault
a3e3bebed7 Rework the FacetDistribution execute method to use the faceted_fields struct 2021-01-27 18:29:54 +01:00
Clément Renault
11309ee99c Rework the FacetDistribution execute method to use the faceted_fields struct 2021-01-27 14:53:50 +01:00
Clément Renault
9c8a654079 Add comments to help read the facet_values branchings 2021-01-27 14:49:08 +01:00
Clément Renault
2e00740515 Make sure that we don't iterate throught all string facet values 2021-01-27 14:41:36 +01:00
Clément Renault
b52d500fbc Reorder the FacetType enum branching in the facet_value method 2021-01-27 14:36:49 +01:00
Clément Renault
d91d321129 Introduce some constants to the FacetDistribution struct and settings 2021-01-27 14:32:30 +01:00
Clément Renault
60480a1e2f Rework the FacetCondition from_array constructor 2021-01-27 14:25:53 +01:00
Clément Renault
65b821b192 Rename the Index facets method into facets_distribution 2021-01-27 14:15:33 +01:00
Clément Renault
433ac8c38a Remove the ordered-float serde feature 2021-01-27 14:11:10 +01:00
Clément Renault
70e9b1e936 Introduce a flag to the search subcommand to display the facet distribution 2021-01-26 14:58:18 +01:00
Kerollmops
61dbcfa44a Bump the roaring to 0.6.4 2021-01-26 14:38:43 +01:00
Kerollmops
916dd3b7c5 Use the faceted_fields_ids method to fetch the ids 2021-01-26 14:14:38 +01:00
Clément Renault
b0c31500fc Simplify the front page 2021-01-26 14:14:38 +01:00
Kerollmops
7be275b692 Add the count to the facet distribution 2021-01-26 14:14:37 +01:00
Clément Renault
4b9e81fc89 Order the facet values lexicographically 2021-01-26 14:09:09 +01:00
Clément Renault
51a37de885 Introduce the FacetValue enum type 2021-01-26 14:09:09 +01:00
Kerollmops
d893e83622 Speed-up facet aggregation by using a FacetIter 2021-01-26 14:09:08 +01:00
Kerollmops
33945a3115 Introduce a new facet filters query field 2021-01-26 14:09:08 +01:00
Kerollmops
afa86d8a45 Add a simple test to the FacetCondition from_array method 2021-01-26 14:06:29 +01:00
Kerollmops
cb5e57e2dd FacetCondition can be created from array of facets 2021-01-26 14:06:28 +01:00
Clément Renault
a8e3269ad6 Introduce a basic front to display facets 2021-01-26 14:06:28 +01:00
Clément Renault
2cd8675734 Show facet values even for empty queries 2021-01-26 14:06:28 +01:00
Clément Renault
3916c54501 Speed-up facet aggregation on low number of candidates 2021-01-26 14:06:28 +01:00
Clément Renault
a17bb54d8f Limit the number of values by facets to a maximum of 1000 2021-01-26 14:06:28 +01:00
Kerollmops
aa129dd7e8 Display the number of candidates instead of the returned document count 2021-01-26 14:06:28 +01:00
Kerollmops
510df4729c Append the facet value to the facet query on click 2021-01-26 14:06:28 +01:00
Kerollmops
d25a859985 Display the facet values on the HTML debug page 2021-01-26 14:06:28 +01:00
Kerollmops
3b64735058 Introduce a struct to compute facets values 2021-01-26 14:06:27 +01:00
Clément Renault
30dae0205e Merge pull request #67 from meilisearch/fix-settings
Fix displayed and searchable attributes
2021-01-26 14:03:43 +01:00
mpostma
87a56d2bc9 Fix settings bug
replace ids with str in settings

This allows for better maintainability of the settings code, since
updating the searchable attributes is now straightforward.

criterion use string

fix reindexing fieldid remaping

add tests for primary_key compute

fix tests

fix http-ui

fixup! add tests for primary_key compute

code improvements settings

update deps

fixup! code improvements settings

fixup! refactor settings updates and fix bug

fixup! Fix settings bug

fixup! Fix settings bug

fixup! Fix settings bug

Update src/update/index_documents/transform.rs

Co-authored-by: Clément Renault <clement@meilisearch.com>

fixup! Fix settings bug
2021-01-26 13:53:08 +01:00
Clément Renault
26f060f66b Merge pull request #75 from meilisearch/fix-search-subcommand
Fix the search subcommand document display loop
2021-01-20 10:07:16 +01:00
Clément Renault
c35befbf38 Fix the search subcommand document display loop 2021-01-18 19:06:36 +01:00
Clément Renault
2fa5808e3f Merge pull request #71 from meilisearch/cleanup-useless-build-rs
Cleanup useless custom build file
2021-01-15 15:45:47 +01:00
Clément Renault
44c0dd0762 Fix an fst Set related warning 2021-01-13 11:03:03 +01:00
Clément Renault
1bb9348a90 Remove the chinese-words.txt previous tokenizer related file 2021-01-13 11:01:57 +01:00
Clément Renault
9141f5ef94 Remove the custom build.rs file 2021-01-13 11:01:38 +01:00
Clément Renault
51d1785576 Merge pull request #63 from meilisearch/meilisearch-tokenizer
Meilisearch tokenizer
2021-01-12 13:26:24 +01:00
mpostma
4f7f7538f7 highlight with new tokenizer 2021-01-11 21:59:37 +01:00
mpostma
1ae761311e integrate with meilisearch tokenizer 2021-01-07 16:14:27 +01:00
Clément Renault
7e1c94ab9c Merge pull request #65 from meilisearch/improve-facet-value-display
Improve the facet value displaying
2021-01-07 16:12:32 +01:00
Clément Renault
0a1beb688c Improve the facet value displaying, extracting the facet level 2021-01-07 16:05:09 +01:00
Clément Renault
5dd4dc2862 Merge pull request #60 from meilisearch/accept-compressed-documents-updates
Accept and mirror compression of documents additions
2020-12-23 10:59:26 +01:00
Kerollmops
a576c7ae4b Display the update meta result content on the update page 2020-12-22 13:42:43 +01:00
Kerollmops
6c7db3d956 Display the time it took to process an update 2020-12-22 13:42:43 +01:00
Kerollmops
9fcbc83ebc Accept and mirror compression of documents additions 2020-12-22 13:42:42 +01:00
Clément Renault
cd158d4cde Merge pull request #61 from meilisearch/update-handler
create update handler trait
2020-12-22 13:42:00 +01:00
mpostma
49a016b53d create update handler trait
fix type inference error
2020-12-22 12:59:15 +01:00
Clément Renault
5039528b56 Merge pull request #59 from meilisearch/improve-bytes-structopt
Use the byte-unit crate to ease library usage
2020-12-20 14:52:39 +01:00
Kerollmops
77e951e933 Use the byte-unit crate to ease library usage 2020-12-20 12:00:37 +01:00
Clément Renault
b032ceb5d4 Merge pull request #56 from meilisearch/asc-desc-criteria-non-faceted
Return non-faceted documents to complete the requested limit
2020-12-17 14:34:36 +01:00
Clément Renault
914eab12f7 Return non-faceted documents as remaining results 2020-12-17 13:57:07 +01:00
Clément Renault
0dec761e21 Merge pull request #54 from meilisearch/compress-updates
Compress updates content using gzip
2020-12-17 11:06:31 +01:00
Clément Renault
5a23417499 Compress updates content using gzip 2020-12-17 10:59:58 +01:00
Clément Renault
cd5605bb86 Merge pull request #50 from meilisearch/fix-asc-desc-criterion
Fix the Asc/Desc criteria
2020-12-13 11:59:11 +01:00
Clément Renault
0e5609d40e Limit the number of elements after reversing it 2020-12-12 14:21:27 +01:00
Clément Renault
9d966a28d3 Merge pull request #47 from meilisearch/fix-grenad-write-bug
Bump grenad to fix an indexing bug
2020-12-05 17:18:49 +01:00
Clément Renault
e7f2ab9138 Bump grenad to fix an indexing bug 2020-12-05 16:39:15 +01:00
Clément Renault
9628da2d17 Merge pull request #40 from meilisearch/asc-desc-faceted-fields
Ascending and descending custom ranking
2020-12-04 12:08:22 +01:00
Clément Renault
026f54dcf7 Use the field id docid facet value database when sorting documents 2020-12-04 12:03:20 +01:00
Clément Renault
3cdf14d4c5 Introduce the field-id-docid-facet-values database 2020-12-04 12:03:20 +01:00
Clément Renault
4ffbddf21f Introduce debug info for the time it takes to fetch candidates 2020-12-04 12:03:20 +01:00
Clément Renault
13217f072b Use the FacetRange iterator in the facet exploring function 2020-12-04 12:03:20 +01:00
Clément Renault
0959e1501f Introduce the FacetRevRange Iterator struct 2020-12-04 12:02:23 +01:00
Clément Renault
58d039a70d Introduce the FacetIter Iterator 2020-12-04 12:02:23 +01:00
Clément Renault
d8e25a0863 Order documents by the first custom criterion on basic searches 2020-12-04 12:02:23 +01:00
Clément Renault
e0cc7faea1 Use the facet ordered to the search 2020-12-04 12:02:23 +01:00
Clément Renault
61b383f422 Introduce the criteria update setting 2020-12-04 12:02:22 +01:00
Clément Renault
f8f33d35e0 Add the criteria list to the index 2020-12-02 11:21:26 +01:00
Kerollmops
57e8e5c965 Move the FacetCondition to its own module 2020-12-02 11:21:26 +01:00
Clément Renault
ecc8bc8910 Introduce the FieldId u8 alias type 2020-12-02 11:19:45 +01:00
Clément Renault
0a63e69e04 Merge pull request #45 from meilisearch/infos-export-documents
Infos export documents
2020-12-02 10:50:54 +01:00
Clément Renault
16755b26e2 Make the export words FST export infos subcommand outputs to stdout 2020-12-02 10:43:22 +01:00
Kerollmops
85d51ab228 Introduce an infos subcommand to export documents from an index 2020-12-02 10:42:48 +01:00
Clément Renault
92f253adb2 Merge pull request #41 from meilisearch/update-store-delete-updates
Allow users to abort pending updates
2020-12-01 14:56:00 +01:00
Clément Renault
222f2913c1 Simplify the processing_update UpdateStore method 2020-12-01 14:51:05 +01:00
Kerollmops
878b1873cd Make sure to avoid removing the first pending update as it is processed 2020-12-01 14:51:05 +01:00
Clément Renault
96f64c629e Move the UpdateStore out of the update module 2020-12-01 14:51:05 +01:00
Clément Renault
58a1f9081c Allow users to abort pending updates, one by one or all at once 2020-12-01 14:51:05 +01:00
Clément Renault
e4c2abb1d9 Merge pull request #44 from meilisearch/clippy
Fix some clippy warnings
2020-12-01 14:50:31 +01:00
Kerollmops
d0240bd9d0 Done a big clippy pass 2020-12-01 14:45:19 +01:00
Clément Renault
6e3f4e5e45 Merge pull request #43 from meilisearch/lowercase-facet-strings
Lowercase the facet string value
2020-12-01 14:44:39 +01:00
Kerollmops
844a9022fb Introduce the FacetStringOperator equal and not_equal constructors 2020-12-01 14:29:44 +01:00
Kerollmops
45877b3154 Lowercase the facet string value 2020-12-01 14:10:00 +01:00
Clément Renault
6120f6590b Merge pull request #38 from meilisearch/facet-queries
Introduce a facet filter system
2020-11-28 17:21:07 +01:00
Clément Renault
ba4ba685f9 Make the facet levels maps to previous level groups and don't split them 2020-11-28 12:43:43 +01:00
Clément Renault
276c87af68 Introduce more test to the FacetCondition struct 2020-11-23 16:43:57 +01:00
Clément Renault
a50f63840f Return spanned pest error while parsing numbers in facet filters 2020-11-23 16:43:57 +01:00
Clément Renault
54d5cec582 Transform numbers into strings when faceted and necessary 2020-11-23 16:43:56 +01:00
Clément Renault
fc686aaca7 Use the De Morgan law to simplify the NOT operation 2020-11-23 16:43:56 +01:00
Clément Renault
7370ef8c5e Add two simple test to the facet FacetCondition struct construction 2020-11-23 16:43:56 +01:00
Clément Renault
fc242f6e1f Rewrite the FacetCondtion Debug impl in a defensive way 2020-11-23 16:43:56 +01:00
Clément Renault
a0adfb5e8e Introduce a real pest parser and support every facet filter conditions 2020-11-23 16:43:55 +01:00
Clément Renault
c52d09d5b1 Support a basic version of the string facet query system 2020-11-23 16:43:55 +01:00
Clément Renault
498f0d8539 Output the documents count for each facet value in the infos subcommand 2020-11-23 16:43:55 +01:00
Clément Renault
278391d961 Move the facets related system into the new search module 2020-11-23 16:43:54 +01:00
Clément Renault
531bd6ddc7 Make the facet operator evaluation code generic 2020-11-23 16:43:54 +01:00
Clément Renault
d40dd3e4da Reduce the amount of duplicated code to iterate over facet values 2020-11-23 16:43:54 +01:00
Clément Renault
07a0c82790 Bump heed to 0.10.4 to use be able to lazily decode roaring bitmaps 2020-11-23 16:43:53 +01:00
Clément Renault
59ca4b9fe4 Introduce a little bit of debug when deleting documents 2020-11-23 16:43:53 +01:00
Clément Renault
0694cc4916 Drastically speed up documents deletion updates 2020-11-23 16:43:53 +01:00
Clément Renault
38c76754ef Make the facet level search system generic on f64 and i64 2020-11-23 16:43:52 +01:00
Clément Renault
9e2cbe3362 Improve the FacetLevelF64 serialization 2020-11-23 16:43:52 +01:00
Clément Renault
ced0c29c56 Simplify getting the biggest level of a facet field 2020-11-23 16:43:52 +01:00
Kerollmops
7d67c9e2e7 Improve the facet search algorithm performances 2020-11-23 16:43:52 +01:00
Clément Renault
67d4a1b3fc Introduce a new update for the facet levels 2020-11-23 16:43:51 +01:00
Clément Renault
45e0feab4e Speed up the facets stats infos subcommand 2020-11-23 16:43:51 +01:00
Kerollmops
7a6e6eb5e2 Introduce a facets stats infos subcommand 2020-11-23 16:43:51 +01:00
Clément Renault
9ec95679e1 Introduce a function to retrieve the facet level range docids 2020-11-23 16:43:50 +01:00
Clément Renault
57d253aeda Improve the infos biggest-value subcommand to support facets 2020-11-23 16:43:50 +01:00
Clément Renault
fd8360deb1 Update the facet indexing facet test 2020-11-23 16:43:50 +01:00
Clément Renault
9b7e516a56 Fix the indexing process going back in time 2020-11-23 16:43:49 +01:00
Clément Renault
b255be93fa Bump heed to 0.10.3 2020-11-23 16:43:49 +01:00
Clément Renault
218eb97241 Introduce an input field for the facet filters on the http-ui 2020-11-23 16:43:49 +01:00
Clément Renault
2341b99379 Support a basic facet based query system 2020-11-23 16:43:49 +01:00
Clément Renault
1d5795d134 Merge pull request #39 from meilisearch/speedup-documents-ids-merging
Speedup documents ids merging
2020-11-22 19:32:24 +01:00
Clément Renault
05c95dfdc6 Introduce an infos subcommand that patches the external documents ids 2020-11-22 19:27:34 +01:00
Clément Renault
27f3ef5f7a Use the new ExternalDocumentsIds struct in the engine 2020-11-22 19:27:34 +01:00
Clément Renault
fe82516f9f Use the ExternalDocumentsIds in the Index struct 2020-11-22 19:27:34 +01:00
Clément Renault
415c0b86ba Introduce the ExternalDocumentsIds struct 2020-11-22 19:27:33 +01:00
Clément Renault
eded5558b2 Rename the users ids documents ids into external documents ids 2020-11-22 17:17:47 +01:00
Clément Renault
f06355b0bb Display the time it takes to merge user documents ids 2020-11-22 11:28:35 +01:00
Clément Renault
b0c5f59c07 Merge pull request #36 from meilisearch/index-facets
Index facets values and support facet numbers
2020-11-14 14:32:05 +01:00
Clément Renault
e76558b0cc Change the settings update system to reindex only one time 2020-11-14 11:17:49 +01:00
Clément Renault
f9cc12ae0f Do not try to parse empty faceted strings 2020-11-13 18:35:47 +01:00
Clément Renault
23f9a22edc Update the HTTP settings route to accept the faceted fields 2020-11-13 18:35:47 +01:00
Clément Renault
8e6efe4d87 Introduce an infos subcommand to display the facet values 2020-11-13 18:35:47 +01:00
Clément Renault
a18d9a1f87 Parse and store the faceted fields 2020-11-13 16:13:51 +01:00
Clément Renault
4e5e55c21a Simplify the merge functions 2020-11-13 14:50:30 +01:00
Clément Renault
8ae9888959 Store the field id instead of the field name in the facets database 2020-11-13 14:50:30 +01:00
Clément Renault
cf9ddd293d Simplify the the facet types 2020-11-13 11:46:48 +01:00
Clément Renault
466fb601d6 Faceted fields settings must specify the facet type 2020-11-13 11:46:48 +01:00
Clément Renault
ebe7087bff Introduce the faceted fields setting 2020-11-11 17:08:18 +01:00
Clément Renault
72f18759ba Introduce getters and setters for the facet fields ids facet types 2020-11-11 16:26:22 +01:00
Clément Renault
92ec908303 Introduce the facet field id values engine database 2020-11-11 16:06:33 +01:00
Clément Renault
e0058c1125 Introduce codecs for facet types (string, f64, u64, i64) 2020-11-11 15:48:24 +01:00
Clément Renault
b4951c058b Merge pull request #35 from meilisearch/better-update-progress
Better update progress
2020-11-11 13:19:32 +01:00
Clément Renault
a71a96894d Use the new indexing progress events in the http server 2020-11-11 13:14:24 +01:00
Clément Renault
ea43080548 Make the indexing process send the new progress step events 2020-11-11 13:13:08 +01:00
Clément Renault
e78b96a657 Introduce a more detailed progress status enum 2020-11-11 12:31:59 +01:00
Clément Renault
8a4794fc51 Merge pull request #34 from meilisearch/speedup-indexing
Write the words pairs proximities directly into LMDB to speedup indexing
2020-11-11 11:30:28 +01:00
Clément Renault
535f8088d7 Write the words pairs proximities directly into LMDB to speedup indexing 2020-11-11 11:25:31 +01:00
Clément Renault
fbe8ec1fe7 Merge pull request #33 from meilisearch/speedup-CI
Avoid compiling benchmarks and speedup the CI
2020-11-11 11:20:26 +01:00
Clément Renault
a55453e634 Avoid compiling benchmarks and speedup the CI 2020-11-11 11:14:57 +01:00
Clément Renault
5a6b62e77c Merge pull request #32 from meilisearch/http-get-one-document
Introduce a route to get one document
2020-11-11 11:14:00 +01:00
Clément Renault
63fab07047 Introduce a route to retrieve a document with its id 2020-11-11 11:04:11 +01:00
Clément Renault
c00fc6f8bb Merge pull request #31 from meilisearch/improve-update-process
Improve update process
2020-11-09 17:45:19 +01:00
Clément Renault
0cfeee13ee Reduce the number of documents limit when update progress are sent 2020-11-09 17:34:52 +01:00
Clément Renault
cf8a6a042e Display a real progress bar when updates are processed 2020-11-09 17:33:36 +01:00
Clément Renault
45ae086974 Make sure pending updates are process when restarting the UpdateStore 2020-11-09 17:33:07 +01:00
Clément Renault
8ffdfa72e3 Merge pull request #28 from meilisearch/highlight-json-value
Make the engine able to highlight any json type
2020-11-09 10:23:22 +01:00
Clément Renault
4fb138c42e Make sure we index all kind of JSON types 2020-11-06 16:35:07 +01:00
Clément Renault
640c7d748a Modify the highlight function to support any JSON type 2020-11-05 13:59:32 +01:00
Clément Renault
c94bc59d7e Introduce a function to transform an obk into a JSON 2020-11-05 13:57:29 +01:00
Clément Renault
b220885f42 Fix the milli logo in the README 2020-11-05 11:43:47 +01:00
Clément Renault
1c2d36d8a3 Merge pull request #27 from meilisearch/split-http-ui
Move the http server into its own sub-module
2020-11-05 11:36:04 +01:00
Clément Renault
0408c9d66a Move the http server into its own sub-module 2020-11-05 11:16:39 +01:00
Clément Renault
749764f35b Merge pull request #26 from meilisearch/searchable-attributes
Introduce the searchable attributes
2020-11-04 09:40:03 +01:00
Clément Renault
a31db33e93 Introduce an optimization when the searchable attributes are ordered 2020-11-03 19:59:09 +01:00
Clément Renault
01c4f5abcd Introduce the searchable attributes setting to the settings route 2020-11-03 19:35:55 +01:00
Clément Renault
63f65bac3e Ignore the long running UpdateStore test 2020-11-03 19:12:00 +01:00
Clément Renault
a20c871ece Add more tests to the Settings searchable attributes operation 2020-11-03 18:58:19 +01:00
Clément Renault
649fb6e401 Make sure that the indexing Store only index searchable fields 2020-11-03 18:58:19 +01:00
Clément Renault
e48630da72 Introduce the searchable parameter settings to the Settings update 2020-11-03 18:58:19 +01:00
Clément Renault
68d783145b Introduce searchable fields methods on the index 2020-11-03 18:58:19 +01:00
Clément Renault
32486b5beb Merge pull request #25 from meilisearch/update-ci
Update the Github Actions settings
2020-11-03 18:53:04 +01:00
Clément Renault
a716ec61b9 Remove the fmt and clippy jobs 2020-11-03 18:52:45 +01:00
Clément Renault
c059924a8f Remove the bors config at it does not work on private repositories 2020-11-03 18:25:49 +01:00
Clément Renault
3ef031b2fe Update the CI to work on push and PRs 2020-11-03 18:25:12 +01:00
Clément Renault
58c07e7f8c Merge pull request #23 from meilisearch/update-builder-thread-pool
Allow library users to specify the rayon ThreadPool for UpdateBuilder
2020-11-02 19:11:50 +01:00
Clément Renault
7e120fc441 Allow library users to specify the rayon ThreadPool for UpdateBuilder 2020-11-02 19:11:22 +01:00
Clément Renault
87902de010 Merge pull request #22 from meilisearch/update-readme
Update the README
2020-11-02 18:28:16 +01:00
Clément Renault
1718fe3d74 Update the README to be up to date with the recent updates 2020-11-02 18:07:24 +01:00
Clément Renault
82322ddab6 Merge pull request #21 from meilisearch/displayed-attributes
Add the displayed attributes setting to an index
2020-11-02 15:50:29 +01:00
Clément Renault
3d1854ab95 Introduce an HTTP route to accept settings changes 2020-11-02 15:47:21 +01:00
Clément Renault
995d72b8c1 Introduce the Settings update operation 2020-11-02 15:31:20 +01:00
Clément Renault
0c612f08c7 Rename the indexing warp routes 2020-11-02 15:30:29 +01:00
Clément Renault
9b08f48dbd Construct the documents based on the displayed fields or fields ids order 2020-11-02 13:01:32 +01:00
Clément Renault
303c3ce89e Clean up the heed imports in the index module 2020-11-02 12:49:54 +01:00
Clément Renault
8f56753a2f Introduce displayed fields methods on the index 2020-11-02 12:49:54 +01:00
Clément Renault
4fded5bd0e Bump heed to be able to reference a RoTxn from multiple threads 2020-11-02 12:49:23 +01:00
Clément Renault
3abfe8aa22 Validate documents ids before accepting them 2020-11-01 20:55:21 +01:00
Clément Renault
0ccf4cf785 Simplify the IndexDocuments builder creation from the UpdateBuilder 2020-11-01 17:31:20 +01:00
Clément Renault
d8ff939409 Introduce bors to the project 2020-11-01 14:49:07 +01:00
Clément Renault
9047dc8163 Add a Github actions workflows 2020-11-01 14:47:44 +01:00
Clément Renault
600aa223c2 Fix a bug where generated docids were not saved when indexing JSON docs 2020-11-01 12:19:07 +01:00
Clément Renault
f0e63025b0 Update the Transform struct to support JSON stream updates 2020-11-01 12:19:06 +01:00
Kerollmops
082ad84914 Fix the benchmarks 2020-10-31 22:18:29 +01:00
Kerollmops
6d52c5b2f0 Introduce a parameter to disable the engine to autogenerate docids 2020-10-31 21:46:55 +01:00
Clément Renault
21b4d60101 Add replace/update csv/json from the HTTP server 2020-10-31 20:52:49 +01:00
Clément Renault
a4f8be7811 Support numbers and boolean when indexing JSON 2020-10-31 20:52:49 +01:00
Clément Renault
f0d028d3a4 Update the Transform struct to support JSON updates 2020-10-31 20:52:49 +01:00
Clément Renault
9d47ee52b4 Generate a uuid v4 based document id when missing 2020-10-31 15:11:06 +01:00
Clément Renault
ddbd336387 Introduce primary key methods on the index 2020-10-31 11:50:59 +01:00
Clément Renault
0d01e4854b Add a test to check that merging works correctly with CSVs 2020-10-30 13:46:56 +01:00
Clément Renault
955302fd95 Introduce an HTTP route to clear the documents 2020-10-30 13:12:55 +01:00
Clément Renault
7cc1a358f5 Fix a documents indexing bug and add a test 2020-10-30 12:14:25 +01:00
Clément Renault
99da69c85f Introduce the prepare_for_closing Index method 2020-10-30 11:46:14 +01:00
Clément Renault
222063b19d Introduce the Index path method 2020-10-30 11:46:00 +01:00
Clément Renault
085d3b9d94 Update heed to 0.10.0 2020-10-30 11:42:00 +01:00
Clément Renault
a30206a665 Prefer using iterator put_current instead of a get put method 2020-10-30 11:13:45 +01:00
Clément Renault
e63fdf2b22 Move the heed env into the index itself to ease the usage of the library 2020-10-30 10:56:35 +01:00
Clément Renault
b5d52b6b45 Prefer using a smallstr instead of a real String to reduce allocations 2020-10-29 14:32:32 +01:00
Clément Renault
40993a0d25 Fix an indexing process bug, where documents were not written in order 2020-10-29 14:20:03 +01:00
Clément Renault
855a251489 Enable the clear documents optimization that wasn't working due to a bug 2020-10-29 13:52:48 +01:00
Clément Renault
1228c2948d Add a comment about the ClearDocuments operation in the DeleteDocuments 2020-10-28 11:17:36 +01:00
Clément Renault
98fc24cbdf Bump heed to fix a prefix iter bug 2020-10-28 10:55:21 +01:00
Kerollmops
d6338af766 Improve documents deletion by iterating over all the word pair positions 2020-10-27 18:50:09 +01:00
Clément Renault
3889d956d9 Introduce the UpdateBuilder and use it in the HTTP routes 2020-10-27 18:47:58 +01:00
Clément Renault
5c62fbb6a8 Move the IndexDocuments update into its own module 2020-10-26 12:21:13 +01:00
Clément Renault
8f76ec97c0 Move the DeleteDocuments update into its own module 2020-10-26 11:01:00 +01:00
Clément Renault
92ef1faa97 Move the ClearDocuments update into its own module 2020-10-26 10:58:17 +01:00
Clément Renault
1e1821f002 Introduce the merge_two_obkv function to merge documents on update 2020-10-26 10:55:07 +01:00
Clément Renault
60347a5483 Move the AvailableDocumentsIds iterator into the update module 2020-10-26 10:53:23 +01:00
Clément Renault
b14cca2ad9 Introduce the UpdateBuilder type along with some update operations 2020-10-25 18:32:01 +01:00
Clément Renault
adacc7977d Make the Index return default values when value don't exist 2020-10-25 18:30:24 +01:00
Clément Renault
a7a4984175 Introduce the Transform type into the indexing system 2020-10-24 17:06:09 +02:00
Clément Renault
b44b04d25b Serialize the CSV record values as JSON strings 2020-10-24 14:43:46 +02:00
Clément Renault
656a851830 Introduce the Transform struct transforming CSVs
This allows us to:
  - transform a CSV, a JSON or a JSON lines data type into the same
    Grenad x Obkv streamable data type and creates the new FieldsIdsMap.
  - Extract all the documents user ids in advance to be able to delete
    the existing documents before re-indexing them.
  - Keep the last documents with the same user id avoiding duplicates
    in the same request.
2020-10-24 13:37:38 +02:00
Clément Renault
8d82e37ec0 Introduce the AvailableDocumentsIds iterator 2020-10-23 12:07:01 +02:00
Clément Renault
2a4cd81c86 Add documentation to the Index methods 2020-10-22 15:44:12 +02:00
Clément Renault
566a7c3039 Make the FieldsIdsMap serialization more stable by using a BTreeMap 2020-10-22 14:53:20 +02:00
Clément Renault
9133f38138 Introduce the FieldsIdsMap type 2020-10-22 12:56:35 +02:00
Clément Renault
802e925fd7 Switch to a JSON protocol for the front page 2020-10-21 18:26:29 +02:00
Clément Renault
5caf523fd9 Move the Index to its own module 2020-10-21 15:55:48 +02:00
Clément Renault
2210818114 Introduce the obkv heed codec 2020-10-21 15:51:48 +02:00
Clément Renault
f6eecb855e Send a basic progressing status to the updates front page 2020-10-21 15:38:28 +02:00
Clément Renault
4eeeccb9cd Change the UpdateStore to have different processed and pending meta types 2020-10-21 13:52:15 +02:00
Clément Renault
16ab3e02a9 Change the UpdateStore internal meta serializer 2020-10-21 13:42:49 +02:00
Clément Renault
f948a03be2 Optimise the merge functions to avoid allocations 2020-10-20 16:40:50 +02:00
Clément Renault
cde8478388 Replace the panic in the merge function by actual errors 2020-10-20 16:19:07 +02:00
Clément Renault
8ed8abb9df Introduce an append-only indexing system 2020-10-20 15:00:58 +02:00
Clément Renault
a122d3d466 Export the indexing part into a module 2020-10-20 14:22:09 +02:00
Clément Renault
eb92e72e6c Updates can send progress update status 2020-10-20 12:28:10 +02:00
Clément Renault
341046c96c Remove the js map file from the filesize.js script 2020-10-20 12:20:42 +02:00
Clément Renault
3a934b7020 Split the update attributes on the updates front page 2020-10-20 12:19:48 +02:00
Clément Renault
03ca1ff634 Make the updates page interactive 2020-10-20 12:09:38 +02:00
Clément Renault
35c9a3c558 Brodacast the updates infos to every ws clients 2020-10-20 11:19:34 +02:00
Clément Renault
56c3a61d83 Introduce a new updates page 2020-10-19 19:57:15 +02:00
Clément Renault
871222aebd Introduce some new routes to handle live indexing 2020-10-19 16:06:43 +02:00
Clément Renault
d3145be744 Rename the meta UpdateStore method 2020-10-19 14:00:00 +02:00
Clément Renault
8bfa43f9a7 Update the iter_metas UpdateStore method 2020-10-19 13:58:08 +02:00
Clément Renault
65e32fecb1 Move the binaries into one with subcommands 2020-10-19 13:44:17 +02:00
Clément Renault
ff389f1270 Update heed-types to 0.7.1 2020-10-19 11:52:59 +02:00
Clément Renault
5b4eda670b Add two tests for the UpdateStore 2020-10-18 18:55:09 +02:00
Clément Renault
edb8c99fbe Introduce a method to get the meta of an update on the UpdateStore 2020-10-18 17:19:04 +02:00
Clément Renault
eca49e3a03 Introduce a notification channel for the UpdateStore 2020-10-18 16:37:37 +02:00
Clément Renault
83c1db8763 Introduce the UpdateStore 2020-10-18 15:26:57 +02:00
Clément Renault
90d4c1d153 Simplify the words pair proximity computation 2020-10-15 16:18:43 +02:00
Clément Renault
9021b2dba6 Introduce the enable-chunk-fusing flag 2020-10-14 18:44:59 +02:00
Kerollmops
f980422c57 Move from oxidized-mtbl to grenad 2020-10-14 12:47:32 +02:00
Clément Renault
b342a86c15 Divide the max-memory parameter by the number of sorters in the store 2020-10-08 17:27:53 +02:00
Kerollmops
fb2c402ae1 Split the max-memory by the number of jobs 2020-10-07 14:23:22 +02:00
Kerollmops
38820bc75c Improve and simplify the query tokenizer 2020-10-07 14:23:22 +02:00
Kerollmops
4e9bd1fef5 Bump oxidized-mtbl 2020-10-07 14:23:22 +02:00
Kerollmops
a00f5850ee Add support for placeholder search for empty queries 2020-10-06 20:19:50 +02:00
Kerollmops
433d9bbc6e Use CompressionType::from_str rather than a custom function 2020-10-06 13:50:34 +02:00
Kerollmops
4b819457c9 Enable the strucopt/clap warp help feature 2020-10-06 13:06:22 +02:00
Clément Renault
a2182e68a6 Rewrite the parallel merge indexing part 2020-10-05 20:54:06 +02:00
Kerollmops
e9e03259c1 Improve the mDFS performance and return the proximity 2020-10-05 18:13:56 +02:00
Kerollmops
bb15f16d8c Merge other databases content while writing into LMDB at the same time 2020-10-05 16:35:10 +02:00
Clément Renault
9af946a306 Merging the main, word docids and words pairs proximity docids in parallel 2020-10-04 18:40:34 +02:00
Clément Renault
99705deb7d Directly use a writer for the docid word positions 2020-10-04 18:17:53 +02:00
Clément Renault
67577a3760 It is an error to merge docid word positions 2020-10-04 17:31:12 +02:00
Clément Renault
ce8e56ee18 Rewrite the indexer to use one MTBL by database
This allows us to avoid prefixing keys and appending into LMDB databases
2020-10-04 17:04:33 +02:00
Clément Renault
770f29fd05 Bump the oxidized-mtbl dependency 2020-10-04 17:04:33 +02:00
Clément Renault
acd2a63879 Introduce a simple FST based chinese word segmenter 2020-10-04 17:04:33 +02:00
Clément Renault
6cc6addc2f Increase the CboRoaringBitmapCodec threshold 2020-10-02 17:06:17 +02:00
Clément Renault
e41a3822a6 Add a simple test for the CboRoaringBitmapCodec 2020-10-02 16:52:36 +02:00
Clément Renault
c4b0c57059 Reduce the default indexer max-memory parameter 2020-10-02 16:47:41 +02:00
Kerollmops
007e647462 Introduce the Mdfs Iterator that explore the proximity graph using a mana DFS 2020-10-02 16:46:07 +02:00
Kerollmops
d4e80407e5 Introduce the mana depth first search algorithm 2020-10-02 16:46:07 +02:00
Kerollmops
f6a8096720 Rename the quartile as percentiles 25th, 50th and 75th 2020-10-02 16:46:07 +02:00
Kerollmops
891e0188dd Introduce the database-stats infos subcommand 2020-10-02 16:46:07 +02:00
Kerollmops
079742b4d3 Clean up the stats and size of database infos subcommands 2020-10-02 16:46:06 +02:00
Kerollmops
d0c73564b1 Use the CboRoaringBitmapCodec for the word pair proximity docids 2020-10-02 16:46:06 +02:00
Kerollmops
5a6a698e1d Introduce the CboRoaringBitmapCodec 2020-10-02 16:46:06 +02:00
Kerollmops
4eda149ffa Rename the BoRoaringBitmap codec 2020-10-02 16:46:06 +02:00
Clément Renault
ac84db2506 Move the words pairs proximities average into the stats infos subcommand 2020-10-02 16:46:06 +02:00
Kerollmops
30755e31e7 Introduce the words pairs proximities stats info subcommand 2020-10-02 16:46:06 +02:00
Clément Renault
bc35c9a598 Introduce the size_of_database infos subcommand 2020-10-02 16:46:05 +02:00
Kerollmops
c6b883289c Remove the unused fetch_keywords function 2020-09-30 15:41:23 +02:00
Kerollmops
58237bd67f Introduce the average-number-of-document-by-word-pair-proximity infos subcommand 2020-09-29 18:32:48 +02:00
Kerollmops
991be8950e Rename the subcommand into average-number-of-positions-by-word-by-doc 2020-09-29 18:15:44 +02:00
Kerollmops
54370e228a Search for documents with longer proximities until we find enough 2020-09-29 17:37:14 +02:00
Kerollmops
f277ea134f Simplify some search function by reducing the number of parameters 2020-09-29 16:08:58 +02:00
Kerollmops
68f4af7d2e Improve the display of the number of processed documents 2020-09-29 16:08:58 +02:00
Kerollmops
59a127d022 Improve the indexing process
We now store the words pairs proximity in a cache and only compute the
shortest proximity between pairs of words in a document.
2020-09-29 15:09:18 +02:00
Kerollmops
6ddb3e722c Depth-first search cache the docids unions 2020-09-28 16:55:21 +02:00
Kerollmops
a3821a0b33 Introduce the depth_first_search path resolution function 2020-09-28 16:34:12 +02:00
Kerollmops
51c237f9d8 Fix the benchmarks compilation 2020-09-28 13:39:17 +02:00
Clément Renault
d8354f6f02 Fix the word_docids capacity limit detection 2020-09-27 11:52:05 +02:00
Clément Renault
25b2853b70 Move the words pairs proximities compute into the write document function 2020-09-23 15:02:40 +02:00
Clément Renault
ed05999f63 Replace the arc cache by a simple linked hash map 2020-09-23 14:50:52 +02:00
Clément Renault
4d22d80281 Display only the key on heed error 2020-09-23 14:13:51 +02:00
Clément Renault
5178b3d59d Make the search system be aware of query words typos 2020-09-23 12:01:39 +02:00
Clément Renault
b597a92487 Add a default max-memory value to the indexer 2020-09-23 12:00:36 +02:00
Clément Renault
1f6e00878d Use the words pair proximities in the search algorithm 2020-09-22 18:47:55 +02:00
Clément Renault
31224a8425 Index the word pair proximities for both orders of the pair 2020-09-22 14:49:22 +02:00
Clément Renault
a58ae5eb2a Introduce the word-pair-proximities-docids infos subcommand 2020-09-22 14:04:34 +02:00
Clément Renault
d6fa9c0414 Index the intra documents word pair proximities 2020-09-22 14:04:33 +02:00
Clément Renault
7b67ae6972 Introduce the StrStrU8 heed codec 2020-09-22 12:44:17 +02:00
Clément Renault
e34437b2d7 Move the proximity function to a module 2020-09-22 10:54:59 +02:00
Clément Renault
15208c7d3d Simplify the indexer record loop 2020-09-22 10:33:30 +02:00
Clément Renault
e5adfaade0 Replace the token filter by a filter mapper 2020-09-22 10:24:31 +02:00
Clément Renault
d21c80b865 Apply the chunk compression parameters on all the MTBL writers 2020-09-21 18:30:54 +02:00
Clément Renault
944df52e2a Simplify the indexer main loop 2020-09-21 14:59:48 +02:00
Kerollmops
3ded98e5fa Bump the roaring version that fix a deserialization bug 2020-09-10 22:37:51 +02:00
Kerollmops
d5e5baa20f Bump the oxidized-mtbl dependency 2020-09-10 13:29:12 +02:00
Kerollmops
0fb086f241 Use the crates.io raoring library 2020-09-08 15:16:04 +02:00
Kerollmops
aed0704404 Remove the temporary optimisation 2020-09-08 14:48:33 +02:00
Kerollmops
072382fa61 Sort the word docids to make intersections much faster 2020-09-07 22:38:49 +02:00
Kerollmops
ad11c5fb3f Introduce the words-docids command for the infos binary 2020-09-07 22:36:35 +02:00
Kerollmops
5664c37539 Introduce an heed codec that reduce the size of small amount of serialized integers 2020-09-07 20:06:23 +02:00
Kerollmops
3e2250423c Introduce the average-number-of-positions infos subcommand 2020-09-07 15:26:42 +02:00
Kerollmops
ea605b499c Introduce two new infos subcommands 2020-09-07 14:56:48 +02:00
Clément Renault
bb1ab428db Use another function to define the proximity 2020-09-06 17:55:07 +02:00
Clément Renault
f928b91e9d Specify the exact rev for the near-proximity dep 2020-09-06 17:21:38 +02:00
Clément Renault
dec460ce52 Fix the infos binary and add commands 2020-09-06 17:14:20 +02:00
Clément Renault
daa3673c1c Invert the word docid positions key order 2020-09-06 10:30:53 +02:00
Clément Renault
c2405bcae2 Prefer using the word_docids db to create the words-fst 2020-09-06 10:23:56 +02:00
Kerollmops
4ca9472e02 Fix the minimum proximity len 2020-09-06 10:19:34 +02:00
Clément Renault
1c504471d3 Introduce the plane-sweep algorithm 2020-09-05 18:25:27 +02:00
Clément Renault
dc88a86259 Store the word positions under the documents 2020-09-05 18:03:06 +02:00
Kerollmops
580ed1119a Make the engine to return csv string records as documents and headers 2020-08-31 19:02:00 +02:00
Clément Renault
bad0663138 Come back to the old tokenizer 2020-08-31 13:34:38 +02:00
Kerollmops
220ba0785c Make the front-end to throttle the request by 100ms 2020-08-31 13:34:35 +02:00
Clément Renault
4afc4d0751 Use the groups of four positions to speed up disjunctions tests 2020-08-30 16:25:11 +02:00
Clément Renault
605f75b56f Add the words grouped by four positions in the infos binary 2020-08-29 18:23:33 +02:00
Clément Renault
ad5cafbfed Introduce a database to store docids in groups of four positions 2020-08-29 17:42:55 +02:00
Clément Renault
3db517548d Move the documents back into the LMDB database 2020-08-29 15:14:04 +02:00
Clément Renault
816db7a0aa Improve the RoaringBitmap codec to reserve enough vector space 2020-08-29 11:21:30 +02:00
Clément Renault
3fe497e129 Improve the Mtbl heed codec to only encode MTBL databases 2020-08-29 11:20:39 +02:00
Clément Renault
21aafd603c Make sure the first document is associated to the document id 0 2020-08-29 10:56:40 +02:00
Clément Renault
0a44ff86ab Put the documents MTBL back into LMDB
We makes sure to write the documents into a file before
memory mapping it and putting it into LMDB, this way we avoid
moving it to RAM
2020-08-28 15:43:24 +02:00
Clément Renault
d784d87880 Remove the prefix LMDB databases 2020-08-28 14:41:43 +02:00
Clément Renault
7cde312f14 Introduce the StrBEU32Codec heed codec 2020-08-28 14:16:37 +02:00
Clément Renault
34db376ae5 Rename the RoaringBitmapCodec module 2020-08-28 13:31:16 +02:00
Kerollmops
38ddc71b83 Simplify the search algorithm 2020-08-26 15:16:41 +02:00
Kerollmops
ba2eb0d7ad Take the words-fst into account when retrieving the biggests values 2020-08-26 14:36:22 +02:00
Clément Renault
32da07ccee Introduce the word-positions-doc-ids and words-positions infos commands 2020-08-23 10:52:47 +02:00
Clément Renault
d19f394630 Make the indexer support gzipped CSV as input 2020-08-21 18:10:24 +02:00
Clément Renault
ff479c865d Replace pipe by ringtail to improve stdin read performances 2020-08-21 17:45:52 +02:00
Clément Renault
ada30c2789 Introducing more arguments to specify the different compression algorithms 2020-08-21 16:41:26 +02:00
Clément Renault
02335ee72d Introduce the biggest-value-sizes command on the infos binary 2020-08-21 14:44:42 +02:00
Clément Renault
1e3e756c19 Introduce the words-frequencies command on the infos binary 2020-08-21 14:44:42 +02:00
Kerollmops
6a230fe803 Move the contains_documents logic to a function 2020-08-21 14:44:42 +02:00
Kerollmops
e55a569629 Compress much more the documents database 2020-08-21 14:44:42 +02:00
Kerollmops
962bad3cea Introduce an infos binary to fetch stats 2020-08-17 19:41:49 +02:00
Clément Renault
8806fcd545 Introduce a better query and document lexer 2020-08-16 14:36:54 +02:00
Clément Renault
1e358e3ae8 Introduce the AstarBagIter that iterates through best paths 2020-08-15 16:24:06 +02:00
Clément Renault
7dc594ba4d Introduce the Search builder struct 2020-08-13 14:27:51 +02:00
Clément Renault
bfb46cbfbe Introduce the Crtierion enum 2020-08-12 10:43:02 +02:00
Clément Renault
6d04a285dc Retrieve and display the distances of the words found 2020-08-11 15:18:02 +02:00
Clément Renault
1bd37d213a Lowercase quoted words 2020-08-10 14:49:09 +02:00
Clément Renault
883a8109c8 Show both database and documents database sizes 2020-08-10 14:37:18 +02:00
Clément Renault
a4e0f3f724 Remove the useless TransitiveArc from the serve binary 2020-08-10 14:06:27 +02:00
Clément Renault
edc06a97d6 Remove the useless stats binary 2020-08-10 13:55:02 +02:00
Clément Renault
ae77fe5a69 Introduce an option to specify the maximum database size 2020-08-10 13:53:53 +02:00
Clément Renault
394844062f Move the documents MTBL database inside the Index 2020-08-10 13:47:19 +02:00
Clément Renault
ecd2b2f217 Make the final merge done in parallel 2020-08-07 15:44:04 +02:00
Clément Renault
91282c8b6a Move the documents into another file 2020-08-07 13:11:31 +02:00
Clément Renault
fae694a102 Put the documents into an MTBL database 2020-08-07 12:14:40 +02:00
Clément Renault
d5a356902a Update oxidized-mtbl 2020-08-07 12:14:03 +02:00
Clément Renault
405a71d3a4 Accept csv from stdin 2020-08-06 13:38:21 +02:00
Clément Renault
d3b1096510 Compute the word attribute postings lists on each threads 2020-08-06 11:50:27 +02:00
Clément Renault
8d734941af Clean up some lines 2020-08-06 10:20:26 +02:00
Clément Renault
a4e3c7c37c Force the Papa parse delimiter 2020-08-05 14:11:46 +02:00
Clément Renault
6508d497ce Replace the regex highlighting by a simple algorithm 2020-08-05 13:52:27 +02:00
Clément Renault
4873abe145 Introduce option flags to toggle the indexing engine 2020-08-05 12:10:41 +02:00
Clément Renault
bd4b18541c Introduce a new indexer which uses an MTBL sorter 2020-08-04 15:44:37 +02:00
Clément Renault
3f21760d56 Update README.md 2020-08-04 15:40:37 +02:00
Clément Renault
bc3a0ac6a3 Display the milli logo and update the description 2020-08-04 15:40:02 +02:00
Kerollmops
d7d8f38fb7 Update bulma to spread the logo more 2020-07-16 23:45:02 +02:00
Kerollmops
ee305c9284 Replace the title by the milli logo 2020-07-15 23:55:28 +02:00
Kerollmops
9ade00e27b Highlight all the matching words 2020-07-14 11:53:21 +02:00
Kerollmops
085c376655 Use the regex crate to highlight "hello" 2020-07-14 11:28:40 +02:00
Kerollmops
dd385ad05b Customize the mark tag css 2020-07-14 11:03:21 +02:00
Kerollmops
aa92311d4e Add a dark theme to the dashboard 2020-07-13 23:51:41 +02:00
Kerollmops
3d144e62c4 Search for best proximities in multiple attributes 2020-07-13 19:06:56 +02:00
Kerollmops
576dd011a1 Compute the candidates but not by attribute 2020-07-13 18:16:05 +02:00
Kerollmops
6b14b20369 Introduce a method to retrieve the number of attributes of the documents 2020-07-13 17:50:16 +02:00
Kerollmops
54afec58a3 Add a fade in out animation when the server process 2020-07-12 11:34:48 +02:00
Kerollmops
92c2b1dd2d Refine the help message of the binaries 2020-07-12 11:06:45 +02:00
Kerollmops
f757df5dfd Introduce the stderr logger to the project 2020-07-12 11:04:35 +02:00
Kerollmops
12358476da Use the log crate instead of stderr 2020-07-12 10:55:09 +02:00
Kerollmops
2c62eeea3c Rename the project milli 2020-07-12 00:16:41 +02:00
Kerollmops
d31da26a51 Avoid cloning RoraringBitmaps when unecessary 2020-07-11 23:51:32 +02:00
Kerollmops
b8a1fc0126 Clean up the CSS style custom bulma rules 2020-07-11 14:51:59 +02:00
Kerollmops
f6eae91c7d Pretty print the new dashboard numbers 2020-07-11 14:17:37 +02:00
Kerollmops
d44428fa90 Display more informations on the dashboard 2020-07-11 11:51:56 +02:00
Kerollmops
11c7fef80a Implement a memory dumper
It moves the in memory HashMaps used when indexing to a disk based MTBL file
2020-07-07 16:48:49 +02:00
Kerollmops
b12bfcb03b Reduce the deepness of the word position document ids
This helps reduce the number of allocations.
2020-07-07 12:30:05 +02:00
Kerollmops
7178b6c2c4 First basic version using MTBL again 2020-07-07 11:32:33 +02:00
Kerollmops
45d0d7c3d4 Clean up the README 2020-07-06 17:38:22 +02:00
Kerollmops
adb1038b26 Add a jobs parameter to set the number of threads the indexer uses 2020-07-06 12:17:17 +02:00
Kerollmops
2a3b03138b Use heed 0.8.1 with the RwIter append method 2020-07-05 19:50:28 +02:00
Kerollmops
ec1023e790 Intersect document ids by inverse popularity of the words
This reduces the worst request we had which took 56s to now took 3s ("the best of the do").
2020-07-05 19:33:51 +02:00
Kerollmops
cd7e64b2b3 Allow users to set the arc cache size when indexing 2020-07-04 18:12:41 +02:00
Kerollmops
ac8353a64f Merge pre-computed word attribute documents ids 2020-07-04 17:02:27 +02:00
Kerollmops
fea7cac206 Display the time it took to compute the word attribute documents ids 2020-07-04 15:18:38 +02:00
Kerollmops
46ced5c828 Introduce the RwIter append heed API 2020-07-04 12:34:10 +02:00
Kerollmops
7e7440c431 Finalize the LMDB indexing design 2020-07-01 22:45:43 +02:00
Kerollmops
2ae3f40971 Make the indexer ignore certain words
This is a preparation for making the indexing fully parallel by making the
indexer only be aware of certain words for each threads to avoid postings lists
conflicts for each words
2020-07-01 17:49:46 +02:00
Kerollmops
a3ac2623d5 Introduce multiple functions to clean up the code 2020-07-01 17:24:55 +02:00
Kerollmops
ac5cc7ddad Introduce an Iterator yielding owned entries for the LruCache 2020-07-01 17:21:52 +02:00
Kerollmops
014a25697d Use only one ARC cache based on the words 2020-07-01 12:03:18 +02:00
Kerollmops
fc4013a43f Fix the ARC cache 2020-07-01 10:35:07 +02:00
Kerollmops
2fcae719ad Use another LRU impl which uses hashbrown 2020-06-29 22:26:06 +02:00
Kerollmops
f98b615bf3 Replace the LRU by an Arc cache 2020-06-29 20:48:57 +02:00
Kerollmops
07abebfc46 Introduce a (too big) LRU cache 2020-06-29 18:15:03 +02:00
Kerollmops
5f0088594b Index by writing directly into LMDB 2020-06-29 13:54:47 +02:00
Kerollmops
8453828a65 Update the README 2020-06-28 12:40:08 +02:00
Kerollmops
63cbeca64e Skip all derived words when too short 2020-06-28 12:13:12 +02:00
Kerollmops
736f0f7560 Use the proximity instead of the attributes when searching for <= 7 proximities 2020-06-28 12:13:12 +02:00
Kerollmops
fe3be8f18a Replace the HashMap by a Vec for attributes documents ids 2020-06-28 12:13:12 +02:00
Kerollmops
6a2834f2b0 Add a jobs parameter to set the number of threads the indexer uses 2020-06-28 12:13:10 +02:00
Kerollmops
7e16afbdce Ignore documents which are not part of the candidates when exploring with A* 2020-06-24 15:06:45 +02:00
Kerollmops
1c7a9a4132 Remove the found documents from the candidates list 2020-06-24 15:00:26 +02:00
Kerollmops
50169b9798 Compute the full list of ids we are willing to find by attribute 2020-06-24 14:48:04 +02:00
Kerollmops
374ec6773f Introduce a database to store all docids for a word and attribute 2020-06-22 19:24:20 +02:00
Kerollmops
a044cb6cc8 Clean up the warnings for prefix postings 2020-06-22 18:10:31 +02:00
Kerollmops
ba3e805981 Document the Index types and the internal LMDB databases 2020-06-22 18:09:22 +02:00
Kerollmops
2f0e1afd16 Introduce the roaring bitmap heed codec 2020-06-22 17:56:07 +02:00
Kerollmops
8148210860 Use the cache when retrieving the documents at the end 2020-06-21 12:25:19 +02:00
Kerollmops
1628a31efa Cache the unions of the derived words positions 2020-06-20 15:38:10 +02:00
Kerollmops
115e0142d9 Add a feature flags to enable the export of stats 2020-06-20 13:25:42 +02:00
Kerollmops
beb49b24f6 Skip looking at connections for proximity 0 2020-06-20 13:19:03 +02:00
Kerollmops
c84012d655 Accept queries from standard input when not given as argument 2020-06-20 12:01:15 +02:00
Kerollmops
d6705d5529 Introduce the criterion dependency to bench the engine 2020-06-19 18:32:25 +02:00
Kerollmops
55a8941922 Optimize things 2020-06-19 17:48:17 +02:00
Kerollmops
a3ca80d20d Ignore every proximities bigger or equal to 8 2020-06-18 15:42:46 +02:00
Kerollmops
3577de04b8 Reduce the number of KV lookups to the sucessfulls only 2020-06-16 12:58:29 +02:00
Kerollmops
e974e6b3c9 Acquire search intersections metrics 2020-06-16 12:10:23 +02:00
Kerollmops
8db16ff306 Add a cache to the contains_documents success function 2020-06-14 13:39:39 +02:00
Kerollmops
a8cda248b4 Introduce a customized A* algorithm.
This custom algo lazily compute the intersections between words, to avoid too much set operations and database reads
2020-06-14 12:51:57 +02:00
Kerollmops
69285b22d3 Check that an edges combination contains results 2020-06-13 11:16:02 +02:00
Kerollmops
b9cc6c10af Introduce a function to ignore useless paths 2020-06-13 00:17:43 +02:00
Kerollmops
d02c5cb023 Fix node skipping by computing the accumulated proximity 2020-06-12 14:08:46 +02:00
Kerollmops
37a48489da Reworked the best proximity algo a little bit 2020-06-12 12:53:08 +02:00
Kerollmops
302866ad73 Make the algo don't work with an astar 2020-06-11 17:43:06 +02:00
Kerollmops
0a83a86e65 Fix multiple bugs 2020-06-11 11:55:03 +02:00
Kerollmops
4e86ecf807 Retrieve the words before the intersect loops 2020-06-10 22:05:01 +02:00
Kerollmops
6ca3579cc0 Add more time debug measurements 2020-06-10 21:35:01 +02:00
Kerollmops
66a4b26811 Introduce a proximity based documents retriever 2020-06-10 16:54:28 +02:00
Kerollmops
78f27c0465 squash-me: Remove debugs 2020-06-10 16:29:46 +02:00
Kerollmops
3ad883d7c7 squash-me: Make the dijkstra work even with different attributes 2020-06-10 16:27:02 +02:00
Kerollmops
fecd8ca54a squash-me: It works! we must remove the debug after having added more tests 2020-06-10 14:20:35 +02:00
Kerollmops
13977d9338 squash-me 2020-06-09 23:06:59 +02:00
Kerollmops
5d5b827f1a Squash-me 2020-06-09 17:32:25 +02:00
Kerollmops
2a6d6a7f69 Introduce a first draft of the best_proximity algorithm 2020-06-09 10:11:43 +02:00
Kerollmops
dfdaceb410 Introduce a first basic working positions-based engine 2020-06-05 20:13:19 +02:00
Kerollmops
f51a63e4ef Store documents ids under attribute ids 2020-06-05 16:32:14 +02:00
Kerollmops
ce86a43779 Make the query tokenizer a real Iterator 2020-06-05 09:49:28 +02:00
Kerollmops
f55f4cb02a Not fetch the cached prefix postings when prefix is disabled 2020-06-04 21:22:45 +02:00
Kerollmops
06bf03f075 Add an help message on the front page
aaa
2020-06-04 21:22:45 +02:00
Kerollmops
eefc6d7c44 Add support for quoted query phrases 2020-06-04 20:25:51 +02:00
Kerollmops
1f7035f18f Just do a little clean-up 2020-06-04 19:13:28 +02:00
Kerollmops
71dc6a3828 Disable prefix search when query is ended by a whitespace 2020-06-04 18:37:20 +02:00
Kerollmops
5d1c625b74 Change the page index texts 2020-06-04 18:20:57 +02:00
Kerollmops
c42d3c19e2 Merge the whole list of generated MTBL in one go 2020-06-04 17:38:43 +02:00
Kerollmops
3a23dc242e More efficiently merge MTBLs, more than two at a time 2020-06-04 16:17:24 +02:00
Kerollmops
1df1f88fe1 Directly write to LMDB without intermediate final MTBL 2020-06-01 21:30:39 +02:00
Kerollmops
2174042994 Merge only 3 MTBL at the same time 2020-06-01 19:49:58 +02:00
Kerollmops
5cc81a0179 Merge many MTBL into one a the same time 2020-06-01 18:39:58 +02:00
Kerollmops
6a047519f6 Do a merge two by two 2020-06-01 18:27:26 +02:00
Kerollmops
5404776f7a Add a little bit more debug 2020-06-01 17:52:43 +02:00
Kerollmops
dff68a339a Use OnceCell to cache levenshtein builders 2020-05-31 19:27:11 +02:00
Kerollmops
dde3e01a59 Introduce prefix postings ids for better perfs 2020-05-31 18:20:49 +02:00
Kerollmops
a26553c90a Reintroduce a simple HTTP server 2020-05-31 17:48:13 +02:00
Kerollmops
2a10b2275e Support prefix typo tolerant search 2020-05-31 17:18:13 +02:00
Kerollmops
ba9527abc0 Support typos with a levenshtein automata 2020-05-31 17:01:11 +02:00
Kerollmops
6c726df9b9 Support multiple space seperated words 2020-05-31 16:09:34 +02:00
Kerollmops
24587148fd Introduce MTBL parallel merging before LMDB writing 2020-05-31 14:22:57 +02:00
Kerollmops
6762c2d08f Clean up a little bit 2020-05-31 14:22:57 +02:00
Kerollmops
3a998cf39c Far better usage of rayon to fold indexed data 2020-05-31 14:22:57 +02:00
Kerollmops
1237306ca8 Introduce a thread that write to heed 2020-05-31 14:22:57 +02:00
Kerollmops
3668627e03 Use zerocopy without bitpacking as a first step 2020-05-31 14:22:07 +02:00
Kerollmops
a81f201fad Inroduce the use of RocksDB instead of sled (RAM) 2020-05-31 14:22:06 +02:00
Kerollmops
91ba938953 Initial commit 2020-05-31 14:22:06 +02:00
Clément Renault
4573f00a0d Initial commit 2020-05-31 14:21:56 +02:00
805 changed files with 65181 additions and 7759 deletions

View File

@@ -2,7 +2,6 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:

View File

@@ -3,7 +3,7 @@
# check_tag $current_tag $file_tag $file_name
function check_tag {
if [[ "$1" != "$2" ]]; then
echo "Error: the current tag does not match the version in $3: found $2 - expected $1"
echo "Error: the current tag does not match the version in Cargo.toml: found $2 - expected $1"
ret=1
fi
}
@@ -11,12 +11,8 @@ function check_tag {
ret=0
current_tag=${GITHUB_REF#'refs/tags/v'}
toml_files='*/Cargo.toml'
for toml_file in $toml_files;
do
file_tag="$(grep '^version = ' $toml_file | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
check_tag $current_tag $file_tag $toml_file
done
file_tag="$(grep '^version = ' Cargo.toml | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
check_tag $current_tag $file_tag
lock_file='Cargo.lock'
lock_tag=$(grep -A 1 'name = "meilisearch-auth"' $lock_file | grep version | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')

View File

@@ -1,127 +1,41 @@
#!/bin/sh
# Was used in our CIs to publish the latest docker image. Not used anymore, will be used again when v1 and v2 will be out and we will want to maintain multiple stable versions.
# Returns "true" or "false" (as a string) to be used in the `if` in GHA
# Used in our CIs to publish the latest Docker image.
# Checks if the current tag should be the latest (in terms of semver and not of release date).
# Ex: previous tag -> v2.1.1
# new tag -> v1.20.3
# The new tag (v1.20.3) should NOT be the latest
# So it returns "false", the `latest` tag should not be updated for the release v1.20.3 and still need to correspond to v2.1.1
# Checks if the current tag ($GITHUB_REF) corresponds to the latest release tag on GitHub
# Returns "true" or "false" (as a string).
# GLOBAL
GREP_SEMVER_REGEXP='v\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)$' # i.e. v[number].[number].[number]
GITHUB_API='https://api.github.com/repos/meilisearch/meilisearch/releases'
PNAME='meilisearch'
# FUNCTIONS
# semverParseInto and semverLT from https://github.com/cloudflare/semver_bash/blob/master/semver.sh
# usage: semverParseInto version major minor patch special
# version: the string version
# major, minor, patch, special: will be assigned by the function
semverParseInto() {
local RE='[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)'
#MAJOR
eval $2=`echo $1 | sed -e "s#$RE#\1#"`
#MINOR
eval $3=`echo $1 | sed -e "s#$RE#\2#"`
#MINOR
eval $4=`echo $1 | sed -e "s#$RE#\3#"`
#SPECIAL
eval $5=`echo $1 | sed -e "s#$RE#\4#"`
}
# usage: semverLT version1 version2
semverLT() {
local MAJOR_A=0
local MINOR_A=0
local PATCH_A=0
local SPECIAL_A=0
local MAJOR_B=0
local MINOR_B=0
local PATCH_B=0
local SPECIAL_B=0
semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A
semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B
if [ $MAJOR_A -lt $MAJOR_B ]; then
return 0
fi
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -lt $MINOR_B ]; then
return 0
fi
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -le $MINOR_B ] && [ $PATCH_A -lt $PATCH_B ]; then
return 0
fi
if [ "_$SPECIAL_A" == "_" ] && [ "_$SPECIAL_B" == "_" ] ; then
return 1
fi
if [ "_$SPECIAL_A" == "_" ] && [ "_$SPECIAL_B" != "_" ] ; then
return 1
fi
if [ "_$SPECIAL_A" != "_" ] && [ "_$SPECIAL_B" == "_" ] ; then
return 0
fi
if [ "_$SPECIAL_A" < "_$SPECIAL_B" ]; then
return 0
fi
return 1
}
# Returns the tag of the latest stable release (in terms of semver and not of release date)
# Returns the version of the latest stable version of Meilisearch by setting the $latest variable.
get_latest() {
temp_file='temp_file' # temp_file needed because the grep would start before the download is over
curl -s 'https://api.github.com/repos/meilisearch/meilisearch/releases' > "$temp_file"
releases=$(cat "$temp_file" | \
grep -E "tag_name|draft|prerelease" \
| tr -d ',"' | cut -d ':' -f2 | tr -d ' ')
# Returns a list of [tag_name draft_boolean prerelease_boolean ...]
# Ex: v0.10.1 false false v0.9.1-rc.1 false true v0.9.0 false false...
# temp_file is needed because the grep would start before the download is over
temp_file=$(mktemp -q /tmp/$PNAME.XXXXXXXXX)
latest_release="$GITHUB_API/latest"
i=0
latest=""
current_tag=""
for release_info in $releases; do
if [ $i -eq 0 ]; then # Checking tag_name
if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release
current_tag=$release_info
else
current_tag=""
fi
i=1
elif [ $i -eq 1 ]; then # Checking draft boolean
if [ "$release_info" = "true" ]; then
current_tag=""
fi
i=2
elif [ $i -eq 2 ]; then # Checking prerelease boolean
if [ "$release_info" = "true" ]; then
current_tag=""
fi
i=0
if [ "$current_tag" != "" ]; then # If the current_tag is valid
if [ "$latest" = "" ]; then # If there is no latest yet
latest="$current_tag"
else
semverLT $current_tag $latest # Comparing latest and the current tag
if [ $? -eq 1 ]; then
latest="$current_tag"
fi
fi
fi
fi
done
if [ $? -ne 0 ]; then
echo "$0: Can't create temp file."
exit 1
fi
if [ -z "$GITHUB_PAT" ]; then
curl -s "$latest_release" > "$temp_file" || return 1
else
curl -H "Authorization: token $GITHUB_PAT" -s "$latest_release" > "$temp_file" || return 1
fi
latest="$(cat "$temp_file" | grep '"tag_name":' | cut -d ':' -f2 | tr -d '"' | tr -d ',' | tr -d ' ')"
rm -f "$temp_file"
echo $latest
return 0
}
# MAIN
current_tag="$(echo $GITHUB_REF | tr -d 'refs/tags/')"
latest="$(get_latest)"
get_latest
if [ "$current_tag" != "$latest" ]; then
# The current release tag is not the latest
@@ -130,3 +44,5 @@ else
# The current release tag is the latest
echo "true"
fi
exit 0

48
.github/uffizzi/Dockerfile vendored Normal file
View File

@@ -0,0 +1,48 @@
# Compile
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /meilisearch
ARG COMMIT_SHA
ARG COMMIT_DATE
ARG GIT_TAG
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
# Run
FROM uffizzi/ttyd:alpine
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
ENV MEILI_NO_ANALYTICS true
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch
# This directory should hold all the data related to meilisearch so we're going
# to move our PWD in there.
# We don't want to put the meilisearch binary
WORKDIR /meili_data
EXPOSE 7700/tcp
ENTRYPOINT ["tini", "--"]
CMD ["ttyd", "/bin/zsh"]

View File

@@ -0,0 +1,26 @@
version: "3"
x-uffizzi:
ingress:
service: nginx
port: 8081
services:
meilisearch:
image: "${MEILISEARCH_IMAGE}"
restart: unless-stopped
ports:
- "7681:7681"
- "7700:7700"
deploy:
resources:
limits:
memory: 500M
nginx:
image: nginx:alpine
restart: unless-stopped
ports:
- "8081:8081"
volumes:
- ./.github/uffizzi/nginx:/etc/nginx

28
.github/uffizzi/nginx/nginx.conf vendored Normal file
View File

@@ -0,0 +1,28 @@
events {
worker_connections 4096; ## Default: 1024
}
http {
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 8081;
location / {
proxy_pass http://localhost:7681;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
}
location /meilisearch/ {
# rewrite /meilisearch/(.*) /$1 break;
proxy_pass http://localhost:7700/;
}
}
}

View File

@@ -1,33 +0,0 @@
---
on:
workflow_dispatch:
name: Execute code coverage
jobs:
nightly-coverage:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- uses: actions-rs/cargo@v1
with:
command: clean
- uses: actions-rs/cargo@v1
with:
command: test
args: --all-features --no-fail-fast
env:
CARGO_INCREMENTAL: "0"
RUSTFLAGS: "-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=unwind -Zpanic_abort_tests"
- uses: actions-rs/grcov@v0.1
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: ${{ steps.coverage.outputs.report }}
yml: ./codecov.yml
fail_ci_if_error: true

View File

@@ -1,6 +1,7 @@
name: Create issue to upgrade dependencies
on:
schedule:
# Run the first of the month, every 3 month
- cron: '0 0 1 */3 *'
workflow_dispatch:
@@ -15,9 +16,13 @@ jobs:
github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
title: Upgrade dependencies
body: |
We need to update the dependencies of the Meilisearch repository, and, if possible, the dependencies of all the core-team repositories that Meilisearch depends on (milli, charabia, heed...).
This issue is about updating Meilisearch dependencies:
- [ ] Cargo toml dependencies of Meilisearch; but also the main engine-team repositories that Meilisearch depends on (charabia, heed...)
- [ ] If new Rust versions have been released, update the Rust version in the Clippy job of this [GitHub Action file](./.github/workflows/rust.yml)
⚠️ This issue should only be done at the beginning of the sprint!
⚠️ To avoid last minute bugs, this issue should only be done at the beginning of the sprint!
The GitHub action dependencies are managed by [Dependabot](./.github/dependabot.yml)
labels: |
dependencies
maintenance

View File

@@ -1,15 +1,32 @@
name: Look for flaky tests
on:
workflow_dispatch:
schedule:
- cron: "0 12 * * FRI" # every friday at 12:00PM
- cron: "0 12 * * FRI" # Every Friday at 12:00PM
jobs:
flaky:
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky 100 times
run: cargo flaky -i 100 --release
- name: Run cargo flaky in the dumps
run: cd dump; cargo flaky -i 100 --release
- name: Run cargo flaky in the index-scheduler
run: cd index-scheduler; cargo flaky -i 100 --release
- name: Run cargo flaky in the auth
run: cd meilisearch-auth; cargo flaky -i 100 --release
- name: Run cargo flaky in meilisearch
run: cd meilisearch; cargo flaky -i 100 --release

29
.github/workflows/latest-git-tag.yml vendored Normal file
View File

@@ -0,0 +1,29 @@
# Create or update a latest git tag when releasing a stable version of Meilisearch
name: Update latest git tag
on:
workflow_dispatch:
release:
types: [released]
jobs:
check-version:
name: Check the version validity
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Check release validity
if: github.event_name == 'release'
run: bash .github/scripts/check-release.sh
update-latest-tag:
runs-on: ubuntu-latest
needs: check-version
steps:
- uses: actions/checkout@v3
- uses: rickstaa/action-create-tag@v1
with:
tag: "latest"
message: "Latest stable release of Meilisearch"
# Move the tag if `latest` already exists
force_push_tag: true
github_token: ${{ secrets.MEILI_BOT_GH_PAT }}

77
.github/workflows/manual_benchmarks.yml vendored Normal file
View File

@@ -0,0 +1,77 @@
name: Benchmarks
on:
workflow_dispatch:
inputs:
dataset_name:
description: 'The name of the dataset used to benchmark (search_songs, search_wiki, search_geo or indexing)'
required: false
default: 'search_songs'
env:
BENCH_NAME: ${{ github.event.inputs.dataset_name }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Helper
- name: 'README: compare with another benchmark'
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@@ -3,8 +3,8 @@ name: Milestone's workflow
# /!\ No git flow are handled here
# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed)
# - the roadmap issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/roadmap-issue.md
# - the changelog issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/changelog-issue.md
# - the roadmap issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/roadmap-issue.md
# - the changelog issue is created, see https://github.com/meilisearch/engine-team/blob/main/issue-templates/changelog-issue.md
# For each Milestone closed
# - the `release_version` label is created
@@ -31,8 +31,6 @@ jobs:
runs-on: ubuntu-latest
outputs:
is-patch: ${{ steps.check-patch.outputs.is-patch }}
env:
MILESTONE_VERSION: ${{ github.event.milestone.title }}
steps:
- uses: actions/checkout@v3
- name: Check if this release is a patch release only
@@ -41,10 +39,10 @@ jobs:
echo version: $MILESTONE_VERSION
if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then
echo 'This is NOT a patch release'
echo ::set-output name=is-patch::false
echo "is-patch=false" >> $GITHUB_OUTPUT
elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo 'This is a patch release'
echo ::set-output name=is-patch::true
echo "is-patch=true" >> $GITHUB_OUTPUT
else
echo "Not a valid format of release, check the Milestone's title."
echo 'Should be vX.Y.Z'
@@ -61,7 +59,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE
- name: Replace all empty occurrences in the templates
run: |
# Replace all <<version>> occurrences
@@ -94,7 +92,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Download the issue template
run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE
- name: Replace all empty occurrences in the templates
run: |
# Replace all <<version>> occurrences

View File

@@ -1,4 +1,5 @@
on:
workflow_dispatch:
schedule:
- cron: '0 2 * * *' # Every day at 2:00am
release:
@@ -17,69 +18,93 @@ jobs:
# If yes, it means we are publishing an official release.
# If no, we are releasing a RC, so no need to check the version.
- name: Check tag format
if: github.event_name != 'schedule'
if: github.event_name == 'release'
id: check-tag-format
run: |
escaped_tag=$(printf "%q" ${{ github.ref_name }})
if [[ $escaped_tag =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo ::set-output name=stable::true
echo "stable=true" >> $GITHUB_OUTPUT
else
echo ::set-output name=stable::false
echo "stable=false" >> $GITHUB_OUTPUT
fi
- name: Check release validity
if: github.event_name != 'schedule' && steps.check-tag-format.outputs.stable == 'true'
if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true'
run: bash .github/scripts/check-release.sh
publish:
publish-linux:
name: Publish binary for Linux
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }}
publish-macos-windows:
name: Publish binary for ${{ matrix.os }}
runs-on: ${{ matrix.os }}
needs: check-version
strategy:
fail-fast: false
matrix:
os: [ubuntu-18.04, macos-latest, windows-latest]
os: [macos-12, windows-2022]
include:
- os: ubuntu-18.04
artifact_name: meilisearch
asset_name: meilisearch-linux-amd64
- os: macos-latest
- os: macos-12
artifact_name: meilisearch
asset_name: meilisearch-macos-amd64
- os: windows-latest
- os: windows-2022
artifact_name: meilisearch.exe
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: hecrj/setup-rust-action@master
with:
rust-version: stable
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name != 'schedule'
uses: svenstaro/upload-release-action@v1-release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-macos-apple-silicon:
name: Publish binary for macOS silicon
runs-on: ${{ matrix.os }}
needs: check-version
continue-on-error: false
strategy:
fail-fast: false
matrix:
include:
- os: macos-latest
- os: macos-12
target: aarch64-apple-darwin
asset_name: meilisearch-macos-apple-silicon
steps:
- name: Checkout repository
uses: actions/checkout@v3
@@ -97,8 +122,8 @@ jobs:
args: --release --target ${{ matrix.target }}
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name != 'schedule'
uses: svenstaro/upload-release-action@v1-release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@@ -109,7 +134,6 @@ jobs:
name: Publish binary for aarch64
runs-on: ${{ matrix.os }}
needs: check-version
continue-on-error: false
strategy:
fail-fast: false
matrix:
@@ -120,11 +144,9 @@ jobs:
linker: gcc-aarch64-linux-gnu
use-cross: true
asset_name: meilisearch-linux-aarch64
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Installing Rust toolchain
uses: actions-rs/toolchain@v1
with:
@@ -132,16 +154,13 @@ jobs:
profile: minimal
target: ${{ matrix.target }}
override: true
- name: APT update
run: |
sudo apt update
- name: Install target specific tools
if: matrix.use-cross
run: |
sudo apt-get install -y ${{ matrix.linker }}
- name: Configure target aarch64 GNU
if: matrix.target == 'aarch64-unknown-linux-gnu'
## Environment variable is not passed using env:
@@ -153,22 +172,18 @@ jobs:
echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config
echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config
echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV
echo RUSTFLAGS="-Clink-arg=-fuse-ld=gold" >> $GITHUB_ENV
- name: Cargo build
uses: actions-rs/cargo@v1
with:
command: build
use-cross: ${{ matrix.use-cross }}
args: --release --target ${{ matrix.target }}
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name != 'schedule'
uses: svenstaro/upload-release-action@v1-release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@@ -1,4 +1,4 @@
name: Publish deb pkg to GitHub release & APT repository & Homebrew
name: Publish to APT repository & Homebrew
on:
release:
@@ -15,19 +15,27 @@ jobs:
debian:
name: Publish debian packagge
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-18.04 to compile with glibc 2.27
image: ubuntu:18.04
steps:
- uses: hecrj/setup-rust-action@master
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
rust-version: stable
toolchain: stable
override: true
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3
- name: Build deb package
run: cargo deb -p meilisearch-http -o target/debian/meilisearch.deb
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@v1-release
uses: svenstaro/upload-release-action@2.4.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb
@@ -38,11 +46,11 @@ jobs:
homebrew:
name: Bump Homebrew formula
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
needs: check-version
steps:
- name: Create PR to Homebrew
uses: mislav/bump-homebrew-formula-action@v1
uses: mislav/bump-homebrew-formula-action@v2
with:
formula-name: meilisearch
env:

View File

@@ -1,10 +1,16 @@
---
on:
schedule:
- cron: '0 4 * * *' # Every day at 4:00am
push:
tags:
- '*'
# Will run for every tag pushed except `latest`
# When the `latest` git tag is created with this [CI](../latest-git-tag.yml)
# we don't need to create a Docker `latest` image again.
# The `latest` Docker image push is already done in this CI when releasing a stable version of Meilisearch.
tags-ignore:
- latest
# Both `schedule` and `workflow_dispatch` build the nightly tag
schedule:
- cron: '0 23 * * *' # Every day at 11:00pm
workflow_dispatch:
name: Publish tagged images to Docker Hub
@@ -14,27 +20,43 @@ jobs:
steps:
- uses: actions/checkout@v3
# Check if the tag has the v<nmumber>.<number>.<number> format. If yes, it means we are publishing an official release.
# If we are running a cron or manual job ('schedule' or 'workflow_dispatch' event), it means we are publishing the `nightly` tag, so not considered stable.
# If we have pushed a tag, and the tag has the v<nmumber>.<number>.<number> format, it means we are publishing an official release, so considered stable.
# In this situation, we need to set `output.stable` to create/update the following tags (additionally to the `vX.Y.Z` Docker tag):
# - a `vX.Y` (without patch version) Docker tag
# - a `latest` Docker tag
- name: Check tag format
if: github.event_name != 'schedule'
# For any other tag pushed, this is not considered stable.
- name: Define if stable and latest release
id: check-tag-format
env:
# To avoid request limit with the .github/scripts/is-latest-release.sh script
GITHUB_PATH: ${{ secrets.MEILI_BOT_GH_PAT }}
run: |
escaped_tag=$(printf "%q" ${{ github.ref_name }})
echo "latest=false" >> $GITHUB_OUTPUT
if [[ $escaped_tag =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo ::set-output name=stable::true
if [[ ${{ github.event_name }} != 'push' ]]; then
echo "stable=false" >> $GITHUB_OUTPUT
elif [[ $escaped_tag =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "stable=true" >> $GITHUB_OUTPUT
echo "latest=$(sh .github/scripts/is-latest-release.sh)" >> $GITHUB_OUTPUT
else
echo ::set-output name=stable::false
echo "stable=false" >> $GITHUB_OUTPUT
fi
# Check only the validity of the tag for official releases (not for pre-releases or other tags)
# Check only the validity of the tag for stable releases (not for pre-releases or other tags)
- name: Check release validity
if: github.event_name != 'schedule' && steps.check-tag-format.outputs.stable == 'true'
if: steps.check-tag-format.outputs.stable == 'true'
run: bash .github/scripts/check-release.sh
- name: Set build-args for Docker buildx
id: build-metadata
run: |
# Extract commit date
commit_date=$(git show -s --format=%cd --date=iso-strict ${{ github.sha }})
echo "date=$commit_date" >> $GITHUB_OUTPUT
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
@@ -42,7 +64,6 @@ jobs:
uses: docker/setup-buildx-action@v2
- name: Login to Docker Hub
if: github.event_name != 'schedule'
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
@@ -53,25 +74,30 @@ jobs:
uses: docker/metadata-action@v4
with:
images: getmeili/meilisearch
# The latest and `vX.Y` tags are only pushed for the official Meilisearch releases
# See https://github.com/docker/metadata-action#latest-tag
# Prevent `latest` to be updated for each new tag pushed.
# We need latest and `vX.Y` tags to only be pushed for the stable Meilisearch releases.
flavor: latest=false
tags: |
type=ref,event=tag
type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}
- name: Build and push
uses: docker/build-push-action@v3
uses: docker/build-push-action@v4
with:
# We do not push tags for the cron jobs, this is only for test purposes
push: ${{ github.event_name != 'schedule' }}
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team
if: github.event_name != 'schedule'
# Do not send if nightly build (i.e. 'schedule' or 'workflow_dispatch' event)
if: github.event_name == 'push'
uses: peter-evans/repository-dispatch@v2
with:
token: ${{ secrets.MEILI_BOT_GH_PAT }}

View File

@@ -0,0 +1,79 @@
name: Benchmarks indexing (push)
on:
push:
branches:
- main
env:
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
BENCH_NAME: "indexing"
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Upload benchmarks to influxdb
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
# Helper
- name: 'README: compare with another benchmark'
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@@ -0,0 +1,78 @@
name: Benchmarks search geo (push)
on:
push:
branches:
- main
env:
BENCH_NAME: "search_geo"
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Upload benchmarks to influxdb
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
# Helper
- name: 'README: compare with another benchmark'
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@@ -0,0 +1,78 @@
name: Benchmarks search songs (push)
on:
push:
branches:
- main
env:
BENCH_NAME: "search_songs"
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Upload benchmarks to influxdb
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
# Helper
- name: 'README: compare with another benchmark'
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@@ -0,0 +1,78 @@
name: Benchmarks search wikipedia articles (push)
on:
push:
branches:
- main
env:
BENCH_NAME: "search_wiki"
INFLUX_TOKEN: ${{ secrets.INFLUX_TOKEN }}
jobs:
benchmarks:
name: Run and upload benchmarks
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
# Set variables
- name: Set current branch name
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/})" >> $GITHUB_OUTPUT
id: current_branch
- name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
shell: bash
run: echo "name=$(echo ${GITHUB_REF#refs/heads/} | tr '/' '_')" >> $GITHUB_OUTPUT
id: normalized_current_branch
- name: Set shorter commit SHA
shell: bash
run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
id: commit_sha
- name: Set file basename with format "dataset_branch_commitSHA"
shell: bash
run: echo "basename=$(echo ${BENCH_NAME}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
id: file
# Run benchmarks
- name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
run: |
cd benchmarks
cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }}
# Generate critcmp files
- name: Install critcmp
uses: taiki-e/install-action@v2
with:
tool: critcmp
- name: Export cripcmp file
run: |
critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
# Upload benchmarks
- name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
uses: BetaHuhn/do-spaces-action@v2
with:
access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
source: ${{ steps.file.outputs.basename }}.json
out_dir: critcmp_results
# Upload benchmarks to influxdb
- name: Upload ${{ steps.file.outputs.basename }}.json to influxDB
run: telegraf --config https://eu-central-1-1.aws.cloud2.influxdata.com/api/v2/telegrafs/08b52e34a370b000 --once --debug
# Helper
- name: 'README: compare with another benchmark'
run: |
echo "${{ steps.file.outputs.basename }}.json has just been pushed."
echo 'How to compare this benchmark with another one?'
echo ' - Check the available files with: ./benchmarks/scripts/list.sh'
echo " - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"

View File

@@ -2,6 +2,9 @@ name: Rust
on:
workflow_dispatch:
schedule:
# Everyday at 5:00am
- cron: '0 5 * * *'
pull_request:
push:
# trying and staging branches are for Bors config
@@ -15,78 +18,128 @@ env:
RUSTFLAGS: "-D warnings"
jobs:
tests:
test-linux:
name: Tests on ubuntu-18.04
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Run test with Rust stable
if: github.event_name != 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run test with Rust nightly
if: github.event_name == 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
# Disable cache due to disk space issues with Windows workers in CI
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all
test-others:
name: Tests on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-18.04, macos-latest, windows-latest]
os: [macos-12, windows-2022]
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.0.0
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
command: build
args: --locked --release --no-default-features
args: --locked --release --no-default-features --all
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release
args: --locked --release --all
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
container:
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
image: ubuntu:18.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.0.0
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
command: test
args: --locked
args: --locked --all
clippy:
name: Run Clippy
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
toolchain: 1.67.0
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.0.0
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
command: clippy
args: --all-targets -- --deny warnings
# allow unlined_format_args https://github.com/rust-lang/rust-clippy/issues/10087
args: --all-targets -- --deny warnings --allow clippy::uninlined_format_args
fmt:
name: Run Rustfmt
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
toolchain: nightly
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.0.0
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0
- name: Run cargo fmt
run: cargo fmt --all -- --check
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
# we are going to create an empty file where rustfmt expects it.
run: |
echo -ne "\n" > benchmarks/benches/datasets_paths.rs
cargo fmt --all -- --check

100
.github/workflows/uffizzi-build.yml vendored Normal file
View File

@@ -0,0 +1,100 @@
name: Uffizzi - Build PR Image
on:
pull_request:
types: [opened,synchronize,reopened,closed]
jobs:
build-meilisearch:
name: Build and push `meilisearch`
runs-on: ubuntu-latest
outputs:
tags: ${{ steps.meta.outputs.tags }}
if: ${{ github.event.action != 'closed' }}
steps:
- name: checkout
uses: actions/checkout@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
- name: Generate UUID image name
id: uuid
run: echo "UUID_TAG=$(uuidgen)" >> $GITHUB_ENV
- name: Docker metadata
id: meta
uses: docker/metadata-action@v3
with:
images: registry.uffizzi.com/${{ env.UUID_TAG }}
tags: |
type=raw,value=60d
- name: Build Image
uses: docker/build-push-action@v3
with:
context: ./
file: .github/uffizzi/Dockerfile
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
render-compose-file:
name: Render Docker Compose File
# Pass output of this workflow to another triggered by `workflow_run` event.
runs-on: ubuntu-latest
needs:
- build-meilisearch
outputs:
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
steps:
- name: Checkout git repo
uses: actions/checkout@v3
- name: Render Compose File
run: |
MEILISEARCH_IMAGE=$(echo ${{ needs.build-meilisearch.outputs.tags }})
export MEILISEARCH_IMAGE
# Render simple template from environment variables.
envsubst < .github/uffizzi/docker-compose.uffizzi.yml > docker-compose.rendered.yml
cat docker-compose.rendered.yml
- name: Upload Rendered Compose File as Artifact
uses: actions/upload-artifact@v3
with:
name: preview-spec
path: docker-compose.rendered.yml
retention-days: 2
- name: Serialize PR Event to File
run: |
cat << EOF > event.json
${{ toJSON(github.event) }}
EOF
- name: Upload PR Event as Artifact
uses: actions/upload-artifact@v3
with:
name: preview-spec
path: event.json
retention-days: 2
delete-preview:
name: Call for Preview Deletion
runs-on: ubuntu-latest
if: ${{ github.event.action == 'closed' }}
steps:
# If this PR is closing, we will not render a compose file nor pass it to the next workflow.
- name: Serialize PR Event to File
run: |
cat << EOF > event.json
${{ toJSON(github.event) }}
EOF
- name: Upload PR Event as Artifact
uses: actions/upload-artifact@v3
with:
name: preview-spec
path: event.json
retention-days: 2

View File

@@ -0,0 +1,103 @@
name: Uffizzi - Deploy Preview
on:
workflow_run:
workflows:
- "Uffizzi - Build PR Image"
types:
- completed
jobs:
cache-compose-file:
name: Cache Compose File
runs-on: ubuntu-latest
if: ${{ github.event.workflow_run.conclusion == 'success' }}
outputs:
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
pr-number: ${{ env.PR_NUMBER }}
expected-url: ${{ env.EXPECTED_URL }}
steps:
- name: 'Download artifacts'
# Fetch output (zip archive) from the workflow run that triggered this workflow.
uses: actions/github-script@v6
with:
script: |
let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: context.payload.workflow_run.id,
});
let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
return artifact.name == "preview-spec"
})[0];
let download = await github.rest.actions.downloadArtifact({
owner: context.repo.owner,
repo: context.repo.repo,
artifact_id: matchArtifact.id,
archive_format: 'zip',
});
let fs = require('fs');
fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/preview-spec.zip`, Buffer.from(download.data));
- name: 'Unzip artifact'
run: unzip preview-spec.zip
- name: Read Event into ENV
run: |
echo 'EVENT_JSON<<EOF' >> $GITHUB_ENV
cat event.json >> $GITHUB_ENV
echo 'EOF' >> $GITHUB_ENV
- name: Hash Rendered Compose File
id: hash
# If the previous workflow was triggered by a PR close event, we will not have a compose file artifact.
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
run: echo "COMPOSE_FILE_HASH=$(md5sum docker-compose.rendered.yml | awk '{ print $1 }')" >> $GITHUB_ENV
- name: Cache Rendered Compose File
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
uses: actions/cache@v3
with:
path: docker-compose.rendered.yml
key: ${{ env.COMPOSE_FILE_HASH }}
- name: Read PR Number From Event Object
id: pr
run: echo "PR_NUMBER=${{ fromJSON(env.EVENT_JSON).number }}" >> $GITHUB_ENV
- name: DEBUG - Print Job Outputs
if: ${{ runner.debug }}
run: |
echo "PR number: ${{ env.PR_NUMBER }}"
echo "Compose file hash: ${{ env.COMPOSE_FILE_HASH }}"
cat event.json
- name: Add expected URL env var
if: ${{ runner.debug }}
run: |
REPO=$(echo ${{ github.repository }} | sed 's/\./+/g')
echo "EXPECTED_URL=${{ inputs.server }}/github.com/$REPO/pull/${{ env.PR_NUMBER }}" >> $GITHUB_ENV
deploy-uffizzi-preview:
name: Use Remote Workflow to Preview on Uffizzi
needs:
- cache-compose-file
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@desc
with:
# If this workflow was triggered by a PR close event, cache-key will be an empty string
# and this reusable workflow will delete the preview deployment.
compose-file-cache-key: ${{ needs.cache-compose-file.outputs.compose-file-cache-key }}
compose-file-cache-path: docker-compose.rendered.yml
server: https://app.uffizzi.com
pr-number: ${{ needs.cache-compose-file.outputs.pr-number }}
description: |
The meilisearch preview environment contains a web terminal from where you can run the
`meilisearch` command. You should be able to access this instance of meilisearch running in
the preview from the link Meilisearch Endpoint link given below.
Web Terminal Endpoint : ${{ needs.cache-compose-file.outputs.expected-url }}
Meilisearch Endpoint : ${{ needs.cache-compose-file.outputs.expected-url }}/meilisearch
permissions:
contents: read
pull-requests: write
id-token: write

View File

@@ -13,10 +13,9 @@ env:
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
jobs:
update-version-cargo-toml:
name: Update version in Cargo.toml files
runs-on: ubuntu-18.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions-rs/toolchain@v1
@@ -30,7 +29,7 @@ jobs:
run: |
raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
new_string="version = \"$raw_new_version\""
sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml
sd '^version = "\d+.\d+.\w+"$' "$new_string" Cargo.toml
- name: Build Meilisearch to update Cargo.lock
run: cargo build
- name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch
@@ -44,4 +43,5 @@ jobs:
--title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \
--body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \
--label 'skip changelog' \
--milestone $NEW_VERSION
--milestone $NEW_VERSION \
--base $GITHUB_REF_NAME

6
.gitignore vendored
View File

@@ -1,3 +1,5 @@
.idea/
.vscode/
/target
**/*.csv
**/*.json_lines
@@ -8,9 +10,11 @@
/snapshots
/dumps
# Snapshots
## ... large
*.full.snap
## ... unreviewed
*.snap.new
# Fuzzcheck data for the facet indexing fuzz test
milli/fuzz/update::facet::incremental::fuzz::fuzz/

View File

@@ -10,24 +10,12 @@ If Meilisearch does not offer optimized support for your language, please consid
## Table of Contents
- [Hacktoberfest 2022](#hacktoberfest-2022)
- [Assumptions](#assumptions)
- [How to Contribute](#how-to-contribute)
- [Development Workflow](#development-workflow)
- [Git Guidelines](#git-guidelines)
- [Release Process (for internal team only)](#release-process-for-internal-team-only)
## Hacktoberfest 2022
It's [Hacktoberfest month](https://hacktoberfest.com)! 🥳
Thanks so much for participating with Meilisearch this year!
1. We will follow the quality standards set by the organizers of Hacktoberfest (see detail on their [website](https://hacktoberfest.com/participation/#spam)). Our reviewers will not consider any PR that doesnt match that standard.
2. PRs reviews will take place from Monday to Thursday, during usual working hours, CEST time. If you submit outside of these hours, theres no need to panic; we will get around to your contribution.
3. There will be no issue assignment as we dont want people to ask to be assigned specific issues and never return, discouraging the volunteer contributors from opening a PR to fix this issue. We take the liberty to choose the PR that best fixes the issue, so we encourage you to get to it as soon as possible and do your best!
You can check out the longer, more complete guideline documentation [here](https://github.com/meilisearch/.github/blob/main/Hacktoberfest_2022_contributors_guidelines.md).
## Assumptions
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
@@ -64,6 +52,23 @@ cargo test
This command will be triggered to each PR as a requirement for merging it.
#### Snapshot-based tests
We are using [insta](https://insta.rs) to perform snapshot-based testing.
We recommend using the insta tooling (such as `cargo-insta`) to update the snapshots if they change following a PR.
New tests should use insta where possible rather than manual `assert` statements.
Furthermore, we provide some macros on top of insta, notably a way to use snapshot hashes instead of inline snapshots, saving a lot of space in the repository.
To effectively debug snapshot-based hashes, we recommend you export the `MEILI_TEST_FULL_SNAPS` environment variable so that snapshot are fully created locally:
```
export MEILI_TEST_FULL_SNAPS=true # add this to your .bashrc, .zshrc, ...
```
#### Test troubleshooting
If you get a "Too many open files" error you might want to increase the open file limit using this command:
```bash
@@ -109,7 +114,35 @@ _[Read more about this](https://github.com/meilisearch/integration-guides/blob/m
### How to Publish a new Release
The full Meilisearch release process is described in [this guide](https://github.com/meilisearch/core-team/blob/main/resources/meilisearch-release.md). Please follow it carefully before doing any release.
The full Meilisearch release process is described in [this guide](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md). Please follow it carefully before doing any release.
### How to publish a prototype
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
The prototype name must follow this convention: `prototype-X-Y` where
- `X` is the feature name formatted in `kebab-case`. It should not end with a single number.
- `Y` is the version of the prototype, starting from `0`.
✅ Example: `prototype-auto-resize-0`. </br>
❌ Bad example: `auto-resize-0`: lacks the `prototype` prefix. </br>
❌ Bad example: `prototype-auto-resize`: lacks the version suffix. </br>
❌ Bad example: `prototype-auto-resize-0-0`: feature name ends with a single number.
Steps to create a prototype:
1. In your terminal, go to the last commit of your branch (the one you want to provide as a prototype).
2. Create a tag following the convention: `git tag prototype-X-Y`
3. Run Meilisearch and check that its launch summary features a line: `Prototype: prototype-X-Y` (you may need to switch branches and back after tagging for this to work).
3. Push the tag: `git push origin prototype-X-Y`
4. Check the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
🐳 Once the CI has finished to run (~1h30), a Docker image named `prototype-X-Y` will be available on [DockerHub](https://hub.docker.com/repository/docker/getmeili/meilisearch/general). People can use it with the following command: `docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-X-Y`. <br>
More information about [how to run Meilisearch with Docker](https://docs.meilisearch.com/learn/cookbooks/docker.html#download-meilisearch-with-docker).
⚙️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-X-Y` tag in the Meilisearch repository and compile from the source code.
⚠️ When sharing a prototype with users, remind them to not use it in production. Prototypes are solely for test purposes.
### Release assets

1574
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
[workspace]
resolver = "2"
members = [
"meilisearch-http",
"meilisearch",
"meilisearch-types",
"meilisearch-auth",
"meili-snap",
@@ -9,13 +9,50 @@ members = [
"dump",
"file-store",
"permissive-json-pointer",
"milli",
"filter-parser",
"flatten-serde-json",
"json-depth-checker",
"benchmarks"
]
[workspace.package]
version = "1.0.0"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
readme = "README.md"
edition = "2021"
license = "MIT"
[profile.release]
codegen-units = 1
[profile.dev.package.flate2]
opt-level = 3
[profile.dev.package.milli]
[profile.dev.package.grenad]
opt-level = 3
[profile.dev.package.roaring]
opt-level = 3
[profile.dev.package.lindera-ipadic-builder]
opt-level = 3
[profile.dev.package.encoding]
opt-level = 3
[profile.dev.package.yada]
opt-level = 3
[profile.release.package.lindera-ipadic-builder]
opt-level = 3
[profile.release.package.encoding]
opt-level = 3
[profile.release.package.yada]
opt-level = 3
[profile.bench.package.lindera-ipadic-builder]
opt-level = 3
[profile.bench.package.encoding]
opt-level = 3
[profile.bench.package.yada]
opt-level = 3

View File

@@ -1,31 +1,30 @@
# Compile
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
FROM rust:bullseye AS compiler
WORKDIR /meilisearch
ARG COMMIT_SHA
ARG COMMIT_DATE
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE}
ARG GIT_TAG
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
arch="$(dpkg --print-architecture)"; \
if [ "$arch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
# Run
FROM alpine:3.16
FROM debian:11.6
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
RUN apt update -q \
&& apt install -q -y tini
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.

View File

@@ -9,11 +9,10 @@
<a href="https://blog.meilisearch.com">Blog</a> |
<a href="https://docs.meilisearch.com">Documentation</a> |
<a href="https://docs.meilisearch.com/faq/">FAQ</a> |
<a href="https://slack.meilisearch.com">Slack</a>
<a href="https://discord.meilisearch.com">Discord</a>
</h4>
<p align="center">
<a href="https://github.com/meilisearch/meilisearch/actions"><img src="https://github.com/meilisearch/meilisearch/workflows/Cargo%20test/badge.svg" alt="Build Status"></a>
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://app.bors.tech/repositories/26457"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
@@ -34,14 +33,6 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
## 🎃 Hacktoberfest
Its Hacktoberfest 2022 @Meilisearch
[Hacktoberfest](https://hacktoberfest.com/) is a celebration of the open-source community. This year, and for the third time in a row, Meilisearch is participating in this fantastic event.
Youd like to contribute? Dont hesitate to check out our [contributing guidelines](./CONTRIBUTING.md).
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds
@@ -69,7 +60,7 @@ You may also want to check out [Meilisearch 101](https://docs.meilisearch.com/le
## ☁️ Meilisearch cloud
Join the closed beta for Meilisearch cloud by filling out [this form](https://meilisearch.typeform.com/to/VI2cI2rv).
Let us manage your infrastructure so you can focus on integrating a great search experience. Try [Meilisearch Cloud](https://meilisearch.com/pricing) today.
## 🧰 SDKs & integration tools
@@ -77,7 +68,7 @@ Install one of our SDKs in your project for seamless integration between Meilise
Take a look at the complete [Meilisearch integration list](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html).
![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)
[![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html)
## ⚙️ Advanced usage
@@ -105,7 +96,19 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
- Want to be part of our Slack community? [Join us!](https://slack.meilisearch.com/)
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
- For everything else, please check [this page listing some of the other places where you can find us](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html)
Thank you for your support!
## 👩‍💻 Contributing
Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md).
## 📦 Versioning
Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases).
The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).
Differently from the binaries, crates in this repository are not currently available on [crates.io](https://crates.io/) and do not follow [SemVer conventions](https://semver.org).

6
assets/milli-logo.svg Normal file
View File

@@ -0,0 +1,6 @@
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.085 190L242.907 86H276.196L246.375 190H213.085Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 190L29.8215 86H63.1111L33.2896 190H0Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M124.986 0L57.5772 235.083L60.7752 236H90.6038L158.276 0H124.986Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.273 0L127.601 236H160.891L228.563 0H195.273Z" fill="#494949"/>
</svg>

After

Width:  |  Height:  |  Size: 585 B

1
benchmarks/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
benches/datasets_paths.rs

54
benchmarks/Cargo.toml Normal file
View File

@@ -0,0 +1,54 @@
[package]
name = "benchmarks"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.65"
csv = "1.1.6"
milli = { path = "../milli", default-features = false }
mimalloc = { version = "0.1.29", default-features = false }
serde_json = { version = "1.0.85", features = ["preserve_order"] }
[dev-dependencies]
criterion = { version = "0.4.0", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.1"
[build-dependencies]
anyhow = "1.0.65"
bytes = "1.2.1"
convert_case = "0.6.0"
flate2 = "1.0.24"
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/default"]
[[bench]]
name = "search_songs"
harness = false
[[bench]]
name = "search_wiki"
harness = false
[[bench]]
name = "search_geo"
harness = false
[[bench]]
name = "indexing"
harness = false
[[bench]]
name = "formatting"
harness = false

138
benchmarks/README.md Normal file
View File

@@ -0,0 +1,138 @@
Benchmarks
==========
## TOC
- [Run the benchmarks](#run-the-benchmarks)
- [Comparison between benchmarks](#comparison-between-benchmarks)
- [Datasets](#datasets)
## Run the benchmarks
### On our private server
The Meili team has self-hosted his own GitHub runner to run benchmarks on our dedicated bare metal server.
To trigger the benchmark workflow:
- Go to the `Actions` tab of this repository.
- Select the `Benchmarks` workflow on the left.
- Click on `Run workflow` in the blue banner.
- Select the branch on which you want to run the benchmarks and select the dataset you want (default: `songs`).
- Finally, click on `Run workflow`.
This GitHub workflow will run the benchmarks and push the `critcmp` report to a DigitalOcean Space (= S3).
The name of the uploaded file is displayed in the workflow.
_[More about critcmp](https://github.com/BurntSushi/critcmp)._
💡 To compare the just-uploaded benchmark with another one, check out the [next section](#comparison-between-benchmarks).
### On your machine
To run all the benchmarks (~5h):
```bash
cargo bench
```
To run only the `search_songs` (~1h), `search_wiki` (~3h), `search_geo` (~20m) or `indexing` (~2h) benchmark:
```bash
cargo bench --bench <dataset name>
```
By default, the benchmarks will be downloaded and uncompressed automatically in the target directory.<br>
If you don't want to download the datasets every time you update something on the code, you can specify a custom directory with the environment variable `MILLI_BENCH_DATASETS_PATH`:
```bash
mkdir ~/datasets
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench search_songs # the four datasets are downloaded
touch build.rs
MILLI_BENCH_DATASETS_PATH=~/datasets cargo bench --bench songs # the code is compiled again but the datasets are not downloaded
```
## Comparison between benchmarks
The benchmark reports we push are generated with `critcmp`. Thus, we use `critcmp` to show the result of a benchmark, or compare results between multiple benchmarks.
We provide a script to download and display the comparison report.
Requirements:
- `grep`
- `curl`
- [`critcmp`](https://github.com/BurntSushi/critcmp)
List the available file in the DO Space:
```bash
./benchmarks/script/list.sh
```
```bash
songs_main_09a4321.json
songs_geosearch_24ec456.json
search_songs_main_cb45a10b.json
```
Run the comparison script:
```bash
# we get the result of ONE benchmark, this give you an idea of how much time an operation took
./benchmarks/scripts/compare.sh son songs_geosearch_24ec456.json
# we compare two benchmarks
./benchmarks/scripts/compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json
# we compare three benchmarks
./benchmarks/scripts/compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json search_songs_main_cb45a10b.json
```
## Datasets
The benchmarks uses the following datasets:
- `smol-songs`
- `smol-wiki`
- `movies`
- `smol-all-countries`
### Songs
`smol-songs` is a subset of the [`songs.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/songs.csv.gz).
It was generated with this command:
```bash
xsv sample --seed 42 1000000 songs.csv -o smol-songs.csv
```
_[Download the generated `smol-songs` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-songs.csv.gz)._
### Wiki
`smol-wiki` is a subset of the [`wikipedia-articles.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/wiki-articles.csv.gz).
It was generated with the following command:
```bash
xsv sample --seed 42 500000 wiki-articles.csv -o smol-wiki-articles.csv
```
_[Download the `smol-wiki` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-wiki-articles.csv.gz)._
### Movies
`movies` is a really small dataset we uses as our example in the [getting started](https://docs.meilisearch.com/learn/getting_started/)
_[Download the `movies` dataset](https://docs.meilisearch.com/movies.json)._
### All Countries
`smol-all-countries` is a subset of the [`all-countries.csv` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/all-countries.csv.gz)
It has been converted to jsonlines and then edited so it matches our format for the `_geo` field.
It was generated with the following command:
```bash
bat all-countries.csv.gz | gunzip | xsv sample --seed 42 1000000 | csv2json-lite | sd '"latitude":"(.*?)","longitude":"(.*?)"' '"_geo": { "lat": $1, "lng": $2 }' | sd '\[|\]|,$' '' | gzip > smol-all-countries.jsonl.gz
```
_[Download the `smol-all-countries` dataset](https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets/smol-all-countries.jsonl.gz)._

View File

@@ -0,0 +1,67 @@
use std::rc::Rc;
use criterion::{criterion_group, criterion_main};
use milli::tokenizer::TokenizerBuilder;
use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords};
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
struct Conf<'a> {
name: &'a str,
text: &'a str,
matching_words: MatcherBuilder<'a, Vec<u8>>,
}
fn bench_formatting(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
Conf {
name: "'the door d'",
text: r#"He used to do the door sounds in "Star Trek" with his mouth, phssst, phssst. The MD-11 passenger and cargo doors also tend to behave like electromagnetic apertures, because the doors do not have continuous electrical contact with the door frames around the door perimeter. But Theodor said that the doors don't work."#,
matching_words: MatcherBuilder::new(MatchingWords::new(vec![
(vec![Rc::new(MatchingWord::new("t".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("he".to_string(), 0, false).unwrap())], vec![0]),
(vec![Rc::new(MatchingWord::new("the".to_string(), 0, false).unwrap())], vec![0]),
(vec![Rc::new(MatchingWord::new("door".to_string(), 1, false).unwrap())], vec![1]),
(vec![Rc::new(MatchingWord::new("do".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("or".to_string(), 0, false).unwrap())], vec![0]),
(vec![Rc::new(MatchingWord::new("thedoor".to_string(), 1, false).unwrap())], vec![0, 1]),
(vec![Rc::new(MatchingWord::new("d".to_string(), 0, true).unwrap())], vec![2]),
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
]
), TokenizerBuilder::default().build()),
},
];
let format_options = &[
FormatOptions { highlight: false, crop: None },
FormatOptions { highlight: true, crop: None },
FormatOptions { highlight: false, crop: Some(10) },
FormatOptions { highlight: true, crop: Some(10) },
FormatOptions { highlight: false, crop: Some(20) },
FormatOptions { highlight: true, crop: Some(20) },
];
for option in format_options {
let highlight = if option.highlight { "highlight" } else { "no-highlight" };
let name = match option.crop {
Some(size) => format!("{}-crop({})", highlight, size),
None => format!("{}-no-crop", highlight),
};
let mut group = c.benchmark_group(&name);
for conf in confs {
group.bench_function(conf.name, |b| {
b.iter(|| {
let mut matcher = conf.matching_words.build(conf.text);
matcher.format(*option);
})
});
}
group.finish();
}
}
criterion_group!(benches, bench_formatting);
criterion_main!(benches);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,122 @@
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields =
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let filterable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_filterable_fields(filterable_fields);
let sortable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
dataset_format: "jsonl",
queries: &[
"",
],
configure: base_conf,
primary_key: Some("geonameid"),
..Conf::BASE
};
fn bench_geo(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
// A basic placeholder with no geo
utils::Conf {
group_name: "placeholder with no geo",
..BASE_CONF
},
// Medium aglomeration: probably the most common usecase
utils::Conf {
group_name: "asc sort from Lille",
sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Lille",
sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):desc"]),
..BASE_CONF
},
// Big agglomeration: a lot of documents close to our point
utils::Conf {
group_name: "asc sort from Tokyo",
sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Tokyo",
sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):desc"]),
..BASE_CONF
},
// The furthest point from any civilization
utils::Conf {
group_name: "asc sort from Point Nemo",
sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Point Nemo",
sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):desc"]),
..BASE_CONF
},
// Filters
utils::Conf {
group_name: "filter of 100km from Lille",
filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Lille",
filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 1000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 100km from Tokyo",
filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Tokyo",
filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 1000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 100km from Point Nemo",
filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Point Nemo",
filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 1000)"),
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_geo);
criterion_main!(benches);

View File

@@ -0,0 +1,196 @@
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["id", "title", "album", "artist", "genre", "country", "released", "duration"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_filterable_fields(faceted_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_SONGS,
queries: &[
"john ", // 9097
"david ", // 4794
"charles ", // 1957
"david bowie ", // 1200
"michael jackson ", // 600
"thelonious monk ", // 303
"charles mingus ", // 142
"marcus miller ", // 60
"tamo ", // 13
"Notstandskomitee ", // 4
],
configure: base_conf,
primary_key: Some("id"),
..Conf::BASE
};
fn bench_songs(c: &mut criterion::Criterion) {
let default_criterion: Vec<String> =
milli::default_criteria().iter().map(|criteria| criteria.to_string()).collect();
let default_criterion = default_criterion.iter().map(|s| s.as_str());
let asc_default: Vec<&str> =
std::iter::once("released-timestamp:asc").chain(default_criterion.clone()).collect();
let desc_default: Vec<&str> =
std::iter::once("released-timestamp:desc").chain(default_criterion.clone()).collect();
let basic_with_quote: Vec<String> = BASE_CONF
.queries
.iter()
.map(|s| {
s.trim().split(' ').map(|s| format!(r#""{}""#, s)).collect::<Vec<String>>().join(" ")
})
.collect();
let basic_with_quote: &[&str] =
&basic_with_quote.iter().map(|s| s.as_str()).collect::<Vec<&str>>();
#[rustfmt::skip]
let confs = &[
/* first we bench each criterion alone */
utils::Conf {
group_name: "proximity",
queries: &[
"black saint sinner lady ",
"les dangeureuses 1960 ",
"The Disneyland Sing-Along Chorus ",
"Under Great Northern Lights ",
"7000 Danses Un Jour Dans Notre Vie ",
],
criterion: Some(&["proximity"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "typo",
queries: &[
"mongus ",
"thelonius monk ",
"Disnaylande ",
"the white striper ",
"indochie ",
"indochien ",
"klub des loopers ",
"fear of the duck ",
"michel depech ",
"stromal ",
"dire straights ",
"Arethla Franklin ",
],
criterion: Some(&["typo"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "words",
queries: &[
"the black saint and the sinner lady and the good doggo ", // four words to pop
"les liaisons dangeureuses 1793 ", // one word to pop
"The Disneyland Children's Sing-Alone song ", // two words to pop
"seven nation mummy ", // one word to pop
"7000 Danses / Le Baiser / je me trompe de mots ", // four words to pop
"Bring Your Daughter To The Slaughter but now this is not part of the title ", // nine words to pop
"whathavenotnsuchforth and a good amount of words to pop to match the first one ", // 13
],
criterion: Some(&["words"]),
..BASE_CONF
},
utils::Conf {
group_name: "asc",
criterion: Some(&["released-timestamp:desc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc",
criterion: Some(&["released-timestamp:desc"]),
..BASE_CONF
},
/* then we bench the asc and desc criterion on top of the default criterion */
utils::Conf {
group_name: "asc + default",
criterion: Some(&asc_default[..]),
..BASE_CONF
},
utils::Conf {
group_name: "desc + default",
criterion: Some(&desc_default[..]),
..BASE_CONF
},
/* we bench the filters with the default request */
utils::Conf {
group_name: "basic filter: <=",
filter: Some("released-timestamp <= 946728000"), // year 2000
..BASE_CONF
},
utils::Conf {
group_name: "basic filter: TO",
filter: Some("released-timestamp 946728000 TO 1262347200"), // year 2000 to 2010
..BASE_CONF
},
utils::Conf {
group_name: "big filter",
filter: Some("released-timestamp != 1262347200 AND (NOT (released-timestamp = 946728000)) AND (duration-float = 1 OR (duration-float 1.1 TO 1.5 AND released-timestamp > 315576000))"),
..BASE_CONF
},
/* the we bench some global / normal search with all the default criterion in the default
* order */
utils::Conf {
group_name: "basic placeholder",
queries: &[""],
..BASE_CONF
},
utils::Conf {
group_name: "basic without quote",
queries: &BASE_CONF
.queries
.iter()
.map(|s| s.trim()) // we remove the space at the end of each request
.collect::<Vec<&str>>(),
..BASE_CONF
},
utils::Conf {
group_name: "basic with quote",
queries: basic_with_quote,
..BASE_CONF
},
utils::Conf {
group_name: "prefix search",
queries: &[
"s", // 500k+ results
"a", //
"b", //
"i", //
"x", // only 7k results
],
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_songs);
criterion_main!(benches);

View File

@@ -0,0 +1,129 @@
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use utils::Conf;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields = ["title", "body", "url"].iter().map(|s| s.to_string()).collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_WIKI_ARTICLES,
queries: &[
"mingus ", // 46 candidates
"miles davis ", // 159
"rock and roll ", // 1007
"machine ", // 3448
"spain ", // 7002
"japan ", // 10.593
"france ", // 17.616
"film ", // 24.959
],
configure: base_conf,
..Conf::BASE
};
fn bench_songs(c: &mut criterion::Criterion) {
let basic_with_quote: Vec<String> = BASE_CONF
.queries
.iter()
.map(|s| {
s.trim().split(' ').map(|s| format!(r#""{}""#, s)).collect::<Vec<String>>().join(" ")
})
.collect();
let basic_with_quote: &[&str] =
&basic_with_quote.iter().map(|s| s.as_str()).collect::<Vec<&str>>();
#[rustfmt::skip]
let confs = &[
/* first we bench each criterion alone */
utils::Conf {
group_name: "proximity",
queries: &[
"herald sings ",
"april paris ",
"tea two ",
"diesel engine ",
],
criterion: Some(&["proximity"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "typo",
queries: &[
"migrosoft ",
"linax ",
"Disnaylande ",
"phytogropher ",
"nympalidea ",
"aritmetric ",
"the fronce ",
"sisan ",
],
criterion: Some(&["typo"]),
optional_words: false,
..BASE_CONF
},
utils::Conf {
group_name: "words",
queries: &[
"the black saint and the sinner lady and the good doggo ", // four words to pop, 27 results
"Kameya Tokujirō mingus monk ", // two words to pop, 55
"Ulrich Hensel meilisearch milli ", // two words to pop, 306
"Idaho Bellevue pizza ", // one word to pop, 800
"Abraham machin ", // one word to pop, 1141
],
criterion: Some(&["words"]),
..BASE_CONF
},
/* the we bench some global / normal search with all the default criterion in the default
* order */
utils::Conf {
group_name: "basic placeholder",
queries: &[""],
..BASE_CONF
},
utils::Conf {
group_name: "basic without quote",
queries: &BASE_CONF
.queries
.iter()
.map(|s| s.trim()) // we remove the space at the end of each request
.collect::<Vec<&str>>(),
..BASE_CONF
},
utils::Conf {
group_name: "basic with quote",
queries: basic_with_quote,
..BASE_CONF
},
utils::Conf {
group_name: "prefix search",
queries: &[
"t", // 453k results
"c", // 405k
"g", // 318k
"j", // 227k
"q", // 71k
"x", // 17k
],
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_songs);
criterion_main!(benches);

256
benchmarks/benches/utils.rs Normal file
View File

@@ -0,0 +1,256 @@
#![allow(dead_code)]
use std::fs::{create_dir_all, remove_dir_all, File};
use std::io::{self, BufRead, BufReader, Cursor, Read, Seek};
use std::num::ParseFloatError;
use std::path::Path;
use std::str::FromStr;
use criterion::BenchmarkId;
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::heed::EnvOpenOptions;
use milli::update::{
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
};
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
use serde_json::Value;
pub struct Conf<'a> {
/// where we are going to create our database.mmdb directory
/// each benchmark will first try to delete it and then recreate it
pub database_name: &'a str,
/// the dataset to be used, it must be an uncompressed csv
pub dataset: &'a str,
/// The format of the dataset
pub dataset_format: &'a str,
pub group_name: &'a str,
pub queries: &'a [&'a str],
/// here you can change which criterion are used and in which order.
/// - if you specify something all the base configuration will be thrown out
/// - if you don't specify anything (None) the default configuration will be kept
pub criterion: Option<&'a [&'a str]>,
/// the last chance to configure your database as you want
pub configure: fn(&mut Settings),
pub filter: Option<&'a str>,
pub sort: Option<Vec<&'a str>>,
/// enable or disable the optional words on the query
pub optional_words: bool,
/// primary key, if there is None we'll auto-generate docids for every documents
pub primary_key: Option<&'a str>,
}
impl Conf<'_> {
pub const BASE: Self = Conf {
database_name: "benches.mmdb",
dataset_format: "csv",
dataset: "",
group_name: "",
queries: &[],
criterion: None,
configure: |_| (),
filter: None,
sort: None,
optional_words: true,
primary_key: None,
};
}
pub fn base_setup(conf: &Conf) -> Index {
match remove_dir_all(conf.database_name) {
Ok(_) => (),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => (),
Err(e) => panic!("{}", e),
}
create_dir_all(conf.database_name).unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(10);
let index = Index::new(options, conf.database_name).unwrap();
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config);
if let Some(primary_key) = conf.primary_key {
builder.set_primary_key(primary_key.to_string());
}
if let Some(criterion) = conf.criterion {
builder.reset_filterable_fields();
builder.reset_criteria();
builder.reset_stop_words();
let criterion = criterion.iter().map(|s| Criterion::from_str(s).unwrap()).collect();
builder.set_criteria(criterion);
}
(conf.configure)(&mut builder);
builder.execute(|_| (), || false).unwrap();
wtxn.commit().unwrap();
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let indexing_config = IndexDocumentsConfig {
autogenerate_docids: conf.primary_key.is_none(),
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
let documents = documents_from(conf.dataset, conf.dataset_format);
let (builder, user_error) = builder.add_documents(documents).unwrap();
user_error.unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
index
}
pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
for conf in confs {
let index = base_setup(conf);
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
let name = format!("{}: {}", file_name, conf.group_name);
let mut group = c.benchmark_group(&name);
for &query in conf.queries {
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
let _ids = search.execute().unwrap();
});
});
}
group.finish();
index.prepare_for_closing().wait();
}
}
pub fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
let reader = File::open(filename)
.unwrap_or_else(|_| panic!("could not find the dataset in: {}", filename));
let reader = BufReader::new(reader);
let documents = match filetype {
"csv" => documents_from_csv(reader).unwrap(),
"json" => documents_from_json(reader).unwrap(),
"jsonl" => documents_from_jsonl(reader).unwrap(),
otherwise => panic!("invalid update format {:?}", otherwise),
};
DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
}
fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
let object = result?;
documents.append_json_object(&object)?;
}
documents.into_inner().map_err(Into::into)
}
fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let mut documents = DocumentsBatchBuilder::new(Vec::new());
documents.append_json_array(reader)?;
documents.into_inner().map_err(Into::into)
}
fn documents_from_csv(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
let csv = csv::Reader::from_reader(reader);
let mut documents = DocumentsBatchBuilder::new(Vec::new());
documents.append_csv(csv)?;
documents.into_inner().map_err(Into::into)
}
enum AllowedType {
String,
Number,
}
fn parse_csv_header(header: &str) -> (String, AllowedType) {
// if there are several separators we only split on the last one.
match header.rsplit_once(':') {
Some((field_name, field_type)) => match field_type {
"string" => (field_name.to_string(), AllowedType::String),
"number" => (field_name.to_string(), AllowedType::Number),
// we may return an error in this case.
_otherwise => (header.to_string(), AllowedType::String),
},
None => (header.to_string(), AllowedType::String),
}
}
struct CSVDocumentDeserializer<R>
where
R: Read,
{
documents: csv::StringRecordsIntoIter<R>,
headers: Vec<(String, AllowedType)>,
}
impl<R: Read> CSVDocumentDeserializer<R> {
fn from_reader(reader: R) -> io::Result<Self> {
let mut records = csv::Reader::from_reader(reader);
let headers = records.headers()?.into_iter().map(parse_csv_header).collect();
Ok(Self { documents: records.into_records(), headers })
}
}
impl<R: Read> Iterator for CSVDocumentDeserializer<R> {
type Item = anyhow::Result<Object>;
fn next(&mut self) -> Option<Self::Item> {
let csv_document = self.documents.next()?;
match csv_document {
Ok(csv_document) => {
let mut document = Object::new();
for ((field_name, field_type), value) in
self.headers.iter().zip(csv_document.into_iter())
{
let parsed_value: Result<Value, ParseFloatError> = match field_type {
AllowedType::Number => {
value.parse::<f64>().map(Value::from).map_err(Into::into)
}
AllowedType::String => Ok(Value::String(value.to_string())),
};
match parsed_value {
Ok(value) => drop(document.insert(field_name.to_string(), value)),
Err(_e) => {
return Some(Err(anyhow::anyhow!(
"Value '{}' is not a valid number",
value
)))
}
}
}
Some(Ok(document))
}
Err(e) => Some(Err(anyhow::anyhow!("Error parsing csv document: {}", e))),
}
}
}

115
benchmarks/build.rs Normal file
View File

@@ -0,0 +1,115 @@
use std::fs::File;
use std::io::{Cursor, Read, Seek, Write};
use std::path::{Path, PathBuf};
use std::{env, fs};
use bytes::Bytes;
use convert_case::{Case, Casing};
use flate2::read::GzDecoder;
use reqwest::IntoUrl;
const BASE_URL: &str = "https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets";
const DATASET_SONGS: (&str, &str) = ("smol-songs", "csv");
const DATASET_SONGS_1_2: (&str, &str) = ("smol-songs-1_2", "csv");
const DATASET_SONGS_3_4: (&str, &str) = ("smol-songs-3_4", "csv");
const DATASET_SONGS_4_4: (&str, &str) = ("smol-songs-4_4", "csv");
const DATASET_WIKI: (&str, &str) = ("smol-wiki-articles", "csv");
const DATASET_WIKI_1_2: (&str, &str) = ("smol-wiki-articles-1_2", "csv");
const DATASET_WIKI_3_4: (&str, &str) = ("smol-wiki-articles-3_4", "csv");
const DATASET_WIKI_4_4: (&str, &str) = ("smol-wiki-articles-4_4", "csv");
const DATASET_MOVIES: (&str, &str) = ("movies", "json");
const DATASET_MOVIES_1_2: (&str, &str) = ("movies-1_2", "json");
const DATASET_MOVIES_3_4: (&str, &str) = ("movies-3_4", "json");
const DATASET_MOVIES_4_4: (&str, &str) = ("movies-4_4", "json");
const DATASET_NESTED_MOVIES: (&str, &str) = ("nested_movies", "json");
const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl");
const ALL_DATASETS: &[(&str, &str)] = &[
DATASET_SONGS,
DATASET_SONGS_1_2,
DATASET_SONGS_3_4,
DATASET_SONGS_4_4,
DATASET_WIKI,
DATASET_WIKI_1_2,
DATASET_WIKI_3_4,
DATASET_WIKI_4_4,
DATASET_MOVIES,
DATASET_MOVIES_1_2,
DATASET_MOVIES_3_4,
DATASET_MOVIES_4_4,
DATASET_NESTED_MOVIES,
DATASET_GEO,
];
/// The name of the environment variable used to select the path
/// of the directory containing the datasets
const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH";
fn main() -> anyhow::Result<()> {
let out_dir = PathBuf::from(env::var(BASE_DATASETS_PATH_KEY).unwrap_or(env::var("OUT_DIR")?));
let benches_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?).join("benches");
let mut manifest_paths_file = File::create(benches_dir.join("datasets_paths.rs"))?;
write!(
manifest_paths_file,
r#"//! This file is generated by the build script.
//! Do not modify by hand, use the build.rs file.
#![allow(dead_code)]
"#
)?;
writeln!(manifest_paths_file)?;
for (dataset, extension) in ALL_DATASETS {
let out_path = out_dir.join(dataset);
let out_file = out_path.with_extension(extension);
writeln!(
&mut manifest_paths_file,
r#"pub const {}: &str = {:?};"#,
dataset.to_case(Case::ScreamingSnake),
out_file.display(),
)?;
if out_file.exists() {
eprintln!(
"The dataset {} already exists on the file system and will not be downloaded again",
out_path.display(),
);
continue;
}
let url = format!("{}/{}.{}.gz", BASE_URL, dataset, extension);
eprintln!("downloading: {}", url);
let bytes = retry(|| download_dataset(url.clone()), 10)?;
eprintln!("{} downloaded successfully", url);
eprintln!("uncompressing in {}", out_file.display());
uncompress_in_file(bytes, &out_file)?;
}
Ok(())
}
fn retry<Ok, Err>(fun: impl Fn() -> Result<Ok, Err>, times: usize) -> Result<Ok, Err> {
for _ in 0..times {
if let ok @ Ok(_) = fun() {
return ok;
}
}
fun()
}
fn download_dataset<U: IntoUrl>(url: U) -> anyhow::Result<Cursor<Bytes>> {
let bytes =
reqwest::blocking::Client::builder().timeout(None).build()?.get(url).send()?.bytes()?;
Ok(Cursor::new(bytes))
}
fn uncompress_in_file<R: Read + Seek, P: AsRef<Path>>(bytes: R, path: P) -> anyhow::Result<()> {
let path = path.as_ref();
let mut gz = GzDecoder::new(bytes);
let mut dataset = Vec::new();
gz.read_to_end(&mut dataset)?;
fs::write(path, dataset)?;
Ok(())
}

38
benchmarks/scripts/compare.sh Executable file
View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
# Requirements:
# - critcmp. See: https://github.com/BurntSushi/critcmp
# - curl
# Usage
# $ bash compare.sh json_file1 json_file1
# ex: bash compare.sh songs_main_09a4321.json songs_geosearch_24ec456.json
# Checking that critcmp is installed
command -v critcmp > /dev/null 2>&1
if [[ "$?" -ne 0 ]]; then
echo 'You must install critcmp to make this script work.'
echo 'See: https://github.com/BurntSushi/critcmp'
echo ' $ cargo install critcmp'
exit 1
fi
s3_url='https://milli-benchmarks.fra1.digitaloceanspaces.com/critcmp_results'
for file in $@
do
file_s3_url="$s3_url/$file"
file_local_path="/tmp/$file"
if [[ ! -f $file_local_path ]]; then
curl $file_s3_url --output $file_local_path --silent
if [[ "$?" -ne 0 ]]; then
echo 'curl command failed.'
exit 1
fi
fi
done
path_list=$(echo " $@" | sed 's/ / \/tmp\//g')
critcmp $path_list

14
benchmarks/scripts/list.sh Executable file
View File

@@ -0,0 +1,14 @@
#!/usr/bin/env bash
# Requirements:
# - curl
# - grep
res=$(curl -s https://milli-benchmarks.fra1.digitaloceanspaces.com | grep -o '<Key>[^<]\+' | cut -c 5- | grep critcmp_results/ | cut -c 18-)
for pattern in "$@"
do
res=$(echo "$res" | grep $pattern)
done
echo "$res"

5
benchmarks/src/lib.rs Normal file
View File

@@ -0,0 +1,5 @@
//! This library is only used to isolate the benchmarks
//! from the original milli library.
//!
//! It does not include interesting functions for milli library
//! users only for milli contributors.

View File

@@ -1,7 +1,7 @@
status = [
'Tests on ubuntu-18.04',
'Tests on macos-latest',
'Tests on windows-latest',
'Tests on macos-12',
'Tests on windows-2022',
'Run Clippy',
'Run Rustfmt',
'Run tests in debug',

View File

@@ -28,17 +28,9 @@ http_payload_size_limit = "100 MB"
log_level = "INFO"
# Defines how much detail should be present in Meilisearch's logs.
# Meilisearch currently supports five log levels, listed in order of increasing verbosity: `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
# Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
# https://docs.meilisearch.com/learn/configuration/instance_options.html#log-level
max_index_size = "100 GiB"
# Sets the maximum size of the index.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-index-size
max_task_db_size = "100 GiB"
# Sets the maximum size of the task database.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-task-db-size
# max_indexing_memory = "2 GiB"
# Sets the maximum amount of RAM Meilisearch can use when indexing.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-memory
@@ -47,18 +39,13 @@ max_task_db_size = "100 GiB"
# Sets the maximum number of threads Meilisearch can use during indexing.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-threads
disable_auto_batching = false
# Deactivates auto-batching when provided.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-auto-batching
#############
### DUMPS ###
#############
dumps_dir = "dumps/"
dump_dir = "dumps/"
# Sets the directory where Meilisearch will create dump files.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#dumps-destination
# https://docs.meilisearch.com/learn/configuration/instance_options.html#dump-directory
# import_dump = "./path/to/my/file.dump"
# Imports the dump file located at the specified path. Path must point to a .dump file.
@@ -78,17 +65,15 @@ ignore_dump_if_db_exists = false
#################
schedule_snapshot = false
# Activates scheduled snapshots when provided.
# Enables scheduled snapshots when true, disable when false (the default).
# If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval
# between each snapshot, in seconds.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#schedule-snapshot-creation
snapshot_dir = "snapshots/"
# Sets the directory where Meilisearch will store snapshots.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-destination
snapshot_interval_sec = 86400
# Defines the interval between each snapshot. Value must be given in seconds.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-interval
# import_snapshot = "./path/to/my/snapshot"
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-snapshot

View File

@@ -1,5 +1,8 @@
#!/bin/sh
# This script can optionally use a GitHub token to increase your request limit (for example, if using this script in a CI).
# To use a GitHub token, pass it through the GITHUB_PAT environment variable.
# GLOBALS
# Colors
@@ -10,9 +13,6 @@ DEFAULT='\033[0m'
# Project name
PNAME='meilisearch'
# Version regexp i.e. v[number].[number].[number]
GREP_SEMVER_REGEXP='v\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)$'
# GitHub API address
GITHUB_API='https://api.github.com/repos/meilisearch/meilisearch/releases'
# GitHub Release address
@@ -20,126 +20,26 @@ GITHUB_REL='https://github.com/meilisearch/meilisearch/releases/download/'
# FUNCTIONS
# semverParseInto and semverLT from: https://github.com/cloudflare/semver_bash/blob/master/semver.sh
# usage: semverParseInto version major minor patch special
# version: the string version
# major, minor, patch, special: will be assigned by the function
semverParseInto() {
local RE='[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)'
# MAJOR
eval $2=`echo $1 | sed -e "s#$RE#\1#"`
# MINOR
eval $3=`echo $1 | sed -e "s#$RE#\2#"`
# PATCH
eval $4=`echo $1 | sed -e "s#$RE#\3#"`
# SPECIAL
eval $5=`echo $1 | sed -e "s#$RE#\4#"`
}
# usage: semverLT version1 version2
semverLT() {
local MAJOR_A=0
local MINOR_A=0
local PATCH_A=0
local SPECIAL_A=0
local MAJOR_B=0
local MINOR_B=0
local PATCH_B=0
local SPECIAL_B=0
semverParseInto $1 MAJOR_A MINOR_A PATCH_A SPECIAL_A
semverParseInto $2 MAJOR_B MINOR_B PATCH_B SPECIAL_B
if [ $MAJOR_A -lt $MAJOR_B ]; then
return 0
fi
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -lt $MINOR_B ]; then
return 0
fi
if [ $MAJOR_A -le $MAJOR_B ] && [ $MINOR_A -le $MINOR_B ] && [ $PATCH_A -lt $PATCH_B ]; then
return 0
fi
if [ "_$SPECIAL_A" == '_' ] && [ "_$SPECIAL_B" == '_' ] ; then
return 1
fi
if [ "_$SPECIAL_A" == '_' ] && [ "_$SPECIAL_B" != '_' ] ; then
return 1
fi
if [ "_$SPECIAL_A" != '_' ] && [ "_$SPECIAL_B" == '_' ] ; then
return 0
fi
if [ "_$SPECIAL_A" < "_$SPECIAL_B" ]; then
return 0
fi
return 1
}
# Get a token from: https://github.com/settings/tokens to increase rate limit (from 60 to 5000),
# make sure the token scope is set to 'public_repo'.
# Create GITHUB_PAT environment variable once you acquired the token to start using it.
# Returns the tag of the latest stable release (in terms of semver and not of release date).
# Gets the version of the latest stable version of Meilisearch by setting the $latest variable.
# Returns 0 in case of success, 1 otherwise.
get_latest() {
# temp_file is needed because the grep would start before the download is over
temp_file=$(mktemp -q /tmp/$PNAME.XXXXXXXXX)
latest_release="$GITHUB_API/latest"
if [ $? -ne 0 ]; then
echo "$0: Can't create temp file, bye bye.."
echo "$0: Can't create temp file."
fetch_release_failure_usage
exit 1
fi
if [ -z "$GITHUB_PAT" ]; then
curl -s $GITHUB_API > "$temp_file" || return 1
curl -s "$latest_release" > "$temp_file" || return 1
else
curl -H "Authorization: token $GITHUB_PAT" -s $GITHUB_API > "$temp_file" || return 1
curl -H "Authorization: token $GITHUB_PAT" -s "$latest_release" > "$temp_file" || return 1
fi
releases=$(cat "$temp_file" | \
grep -E '"tag_name":|"draft":|"prerelease":' \
| tr -d ',"' | cut -d ':' -f2 | tr -d ' ')
# Returns a list of [tag_name draft_boolean prerelease_boolean ...]
# Ex: v0.10.1 false false v0.9.1-rc.1 false true v0.9.0 false false...
i=0
latest=''
current_tag=''
for release_info in $releases; do
# Checking tag_name
if [ $i -eq 0 ]; then
# If it's not an alpha or beta release
if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then
current_tag=$release_info
else
current_tag=''
fi
i=1
# Checking draft boolean
elif [ $i -eq 1 ]; then
if [ "$release_info" = 'true' ]; then
current_tag=''
fi
i=2
# Checking prerelease boolean
elif [ $i -eq 2 ]; then
if [ "$release_info" = 'true' ]; then
current_tag=''
fi
i=0
# If the current_tag is valid
if [ "$current_tag" != '' ]; then
# If there is no latest yes
if [ "$latest" = '' ]; then
latest="$current_tag"
else
# Comparing latest and the current tag
semverLT $current_tag $latest
if [ $? -eq 1 ]; then
latest="$current_tag"
fi
fi
fi
fi
done
latest="$(cat "$temp_file" | grep '"tag_name":' | cut -d ':' -f2 | tr -d '"' | tr -d ',' | tr -d ' ')"
rm -f "$temp_file"
return 0
@@ -174,9 +74,9 @@ get_archi() {
archi='amd64'
;;
'arm64')
# MacOS M1
# macOS M1/M2
if [ $os = 'macos' ]; then
archi='amd64'
archi='apple-silicon'
else
archi='aarch64'
fi
@@ -210,12 +110,13 @@ fetch_release_failure_usage() {
echo ''
printf "$RED%s\n$DEFAULT" 'ERROR: Impossible to get the latest stable version of Meilisearch.'
echo 'Please let us know about this issue: https://github.com/meilisearch/meilisearch/issues/new/choose'
echo ''
echo 'In the meantime, you can manually download the appropriate binary from the GitHub release assets here: https://github.com/meilisearch/meilisearch/releases/latest'
}
fill_release_variables() {
# Fill $latest variable.
if ! get_latest; then
# TO CHANGE.
fetch_release_failure_usage
exit 1
fi

View File

@@ -1,7 +1,14 @@
[package]
name = "dump"
version = "0.30.0"
edition = "2021"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
edition.workspace = true
homepage.workspace = true
readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.65"

View File

@@ -3,8 +3,6 @@ use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")]
DumpV1Unsupported,
#[error("Bad index name.")]
BadIndexName,
#[error("Malformed task.")]
@@ -21,14 +19,14 @@ pub enum Error {
impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
// Are these three really Internal errors?
// TODO look at that later.
Error::Io(_) => Code::Internal,
Error::Io(e) => e.error_code(),
// These errors either happen when creating a dump and don't need any error code,
// or come from an internal bad deserialization.
Error::Serde(_) => Code::Internal,
Error::Uuid(_) => Code::Internal,
// all these errors should never be raised when creating a dump, thus no error code should be associated.
Error::DumpV1Unsupported => Code::Internal,
Error::BadIndexName => Code::Internal,
Error::MalformedTask => Code::Internal,
}

View File

@@ -23,7 +23,7 @@ const CURRENT_DUMP_VERSION: Version = Version::V6;
type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
pub dump_version: Version,
@@ -32,7 +32,7 @@ pub struct Metadata {
pub dump_date: OffsetDateTime,
}
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
pub uid: String,
@@ -43,7 +43,7 @@ pub struct IndexMetadata {
pub updated_at: OffsetDateTime,
}
#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
pub enum Version {
V1,
V2,
@@ -87,7 +87,7 @@ pub struct TaskDump {
pub finished_at: Option<OffsetDateTime>,
}
// A `Kind` specific version made for the dump. If modified you may break the dump.
// A `Kind` specific version made for the dump. If modified you may break the dump.
#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum KindDump {
@@ -125,7 +125,6 @@ pub enum KindDump {
tasks: RoaringBitmap,
},
DumpCreation {
dump_uid: String,
keys: Vec<Key>,
instance_uid: Option<InstanceUid>,
},
@@ -188,8 +187,8 @@ impl From<KindWithContent> for KindDump {
KindWithContent::TaskDeletion { query, tasks } => {
KindDump::TasksDeletion { query, tasks }
}
KindWithContent::DumpCreation { dump_uid, keys, instance_uid } => {
KindDump::DumpCreation { dump_uid, keys, instance_uid }
KindWithContent::DumpCreation { keys, instance_uid } => {
KindDump::DumpCreation { keys, instance_uid }
}
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
}
@@ -199,17 +198,16 @@ impl From<KindWithContent> for KindDump {
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::{Seek, SeekFrom};
use std::io::Seek;
use std::str::FromStr;
use big_s::S;
use maplit::btreeset;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self};
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::star_or::StarOr;
use meilisearch_types::tasks::{Details, Status};
use serde_json::{json, Map, Value};
use time::macros::datetime;
@@ -342,7 +340,7 @@ pub(crate) mod test {
name: Some(S("doggos_key")),
uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(),
actions: vec![Action::DocumentsAll],
indexes: vec![StarOr::Other(IndexUid::from_str("doggos").unwrap())],
indexes: vec![IndexUidPattern::from_str("doggos").unwrap()],
expires_at: Some(datetime!(4130-03-14 12:21 UTC)),
created_at: datetime!(1960-11-15 0:00 UTC),
updated_at: datetime!(2022-11-10 0:00 UTC),
@@ -352,7 +350,7 @@ pub(crate) mod test {
name: Some(S("master_key")),
uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(),
actions: vec![Action::All],
indexes: vec![StarOr::Star],
indexes: vec![IndexUidPattern::all()],
expires_at: None,
created_at: datetime!(0000-01-01 00:01 UTC),
updated_at: datetime!(1964-05-04 17:25 UTC),
@@ -411,13 +409,12 @@ pub(crate) mod test {
// create the dump
let mut file = tempfile::tempfile().unwrap();
dump.persist_to(&mut file).unwrap();
file.seek(SeekFrom::Start(0)).unwrap();
file.rewind().unwrap();
file
}
#[test]
#[ignore]
fn test_creating_and_read_dump() {
let mut file = create_test_dump();
let mut dump = DumpReader::open(&mut file).unwrap();

View File

@@ -1,3 +1,4 @@
pub mod v1_to_v2;
pub mod v2_to_v3;
pub mod v3_to_v4;
pub mod v4_to_v5;

View File

@@ -0,0 +1,38 @@
---
source: dump/src/reader/compat/v1_to_v2.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@@ -0,0 +1,31 @@
---
source: dump/src/reader/compat/v1_to_v2.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,24 @@
---
source: dump/src/reader/compat/v1_to_v2.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,23 @@
---
source: dump/src/reader/compat/v2_to_v3.rs
expression: movies2.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,23 @@
---
source: dump/src/reader/compat/v2_to_v3.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,37 @@
---
source: dump/src/reader/compat/v2_to_v3.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@@ -0,0 +1,24 @@
---
source: dump/src/reader/compat/v2_to_v3.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,25 @@
---
source: dump/src/reader/compat/v3_to_v4.rs
expression: movies2.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,25 @@
---
source: dump/src/reader/compat/v3_to_v4.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,39 @@
---
source: dump/src/reader/compat/v3_to_v4.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@@ -0,0 +1,31 @@
---
source: dump/src/reader/compat/v3_to_v4.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"release_date"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,56 @@
---
source: dump/src/reader/compat/v4_to_v5.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": "Reset",
"searchableAttributes": "Reset",
"filterableAttributes": {
"Set": []
},
"sortableAttributes": {
"Set": []
},
"rankingRules": {
"Set": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
]
},
"stopWords": {
"Set": []
},
"synonyms": {
"Set": {}
},
"distinctAttribute": "Reset",
"typoTolerance": {
"Set": {
"enabled": {
"Set": true
},
"minWordSizeForTypos": {
"Set": {
"oneTypo": {
"Set": 5
},
"twoTypos": {
"Set": 9
}
}
},
"disableOnWords": {
"Set": []
},
"disableOnAttributes": {
"Set": []
}
}
},
"faceting": "NotSet",
"pagination": "NotSet"
}

View File

@@ -0,0 +1,70 @@
---
source: dump/src/reader/compat/v4_to_v5.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": "Reset",
"searchableAttributes": "Reset",
"filterableAttributes": {
"Set": []
},
"sortableAttributes": {
"Set": []
},
"rankingRules": {
"Set": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
]
},
"stopWords": {
"Set": []
},
"synonyms": {
"Set": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
}
},
"distinctAttribute": "Reset",
"typoTolerance": {
"Set": {
"enabled": {
"Set": true
},
"minWordSizeForTypos": {
"Set": {
"oneTypo": {
"Set": 5
},
"twoTypos": {
"Set": 9
}
}
},
"disableOnWords": {
"Set": []
},
"disableOnAttributes": {
"Set": []
}
}
},
"faceting": "NotSet",
"pagination": "NotSet"
}

View File

@@ -0,0 +1,62 @@
---
source: dump/src/reader/compat/v4_to_v5.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": "Reset",
"searchableAttributes": "Reset",
"filterableAttributes": {
"Set": [
"genres",
"id"
]
},
"sortableAttributes": {
"Set": [
"release_date"
]
},
"rankingRules": {
"Set": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness",
"release_date:asc"
]
},
"stopWords": {
"Set": []
},
"synonyms": {
"Set": {}
},
"distinctAttribute": "Reset",
"typoTolerance": {
"Set": {
"enabled": {
"Set": true
},
"minWordSizeForTypos": {
"Set": {
"oneTypo": {
"Set": 5
},
"twoTypos": {
"Set": 9
}
}
},
"disableOnWords": {
"Set": []
},
"disableOnAttributes": {
"Set": []
}
}
},
"faceting": "NotSet",
"pagination": "NotSet"
}

View File

@@ -0,0 +1,40 @@
---
source: dump/src/reader/compat/v5_to_v6.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
}

View File

@@ -0,0 +1,54 @@
---
source: dump/src/reader/compat/v5_to_v6.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
}

View File

@@ -0,0 +1,46 @@
---
source: dump/src/reader/compat/v5_to_v6.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"release_date"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
}

View File

@@ -0,0 +1,410 @@
use std::str::FromStr;
use super::v2_to_v3::CompatV2ToV3;
use crate::reader::{v1, v2, Document};
use crate::Result;
pub struct CompatV1ToV2 {
pub from: v1::V1Reader,
}
impl CompatV1ToV2 {
pub fn new(v1: v1::V1Reader) -> Self {
Self { from: v1 }
}
pub fn to_v3(self) -> CompatV2ToV3 {
CompatV2ToV3::Compat(self)
}
pub fn version(&self) -> crate::Version {
self.from.version()
}
pub fn date(&self) -> Option<time::OffsetDateTime> {
self.from.date()
}
pub fn index_uuid(&self) -> Vec<v2::meta::IndexUuid> {
self.from
.index_uuid()
.into_iter()
.enumerate()
// we use the index of the index 😬 as UUID for the index, so that we can link the v2::Task to their index
.map(|(index, index_uuid)| v2::meta::IndexUuid {
uid: index_uuid.uid,
uuid: uuid::Uuid::from_u128(index as u128),
})
.collect()
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<CompatIndexV1ToV2>> + '_> {
Ok(self.from.indexes()?.map(|index_reader| Ok(CompatIndexV1ToV2 { from: index_reader? })))
}
pub fn tasks(
&mut self,
) -> Box<dyn Iterator<Item = Result<(v2::Task, Option<v2::UpdateFile>)>> + '_> {
// Convert an error here to an iterator yielding the error
let indexes = match self.from.indexes() {
Ok(indexes) => indexes,
Err(err) => return Box::new(std::iter::once(Err(err))),
};
let it = indexes.enumerate().flat_map(
move |(index, index_reader)| -> Box<dyn Iterator<Item = _>> {
let index_reader = match index_reader {
Ok(index_reader) => index_reader,
Err(err) => return Box::new(std::iter::once(Err(err))),
};
Box::new(
index_reader
.tasks()
// Filter out the UpdateStatus::Customs variant that is not supported in v2
// and enqueued tasks, that don't contain the necessary update file in v1
.filter_map(move |task| -> Option<_> {
let task = match task {
Ok(task) => task,
Err(err) => return Some(Err(err)),
};
Some(Ok((
v2::Task {
uuid: uuid::Uuid::from_u128(index as u128),
update: Option::from(task)?,
},
None,
)))
}),
)
},
);
Box::new(it)
}
}
pub struct CompatIndexV1ToV2 {
pub from: v1::V1IndexReader,
}
impl CompatIndexV1ToV2 {
pub fn metadata(&self) -> &crate::IndexMetadata {
self.from.metadata()
}
pub fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Document>> + '_>> {
self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>)
}
pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> {
Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check())
}
}
impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
fn from(source: v1::settings::Settings) -> Self {
Self {
displayed_attributes: option_to_setting(source.displayed_attributes)
.map(|displayed| displayed.into_iter().collect()),
searchable_attributes: option_to_setting(source.searchable_attributes),
filterable_attributes: option_to_setting(source.attributes_for_faceting.clone())
.map(|filterable| filterable.into_iter().collect()),
sortable_attributes: option_to_setting(source.attributes_for_faceting)
.map(|sortable| sortable.into_iter().collect()),
ranking_rules: option_to_setting(source.ranking_rules).map(|ranking_rules| {
ranking_rules
.into_iter()
.filter_map(|ranking_rule| {
match v1::settings::RankingRule::from_str(&ranking_rule) {
Ok(ranking_rule) => {
let criterion: Option<v2::settings::Criterion> =
ranking_rule.into();
criterion.as_ref().map(ToString::to_string)
}
Err(()) => {
log::warn!(
"Could not import the following ranking rule: `{}`.",
ranking_rule
);
None
}
}
})
.collect()
}),
stop_words: option_to_setting(source.stop_words),
synonyms: option_to_setting(source.synonyms),
distinct_attribute: option_to_setting(source.distinct_attribute),
_kind: std::marker::PhantomData,
}
}
}
fn option_to_setting<T>(opt: Option<Option<T>>) -> v2::Setting<T> {
match opt {
Some(Some(t)) => v2::Setting::Set(t),
None => v2::Setting::NotSet,
Some(None) => v2::Setting::Reset,
}
}
impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
fn from(source: v1::update::UpdateStatus) -> Self {
use v1::update::UpdateStatus as UpdateStatusV1;
use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source {
UpdateStatusV1::Enqueued { content } => {
log::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id
);
log::warn!("Task will be skipped in the queue of imported tasks.");
return None;
}
UpdateStatusV1::Failed { content } => UpdateStatusV2::Failed(v2::updates::Failed {
from: v2::updates::Processing {
from: v2::updates::Enqueued {
update_id: content.update_id,
meta: Option::from(content.update_type)?,
enqueued_at: content.enqueued_at,
content: None,
},
started_processing_at: content.processed_at
- std::time::Duration::from_secs_f64(content.duration),
},
error: v2::ResponseError {
// error code is ignored by serialization, and so always default in deserialized v2 dumps
// that's a good thing, because we don't have them in v1 dump 😅
code: http::StatusCode::default(),
message: content.error.unwrap_or_default(),
// error codes are unchanged between v1 and v2
error_code: content.error_code.unwrap_or_default(),
// error types are unchanged between v1 and v2
error_type: content.error_type.unwrap_or_default(),
// error links are unchanged between v1 and v2
error_link: content.error_link.unwrap_or_default(),
},
failed_at: content.processed_at,
}),
UpdateStatusV1::Processed { content } => {
UpdateStatusV2::Processed(v2::updates::Processed {
success: match &content.update_type {
v1::update::UpdateType::ClearAll => {
v2::updates::UpdateResult::DocumentDeletion { deleted: u64::MAX }
}
v1::update::UpdateType::Customs => v2::updates::UpdateResult::Other,
v1::update::UpdateType::DocumentsAddition { number } => {
v2::updates::UpdateResult::DocumentsAddition(
v2::updates::DocumentAdditionResult { nb_documents: *number },
)
}
v1::update::UpdateType::DocumentsPartial { number } => {
v2::updates::UpdateResult::DocumentsAddition(
v2::updates::DocumentAdditionResult { nb_documents: *number },
)
}
v1::update::UpdateType::DocumentsDeletion { number } => {
v2::updates::UpdateResult::DocumentDeletion { deleted: *number as u64 }
}
v1::update::UpdateType::Settings { .. } => v2::updates::UpdateResult::Other,
},
processed_at: content.processed_at,
from: v2::updates::Processing {
from: v2::updates::Enqueued {
update_id: content.update_id,
meta: Option::from(content.update_type)?,
enqueued_at: content.enqueued_at,
content: None,
},
started_processing_at: content.processed_at
- std::time::Duration::from_secs_f64(content.duration),
},
})
}
})
}
}
impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
fn from(source: v1::update::UpdateType) -> Self {
Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => {
log::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None;
}
v1::update::UpdateType::DocumentsAddition { .. } => {
v2::updates::UpdateMeta::DocumentsAddition {
method: v2::updates::IndexDocumentsMethod::ReplaceDocuments,
format: v2::updates::UpdateFormat::Json,
primary_key: None,
}
}
v1::update::UpdateType::DocumentsPartial { .. } => {
v2::updates::UpdateMeta::DocumentsAddition {
method: v2::updates::IndexDocumentsMethod::UpdateDocuments,
format: v2::updates::UpdateFormat::Json,
primary_key: None,
}
}
v1::update::UpdateType::DocumentsDeletion { .. } => {
v2::updates::UpdateMeta::DeleteDocuments { ids: vec![] }
}
v1::update::UpdateType::Settings { settings } => {
v2::updates::UpdateMeta::Settings((*settings).into())
}
})
}
}
impl From<v1::settings::SettingsUpdate> for v2::Settings<v2::Unchecked> {
fn from(source: v1::settings::SettingsUpdate) -> Self {
let ranking_rules = v2::Setting::from(source.ranking_rules);
// go from the concrete types of v1 (RankingRule) to the concrete type of v2 (Criterion),
// and then back to string as this is what the settings manipulate
let ranking_rules = ranking_rules.map(|ranking_rules| {
ranking_rules
.into_iter()
// filter out the WordsPosition ranking rule that exists in v1 but not v2
.filter_map(Option::<v2::settings::Criterion>::from)
.map(|criterion| criterion.to_string())
.collect()
});
Self {
displayed_attributes: v2::Setting::from(source.displayed_attributes)
.map(|displayed_attributes| displayed_attributes.into_iter().collect()),
searchable_attributes: source.searchable_attributes.into(),
filterable_attributes: v2::Setting::from(source.attributes_for_faceting.clone())
.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()),
sortable_attributes: v2::Setting::from(source.attributes_for_faceting)
.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()),
ranking_rules,
stop_words: source.stop_words.into(),
synonyms: source.synonyms.into(),
distinct_attribute: source.distinct_attribute.into(),
_kind: std::marker::PhantomData,
}
}
}
impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
fn from(source: v1::settings::RankingRule) -> Self {
match source {
v1::settings::RankingRule::Typo => Some(v2::settings::Criterion::Typo),
v1::settings::RankingRule::Words => Some(v2::settings::Criterion::Words),
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => {
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None
}
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),
v1::settings::RankingRule::Asc(field_name) => {
Some(v2::settings::Criterion::Asc(field_name))
}
v1::settings::RankingRule::Desc(field_name) => {
Some(v2::settings::Criterion::Desc(field_name))
}
}
}
}
impl<T> From<v1::settings::UpdateState<T>> for v2::Setting<T> {
fn from(source: v1::settings::UpdateState<T>) -> Self {
match source {
v1::settings::UpdateState::Update(new_value) => v2::Setting::Set(new_value),
v1::settings::UpdateState::Clear => v2::Setting::Reset,
v1::settings::UpdateState::Nothing => v2::Setting::NotSet,
}
}
}
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
use flate2::bufread::GzDecoder;
use meili_snap::insta;
use tempfile::TempDir;
use super::*;
#[test]
fn compat_v1_v2() {
let dump = File::open("tests/assets/v1.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = v1::V1Reader::open(dir).unwrap().to_v2();
// top level infos
assert_eq!(dump.date(), None);
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"2298010973ee98cf4670787314176a3a");
assert_eq!(update_files.len(), 9);
assert!(update_files[..].iter().all(|u| u.is_none())); // no update file in dumps v1
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2022-10-02T13:23:39.976870431Z",
"updatedAt": "2022-10-02T13:27:54.353262482Z"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2022-10-02T13:15:29.477512777Z",
"updatedAt": "2022-10-02T13:21:12.671204856Z"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2022-10-02T13:38:26.358882984Z",
"updatedAt": "2022-10-02T13:38:26.385609433Z"
}
"###);
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed");
}
}

View File

@@ -4,22 +4,28 @@ use std::str::FromStr;
use time::OffsetDateTime;
use uuid::Uuid;
use super::v1_to_v2::{CompatIndexV1ToV2, CompatV1ToV2};
use super::v3_to_v4::CompatV3ToV4;
use crate::reader::{v2, v3, Document};
use crate::Result;
pub struct CompatV2ToV3 {
pub from: v2::V2Reader,
pub enum CompatV2ToV3 {
V2(v2::V2Reader),
Compat(CompatV1ToV2),
}
impl CompatV2ToV3 {
pub fn new(v2: v2::V2Reader) -> CompatV2ToV3 {
CompatV2ToV3 { from: v2 }
CompatV2ToV3::V2(v2)
}
pub fn index_uuid(&self) -> Vec<v3::meta::IndexUuid> {
self.from
.index_uuid()
let v2_uuids = match self {
CompatV2ToV3::V2(from) => from.index_uuid(),
CompatV2ToV3::Compat(compat) => compat.index_uuid(),
};
v2_uuids
.into_iter()
.into_iter()
.map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid })
.collect()
@@ -30,11 +36,17 @@ impl CompatV2ToV3 {
}
pub fn version(&self) -> crate::Version {
self.from.version()
match self {
CompatV2ToV3::V2(from) => from.version(),
CompatV2ToV3::Compat(compat) => compat.version(),
}
}
pub fn date(&self) -> Option<time::OffsetDateTime> {
self.from.date()
match self {
CompatV2ToV3::V2(from) => from.date(),
CompatV2ToV3::Compat(compat) => compat.date(),
}
}
pub fn instance_uid(&self) -> Result<Option<uuid::Uuid>> {
@@ -42,10 +54,18 @@ impl CompatV2ToV3 {
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<CompatIndexV2ToV3>> + '_> {
Ok(self.from.indexes()?.map(|index_reader| -> Result<_> {
let compat = CompatIndexV2ToV3::new(index_reader?);
Ok(compat)
}))
Ok(match self {
CompatV2ToV3::V2(from) => Box::new(from.indexes()?.map(|index_reader| -> Result<_> {
let compat = CompatIndexV2ToV3::new(index_reader?);
Ok(compat)
}))
as Box<dyn Iterator<Item = Result<CompatIndexV2ToV3>> + '_>,
CompatV2ToV3::Compat(compat) => Box::new(compat.indexes()?.map(|index_reader| {
let compat = CompatIndexV2ToV3::Compat(Box::new(index_reader?));
Ok(compat)
}))
as Box<dyn Iterator<Item = Result<CompatIndexV2ToV3>> + '_>,
})
}
pub fn tasks(
@@ -54,11 +74,13 @@ impl CompatV2ToV3 {
dyn Iterator<Item = Result<(v3::Task, Option<Box<dyn Iterator<Item = Result<Document>>>>)>>
+ '_,
> {
let _indexes = self.from.index_uuid.clone();
let tasks = match self {
CompatV2ToV3::V2(from) => from.tasks(),
CompatV2ToV3::Compat(compat) => compat.tasks(),
};
Box::new(
self.from
.tasks()
tasks
.map(move |task| {
task.map(|(task, content_file)| {
let task = v3::Task { uuid: task.uuid, update: task.update.into() };
@@ -76,27 +98,38 @@ impl CompatV2ToV3 {
}
}
pub struct CompatIndexV2ToV3 {
from: v2::V2IndexReader,
pub enum CompatIndexV2ToV3 {
V2(v2::V2IndexReader),
Compat(Box<CompatIndexV1ToV2>),
}
impl CompatIndexV2ToV3 {
pub fn new(v2: v2::V2IndexReader) -> CompatIndexV2ToV3 {
CompatIndexV2ToV3 { from: v2 }
CompatIndexV2ToV3::V2(v2)
}
pub fn metadata(&self) -> &crate::IndexMetadata {
self.from.metadata()
match self {
CompatIndexV2ToV3::V2(from) => from.metadata(),
CompatIndexV2ToV3::Compat(compat) => compat.metadata(),
}
}
pub fn documents(&mut self) -> Result<Box<dyn Iterator<Item = Result<Document>> + '_>> {
self.from
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>)
match self {
CompatIndexV2ToV3::V2(from) => from
.documents()
.map(|iter| Box::new(iter) as Box<dyn Iterator<Item = Result<Document>> + '_>),
CompatIndexV2ToV3::Compat(compat) => compat.documents(),
}
}
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
Ok(v3::Settings::<v3::Unchecked>::from(self.from.settings()?).check())
let settings = match self {
CompatIndexV2ToV3::V2(from) => from.settings()?,
CompatIndexV2ToV3::Compat(compat) => compat.settings()?,
};
Ok(v3::Settings::<v3::Unchecked>::from(settings).check())
}
}
@@ -328,28 +361,29 @@ impl From<String> for v3::Code {
}
}
fn option_to_setting<T>(opt: Option<Option<T>>) -> v3::Setting<T> {
match opt {
Some(Some(t)) => v3::Setting::Set(t),
None => v3::Setting::NotSet,
Some(None) => v3::Setting::Reset,
impl<A> From<v2::Setting<A>> for v3::Setting<A> {
fn from(setting: v2::Setting<A>) -> Self {
match setting {
v2::settings::Setting::Set(a) => v3::settings::Setting::Set(a),
v2::settings::Setting::Reset => v3::settings::Setting::Reset,
v2::settings::Setting::NotSet => v3::settings::Setting::NotSet,
}
}
}
impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
fn from(settings: v2::Settings<T>) -> Self {
v3::Settings {
displayed_attributes: option_to_setting(settings.displayed_attributes),
searchable_attributes: option_to_setting(settings.searchable_attributes),
filterable_attributes: option_to_setting(settings.filterable_attributes)
.map(|f| f.into_iter().collect()),
sortable_attributes: v3::Setting::NotSet,
ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| {
displayed_attributes: settings.displayed_attributes.into(),
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: v3::Setting::from(settings.ranking_rules).map(|criteria| {
criteria.into_iter().map(|criterion| patch_ranking_rules(&criterion)).collect()
}),
stop_words: option_to_setting(settings.stop_words),
synonyms: option_to_setting(settings.synonyms),
distinct_attribute: option_to_setting(settings.distinct_attribute),
stop_words: settings.stop_words.into(),
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
_kind: std::marker::PhantomData,
}
}
@@ -361,6 +395,7 @@ fn patch_ranking_rules(ranking_rule: &str) -> String {
Ok(v2::settings::Criterion::Typo) => String::from("typo"),
Ok(v2::settings::Criterion::Proximity) => String::from("proximity"),
Ok(v2::settings::Criterion::Attribute) => String::from("attribute"),
Ok(v2::settings::Criterion::Sort) => String::from("sort"),
Ok(v2::settings::Criterion::Exactness) => String::from("exactness"),
Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"),
Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"),
@@ -381,7 +416,6 @@ pub(crate) mod test {
use super::*;
#[test]
#[ignore]
fn compat_v2_v3() {
let dump = File::open("tests/assets/v2.dump").unwrap();
let dir = TempDir::new().unwrap();
@@ -427,7 +461,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"54b3d7a0d96de35427d867fa17164a99");
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
@@ -442,7 +476,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"ae7c5ade2243a553152dab2f354e9095");
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
@@ -457,7 +491,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"1be82b894556d23953af557b6a328a58");
insta::assert_json_snapshot!(movies2.settings().unwrap());
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
@@ -472,7 +506,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"1be82b894556d23953af557b6a328a58");
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");

View File

@@ -347,7 +347,6 @@ pub(crate) mod test {
use super::*;
#[test]
#[ignore]
fn compat_v3_v4() {
let dump = File::open("tests/assets/v3.dump").unwrap();
let dir = TempDir::new().unwrap();
@@ -397,7 +396,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"d3402aff19b90acea9e9a07c466690aa");
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
@@ -412,7 +411,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"687aaab250f01b55d57bc69aa313b581");
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
@@ -427,7 +426,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"cd9fedbd7e3492831a94da62c90013ea");
insta::assert_json_snapshot!(movies2.settings().unwrap());
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
@@ -442,7 +441,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"cd9fedbd7e3492831a94da62c90013ea");
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");

View File

@@ -383,7 +383,6 @@ pub(crate) mod test {
use super::*;
#[test]
#[ignore]
fn compat_v4_v5() {
let dump = File::open("tests/assets/v4.dump").unwrap();
let dir = TempDir::new().unwrap();
@@ -430,7 +429,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"26947283836ee4cdf0974f82efcc5332");
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
@@ -445,7 +444,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"156871410d17e23803d0c90ddc6a66cb");
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab");
@@ -460,7 +459,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"69c9916142612cf4a2da9b9ed9455e9e");
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");

View File

@@ -1,3 +1,5 @@
use std::str::FromStr;
use super::v4_to_v5::{CompatIndexV4ToV5, CompatV4ToV5};
use crate::reader::{v5, v6, Document, UpdateFile};
use crate::Result;
@@ -119,11 +121,10 @@ impl CompatV5ToV6 {
allow_index_creation,
settings: Box::new(settings.into()),
},
v5::tasks::TaskContent::Dump { uid } => v6::Kind::DumpCreation {
dump_uid: uid,
keys: keys.clone(),
instance_uid,
},
v5::tasks::TaskContent::Dump { uid: _ } => {
// in v6 we compute the dump_uid from the started_at processing time
v6::Kind::DumpCreation { keys: keys.clone(), instance_uid }
}
},
canceled_by: None,
details: task_view.details.map(|details| match details {
@@ -143,13 +144,15 @@ impl CompatV5ToV6 {
received_document_ids,
deleted_documents,
} => v6::Details::DocumentDeletion {
matched_documents: received_document_ids,
provided_ids: received_document_ids,
deleted_documents,
},
v5::Details::ClearAll { deleted_documents } => {
v6::Details::ClearAll { deleted_documents }
}
v5::Details::Dump { dump_uid } => v6::Details::Dump { dump_uid },
v5::Details::Dump { dump_uid } => {
v6::Details::Dump { dump_uid: Some(dump_uid) }
}
}),
error: task_view.error.map(|e| e.into()),
enqueued_at: task_view.enqueued_at,
@@ -178,10 +181,8 @@ impl CompatV5ToV6 {
.indexes
.into_iter()
.map(|index| match index {
v5::StarOr::Star => v6::StarOr::Star,
v5::StarOr::Other(uid) => {
v6::StarOr::Other(v6::IndexUid::new_unchecked(uid.as_str()))
}
v5::StarOr::Star => v6::IndexUidPattern::all(),
v5::StarOr::Other(uid) => v6::IndexUidPattern::new_unchecked(uid.as_str()),
})
.collect(),
expires_at: key.expires_at,
@@ -253,51 +254,50 @@ impl<T> From<v5::Setting<T>> for v6::Setting<T> {
impl From<v5::ResponseError> for v6::ResponseError {
fn from(error: v5::ResponseError) -> Self {
let code = match error.error_code.as_ref() {
"index_creation_failed" => v6::Code::CreateIndex,
"index_creation_failed" => v6::Code::IndexCreationFailed,
"index_already_exists" => v6::Code::IndexAlreadyExists,
"index_not_found" => v6::Code::IndexNotFound,
"invalid_index_uid" => v6::Code::InvalidIndexUid,
"invalid_min_word_length_for_typo" => v6::Code::InvalidMinWordLengthForTypo,
"invalid_min_word_length_for_typo" => v6::Code::InvalidSettingsTypoTolerance,
"invalid_state" => v6::Code::InvalidState,
"primary_key_inference_failed" => v6::Code::MissingPrimaryKey,
"index_primary_key_already_exists" => v6::Code::PrimaryKeyAlreadyPresent,
"primary_key_inference_failed" => v6::Code::IndexPrimaryKeyNoCandidateFound,
"index_primary_key_already_exists" => v6::Code::IndexPrimaryKeyAlreadyExists,
"max_fields_limit_exceeded" => v6::Code::MaxFieldsLimitExceeded,
"missing_document_id" => v6::Code::MissingDocumentId,
"invalid_document_id" => v6::Code::InvalidDocumentId,
"invalid_filter" => v6::Code::Filter,
"invalid_sort" => v6::Code::Sort,
"invalid_filter" => v6::Code::InvalidSettingsFilterableAttributes,
"invalid_sort" => v6::Code::InvalidSettingsSortableAttributes,
"bad_parameter" => v6::Code::BadParameter,
"bad_request" => v6::Code::BadRequest,
"database_size_limit_reached" => v6::Code::DatabaseSizeLimitReached,
"document_not_found" => v6::Code::DocumentNotFound,
"internal" => v6::Code::Internal,
"invalid_geo_field" => v6::Code::InvalidGeoField,
"invalid_ranking_rule" => v6::Code::InvalidRankingRule,
"invalid_store_file" => v6::Code::InvalidStore,
"invalid_api_key" => v6::Code::InvalidToken,
"invalid_geo_field" => v6::Code::InvalidDocumentGeoField,
"invalid_ranking_rule" => v6::Code::InvalidSettingsRankingRules,
"invalid_store_file" => v6::Code::InvalidStoreFile,
"invalid_api_key" => v6::Code::InvalidApiKey,
"missing_authorization_header" => v6::Code::MissingAuthorizationHeader,
"no_space_left_on_device" => v6::Code::NoSpaceLeftOnDevice,
"dump_not_found" => v6::Code::DumpNotFound,
"task_not_found" => v6::Code::TaskNotFound,
"payload_too_large" => v6::Code::PayloadTooLarge,
"unretrievable_document" => v6::Code::RetrieveDocument,
"search_error" => v6::Code::SearchDocuments,
"unretrievable_document" => v6::Code::UnretrievableDocument,
"unsupported_media_type" => v6::Code::UnsupportedMediaType,
"dump_already_processing" => v6::Code::DumpAlreadyInProgress,
"dump_already_processing" => v6::Code::DumpAlreadyProcessing,
"dump_process_failed" => v6::Code::DumpProcessFailed,
"invalid_content_type" => v6::Code::InvalidContentType,
"missing_content_type" => v6::Code::MissingContentType,
"malformed_payload" => v6::Code::MalformedPayload,
"missing_payload" => v6::Code::MissingPayload,
"api_key_not_found" => v6::Code::ApiKeyNotFound,
"missing_parameter" => v6::Code::MissingParameter,
"missing_parameter" => v6::Code::BadRequest,
"invalid_api_key_actions" => v6::Code::InvalidApiKeyActions,
"invalid_api_key_indexes" => v6::Code::InvalidApiKeyIndexes,
"invalid_api_key_expires_at" => v6::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v6::Code::InvalidApiKeyDescription,
"invalid_api_key_name" => v6::Code::InvalidApiKeyName,
"invalid_api_key_uid" => v6::Code::InvalidApiKeyUid,
"immutable_field" => v6::Code::ImmutableField,
"immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => {
log::warn!("Unknown error code {}", other);
@@ -315,7 +315,26 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: settings.ranking_rules.into(),
ranking_rules: {
match settings.ranking_rules {
v5::settings::Setting::Set(ranking_rules) => {
let mut new_ranking_rules = vec![];
for rule in ranking_rules {
match v6::RankingRuleView::from_str(&rule) {
Ok(new_rule) => {
new_ranking_rules.push(new_rule);
}
Err(_) => {
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
}
}
}
v6::Setting::Set(new_ranking_rules)
}
v5::settings::Setting::Reset => v6::Setting::Reset,
v5::settings::Setting::NotSet => v6::Setting::NotSet,
}
},
stop_words: settings.stop_words.into(),
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
@@ -401,7 +420,6 @@ pub(crate) mod test {
use super::*;
#[test]
#[ignore]
fn compat_v5_v6() {
let dump = File::open("tests/assets/v5.dump").unwrap();
let dir = TempDir::new().unwrap();
@@ -419,7 +437,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"42d4200cf6d92a6449989ca48cd8e28a");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"41f91d3a94911b2735ec41b07540df5c");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition
@@ -449,7 +467,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"8e5cadabf74aebe1160bf51c3d489efe");
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
@@ -464,7 +482,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"4894ac1e74b9e1069ed5ee262b7a1aca");
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 200);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a");
@@ -479,7 +497,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"054dbf08a79e08bb9becba6f5d090f13");
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");

View File

@@ -1,42 +0,0 @@
use meilisearch_auth::error::AuthControllerError;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::internal_error;
use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError};
pub type Result<T> = std::result::Result<T, DumpError>;
#[derive(thiserror::Error, Debug)]
pub enum DumpError {
#[error("An internal error has occurred. `{0}`.")]
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
#[error("{0}")]
IndexResolver(Box<IndexResolverError>),
}
internal_error!(
DumpError: milli::heed::Error,
std::io::Error,
tokio::task::JoinError,
tokio::sync::oneshot::error::RecvError,
serde_json::error::Error,
tempfile::PersistError,
fs_extra::error::Error,
AuthControllerError,
TaskError
);
impl From<IndexResolverError> for DumpError {
fn from(e: IndexResolverError) -> Self {
Self::IndexResolver(Box::new(e))
}
}
impl ErrorCode for DumpError {
fn error_code(&self) -> Code {
match self {
DumpError::Internal(_) => Code::Internal,
DumpError::IndexResolver(e) => e.error_code(),
}
}
}

View File

@@ -9,11 +9,11 @@ use self::compat::v4_to_v5::CompatV4ToV5;
use self::compat::v5_to_v6::{CompatIndexV5ToV6, CompatV5ToV6};
use self::v5::V5Reader;
use self::v6::{V6IndexReader, V6Reader};
use crate::{Error, Result, Version};
use crate::{Result, Version};
mod compat;
// pub(self) mod v1;
pub(self) mod v1;
pub(self) mod v2;
pub(self) mod v3;
pub(self) mod v4;
@@ -45,8 +45,9 @@ impl DumpReader {
let MetadataVersion { dump_version } = serde_json::from_reader(&mut meta_file)?;
match dump_version {
// Version::V1 => Ok(Box::new(v1::Reader::open(path)?)),
Version::V1 => Err(Error::DumpV1Unsupported),
Version::V1 => {
Ok(v1::V1Reader::open(path)?.to_v2().to_v3().to_v4().to_v5().to_v6().into())
}
Version::V2 => Ok(v2::V2Reader::open(path)?.to_v3().to_v4().to_v5().to_v6().into()),
Version::V3 => Ok(v3::V3Reader::open(path)?.to_v4().to_v5().to_v6().into()),
Version::V4 => Ok(v4::V4Reader::open(path)?.to_v5().to_v6().into()),
@@ -189,7 +190,6 @@ pub(crate) mod test {
use super::*;
#[test]
#[ignore]
fn import_dump_v5() {
let dump = File::open("tests/assets/v5.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
@@ -201,7 +201,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"42d4200cf6d92a6449989ca48cd8e28a");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"41f91d3a94911b2735ec41b07540df5c");
assert_eq!(update_files.len(), 22);
assert!(update_files[0].is_none()); // the dump creation
assert!(update_files[1].is_some()); // the enqueued document addition
@@ -222,53 +222,52 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-04T15:51:35.939396731Z",
"updatedAt": "2022-10-04T15:55:01.897325373Z"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"8e5cadabf74aebe1160bf51c3d489efe");
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-04T15:51:35.291992167Z",
"updatedAt": "2022-10-04T15:55:10.33561842Z"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"4894ac1e74b9e1069ed5ee262b7a1aca");
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 200);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-04T15:51:37.381094632Z",
"updatedAt": "2022-10-04T15:55:02.394503431Z"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"054dbf08a79e08bb9becba6f5d090f13");
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
#[ignore]
fn import_dump_v4() {
let dump = File::open("tests/assets/v4.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
@@ -280,7 +279,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"491e244a80a19fe2a900b809d310c24a");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"c2445ddd1785528b80f2ba534d3bd00c");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@@ -300,53 +299,52 @@ pub(crate) mod test {
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-06T12:53:39.360187055Z",
"updatedAt": "2022-10-06T12:53:40.603035979Z"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"1f9da51a4518166fb440def5437eafdb");
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-06T12:53:38.710611568Z",
"updatedAt": "2022-10-06T12:53:49.785862546Z"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"488816aba82c1bd65f1609630055c611");
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
insta::assert_json_snapshot!(spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
"createdAt": "2022-10-06T12:53:40.831649057Z",
"updatedAt": "2022-10-06T12:53:41.116036186Z"
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"7b4f66dad597dc651650f35fe34be27f");
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
#[ignore]
fn import_dump_v3() {
let dump = File::open("tests/assets/v3.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
@@ -358,7 +356,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"7cacce2e21702be696b866808c726946");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"cd12efd308fe3ed226356a727ab42ed3");
assert_eq!(update_files.len(), 10);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@@ -388,7 +386,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"855f3165dec609b919171ff83f82b364");
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
@@ -403,7 +401,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"43e0bf1746c3ea1d64c1e10ea544c190");
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
@@ -418,7 +416,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"5fd06a5038f49311600379d43412b655");
insta::assert_json_snapshot!(movies2.settings().unwrap());
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
@@ -433,14 +431,13 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"5fd06a5038f49311600379d43412b655");
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
#[ignore]
fn import_dump_v2() {
let dump = File::open("tests/assets/v2.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
@@ -452,7 +449,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6cabec4e252b74c8f3a2c8517622e85f");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"bc616290adfe7d09a624cf6065ca9069");
assert_eq!(update_files.len(), 9);
assert!(update_files[0].is_some()); // the enqueued document addition
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@@ -482,7 +479,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"b15b71f56dd082d8e8ec5182e688bf36");
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
@@ -497,7 +494,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"5389153ddf5527fa79c54b6a6e9c21f6");
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 110);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022");
@@ -512,7 +509,7 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"8aebab01301d266acf3e18dd449c008f");
insta::assert_json_snapshot!(movies2.settings().unwrap());
let documents = movies2.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 0);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");
@@ -527,9 +524,162 @@ pub(crate) mod test {
}
"###);
meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"8aebab01301d266acf3e18dd449c008f");
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn import_dump_v2_from_meilisearch_v0_22_0_issue_3435() {
let dump = File::open("tests/assets/v2-v0.22.0.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"2db37756d8af1fb7623436b76e8956a6");
assert_eq!(update_files.len(), 8);
assert!(update_files[0..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn import_dump_v1() {
let dump = File::open("tests/assets/v1.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
assert_eq!(dump.date(), None);
assert_eq!(dump.instance_uid().unwrap(), None);
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8df6eab075a44b3c1af6b726f9fd9a43");
assert_eq!(update_files.len(), 9);
assert!(update_files[..].iter().all(|u| u.is_none())); // no update file in dump v1
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(keys), @"[]");
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2022-10-02T13:23:39.976870431Z",
"updatedAt": "2022-10-02T13:27:54.353262482Z"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2022-10-02T13:15:29.477512777Z",
"updatedAt": "2022-10-02T13:21:12.671204856Z"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5");
// spells
insta::assert_json_snapshot!(spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2022-10-02T13:38:26.358882984Z",
"updatedAt": "2022-10-02T13:38:26.385609433Z"
}
"###);
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed");
}
}

View File

@@ -0,0 +1,24 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,38 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@@ -0,0 +1,31 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,23 @@
---
source: dump/src/reader/mod.rs
expression: movies2.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,23 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,37 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@@ -0,0 +1,24 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,25 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,39 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@@ -0,0 +1,30 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"release_date"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,25 @@
---
source: dump/src/reader/mod.rs
expression: movies2.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,25 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,39 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@@ -0,0 +1,31 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"release_date"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@@ -0,0 +1,34 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
}
}

View File

@@ -0,0 +1,48 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
}
}

View File

@@ -0,0 +1,40 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"release_date"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
}
}

View File

@@ -0,0 +1,40 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
}

View File

@@ -0,0 +1,54 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
}

View File

@@ -0,0 +1,46 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"release_date"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null,
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
}

View File

@@ -1,173 +1,262 @@
use std::{
convert::Infallible,
fs::{self, File},
io::{BufRead, BufReader},
path::Path,
};
use std::fs::{self, File};
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use serde::Deserialize;
use tempfile::TempDir;
use time::OffsetDateTime;
use self::update::UpdateStatus;
use super::{DumpReader, IndexReader};
use crate::{Error, Result, Version};
use super::compat::v1_to_v2::CompatV1ToV2;
use super::Document;
use crate::{IndexMetadata, Result, Version};
pub mod settings;
pub mod update;
pub mod v1;
pub struct V1Reader {
dump: TempDir,
metadata: v1::Metadata,
indexes: Vec<V1IndexReader>,
pub dump: TempDir,
pub db_version: String,
pub dump_version: crate::Version,
indexes: Vec<V1Index>,
}
struct V1IndexReader {
name: String,
pub struct IndexUuid {
pub name: String,
pub uid: String,
}
pub type Task = self::update::UpdateStatus;
struct V1Index {
metadata: IndexMetadataV1,
path: PathBuf,
}
impl V1Index {
pub fn new(path: PathBuf, metadata: Index) -> Self {
Self { metadata: metadata.into(), path }
}
pub fn open(&self) -> Result<V1IndexReader> {
V1IndexReader::new(&self.path, self.metadata.clone())
}
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata.metadata
}
}
pub struct V1IndexReader {
metadata: IndexMetadataV1,
documents: BufReader<File>,
settings: BufReader<File>,
updates: BufReader<File>,
current_update: Option<UpdateStatus>,
}
impl V1IndexReader {
pub fn new(name: String, path: &Path) -> Result<Self> {
let mut ret = V1IndexReader {
name,
pub fn new(path: &Path, metadata: IndexMetadataV1) -> Result<Self> {
Ok(V1IndexReader {
metadata,
documents: BufReader::new(File::open(path.join("documents.jsonl"))?),
settings: BufReader::new(File::open(path.join("settings.json"))?),
updates: BufReader::new(File::open(path.join("updates.jsonl"))?),
current_update: None,
};
ret.next_update();
Ok(ret)
})
}
pub fn next_update(&mut self) -> Result<Option<UpdateStatus>> {
let current_update = if let Some(line) = self.updates.lines().next() {
Some(serde_json::from_str(&line?)?)
} else {
None
};
pub fn metadata(&self) -> &IndexMetadata {
&self.metadata.metadata
}
Ok(std::mem::replace(&mut self.current_update, current_update))
pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> {
Ok((&mut self.documents)
.lines()
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn settings(&mut self) -> Result<self::settings::Settings> {
Ok(serde_json::from_reader(&mut self.settings)?)
}
pub fn tasks(self) -> impl Iterator<Item = Result<Task>> {
self.updates.lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })
}
}
impl V1Reader {
pub fn open(dump: TempDir) -> Result<Self> {
let mut meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata = serde_json::from_reader(&*meta_file)?;
let meta_file = fs::read(dump.path().join("metadata.json"))?;
let metadata: Metadata = serde_json::from_reader(&*meta_file)?;
let mut indexes = Vec::new();
let entries = fs::read_dir(dump.path())?;
for entry in entries {
let entry = entry?;
if entry.file_type()?.is_dir() {
indexes.push(V1IndexReader::new(
entry
.file_name()
.to_str()
.ok_or(Error::BadIndexName)?
.to_string(),
&entry.path(),
)?);
}
for index in metadata.indexes.into_iter() {
let index_path = dump.path().join(&index.uid);
indexes.push(V1Index::new(index_path, index));
}
Ok(V1Reader {
dump,
metadata,
indexes,
db_version: metadata.db_version,
dump_version: metadata.dump_version,
})
}
fn next_update(&mut self) -> Result<Option<UpdateStatus>> {
if let Some((idx, _)) = self
.indexes
pub fn to_v2(self) -> CompatV1ToV2 {
CompatV1ToV2 { from: self }
}
pub fn index_uuid(&self) -> Vec<IndexUuid> {
self.indexes
.iter()
.map(|index| index.current_update)
.enumerate()
.filter_map(|(idx, update)| update.map(|u| (idx, u)))
.min_by_key(|(_, update)| update.enqueued_at())
{
self.indexes[idx].next_update()
} else {
Ok(None)
.map(|index| IndexUuid {
name: index.metadata.name.to_owned(),
uid: index.metadata().uid.to_owned(),
})
.collect()
}
pub fn version(&self) -> Version {
Version::V1
}
pub fn date(&self) -> Option<OffsetDateTime> {
None
}
pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V1IndexReader>> + '_> {
Ok(self.indexes.iter().map(|index| index.open()))
}
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Index {
pub name: String,
pub uid: String,
#[serde(with = "time::serde::rfc3339")]
created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
updated_at: OffsetDateTime,
pub primary_key: Option<String>,
}
#[derive(Clone)]
pub struct IndexMetadataV1 {
pub name: String,
pub metadata: crate::IndexMetadata,
}
impl From<Index> for IndexMetadataV1 {
fn from(index: Index) -> Self {
IndexMetadataV1 {
name: index.name,
metadata: crate::IndexMetadata {
uid: index.uid,
primary_key: index.primary_key,
created_at: index.created_at,
updated_at: index.updated_at,
},
}
}
}
impl IndexReader for &V1IndexReader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = settings::Settings;
fn name(&self) -> &str {
todo!()
}
fn documents(&self) -> Result<Box<dyn Iterator<Item = Result<Self::Document>>>> {
todo!()
}
fn settings(&self) -> Result<Self::Settings> {
todo!()
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
pub indexes: Vec<Index>,
pub db_version: String,
pub dump_version: crate::Version,
}
impl DumpReader for V1Reader {
type Document = serde_json::Map<String, serde_json::Value>;
type Settings = settings::Settings;
#[cfg(test)]
pub(crate) mod test {
use std::fs::File;
use std::io::BufReader;
type Task = update::UpdateStatus;
type UpdateFile = Infallible;
use flate2::bufread::GzDecoder;
use meili_snap::insta;
use tempfile::TempDir;
type Key = Infallible;
use super::*;
fn date(&self) -> Option<OffsetDateTime> {
None
}
#[test]
fn read_dump_v1() {
let dump = File::open("tests/assets/v1.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
fn version(&self) -> Version {
Version::V1
}
let dump = V1Reader::open(dir).unwrap();
fn indexes(
&self,
) -> Result<
Box<
dyn Iterator<
Item = Result<
Box<
dyn super::IndexReader<
Document = Self::Document,
Settings = Self::Settings,
>,
>,
>,
>,
>,
> {
Ok(Box::new(self.indexes.iter().map(|index| {
let index = Box::new(index)
as Box<dyn IndexReader<Document = Self::Document, Settings = Self::Settings>>;
Ok(index)
})))
}
// top level infos
assert_eq!(dump.date(), None);
fn tasks(&self) -> Box<dyn Iterator<Item = Result<(Self::Task, Option<Self::UpdateFile>)>>> {
Box::new(std::iter::from_fn(|| {
self.next_update()
.transpose()
.map(|result| result.map(|task| (task, None)))
}))
}
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
fn keys(&self) -> Box<dyn Iterator<Item = Result<Self::Key>>> {
Box::new(std::iter::empty())
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut dnd_spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "2022-10-02T13:23:39.976870431Z",
"updatedAt": "2022-10-02T13:27:54.353262482Z"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca");
// products tasks
let tasks = products.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"91de507f206ad21964584021932ba7a7");
// movies
insta::assert_json_snapshot!(movies.metadata(), @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "2022-10-02T13:15:29.477512777Z",
"updatedAt": "2022-10-02T13:21:12.671204856Z"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b63dbed5bbc059f3e32bc471ae699bf5");
// movies tasks
let tasks = movies.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"55eef4de2bef7e84c5ce0bee47488f56");
// spells
insta::assert_json_snapshot!(dnd_spells.metadata(), @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "2022-10-02T13:38:26.358882984Z",
"updatedAt": "2022-10-02T13:38:26.385609433Z"
}
"###);
insta::assert_json_snapshot!(dnd_spells.settings().unwrap());
let documents = dnd_spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"aa24c0cfc733d66c396237ad44263bed");
// spells tasks
let tasks = dnd_spells.tasks().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"836dd7d64d5ad20ad901c44b1b161a4c");
}
}

View File

@@ -1,6 +1,9 @@
use std::collections::{BTreeMap, BTreeSet};
use std::result::Result as StdResult;
use std::str::FromStr;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Deserializer, Serialize};
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
@@ -53,6 +56,34 @@ pub enum RankingRule {
Desc(String),
}
static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
impl FromStr for RankingRule {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"typo" => Self::Typo,
"words" => Self::Words,
"proximity" => Self::Proximity,
"attribute" => Self::Attribute,
"wordsPosition" => Self::WordsPosition,
"exactness" => Self::Exactness,
text => {
let caps = ASC_DESC_REGEX.captures(text).ok_or(())?;
let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str();
match order {
"asc" => Self::Asc(field_name.to_string()),
"desc" => Self::Desc(field_name.to_string()),
_ => return Err(()),
}
}
})
}
}
// Any value that is present is considered Some value, including null.
fn deserialize_some<'de, T, D>(deserializer: D) -> StdResult<Option<T>, D::Error>
where

View File

@@ -0,0 +1,24 @@
---
source: dump/src/reader/v1/mod.rs
expression: dnd_spells.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": []
}

View File

@@ -0,0 +1,38 @@
---
source: dump/src/reader/v1/mod.rs
expression: products.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"attributesForFaceting": []
}

View File

@@ -0,0 +1,28 @@
---
source: dump/src/reader/v1/mod.rs
expression: movies.settings().unwrap()
---
{
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"wordsPosition",
"exactness",
"asc(release_date)"
],
"distinctAttribute": null,
"searchableAttributes": [
"*"
],
"displayedAttributes": [
"*"
],
"stopWords": [],
"synonyms": {},
"attributesForFaceting": [
"id",
"genres"
]
}

View File

@@ -1,54 +1,8 @@
use serde::{Deserialize, Serialize};
use serde_json::Value;
use time::OffsetDateTime;
use super::settings::SettingsUpdate;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Update {
data: UpdateData,
#[serde(with = "time::serde::rfc3339")]
enqueued_at: OffsetDateTime,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum UpdateData {
ClearAll,
Customs(Vec<u8>),
// (primary key, documents)
DocumentsAddition {
primary_key: Option<String>,
documents: Vec<serde_json::Map<String, Value>>,
},
DocumentsPartial {
primary_key: Option<String>,
documents: Vec<serde_json::Map<String, Value>>,
},
DocumentsDeletion(Vec<String>),
Settings(Box<SettingsUpdate>),
}
impl UpdateData {
pub fn update_type(&self) -> UpdateType {
match self {
UpdateData::ClearAll => UpdateType::ClearAll,
UpdateData::Customs(_) => UpdateType::Customs,
UpdateData::DocumentsAddition { documents, .. } => UpdateType::DocumentsAddition {
number: documents.len(),
},
UpdateData::DocumentsPartial { documents, .. } => UpdateType::DocumentsPartial {
number: documents.len(),
},
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
number: deletion.len(),
},
UpdateData::Settings(update) => UpdateType::Settings {
settings: update.clone(),
},
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "name")]
pub enum UpdateType {

Some files were not shown because too many files have changed in this diff Show More