Compare commits

...

101 Commits

Author SHA1 Message Date
Tamo
1585b3ed2f re-implement the snapshot import at startup 2023-11-07 14:15:57 +01:00
Tamo
c48f72e6b8 fix a few warnings 2023-11-06 16:32:49 +01:00
Tamo
41178e59fc fix the leader election 2023-11-06 15:00:14 +01:00
Tamo
d1bc7ec58a fix all the bugs on the snapshots export/import pipeline 2023-11-06 12:34:48 +01:00
Tamo
c5ec817f52 WIP fix a lot of bugs: The follower do not wake up when a new snapshot is available 2023-11-02 18:21:17 +01:00
Tamo
d0a3582a56 no deadlock on start 2023-11-02 16:45:33 +01:00
Tamo
0c18962b13 it compiles, now tokio is complaining that we block the main thread (and is right), I need to patch the zookeeper client to not rely on tokio anymore 2023-11-02 12:55:16 +01:00
Tamo
366144146b WIT: it compiles but the processing of tasks and loading of snapshots is still not implemented 2023-11-02 10:47:54 +01:00
Tamo
03b510945b ICE: use a git version of my crate so rust team can pull the repo 2023-10-31 17:59:59 +01:00
Tamo
c3a8d4b7fb ICE 2023-10-31 17:39:10 +01:00
Tamo
c573261ac4 WIP: start updating the zookeeper client => leader election is missing 2023-10-31 13:04:32 +01:00
Clément Renault
8f04353b7d bump strois to a temporary version while we wait for rusty_s3 to do a new release again 2023-10-17 11:49:18 +02:00
Clément Renault
b9983a48c7 No more create the S3 Bucket 2023-10-17 10:26:03 +02:00
Tamo
b22f1260bf bump strois to a temporary version while we wait for rusty_s3 to do a new release 2023-10-13 11:04:12 +02:00
Tamo
dfb84f80da bump strois version 2023-10-10 19:25:12 +02:00
Tamo
98b67f217a move to our new S3 lib 2023-09-28 11:24:18 +02:00
Tamo
6325cda74f bump charabia 2023-09-21 15:18:44 +02:00
Tamo
c71ba72f73 fix build in release mode 2023-09-21 11:05:57 +02:00
Tamo
ecd36b15f0 exposes all the s3 arguments 2023-09-13 18:17:56 +02:00
Clément Renault
8a2e8a887f Load the latest snapshot when we start the engine 2023-09-12 18:08:24 +02:00
Clément Renault
309c33a418 Fix again the dots 2023-09-12 17:55:01 +02:00
Clément Renault
9b01506cee Move the load snapshot step into a function 2023-09-12 16:05:02 +02:00
Clément Renault
f37fdceb15 Use slashes instead of dots for the s3 paths separators 2023-09-12 15:46:15 +02:00
Clément Renault
f544cfa444 Remove tasks and content file on the s3 2023-09-12 15:19:45 +02:00
Clément Renault
c158d03337 Fix internal error 2023-09-12 14:46:13 +02:00
Tamo
b7109c0fd2 start a script to run everything 2023-09-12 11:34:59 +02:00
Kerollmops
a53a0fdb77 Store content files into the S3 2023-09-11 18:17:22 +02:00
Clément Renault
719fdd701b Fix and crash when the tasks path is unknown 2023-09-07 11:31:18 +02:00
Kerollmops
01c13c98ac Mastering minio 2023-09-06 17:54:21 +02:00
Tamo
5b89276fcc starts using s3 2023-09-05 19:25:09 +02:00
Kerollmops
41697c4d65 Introduce the zk-tasks folder 2023-09-04 18:24:34 +02:00
Kerollmops
7d85753573 Make the snapshot download work 2023-09-04 17:38:56 +02:00
Kerollmops
76657af1f9 Add the options into the IndexScheduler 2023-09-04 16:38:05 +02:00
Tamo
966cbdab69 make the tests compile again 2023-09-04 15:39:54 +02:00
Clément Renault
0c68b9ed4c WIP making the final snapshot swap 2023-08-31 15:56:42 +02:00
Clément Renault
d7233ecdb8 Make things to compile again 2023-08-31 14:55:14 +02:00
Clément Renault
95a011af13 Wrap the IndexScheduler fields into an inner struct 2023-08-31 10:36:33 +02:00
Clément Renault
e257710961 WIP fix the tests 2023-08-30 18:03:24 +02:00
Clément Renault
9dd4423054 Fix the watcher ordering of the auth/ node 2023-08-30 17:51:22 +02:00
Clément Renault
8c3ad57ef9 React to changes towards the cluster members 2023-08-30 17:40:12 +02:00
Clément Renault
2d1434da81 Keep the ZK flow when enqueuing tasks 2023-08-30 17:15:15 +02:00
Clément Renault
c488a4a351 Fixup a lot of small issues on the ZK config 2023-08-30 16:42:55 +02:00
Kerollmops
0c7d7c68bc WIP moving to the sync zookeeper API 2023-08-30 15:06:12 +02:00
Tamo
854745c670 wip: starts working on importing the snapshots 2023-08-16 18:41:05 +02:00
Tamo
777eebb759 starts creating snapshot, the import is still missing 2023-08-10 15:00:25 +02:00
Tamo
61ccfaf9bc wake up after registering a task 2023-08-10 09:39:39 +02:00
Tamo
f0c4d36ff7 implement the deletion of tasks after processing a batch
add a lot of comments and logs
2023-08-10 09:36:43 +02:00
Tamo
8c20d6e2fe fix the leader election 2023-08-09 17:23:13 +02:00
ManyTheFish
8e437ed76c Start leader election and task processing (WIP) 2023-08-09 16:52:38 +02:00
Tamo
1191ec5939 fix the register task watcher 2023-08-08 13:18:55 +02:00
Tamo
0d20d08daf fix a few warnings 2023-08-08 11:39:48 +02:00
ManyTheFish
b66bf049b5 Create a task on zookeeper side when a task is created locally 2023-08-07 17:02:51 +02:00
ManyTheFish
b2f36b9b97 Comment Meilisearch container by default 2023-08-07 17:02:00 +02:00
ManyTheFish
b311089435 Update zookeeper client 2023-08-07 14:20:01 +02:00
ManyTheFish
3d46e84d97 update docker compose as an example 2023-08-03 17:15:24 +02:00
ManyTheFish
ad7f8edff8 fix auto-synchronization with zk 2023-08-03 14:43:29 +02:00
Tamo
5ce01bcb53 add logs 2023-08-03 13:59:05 +02:00
Tamo
d5523cc6ac fix the tests 2023-08-03 12:28:08 +02:00
Tamo
fe7a312ec6 Import the already existing api keys on startup 2023-08-03 12:25:32 +02:00
Tamo
57dc4b148c implement the watcher for all kind of operations 2023-08-03 10:52:13 +02:00
Tamo
a325ddfe6a Forward the key deletions to zookeeper 2023-08-03 10:36:49 +02:00
Tamo
0cd81573b4 Forward the keys update to zookeeper 2023-08-03 10:22:34 +02:00
ManyTheFish
b0ff595f60 Event Listener: delete local key if deleted on ZK 2023-08-02 18:36:36 +02:00
ManyTheFish
3eb6f4b56f Create api keys 2023-08-02 16:52:45 +02:00
ManyTheFish
49f976c8d8 fix analitics compilation 2023-08-02 14:17:03 +02:00
Tamo
84d56f3320 send the creation of api-key to zookeeper 2023-08-02 13:57:30 +02:00
Tamo
97e3dfd99d makes zk available inside the auth-controller with config coming from the cli, it compiles 2023-08-02 13:17:40 +02:00
ManyTheFish
dc38da95c4 WIP 2023-08-02 12:00:02 +02:00
ManyTheFish
2ce8b42757 REMOVE: add docker compôse for tests 2023-08-02 11:59:54 +02:00
meili-bors[bot]
f391039a6f Merge #3967
3967: Bring back changes from `release-v1.3.0` into `main` r=ManyTheFish a=curquiza

Using a temp branch because of git conflict

Co-authored-by: Cong Chen <cong.chen@ocrlabs.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-08-01 16:22:09 +00:00
curquiza
fcdd20b533 Fix README after git conflict 2023-08-01 16:06:33 +02:00
ManyTheFish
b45c36cd71 Merge branch 'main' into tmp-release-v1.3.0 2023-08-01 15:05:17 +02:00
meili-bors[bot]
151c31c18f Merge #3963
3963: Fix the milli crate r=ManyTheFish a=irevoire

Milli was using the serde feature of either without enabling it first; thus, it wasn't working.

It was working in meilisearch, though, because `meilisearch-types` was using the feature which enables it globally for all the other crates.

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3962

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-07-31 09:32:08 +00:00
Tamo
a8ad0902d3 Fix the milli crate
Milli was using the serde feature of either without enabling it first, thus it wasn't working
2023-07-31 11:08:27 +02:00
meili-bors[bot]
e917dbdebb Merge #3957
3957: fix: upgrade mimalloc dependency to resolve FreeBSD build r=irevoire a=ThatOneCalculator

# Pull Request

## Related issue
Fixes #3806

## What does this PR do?
- Upgrades mimalloc to 0.1.37
- Fixes build on FreeBSD

Ref: https://github.com/meilisearch/meilisearch/issues/3806#issuecomment-1653693468

Tested and working on FreeBSD 13.1-RELEASE-p5

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: ThatOneCalculator <kainoa@t1c.dev>
2023-07-31 08:49:36 +00:00
ThatOneCalculator
ba919b6123 fix: ⬆️ up mimalloc 2023-07-28 20:35:47 -07:00
meili-bors[bot]
2dfbb6813a Merge #3913
3913: Expose a Puffin server to profile the indexing process r=Kerollmops a=Kerollmops

This PR exposes a puffin HTTP server to expose the internal timing it takes to index documents, delete documents, or update the settings of an index.

<img width="1752" alt="Capture d’écran 2023-07-10 à 18 44 58" src="https://github.com/meilisearch/meilisearch/assets/3610253/a3c7a6bf-db5b-42f4-8be1-c4e31c869843">

## To be done

 - [x] Move the puffin HTTP server under a feature flag.
 - [x] Use [the `puffin::set_scopes_on` function](https://docs.rs/puffin/latest/puffin/fn.set_scopes_on.html) to toggle it (by using the feature directly).
     When this function is called with `false`, [a call to `profile_scope!` talked 1-2ns](https://docs.rs/puffin/latest/puffin/fn.set_scopes_on.html).
 - [x] Create a _PROFILING.md_ file explaining how to use it.
   - [x] Explain that merging scopes on the interface is not always useful.
 - [x] Add more info on the number of batched tasks (using the `puffin::profile_scope!` macro data).
   - I added more info, but that's more continuous work when we consider we need more info here and there.
 - [x] Clean up some scopes, and don't touch too much code to inject puffin.
   - I am not sure that the _index_documents/mod.rs_ function is that complex with the addition of the scope.
 - [x] Think about what we consider frames. One indexation operation or the wall program. When must we stop the frame, then?
   - What we consider a frame is one single `IndexScheduler::tick` execution.
   - We can change that later.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-07-19 09:44:01 +00:00
Clément Renault
8f589a5cce Introduce a PROFILING.md tutorial to profile Meilisearch 2023-07-18 17:38:13 +02:00
Clément Renault
0b8bbd8750 Toggle the puffin profiling with a feature flag 2023-07-18 17:38:13 +02:00
Kerollmops
eef95de30e First iteration on exposing puffin profiling 2023-07-18 17:38:13 +02:00
meili-bors[bot]
13a13a4862 Merge #3932
3932: Add UTM tracking to README r=gillian-meilisearch a=Strift

# Pull Request

Hi `@macraig` `@curquiza` 👋 

## Related issue

N/A

## What does this PR do?

This PR adds UTM tracking to the links in the README.

It add UTM params to:
- links in the nav
- links to where2watch
- links in the Features section
- Docs & Getting started links (cc `@guimachiavelli)`
- links in the SDKs section
- links in the Advanced usage section
- links in the Telemetry section
- links in the Get in touch section

Additionally, this PR adds a link to the Meilisearch logo (there is currently none.)

## On the UTM pattern

All links in this PR use the new convention `@gmourier` and I agreed on: 
- utm_campaign=oss
- utm_source=github
- utm_medium=meilisearch
- utm_content= where the link is in the page

It's worth considering updating the tracking link for the Cloud, which is the only one that doesn’t follow the new convention. It is currently using `utm_campaign=oss&utm_source=engine&utm_medium=meilisearch`.

Merging analytics from different UTMs is doable on Amplitude, but can't be done in Fathom. Plus, having two different conventions creates knowledge overhead, and is bound to result in corrupt analytics at some point. I suggest we change the Cloud UTM trackers too — the sooner we eat the frog, the better imo. 

## PR checklist

Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Strift <strift@Strifts-MacBook-Pro.local>
Co-authored-by: Strift <laurent@meilisearch.com>
2023-07-18 13:42:50 +00:00
Strift
e691c92ed5 Replace UTM link on Cloud 2023-07-18 14:48:00 +02:00
Strift
928ab2f9b1 Add UTM params to contact section links 2023-07-14 18:24:03 +02:00
Strift
7c18a9375f Add UTM params to telemetry section links 2023-07-14 18:19:46 +02:00
Strift
05a311f9be Add UTM params to Advanced usage links 2023-07-14 18:17:51 +02:00
Strift
9b1b9b409e Add UTM params to SDKs logos link 2023-07-14 18:17:28 +02:00
Strift
7f555f23e8 Add UTM params to SDKs section links 2023-07-14 18:15:17 +02:00
Strift
a0bfc9f63a Add UTM params to docs & getting started links 2023-07-14 18:02:21 +02:00
Strift
3155264381 Add UTM params to features links 2023-07-14 17:51:25 +02:00
Strift
42400c381e Add UTM on demo link 2023-07-14 17:43:05 +02:00
Strift
08c7dab528 Add UTM on demo gif 2023-07-14 17:40:37 +02:00
Strift
8590687515 Add UTM params to nav links 2023-07-14 17:34:45 +02:00
Strift
8f5d127b1e Add links on Meilisearch logo 2023-07-14 17:26:06 +02:00
meili-bors[bot]
2b4160ebb9 Merge #3918
3918: Update and fix the Test Suite CI r=dureuill a=Kerollmops

This Pull Request renames the _Run test with Rust_ into _Setup test with Rust_ for more clarity and `cargo update -p proc-macro2` to make the project compile with the latest Rust Nightly.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-07-12 13:18:25 +00:00
Kerollmops
8ba1c8f88f Update proc-macro2 to compile with the latest nightly 2023-07-12 11:47:27 +02:00
Kerollmops
8e7edf8ea7 Rename the jobs in the CI for clarity 2023-07-12 11:16:01 +02:00
meili-bors[bot]
9daccdf7f0 Merge #3895
3895: Update README.md r=curquiza a=ferdi05

Adding the free-trial option

# Pull Request

## Related issue
Fixes #<issue_number>

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ferdinand Boas <ferdinand.boas@gmail.com>
2023-07-10 11:26:47 +00:00
Ferdinand Boas
437ee55c57 Update README.md
Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
2023-07-06 12:15:52 +02:00
Ferdinand Boas
b1717865ea Update README.md
Adding the free-trial option
2023-07-06 11:52:35 +02:00
meili-bors[bot]
176f716292 Merge #3871
3871: Bump Swatinem/rust-cache from 2.4.0 to 2.5.0 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.4.0 to 2.5.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.5.0</h2>
<h2>What's Changed</h2>
<ul>
<li>feat: Rm workspace crates version before caching by <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/147">Swatinem/rust-cache#147</a></li>
<li>feat: Add hash of <code>.cargo/config.toml</code> to key by <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/149">Swatinem/rust-cache#149</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/147">Swatinem/rust-cache#147</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.4.0...v2.5.0">https://github.com/Swatinem/rust-cache/compare/v2.4.0...v2.5.0</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="2656b87321"><code>2656b87</code></a> 2.5.0</li>
<li><a href="715970feed"><code>715970f</code></a> feat: Add hash of <code>.cargo/config.toml</code> to key (<a href="https://redirect.github.com/swatinem/rust-cache/issues/149">#149</a>)</li>
<li><a href="3d4000164d"><code>3d40001</code></a> feat: Rm workspace crates version before caching (<a href="https://redirect.github.com/swatinem/rust-cache/issues/147">#147</a>)</li>
<li>See full diff in <a href="https://github.com/swatinem/rust-cache/compare/v2.4.0...v2.5.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.4.0&new-version=2.5.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-07-04 12:57:38 +00:00
dependabot[bot]
40ad19ba9e Bump Swatinem/rust-cache from 2.4.0 to 2.5.0
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.4.0 to 2.5.0.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.4.0...v2.5.0)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-07-01 17:46:11 +00:00
65 changed files with 2346 additions and 961 deletions

View File

@@ -30,20 +30,20 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Run test with Rust stable
- name: Setup test with Rust stable
if: github.event_name != 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run test with Rust nightly
- name: Setup test with Rust nightly
if: github.event_name == 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.5.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -65,7 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.5.0
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -117,17 +117,17 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo tree without default features and check lindera is not present
run: |
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
- name: Run cargo tree with default features and check lindera is pressent
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
@@ -146,7 +146,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.5.0
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@@ -165,7 +165,7 @@ jobs:
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.5.0
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@@ -184,7 +184,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.5.0
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

509
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -37,23 +37,8 @@ opt-level = 3
[profile.dev.package.roaring]
opt-level = 3
[profile.dev.package.lindera-ipadic-builder]
opt-level = 3
[profile.dev.package.encoding]
opt-level = 3
[profile.dev.package.yada]
opt-level = 3
[patch.crates-io]
strois = { git = "https://github.com/meilisearch/strois", rev = "eeb945c" }
[profile.release.package.lindera-ipadic-builder]
opt-level = 3
[profile.release.package.encoding]
opt-level = 3
[profile.release.package.yada]
opt-level = 3
[profile.bench.package.lindera-ipadic-builder]
opt-level = 3
[profile.bench.package.encoding]
opt-level = 3
[profile.bench.package.yada]
opt-level = 3
[patch."https://github.com/irevoire/zookeeper-client-sync"]
zookeeper-client-sync = { path = "../zookeeper-client-sync" }

19
PROFILING.md Normal file
View File

@@ -0,0 +1,19 @@
# Profiling Meilisearch
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
![An example profiling with Puffin viewer](assets/profiling-example.png)
## Profiling the Indexing Process
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
## Profiling the Search Process
We still need to take the time to profile the search side of the engine with Puffin. It would require time to profile the filtering phase, query parsing, creation, and execution. We could even profile the Actix HTTP server.
The only issue we see is the framing system. Puffin requires a global frame-based profiling phase, which collides with Meilisearch's ability to accept and answer multiple requests on different threads simultaneously.

View File

@@ -1,16 +1,20 @@
<p align="center">
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
</a>
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
</a>
</p>
<h4 align="center">
<a href="https://www.meilisearch.com">Website</a> |
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Website</a> |
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=engine&utm_medium=meilisearch">Meilisearch Cloud</a> |
<a href="https://blog.meilisearch.com">Blog</a> |
<a href="https://www.meilisearch.com/docs">Documentation</a> |
<a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
<a href="https://discord.meilisearch.com">Discord</a>
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Meilisearch Cloud</a> |
<a href="https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Blog</a> |
<a href="https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Documentation</a> |
<a href="https://www.meilisearch.com/docs/faq?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">FAQ</a> |
<a href="https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Discord</a>
</h4>
<p align="center">
@@ -24,40 +28,40 @@
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
<p align="center" name="demo">
<a href="https://where2watch.meilisearch.com/#gh-light-mode-only" target="_blank">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
<img src="assets/demo-light.gif#gh-light-mode-only" alt="A bright colored application for finding movies screening near the user">
</a>
<a href="https://where2watch.meilisearch.com/#gh-dark-mode-only" target="_blank">
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-dark-mode-only" target="_blank">
<img src="assets/demo-dark.gif#gh-dark-mode-only" alt="A dark colored application for finding movies screening near the user">
</a>
</p>
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **Easy to install, deploy, and maintain**
## 📖 Documentation
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
## ⚡ Supercharge your Meilisearch experience
@@ -67,29 +71,29 @@ Say goodbye to server deployment and manual updates with [Meilisearch Cloud](htt
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-link).
[![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
[![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-logos)
## ⚙️ Advanced usage
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) close at hand.
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
## 📊 Telemetry
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
## 📫 Get in touch!
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/)
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

View File

@@ -14,7 +14,7 @@ license.workspace = true
anyhow = "1.0.70"
csv = "1.2.1"
milli = { path = "../milli" }
mimalloc = { version = "0.1.36", default-features = false }
mimalloc = { version = "0.1.37", default-features = false }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
[dev-dependencies]

59
docker-compose.yml Normal file
View File

@@ -0,0 +1,59 @@
version: "3.9"
services:
zk1:
container_name: zk1
hostname: zk1
image: bitnami/zookeeper:3.7.1
ports:
- 21811:2181
environment:
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_SERVER_ID=1
- ZOO_SERVERS=0.0.0.0:2888:3888,zk2:2888:3888,zk3:2888:3888
zk2:
container_name: zk2
hostname: zk2
image: bitnami/zookeeper:3.7.1
ports:
- 21812:2181
environment:
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_SERVER_ID=2
- ZOO_SERVERS=zk1:2888:3888,0.0.0.0:2888:3888,zk3:2888:3888
zk3:
container_name: zk3
hostname: zk3
image: bitnami/zookeeper:3.7.1
ports:
- 21813:2181
environment:
- ALLOW_ANONYMOUS_LOGIN=yes
- ZOO_SERVER_ID=3
- ZOO_SERVERS=zk1:2888:3888,zk2:2888:3888,0.0.0.0:2888:3888
zoonavigator:
container_name: zoonavigator
image: elkozmon/zoonavigator
ports:
- 9000:9000
# Meilisearch instances
# m1:
# container_name: m1
# hostname: m1
# image: getmeili/meilisearch:prototype-zookeeper-ha-0
# ports:
# - 7700:7700
# environment:
# - MEILI_ZK_URL=zk1:2181
# - MEILI_MASTER_KEY=masterkey
# restart: always
# m2:
# container_name: m2
# hostname: m2
# image: getmeili/meilisearch:prototype-zookeeper-ha-0
# ports:
# - 7701:7700
# environment:
# - MEILI_ZK_URL=zk2:2181
# - MEILI_MASTER_KEY=masterkey
# restart: always

View File

@@ -22,20 +22,6 @@ pub enum Error {
pub type Result<T> = std::result::Result<T, Error>;
impl Deref for File {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for File {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[derive(Clone, Debug)]
pub struct FileStore {
path: PathBuf,
@@ -146,6 +132,20 @@ impl File {
}
}
impl Deref for File {
type Target = NamedTempFile;
fn deref(&self) -> &Self::Target {
&self.file
}
}
impl DerefMut for File {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.file
}
}
#[cfg(test)]
mod test {
use std::io::Write;

61
ha_test/run.sh Normal file
View File

@@ -0,0 +1,61 @@
#!/bin/bash
function is_everything_installed {
everything_ok=yes
if hash zkli 2>/dev/null; then
echo "✅ zkli installed"
else
everything_ok=no
echo "🥺 zkli is missing, please run \`cargo install zkli\`"
fi
if hash s3cmd 2>/dev/null; then
echo "✅ s3cmd installed"
else
everything_ok=no
echo "🥺 s3cmd is missing, see how to install it here https://s3tools.org/s3cmd"
fi
if [ $everything_ok = "no" ]; then
echo "Exiting..."
exit 1
fi
}
# param: addr of zookeeper
function connect_to_zookeeper {
if ! zkli -a "$1" ls > /dev/null; then
echo "zkli can't connect"
return 1
fi
}
# param: addr of the s3 bucket
function connect_to_s3 {
# S3_SECRET_KEY
# S3_ACCESS_KEY
# S3_HOST
# S3_BUCKET
s3cmd --host="$S3_HOST" --host-bucket="$S3_BUCKET" --access_key="$ACCESS_KEY" --secret_key="$S3_SECRET_KEY" ls
if $?; then
echo "s3cmd can't connect"
return 1
fi
}
is_everything_installed
ZOOKEEPER_ADDR="localhost:2181"
if ! connect_to_zookeeper $ZOOKEEPER_ADDR; then
ZOOKEEPER_ADDR="localhost:21811"
if ! connect_to_zookeeper $ZOOKEEPER_ADDR; then
echo "Can't connect to zkli"
exit 1
fi
fi
connect_to_s3

View File

@@ -22,6 +22,7 @@ log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
puffin = "0.16.0"
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
@@ -30,6 +31,10 @@ tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
tokio = { version = "1.27.0", features = ["full"] }
zookeeper-client-sync = { git = "https://github.com/irevoire/zookeeper-client-sync.git" }
parking_lot = "0.12.1"
strois = "0.0.4"
[dev-dependencies]
big_s = "1.0.2"

View File

@@ -43,7 +43,7 @@ use uuid::Uuid;
use crate::autobatcher::{self, BatchKind};
use crate::utils::{self, swap_index_uid_in_task};
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
use crate::{Error, IndexSchedulerInner, ProcessingTasks, Result, TaskId};
/// Represents a combination of tasks that can all be processed at the same time.
///
@@ -198,6 +198,35 @@ impl Batch {
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
}
}
/// Return the content fields uuids associated with this batch.
pub fn content_uuids(&self) -> Vec<Uuid> {
match self {
Batch::TaskCancelation { .. }
| Batch::TaskDeletion(_)
| Batch::Dump(_)
| Batch::IndexCreation { .. }
| Batch::IndexDocumentDeletionByFilter { .. }
| Batch::IndexUpdate { .. }
| Batch::SnapshotCreation(_)
| Batch::IndexDeletion { .. }
| Batch::IndexSwap { .. } => vec![],
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { operations, .. } => operations
.iter()
.flat_map(|op| match op {
DocumentOperation::Add(uuid) => Some(*uuid),
DocumentOperation::Delete(_) => None,
})
.collect(),
IndexOperation::DocumentDeletion { .. }
| IndexOperation::Settings { .. }
| IndexOperation::DocumentClear { .. }
| IndexOperation::SettingsAndDocumentOperation { .. }
| IndexOperation::DocumentClearAndSetting { .. } => vec![],
},
}
}
}
impl IndexOperation {
@@ -213,7 +242,7 @@ impl IndexOperation {
}
}
impl IndexScheduler {
impl IndexSchedulerInner {
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
///
/// ## Arguments
@@ -471,6 +500,8 @@ impl IndexScheduler {
#[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
puffin::profile_function!();
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
@@ -478,8 +509,7 @@ impl IndexScheduler {
if let Some(task_id) = to_cancel.max() {
// We retrieve the tasks that were processing before this tasks cancelation started.
// We must *not* reset the processing tasks before calling this method.
let ProcessingTasks { started_at, processing } =
&*self.processing_tasks.read().unwrap();
let ProcessingTasks { started_at, processing, .. } = &*self.processing_tasks.read();
return Ok(Some(Batch::TaskCancelation {
task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?,
previous_started_at: *started_at,
@@ -575,6 +605,9 @@ impl IndexScheduler {
self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?;
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
}
puffin::profile_function!(format!("{:?}", batch));
match batch {
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
@@ -1111,6 +1144,8 @@ impl IndexScheduler {
index: &'i Index,
operation: IndexOperation,
) -> Result<Vec<Task>> {
puffin::profile_function!();
match operation {
IndexOperation::DocumentClear { mut tasks, .. } => {
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
@@ -1385,7 +1420,7 @@ impl IndexScheduler {
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
// 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
let processing_tasks = &self.processing_tasks.read().processing.clone();
let all_task_ids = self.all_task_ids(wtxn)?;
let mut to_delete_tasks = all_task_ids & matched_tasks;

View File

@@ -295,6 +295,11 @@ impl IndexMap {
"Attempt to finish deletion of an index that was being closed"
);
}
/// Returns the indexes that were opened by the `IndexMap`.
pub fn clear(&mut self) -> Vec<Index> {
self.available.clear().into_iter().map(|(_, (_, index))| index).collect()
}
}
/// Create or open an index in the specified path.
@@ -335,7 +340,8 @@ mod tests {
impl IndexMapper {
fn test() -> (Self, Env, IndexSchedulerHandle) {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
(index_scheduler.index_mapper, index_scheduler.env, handle)
let index_scheduler = index_scheduler.inner();
(index_scheduler.index_mapper.clone(), index_scheduler.env.clone(), handle)
}
}

View File

@@ -61,7 +61,7 @@ pub struct IndexMapper {
pub(crate) index_stats: Database<UuidCodec, SerdeJson<IndexStats>>,
/// Path to the folder where the LMDB environments of each index are.
base_path: PathBuf,
pub(crate) base_path: PathBuf,
/// The map size an index is opened with on the first time.
index_base_map_size: usize,
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
@@ -135,7 +135,7 @@ impl IndexMapper {
index_growth_amount: usize,
index_count: usize,
enable_mdb_writemap: bool,
indexer_config: IndexerConfig,
indexer_config: Arc<IndexerConfig>,
) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
@@ -150,7 +150,7 @@ impl IndexMapper {
index_base_map_size,
index_growth_amount,
enable_mdb_writemap,
indexer_config: Arc::new(indexer_config),
indexer_config,
})
}
@@ -428,6 +428,11 @@ impl IndexMapper {
Ok(())
}
/// Returns the indexes that were opened by the `IndexMapper`.
pub fn clear(&mut self) -> Vec<Index> {
self.index_map.write().unwrap().clear()
}
/// The stats of an index.
///
/// If available in the cache, they are directly returned.

View File

@@ -1,5 +1,6 @@
use std::collections::BTreeSet;
use std::fmt::Write;
use std::ops::Deref;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
@@ -8,12 +9,13 @@ use meilisearch_types::tasks::{Details, Task};
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
use crate::{IndexScheduler, Kind, Status, BEI128};
use crate::{IndexScheduler, IndexSchedulerInner, Kind, Status, BEI128};
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
scheduler.assert_internally_consistent();
let IndexScheduler {
let inner = scheduler.inner();
let IndexSchedulerInner {
autobatching_enabled,
must_stop_processing: _,
processing_tasks,
@@ -38,13 +40,15 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
test_breakpoint_sdr: _,
planned_failures: _,
run_loop_iteration: _,
} = scheduler;
zookeeper: _,
options: _,
} = inner.deref();
let rtxn = env.read_txn().unwrap();
let mut snap = String::new();
let processing_tasks = processing_tasks.read().unwrap().processing.clone();
let processing_tasks = processing_tasks.read().processing.clone();
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
snap.push_str("### Processing Tasks:\n");
snap.push_str(&snapshot_bitmap(&processing_tasks));

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,6 @@
//! Thread-safe `Vec`-backend LRU cache using [`std::sync::atomic::AtomicU64`] for synchronization.
use std::mem;
use std::sync::atomic::{AtomicU64, Ordering};
/// Thread-safe `Vec`-backend LRU cache
@@ -190,6 +191,11 @@ where
}
None
}
/// Returns the generation associated to the key and values of the `LruMap`.
pub fn clear(&mut self) -> Vec<(AtomicU64, (K, V))> {
mem::take(&mut self.0.data)
}
}
/// The result of an insertion in a LRU map.

View File

@@ -10,9 +10,9 @@ use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128};
use crate::{Error, IndexSchedulerInner, Result, Task, TaskId, BEI128};
impl IndexScheduler {
impl IndexSchedulerInner {
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
}
@@ -331,11 +331,12 @@ pub fn clamp_to_page_size(size: usize) -> usize {
}
#[cfg(test)]
impl IndexScheduler {
impl crate::IndexScheduler {
/// Asserts that the index scheduler's content is internally consistent.
pub fn assert_internally_consistent(&self) {
let rtxn = self.env.read_txn().unwrap();
for task in self.all_tasks.iter(&rtxn).unwrap() {
let this = self.inner();
let rtxn = this.env.read_txn().unwrap();
for task in this.all_tasks.iter(&rtxn).unwrap() {
let (task_id, task) = task.unwrap();
let task_id = task_id.get();
@@ -354,21 +355,21 @@ impl IndexScheduler {
} = task;
assert_eq!(uid, task.uid);
if let Some(task_index_uid) = &task_index_uid {
assert!(self
assert!(this
.index_tasks
.get(&rtxn, task_index_uid.as_str())
.unwrap()
.unwrap()
.contains(task.uid));
}
let db_enqueued_at = self
let db_enqueued_at = this
.enqueued_at
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
.unwrap()
.unwrap();
assert!(db_enqueued_at.contains(task_id));
if let Some(started_at) = started_at {
let db_started_at = self
let db_started_at = this
.started_at
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
.unwrap()
@@ -376,7 +377,7 @@ impl IndexScheduler {
assert!(db_started_at.contains(task_id));
}
if let Some(finished_at) = finished_at {
let db_finished_at = self
let db_finished_at = this
.finished_at
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
.unwrap()
@@ -384,9 +385,9 @@ impl IndexScheduler {
assert!(db_finished_at.contains(task_id));
}
if let Some(canceled_by) = canceled_by {
let db_canceled_tasks = self.get_status(&rtxn, Status::Canceled).unwrap();
let db_canceled_tasks = this.get_status(&rtxn, Status::Canceled).unwrap();
assert!(db_canceled_tasks.contains(uid));
let db_canceling_task = self.get_task(&rtxn, canceled_by).unwrap().unwrap();
let db_canceling_task = this.get_task(&rtxn, canceled_by).unwrap().unwrap();
assert_eq!(db_canceling_task.status, Status::Succeeded);
match db_canceling_task.kind {
KindWithContent::TaskCancelation { query: _, tasks } => {
@@ -427,7 +428,7 @@ impl IndexScheduler {
Details::IndexInfo { primary_key: pk1 } => match &kind {
KindWithContent::IndexCreation { index_uid, primary_key: pk2 }
| KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => {
self.index_tasks
this.index_tasks
.get(&rtxn, index_uid.as_str())
.unwrap()
.unwrap()
@@ -535,23 +536,23 @@ impl IndexScheduler {
}
}
assert!(self.get_status(&rtxn, status).unwrap().contains(uid));
assert!(self.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid));
assert!(this.get_status(&rtxn, status).unwrap().contains(uid));
assert!(this.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid));
if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind {
match status {
Status::Enqueued | Status::Processing => {
assert!(self
assert!(this
.file_store
.all_uuids()
.unwrap()
.any(|uuid| uuid.as_ref().unwrap() == &content_file),
"Could not find uuid `{content_file}` in the file_store. Available uuids are {:?}.",
self.file_store.all_uuids().unwrap().collect::<std::result::Result<Vec<_>, file_store::Error>>().unwrap(),
this.file_store.all_uuids().unwrap().collect::<std::result::Result<Vec<_>, file_store::Error>>().unwrap(),
);
}
Status::Succeeded | Status::Failed | Status::Canceled => {
assert!(self
assert!(this
.file_store
.all_uuids()
.unwrap()

View File

@@ -14,6 +14,7 @@ license.workspace = true
base64 = "0.21.0"
enum-iterator = "1.4.0"
hmac = "0.12.1"
log = "0.4.19"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
@@ -24,3 +25,4 @@ sha2 = "0.10.6"
thiserror = "1.0.40"
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["serde", "v4"] }
zookeeper-client-sync = { git = "https://github.com/irevoire/zookeeper-client-sync.git" }

View File

@@ -19,6 +19,7 @@ internal_error!(
AuthControllerError: meilisearch_types::milli::heed::Error,
std::io::Error,
serde_json::Error,
zookeeper_client_sync::Error,
std::str::Utf8Error
);

View File

@@ -16,22 +16,111 @@ pub use store::open_auth_store_env;
use store::{generate_key_as_hexa, HeedAuthStore};
use time::OffsetDateTime;
use uuid::Uuid;
use zookeeper_client_sync::{
Acls, AddWatchMode, CreateMode, Error as ZkError, EventType, WatchedEvent, Zookeeper,
};
#[derive(Clone)]
pub struct AuthController {
store: Arc<HeedAuthStore>,
master_key: Option<String>,
zookeeper: Option<Arc<Zookeeper>>,
}
impl AuthController {
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
pub fn new(
db_path: impl AsRef<Path>,
master_key: &Option<String>,
zookeeper: Option<Arc<Zookeeper>>,
) -> Result<Self> {
let store = HeedAuthStore::new(db_path)?;
let controller = Self { store: Arc::new(store), master_key: master_key.clone(), zookeeper };
if store.is_empty()? {
generate_default_keys(&store)?;
match controller.zookeeper {
// setup the auth zk environment, the `auth` node
Some(ref zookeeper) => {
// Zookeeper Event listener loop
let controller_clone = controller.clone();
let zkk = zookeeper.clone();
let watcher = zookeeper.watch("/auth", AddWatchMode::PersistentRecursive)?;
watcher.run_on_change(move |event| {
let WatchedEvent { event_type, path, .. } = dbg!(event);
match event_type {
EventType::NodeDeleted => {
// TODO: ugly unwraps
let uuid = path.strip_prefix("/auth/").unwrap();
let uuid = Uuid::parse_str(uuid).unwrap();
log::info!("The key {} has been deleted", uuid);
controller_clone.store.delete_api_key(uuid).unwrap();
}
EventType::NodeCreated | EventType::NodeDataChanged => {
if path.strip_prefix("/auth/").map_or(false, |s| !s.is_empty()) {
let (key, _stat) = zkk.get_data(&path).unwrap();
let key: Key = serde_json::from_slice(&key).unwrap();
log::info!("The key {} has been inserted", key.uid);
controller_clone.store.put_api_key(key).unwrap();
}
}
otherwise => panic!("Got the unexpected `{otherwise:?}` event!"),
}
});
// TODO: we should catch the potential unexpected errors here https://docs.rs/zookeeper-client/latest/zookeeper_client/struct.Client.html#method.create
// for the moment we consider that `create` only returns Error::NodeExists.
match zookeeper.create(
"/auth",
&[],
&CreateMode::Persistent.with_acls(Acls::anyone_all()),
) {
// If the store is empty, we must generate and push the default api-keys.
Ok(_) => generate_default_keys(&controller)?,
// If the node exist we should clear our DB and download all the existing api-keys
Err(ZkError::NodeExists) => {
log::warn!("Auth directory already exists, we need to clear our keys + import the one in zookeeper");
let store = controller.store.clone();
store.delete_all_keys()?;
let (children, _) = zookeeper
.get_children("/auth")
.expect("Internal, the auth directory was deleted during execution.");
log::info!("Importing {} api-keys", children.len());
for path in children {
log::info!(" Importing {}", path);
match zookeeper.get_data(&format!("/auth/{}", &path)) {
Ok((key, _stat)) => {
let key = serde_json::from_slice(&key).unwrap();
let store = controller.store.clone();
store.put_api_key(key)?;
}
Err(e) => panic!("{e}"),
}
// else the file was deleted while we were inserting the key. We ignore it.
// TODO: What happens if someone updates the files before we have the time
// to setup the watcher
}
}
e @ Err(
ZkError::NoNode | ZkError::NoChildrenForEphemerals | ZkError::InvalidAcl,
) => unreachable!("{e:?}"),
Err(e) => panic!("{e}"),
}
// TODO: Race condition above:
// What happens if two node join exactly at the same moment:
// One will create the dir
// The second one will delete its DB, load nothing and install a watcher
// The first one will push its keys and should wake up and update the second one.
// / BUT, if the second one delete its DB and the first one push its files before the second one install the watcher we're fucked
}
None => {
if controller.store.is_empty()? {
generate_default_keys(&controller)?;
}
}
}
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
Ok(controller)
}
/// Return `Ok(())` if the auth controller is able to access one of its database.
@@ -53,7 +142,23 @@ impl AuthController {
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
match self.store.get_api_key(create_key.uid)? {
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
None => self.store.put_api_key(create_key.to_key()),
None => self.put_key(create_key.to_key()),
}
}
pub fn put_key(&self, key: Key) -> Result<Key> {
let store = self.store.clone();
match &self.zookeeper {
Some(zookeeper) => {
zookeeper.create(
&format!("/auth/{}", key.uid),
&serde_json::to_vec_pretty(&key)?,
&CreateMode::Persistent.with_acls(Acls::anyone_all()),
)?;
Ok(key)
}
None => store.put_api_key(key),
}
}
@@ -68,7 +173,20 @@ impl AuthController {
name => key.name = name.set(),
};
key.updated_at = OffsetDateTime::now_utc();
self.store.put_api_key(key)
let store = self.store.clone();
// TODO: we may commit only after zk persisted the keys
match &self.zookeeper {
Some(zookeeper) => {
zookeeper.set_data(
&format!("/auth/{}", key.uid),
&serde_json::to_vec_pretty(&key)?,
None,
)?;
Ok(key)
}
None => store.put_api_key(key),
}
}
pub fn get_key(&self, uid: Uuid) -> Result<Key> {
@@ -110,7 +228,19 @@ impl AuthController {
}
pub fn delete_key(&self, uid: Uuid) -> Result<()> {
if self.store.delete_api_key(uid)? {
let deleted = match &self.zookeeper {
Some(zookeeper) => match zookeeper.delete(&format!("/auth/{}", uid), None) {
Ok(()) => true,
Err(ZkError::NoNode) => false,
Err(e) => return Err(e.into()),
},
None => {
let store = self.store.clone();
store.delete_api_key(uid)?
}
};
if deleted {
Ok(())
} else {
Err(AuthControllerError::ApiKeyNotFound(uid.to_string()))
@@ -159,7 +289,7 @@ impl AuthController {
self.store.delete_all_keys()
}
/// Delete all the keys in the DB.
/// Insert a key in the DB without any check on its validity
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
self.store.put_api_key(key)?;
Ok(())
@@ -304,10 +434,9 @@ pub struct IndexSearchRules {
pub filter: Option<serde_json::Value>,
}
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
store.put_api_key(Key::default_admin())?;
store.put_api_key(Key::default_search())?;
fn generate_default_keys(controller: &AuthController) -> Result<()> {
controller.put_key(Key::default_admin())?;
controller.put_key(Key::default_search())?;
Ok(())
}

View File

@@ -1,4 +1,3 @@
use std::borrow::Borrow;
use std::fmt::{self, Debug, Display};
use std::fs::File;
use std::io::{self, Seek, Write};
@@ -42,7 +41,7 @@ impl Display for DocumentFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Io(e) => write!(f, "{e}"),
Self::MalformedPayload(me, b) => match me.borrow() {
Self::MalformedPayload(me, b) => match me {
Error::Json(se) => {
let mut message = match se.classify() {
Category::Data => {

View File

@@ -175,6 +175,7 @@ macro_rules! make_error_codes {
// An exhaustive list of all the error codes used by meilisearch.
make_error_codes! {
S3Error , System , INTERNAL_SERVER_ERROR;
ApiKeyAlreadyExists , InvalidRequest , CONFLICT ;
ApiKeyNotFound , InvalidRequest , NOT_FOUND ;
BadParameter , InvalidRequest , BAD_REQUEST;

View File

@@ -58,7 +58,7 @@ lazy_static = "1.4.0"
log = "0.4.17"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.36", default-features = false }
mimalloc = { version = "0.1.37", default-features = false }
mime = "0.3.17"
num_cpus = "1.15.0"
obkv = "0.2.0"
@@ -69,6 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
pin-project-lite = "0.2.9"
platform-dirs = "0.3.0"
prometheus = { version = "0.13.3", features = ["process"] }
puffin = "0.16.0"
puffin_http = { version = "0.13.0", optional = true }
rand = "0.8.5"
rayon = "1.7.0"
regex = "1.7.3"
@@ -78,6 +80,7 @@ reqwest = { version = "0.11.16", features = [
], default-features = false }
rustls = "0.20.8"
rustls-pemfile = "1.0.2"
strois = "0.0.4"
segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
@@ -103,6 +106,7 @@ walkdir = "2.3.3"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.2.0"
zookeeper-client-sync = { git = "https://github.com/irevoire/zookeeper-client-sync.git" }
[dev-dependencies]
actix-rt = "2.8.0"
@@ -133,7 +137,18 @@ zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"]
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
profile-with-puffin = ["dep:puffin_http"]
mini-dashboard = [
"actix-web-static-files",
"static-files",
"anyhow",
"cargo_toml",
"hex",
"reqwest",
"sha-1",
"tempfile",
"zip",
]
chinese = ["meilisearch-types/chinese"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]

View File

@@ -312,6 +312,13 @@ impl From<Opt> for Infos {
config_file_path,
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics: _,
zk_url: _,
s3_url: _,
s3_region: _,
s3_bucket: _,
s3_access_key: _,
s3_secret_key: _,
s3_security_token: _,
} = options;
let schedule_snapshot = match schedule_snapshot {

View File

@@ -33,6 +33,8 @@ pub enum MeilisearchHttpError {
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
)]
SwapIndexPayloadWrongLength(Vec<IndexUid>),
#[error("S3 Error: {0}")]
S3Error(#[from] strois::Error),
#[error(transparent)]
IndexUid(#[from] IndexUidFormatError),
#[error(transparent)]
@@ -65,6 +67,7 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
MeilisearchHttpError::S3Error(_) => Code::S3Error,
MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal,

View File

@@ -39,6 +39,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use strois::Bucket;
use zookeeper_client_sync::Zookeeper;
use crate::error::MeilisearchHttpError;
@@ -136,14 +138,17 @@ enum OnFailure {
KeepDb,
}
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
pub fn setup_meilisearch(
opt: &Opt,
zookeeper: Option<Arc<Zookeeper>>,
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
let empty_db = is_empty_db(&opt.db_path);
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
let snapshot_path_exists = snapshot_path.exists();
// the db is empty and the snapshot exists, import it
if empty_db && snapshot_path_exists {
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb, zookeeper)?,
Err(e) => {
std::fs::remove_dir_all(&opt.db_path)?;
return Err(e);
@@ -160,14 +165,14 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
bail!("snapshot doesn't exist at {}", snapshot_path.display())
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
} else {
open_or_create_database(opt, empty_db)?
open_or_create_database(opt, empty_db, zookeeper)?
}
} else if let Some(ref path) = opt.import_dump {
let src_path_exists = path.exists();
// the db is empty and the dump exists, import it
if empty_db && src_path_exists {
let (mut index_scheduler, mut auth_controller) =
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
open_or_create_database_unchecked(opt, OnFailure::RemoveDb, zookeeper)?;
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
Ok(()) => (index_scheduler, auth_controller),
Err(e) => {
@@ -187,10 +192,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
} else {
open_or_create_database(opt, empty_db)?
open_or_create_database(opt, empty_db, zookeeper)?
}
} else {
open_or_create_database(opt, empty_db)?
open_or_create_database(opt, empty_db, zookeeper)?
};
// We create a loop in a thread that registers snapshotCreation tasks
@@ -199,15 +204,12 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
let snapshot_delay = Duration::from_secs(snapshot_delay);
let index_scheduler = index_scheduler.clone();
thread::Builder::new()
.name(String::from("register-snapshot-tasks"))
.spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
error!("Error while registering snapshot: {}", e);
}
})
.unwrap();
thread::spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
error!("Error while registering snapshot: {}", e);
}
});
}
Ok((index_scheduler, auth_controller))
@@ -217,34 +219,48 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
fn open_or_create_database_unchecked(
opt: &Opt,
on_failure: OnFailure,
zookeeper: Option<Arc<Zookeeper>>,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key, zookeeper.clone());
let instance_features = opt.to_instance_features();
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
max_number_of_tasks: 1_000_000,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
})?)
};
let index_scheduler = IndexScheduler::new(Arc::new(IndexSchedulerOptions {
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
auth_path: opt.db_path.join("auth"),
tasks_path: opt.db_path.join("tasks"),
update_file_path: opt.db_path.join("update_files"),
indexes_path: opt.db_path.join("indexes"),
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into().map(Arc::new)?,
autobatching_enabled: true,
max_number_of_tasks: 1_000_000,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features,
zookeeper: zookeeper.clone(),
s3: opt.s3_url.as_ref().map(|url| {
Arc::new(
Bucket::builder(url)
.unwrap()
.key(opt.s3_access_key.as_ref().expect("Need s3 key to work").clone())
.secret(opt.s3_secret_key.as_ref().expect("Need s3 secret to work").clone())
.maybe_token(opt.s3_security_token.clone())
.region(&opt.s3_region)
.with_url_path_style(true)
.bucket(opt.s3_bucket.as_ref().expect("Need an s3 bucket to work"))
.unwrap(),
)
}),
}))
.map_err(anyhow::Error::from);
match (
index_scheduler_builder(),
index_scheduler,
auth_controller.map_err(anyhow::Error::from),
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
) {
@@ -262,12 +278,13 @@ fn open_or_create_database_unchecked(
fn open_or_create_database(
opt: &Opt,
empty_db: bool,
zookeeper: Option<Arc<Zookeeper>>,
) -> anyhow::Result<(IndexScheduler, AuthController)> {
if !empty_db {
check_version_file(&opt.db_path)?;
}
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
open_or_create_database_unchecked(opt, OnFailure::KeepDb, zookeeper)
}
fn import_dump(
@@ -277,6 +294,7 @@ fn import_dump(
auth: &mut AuthController,
) -> Result<(), anyhow::Error> {
let reader = File::open(dump_path)?;
let index_scheduler = index_scheduler.inner();
let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() {

View File

@@ -12,6 +12,7 @@ use meilisearch::analytics::Analytics;
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use zookeeper_client_sync::Zookeeper;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
@@ -30,6 +31,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
#[cfg(feature = "profile-with-puffin")]
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
@@ -59,7 +64,13 @@ async fn main() -> anyhow::Result<()> {
_ => (),
}
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
let zookeeper = match opt.zk_url {
Some(ref url) => Some(Arc::new(Zookeeper::connect(url).await.unwrap())),
None => None,
};
let optc = opt.clone();
let (index_scheduler, auth_controller) =
tokio::task::spawn_blocking(move || setup_meilisearch(&optc, zookeeper)).await.unwrap()?;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
let analytics = if !opt.no_analytics {

View File

@@ -28,6 +28,13 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
const MEILI_ENV: &str = "MEILI_ENV";
const MEILI_ZK_URL: &str = "MEILI_ZK_URL";
const MEILI_S3_URL: &str = "MEILI_S3_URL";
const MEILI_S3_BUCKET: &str = "MEILI_S3_BUCKET";
const MEILI_S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
const MEILI_S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
const MEILI_S3_SECURITY_TOKEN: &str = "MEILI_S3_SECURITY_TOKEN";
const MEILI_S3_REGION: &str = "MEILI_S3_REGION";
#[cfg(all(not(debug_assertions), feature = "analytics"))]
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@@ -56,6 +63,7 @@ const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
const DEFAULT_ENV: &str = "development";
const DEFAULT_S3_REGION: &str = "eu-central-1";
const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB";
const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
@@ -154,6 +162,36 @@ pub struct Opt {
#[serde(default = "default_env")]
pub env: String,
/// Sets the HTTP address and port used to communicate with the zookeeper cluster.
/// If ran locally, the default url is `http://localhost:2181/`.
#[clap(long, env = MEILI_ZK_URL)]
pub zk_url: Option<String>,
/// Sets the address and port used to communicate with the S3 bucket.
#[clap(long, env = MEILI_S3_URL)]
pub s3_url: Option<String>,
/// Sets the region used to communicate with the s3 bucket.
#[clap(long, env = MEILI_S3_REGION, default_value_t = default_s3_region())]
#[serde(default = "default_s3_region")]
pub s3_region: String,
/// Sets the S3 bucket name to use.
#[clap(long, env = MEILI_S3_BUCKET)]
pub s3_bucket: Option<String>,
/// Set the S3 access key. If used you must also set the secret key.
#[clap(long, env = MEILI_S3_ACCESS_KEY)]
pub s3_access_key: Option<String>,
/// Set the S3 secret key. If used you must also set the access key.
#[clap(long, env = MEILI_S3_SECRET_KEY)]
pub s3_secret_key: Option<String>,
/// Security token, can't be used with access key and secret key.
#[clap(long, env = MEILI_S3_SECURITY_TOKEN)]
pub s3_security_token: Option<String>,
/// Deactivates Meilisearch's built-in telemetry when provided.
///
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@@ -368,6 +406,13 @@ impl Opt {
http_addr,
master_key,
env,
zk_url,
s3_url,
s3_region,
s3_bucket,
s3_access_key,
s3_secret_key,
s3_security_token,
max_index_size: _,
max_task_db_size: _,
http_payload_size_limit,
@@ -401,6 +446,25 @@ impl Opt {
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
}
export_to_env_if_not_present(MEILI_ENV, env);
if let Some(zk_url) = zk_url {
export_to_env_if_not_present(MEILI_ZK_URL, zk_url);
}
if let Some(s3_url) = s3_url {
export_to_env_if_not_present(MEILI_S3_URL, s3_url);
}
export_to_env_if_not_present(MEILI_S3_REGION, s3_region);
if let Some(s3_bucket) = s3_bucket {
export_to_env_if_not_present(MEILI_S3_BUCKET, s3_bucket);
}
if let Some(s3_access_key) = s3_access_key {
export_to_env_if_not_present(MEILI_S3_ACCESS_KEY, s3_access_key);
}
if let Some(s3_secret_key) = s3_secret_key {
export_to_env_if_not_present(MEILI_S3_SECRET_KEY, s3_secret_key);
}
if let Some(s3_security_token) = s3_security_token {
export_to_env_if_not_present(MEILI_S3_SECURITY_TOKEN, s3_security_token);
}
#[cfg(all(not(debug_assertions), feature = "analytics"))]
{
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
@@ -547,7 +611,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
Ok(Self {
log_every_n: Some(DEFAULT_LOG_EVERY_N),
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool),
thread_pool: Some(Arc::new(thread_pool)),
max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget,
..Default::default()
@@ -715,6 +779,10 @@ fn default_env() -> String {
DEFAULT_ENV.to_string()
}
fn default_s3_region() -> String {
DEFAULT_S3_REGION.to_string()
}
fn default_max_index_size() -> Byte {
Byte::from_bytes(INDEX_SIZE)
}

View File

@@ -41,14 +41,10 @@ pub async fn create_api_key(
_req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let v = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let key = auth_controller.create_key(v)?;
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
let key = auth_controller.create_key(v)?;
let key = KeyView::from_key(key, &auth_controller);
Ok(HttpResponse::Created().json(res))
Ok(HttpResponse::Created().json(key))
}
#[derive(Deserr, Debug, Clone, Copy)]
@@ -110,17 +106,11 @@ pub async fn patch_api_key(
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
let patch_api_key = body.into_inner();
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
let key = auth_controller.update_key(uid, patch_api_key)?;
let uid = Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
let key = auth_controller.update_key(uid, patch_api_key)?;
let key = KeyView::from_key(key, &auth_controller);
Ok(KeyView::from_key(key, &auth_controller))
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
Ok(HttpResponse::Ok().json(res))
Ok(HttpResponse::Ok().json(key))
}
pub async fn delete_api_key(
@@ -128,13 +118,8 @@ pub async fn delete_api_key(
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;
tokio::task::spawn_blocking(move || {
let uid =
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
auth_controller.delete_key(uid)
})
.await
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
let uid = Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
auth_controller.delete_key(uid)?;
Ok(HttpResponse::NoContent().finish())
}

View File

@@ -29,8 +29,7 @@ pub async fn create_dump(
keys: auth_controller.list_keys()?,
instance_uid: analytics.instance_uid().cloned(),
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -78,6 +78,6 @@ async fn patch_features(
}),
Some(&req),
);
index_scheduler.put_runtime_features(new_features)?;
index_scheduler.inner().put_runtime_features(new_features)?;
Ok(HttpResponse::Ok().json(new_features))
}

View File

@@ -1,4 +1,4 @@
use std::io::ErrorKind;
use std::io::{BufReader, ErrorKind, Seek, SeekFrom};
use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
@@ -129,8 +129,7 @@ pub async fn delete_document(
index_uid: index_uid.to_string(),
documents_ids: vec![document_id],
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -396,11 +395,12 @@ async fn document_addition(
return Err(MeilisearchHttpError::MissingPayload(format));
}
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
if let Err(e) = buffer.seek(SeekFrom::Start(0)).await {
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
}
let read_file = buffer.into_inner().into_std().await;
let s3 = index_scheduler.s3.clone();
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
@@ -409,8 +409,16 @@ async fn document_addition(
}
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
if let Some(s3) = s3 {
update_file.seek(SeekFrom::Start(0)).unwrap();
let mut reader = BufReader::new(&*update_file);
s3.put_object_multipart(format!("update-files/{}", uuid), &mut reader)?;
}
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist()?;
Ok(documents_count)
})
.await;
@@ -445,7 +453,7 @@ async fn document_addition(
};
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
let task = match scheduler.register(task) {
Ok(task) => task,
Err(e) => {
index_scheduler.delete_update_file(uuid)?;
@@ -476,8 +484,7 @@ pub async fn delete_documents_batch(
let task =
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -512,8 +519,7 @@ pub async fn delete_documents_by_filter(
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -529,8 +535,7 @@ pub async fn clear_all_documents(
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -135,8 +135,7 @@ pub async fn create_index(
);
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
Ok(HttpResponse::Accepted().json(task))
} else {
@@ -203,8 +202,7 @@ pub async fn update_index(
primary_key: body.primary_key,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -216,8 +214,7 @@ pub async fn delete_index(
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -55,10 +55,7 @@ macro_rules! make_setting_route {
is_deletion: true,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -97,10 +94,7 @@ macro_rules! make_setting_route {
is_deletion: false,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -586,8 +580,7 @@ pub async fn update_all(
is_deletion: false,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -622,8 +615,7 @@ pub async fn delete_all(
is_deletion: true,
allow_index_creation,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -18,7 +18,6 @@ use serde_json::json;
use time::format_description::well_known::Rfc3339;
use time::macros::format_description;
use time::{Date, Duration, OffsetDateTime, Time};
use tokio::task;
use super::SummarizedTaskView;
use crate::analytics::Analytics;
@@ -325,15 +324,12 @@ async fn cancel_tasks(
let query = params.into_query();
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
index_scheduler.filters(),
)?;
let (tasks, _) =
index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?;
let task_cancelation =
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
let task = index_scheduler.register(task_cancelation)?;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))
@@ -370,15 +366,12 @@ async fn delete_tasks(
);
let query = params.into_query();
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
index_scheduler.filters(),
)?;
let (tasks, _) =
index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?;
let task_deletion =
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
let task = index_scheduler.register(task_deletion)?;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))

View File

@@ -39,7 +39,7 @@ impl Server {
let options = default_settings(dir.path());
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let (index_scheduler, auth) = setup_meilisearch(&options, None).await.unwrap();
let service = Service { index_scheduler, auth, options, api_key: None };
Server { service, _dir: Some(dir) }
@@ -54,7 +54,7 @@ impl Server {
options.master_key = Some("MASTER_KEY".to_string());
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
let (index_scheduler, auth) = setup_meilisearch(&options, None).await.unwrap();
let service = Service { index_scheduler, auth, options, api_key: None };
Server { service, _dir: Some(dir) }
@@ -67,7 +67,7 @@ impl Server {
}
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
let (index_scheduler, auth) = setup_meilisearch(&options)?;
let (index_scheduler, auth) = setup_meilisearch(&options, None).await?;
let service = Service { index_scheduler, auth, options, api_key: None };
Ok(Server { service, _dir: None })

View File

@@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.4.0"
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
byteorder = "1.4.3"
charabia = { version = "0.8.2", default-features = false }
charabia = { version = "0.8.4", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.8"
deserr = "0.5.0"
@@ -65,13 +65,16 @@ filter-parser = { path = "../filter-parser" }
# documents words self-join
itertools = "0.10.5"
# profiling
puffin = "0.16.0"
# logging
log = "0.4.17"
logging_timer = "1.1.0"
csv = "1.2.1"
[dev-dependencies]
mimalloc = { version = "0.1.29", default-features = false }
mimalloc = { version = "0.1.37", default-features = false }
big_s = "1.0.2"
insta = "1.29.0"
maplit = "1.0.2"

View File

@@ -15,6 +15,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
}
pub fn execute(self) -> Result<u64> {
puffin::profile_function!();
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
let Index {
env: _env,

View File

@@ -109,6 +109,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
Some(docid)
}
pub fn execute(self) -> Result<DocumentDeletionResult> {
puffin::profile_function!();
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
self.execute_inner()?;

View File

@@ -31,6 +31,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
autogenerate_docids: bool,
reader: DocumentsBatchReader<R>,
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
puffin::profile_function!();
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;

View File

@@ -30,6 +30,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
stop_words: Option<&fst::Set<&[u8]>>,
max_positions_per_attributes: Option<u32>,
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
puffin::profile_function!();
let max_positions_per_attributes = max_positions_per_attributes
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
let max_memory = indexer.max_memory_by_thread();

View File

@@ -20,6 +20,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
docid_fid_facet_number: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut facet_number_docids_sorter = create_sorter(

View File

@@ -18,6 +18,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
docid_fid_facet_string: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut facet_string_docids_sorter = create_sorter(

View File

@@ -34,6 +34,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
indexer: GrenadParameters,
faceted_fields: &HashSet<FieldId>,
) -> Result<ExtractedFacetValues> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut fid_docid_facet_numbers_sorter = create_sorter(

View File

@@ -22,6 +22,8 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut fid_word_count_docids_sorter = create_sorter(

View File

@@ -19,6 +19,8 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
primary_key_id: FieldId,
(lat_fid, lng_fid): (FieldId, FieldId),
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@@ -19,6 +19,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
primary_key_id: FieldId,
vectors_fid: FieldId,
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let mut writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@@ -27,6 +27,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
indexer: GrenadParameters,
exact_attributes: &HashSet<FieldId>,
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut word_docids_sorter = create_sorter(

View File

@@ -15,6 +15,8 @@ pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut word_fid_docids_sorter = create_sorter(

View File

@@ -21,6 +21,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut word_pair_proximity_docids_sorter = create_sorter(

View File

@@ -18,6 +18,8 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters,
) -> Result<grenad::Reader<File>> {
puffin::profile_function!();
let max_memory = indexer.max_memory_by_thread();
let mut word_position_docids_sorter = create_sorter(

View File

@@ -52,6 +52,8 @@ pub(crate) fn data_from_obkv_documents(
max_positions_per_attributes: Option<u32>,
exact_attributes: HashSet<FieldId>,
) -> Result<()> {
puffin::profile_function!();
original_obkv_chunks
.par_bridge()
.map(|original_documents_chunk| {
@@ -238,11 +240,13 @@ fn spawn_extraction_task<FE, FS, M>(
M::Output: Send,
{
rayon::spawn(move || {
puffin::profile_scope!("extract_multiple_chunks", name);
let chunks: Result<M> =
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
rayon::spawn(move || match chunks {
Ok(chunks) => {
debug!("merge {} database", name);
puffin::profile_scope!("merge_multiple_chunks", name);
let reader = chunks.merge(merge_fn, &indexer);
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
}

View File

@@ -214,6 +214,7 @@ pub fn sorter_into_lmdb_database(
sorter: Sorter<MergeFn>,
merge: MergeFn,
) -> Result<()> {
puffin::profile_function!();
debug!("Writing MTBL sorter...");
let before = Instant::now();

View File

@@ -137,6 +137,8 @@ where
mut self,
reader: DocumentsBatchReader<R>,
) -> Result<(Self, StdResult<u64, UserError>)> {
puffin::profile_function!();
// Early return when there is no document to add
if reader.is_empty() {
return Ok((self, Ok(0)));
@@ -175,6 +177,8 @@ where
mut self,
to_delete: Vec<String>,
) -> Result<(Self, StdResult<u64, UserError>)> {
puffin::profile_function!();
// Early return when there is no document to add
if to_delete.is_empty() {
return Ok((self, Ok(0)));
@@ -194,6 +198,8 @@ where
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
puffin::profile_function!();
if self.added_documents == 0 {
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
@@ -232,6 +238,8 @@ where
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
let TransformOutput {
primary_key,
fields_ids_map,
@@ -322,6 +330,7 @@ where
// Run extraction pipeline in parallel.
pool.install(|| {
puffin::profile_scope!("extract_and_send_grenad_chunks");
// split obkv file into several chunks
let original_chunk_iter =
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
@@ -477,6 +486,8 @@ where
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
// Merged databases are already been indexed, we start from this count;
let mut databases_seen = MERGED_DATABASE_COUNT;
@@ -511,26 +522,36 @@ where
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
let current_prefix_fst;
let common_prefix_fst_words_tmp;
let common_prefix_fst_words: Vec<_>;
let new_prefix_fst_words;
let del_prefix_fst_words;
// We retrieve the common words between the previous and new prefix word fst.
let common_prefix_fst_words = fst_stream_into_vec(
previous_words_prefixes_fst.op().add(&current_prefix_fst).intersection(),
);
let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
.as_slice()
.linear_group_by_key(|x| x.chars().next().unwrap())
.collect();
{
puffin::profile_scope!("compute_prefix_diffs");
// We retrieve the newly added words between the previous and new prefix word fst.
let new_prefix_fst_words = fst_stream_into_vec(
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
);
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
// We compute the set of prefixes that are no more part of the prefix fst.
let del_prefix_fst_words = fst_stream_into_hashset(
previous_words_prefixes_fst.op().add(&current_prefix_fst).difference(),
);
// We retrieve the common words between the previous and new prefix word fst.
common_prefix_fst_words_tmp = fst_stream_into_vec(
previous_words_prefixes_fst.op().add(&current_prefix_fst).intersection(),
);
common_prefix_fst_words = common_prefix_fst_words_tmp
.as_slice()
.linear_group_by_key(|x| x.chars().next().unwrap())
.collect();
// We retrieve the newly added words between the previous and new prefix word fst.
new_prefix_fst_words = fst_stream_into_vec(
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
);
// We compute the set of prefixes that are no more part of the prefix fst.
del_prefix_fst_words = fst_stream_into_hashset(
previous_words_prefixes_fst.op().add(&current_prefix_fst).difference(),
);
}
databases_seen += 1;
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
@@ -668,6 +689,8 @@ fn execute_word_prefix_docids(
common_prefix_fst_words: &[&[String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
) -> Result<()> {
puffin::profile_function!();
let cursor = reader.into_cursor()?;
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
builder.chunk_compression_type = indexer_config.chunk_compression_type;

View File

@@ -558,6 +558,8 @@ impl<'a, 'i> Transform<'a, 'i> {
where
F: Fn(UpdateIndexingStep) + Sync,
{
puffin::profile_function!();
let primary_key = self
.index
.primary_key(wtxn)?

View File

@@ -46,6 +46,66 @@ pub(crate) enum TypedChunk {
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
}
impl TypedChunk {
pub fn to_debug_string(&self) -> String {
match self {
TypedChunk::FieldIdDocidFacetStrings(grenad) => {
format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdDocidFacetNumbers(grenad) => {
format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::Documents(grenad) => {
format!("Documents {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdWordcountDocids(grenad) => {
format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::NewDocumentsIds(grenad) => {
format!("NewDocumentsIds {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => format!(
"WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {} }}",
word_docids_reader.len(),
exact_word_docids_reader.len()
),
TypedChunk::WordPositionDocids(grenad) => {
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::WordFidDocids(grenad) => {
format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::WordPairProximityDocids(grenad) => {
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetStringDocids(grenad) => {
format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetNumberDocids(grenad) => {
format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetExistsDocids(grenad) => {
format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetIsNullDocids(grenad) => {
format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => {
format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::GeoPoints(grenad) => {
format!("GeoPoints {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::VectorPoints(grenad) => {
format!("VectorPoints {{ number_of_entries: {} }}", grenad.len())
}
TypedChunk::ScriptLanguageDocids(grenad) => {
format!("ScriptLanguageDocids {{ number_of_entries: {} }}", grenad.len())
}
}
}
}
/// Write typed chunk in the corresponding LMDB database of the provided index.
/// Return new documents seen.
pub(crate) fn write_typed_chunk_into_index(
@@ -54,6 +114,8 @@ pub(crate) fn write_typed_chunk_into_index(
wtxn: &mut RwTxn,
index_is_empty: bool,
) -> Result<(RoaringBitmap, bool)> {
puffin::profile_function!(typed_chunk.to_debug_string());
let mut is_merged_database = false;
match typed_chunk {
TypedChunk::Documents(obkv_documents_iter) => {
@@ -350,6 +412,8 @@ where
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
{
puffin::profile_function!(format!("number of entries: {}", data.len()));
let mut buffer = Vec::new();
let database = database.remap_types::<ByteSlice, ByteSlice>();
@@ -392,6 +456,8 @@ where
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
{
puffin::profile_function!(format!("number of entries: {}", data.len()));
if !index_is_empty {
return write_entries_into_database(
data,

View File

@@ -1,3 +1,5 @@
use std::sync::Arc;
use grenad::CompressionType;
use rayon::ThreadPool;
@@ -9,7 +11,7 @@ pub struct IndexerConfig {
pub max_memory: Option<usize>,
pub chunk_compression_type: CompressionType,
pub chunk_compression_level: Option<u32>,
pub thread_pool: Option<ThreadPool>,
pub thread_pool: Option<Arc<ThreadPool>>,
pub max_positions_per_attributes: Option<u32>,
pub skip_index_budget: bool,
}

View File

@@ -50,6 +50,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
common_prefix_fst_words: &[&'a [String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
) -> Result<()> {
puffin::profile_function!();
index_word_prefix_database(
self.wtxn,
self.index.word_pair_proximity_docids,

View File

@@ -27,6 +27,8 @@ pub fn index_prefix_word_database(
chunk_compression_type: CompressionType,
chunk_compression_level: Option<u32>,
) -> Result<()> {
puffin::profile_function!();
let max_proximity = max_proximity - 1;
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");

View File

@@ -191,6 +191,7 @@ pub fn index_word_prefix_database(
chunk_compression_type: CompressionType,
chunk_compression_level: Option<u32>,
) -> Result<()> {
puffin::profile_function!();
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
// Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length

View File

@@ -303,6 +303,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
puffin::profile_function!();
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// if the settings are set before any document update, we don't need to do anything, and
// will set the primary key during the first document addition.

View File

@@ -45,6 +45,8 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
common_prefix_fst_words: &[&[String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
) -> Result<()> {
puffin::profile_function!();
// It is forbidden to keep a mutable reference into the database
// and write into it at the same time, therefore we write into another file.
let mut prefix_docids_sorter = create_sorter(

View File

@@ -50,6 +50,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
common_prefix_fst_words: &[&[String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
) -> Result<()> {
puffin::profile_function!();
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
let mut prefix_integer_docids_sorter = create_sorter(

View File

@@ -42,6 +42,8 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
#[logging_timer::time("WordsPrefixesFst::{}")]
pub fn execute(self) -> Result<()> {
puffin::profile_function!();
let words_fst = self.index.words_fst(self.wtxn)?;
let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];