Compare commits

...

690 Commits

Author SHA1 Message Date
6a5a834f27 Lazily compute the FSTs during indexing 2025-04-03 16:04:35 +02:00
418fa47963 Merge pull request #5313 from barloes/fixRankingScoreThresholdRankingIssue
fix for rankingScoreThreshold changes the results' ranking
2025-04-01 13:10:55 +00:00
0656a0d515 Optimize roaring operation
Co-authored-by: Many the fish <many@meilisearch.com>
2025-04-01 14:25:27 +02:00
e36a8c50b9 Merge pull request #5478 from meilisearch/enforce-embedding-dimensions
Enforce embedding dimensions
2025-03-31 15:31:29 +00:00
08ff135ad6 Fix test 2025-03-31 15:27:49 +02:00
f729864466 Check dimension mismatch at insertion time 2025-03-31 15:27:49 +02:00
94ea263bef Add new error for dimensions mismatch during indexing 2025-03-31 15:27:49 +02:00
0e475cb5e6 fix warn and show what meilisearch understood of the vectors in the cursed test 2025-03-31 13:49:22 +02:00
62de70b73c Document problematic case in test and acknowledge PR comment 2025-03-31 13:49:22 +02:00
7707fb18dd add embedding with dimension mismatch test case 2025-03-31 13:49:22 +02:00
bb2e9419d3 Merge pull request #5468 from meilisearch/more-precise-post-processing
More Precise Post Processing
2025-03-27 10:07:09 +00:00
cf68713145 Merge pull request #5465 from meilisearch/improve-stats-perf
Improve documents stats performances
2025-03-27 09:20:14 +00:00
811143cbe9 Add more progress precision when doing post processing 2025-03-27 10:17:28 +01:00
c670e9a39b Make sure the snaps are happy 2025-03-26 20:03:35 +01:00
65f1b13475 Merge pull request #5464 from meilisearch/camel-case-database-sizes
Prefer camelCase for internal database sizes db name
2025-03-26 16:40:39 +00:00
db7ce03763 Improve the performances of computing the size of the documents database 2025-03-26 17:40:12 +01:00
7ed9adde29 Prefer camelCase for internal database sizes db name 2025-03-26 16:45:52 +01:00
9ce7ccfbe7 Merge pull request #5457 from meilisearch/show-database-sizes-changes
Show database sizes batches
2025-03-26 10:19:40 +00:00
3deb1ef78f Fix the snapshots again 2025-03-26 10:38:49 +01:00
5820d822c8 Add more details about the finalizing progress step 2025-03-26 09:49:43 +01:00
637bea0370 Compute and store the database sizes 2025-03-26 09:49:42 +01:00
fd079c6757 Add an index method to get the database sizes 2025-03-25 16:30:51 +01:00
182e5d5632 Add database sizes stats to the batches 2025-03-25 16:30:15 +01:00
82aee6a9af Merge pull request #5415 from meilisearch/isolate-word-fst-usage
Isolate word fst usage
2025-03-25 11:43:37 +00:00
fca947219f Merge pull request #5402 from meilisearch/do-not-reindex-searchable-order-change
Avoid reindexing searchable order changes
2025-03-25 07:03:14 +00:00
fb7ae9f97f Merge pull request #5454 from meilisearch/update-charabia-v0.9.3
Update Charabia v0.9.3
2025-03-24 22:34:51 +00:00
cd421fea1e Merge pull request #5456 from meilisearch/fix-CI
Fix CI to work with merge queues
2025-03-25 09:55:59 +00:00
1ad4235beb Remove the bors file 2025-03-25 10:05:41 +01:00
de6c7e551e Remove bors references from the repository 2025-03-25 10:04:38 +01:00
c0fe70c5f0 Make the CI work with merge queue grouping 2025-03-25 10:04:24 +01:00
a09d08c7b6 Avoid reindexing searchable order changes
Update settings.rs

Update settings.rs
2025-03-24 16:26:52 +01:00
2e6aa63efc Update Charabia v0.9.3 2025-03-24 14:32:21 +01:00
f9807ba32e Fix logic when results are below the threshold 2025-03-19 11:34:53 +01:00
8c8cc59a6c remove new line added by accident 2025-03-19 11:34:53 +01:00
f540a69ac3 add 1 to index so it points to correct position 2025-03-19 11:34:52 +01:00
7df2bdfb15 Merge #5436
5436: Update mini-dashboard to v0.2.19 version r=Kerollmops a=curquiza

Fixes mini dashboard to prevent the panel from popping up every time

Fixed by `@mdubus` 👍 

Co-authored-by: curquiza <clementine@meilisearch.com>
2025-03-18 16:24:31 +00:00
71f7456748 Update mini-dashboard to v0.2.19 version 2025-03-18 12:48:38 +01:00
c98b313d03 Merge #5426
5426: Bump zip from 2.2.2 to 2.3.0 r=Kerollmops a=dependabot[bot]

Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/releases">zip's releases</a>.</em></p>
<blockquote>
<h2>v2.3.0</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2>v2.2.3</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md">zip's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.3...v2.3.0">2.3.0</a> - 2025-03-16</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.2.3">2.2.3</a> - 2025-02-26</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6eab5f5cc6"><code>6eab5f5</code></a> chore: release v2.3.0 (<a href="https://redirect.github.com/zip-rs/zip2/issues/300">#300</a>)</li>
<li><a href="e4aee2050f"><code>e4aee20</code></a> implement <code>ZipFile::options</code> + refactor options normalization (<a href="https://redirect.github.com/zip-rs/zip2/issues/305">#305</a>)</li>
<li><a href="ea8a7bba24"><code>ea8a7bb</code></a> fix(test): Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/issues/308">#308</a>)</li>
<li><a href="365c81a39f"><code>365c81a</code></a> Use <code>xz2</code> crate instead of a custom implementation (<a href="https://redirect.github.com/zip-rs/zip2/issues/306">#306</a>)</li>
<li><a href="ae94b3452b"><code>ae94b34</code></a> chore: Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/issues/310">#310</a>)</li>
<li><a href="a2e062f370"><code>a2e062f</code></a> Merge commit from fork</li>
<li><a href="0199ac2cb8"><code>0199ac2</code></a> Simplify handling for symlink targets</li>
<li><a href="977bb9479d"><code>977bb94</code></a> fix failing tests, remove symlink loop check</li>
<li><a href="3cb29e70d1"><code>3cb29e7</code></a> Partial fix for tests</li>
<li><a href="2182b07686"><code>2182b07</code></a> Refactor</li>
<li>Additional commits viewable in <a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=zip&package-manager=cargo&previous-version=2.2.2&new-version=2.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-18 08:57:11 +00:00
69678ed8e1 Bump zip from 2.2.2 to 2.3.0
Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
- [Release notes](https://github.com/zip-rs/zip2/releases)
- [Changelog](https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md)
- [Commits](https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0)

---
updated-dependencies:
- dependency-name: zip
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-18 00:19:49 +00:00
bf144a94d8 No more use FST to find a word without any typo 2025-03-17 14:20:10 +01:00
b0b1888ef9 Add test 2025-03-17 14:20:10 +01:00
6ec1d2b712 Merge #5423
5423: Bump ring to v0.17.14 to compile on old aarch64 r=irevoire a=Kerollmops

This PR will fix [this CI issue](https://github.com/meilisearch/meilisearch/actions/runs/13896085925/job/38876941154) where ring v0.17.13 breaks the compilation on old aarch64 machines by bumping its version to v0.17.14.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:53:02 +00:00
cbdf80893d Merge #5422
5422: Add more progress levels to measure merging r=Kerollmops a=Kerollmops

I found out that Meilisearch was not correctly reporting the long indexing times in the progress and that a lot of time was spent on extracting words with all documents already extracted. The reason was that there was no step to report merging the cache and sending the entries to write to the writer thread. This PR adds these entries to the progress.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:02:46 +00:00
e2156ddfc7 Simplify the IndexingStep progress enum 2025-03-17 11:40:50 +01:00
49dd50dab2 Bump ring to v0.17.14 to compile on old aarch64 2025-03-17 11:29:17 +01:00
13a88d6131 Merge #5407
5407: Geo update bug r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #5380
Fixes #5399



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-17 10:24:33 +00:00
d9875b782d Merge #5421
5421: Accept total batch size in human size r=irevoire a=Kerollmops

This PR fixes the new `experimental-limit-batched-tasks-total-size` to accept human-defined sizes in bytes.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 09:41:22 +00:00
cb16baab18 Add more progress levels to measure merging 2025-03-17 10:13:29 +01:00
2500e3c067 Merge #5414
5414: Update version for the next release (v1.14.0) in Cargo.toml r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Fixes https://github.com/meilisearch/meilisearch/issues/5268.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-14 13:35:54 +00:00
d3e4b2dfe7 Accept total batch size in human size 2025-03-14 13:07:51 +01:00
2a46624e19 Merge #5420
5420: Add support for the progress API of arroy r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5419

## What does this PR do?
- Convert the arroy progress to the meilisearch progress
- Use the new arroy closure to support the progress of arroy


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-13 18:03:08 +00:00
009c36a4d0 Add support for the progress API of arroy 2025-03-13 19:00:43 +01:00
2a47e25e6d Update the upgrade path snap 2025-03-13 18:35:06 +01:00
82912e191b Merge #5418
5418: Cache embeddings in search r=Kerollmops a=dureuill

# Pull Request

## Related issue
TBD

## What does this PR do?
- Adds a cache for embeddings produced in search
- The cache is disabled by default, and can be enabled following the instructions [here](https://github.com/orgs/meilisearch/discussions/818).
- Had to accommodate the `timeout` test for openai that uses a mock that simulates a timeout on subsequent responses: since the test was reusing the same query, the cache would kick-in and no request would be made to the mock, meaning no timeout any longer and so a failing test 😅 
- `Embedder::embed_search` now accepts a reference instead of an owned `String`.

## Manual testing

- I created 4 indexes on a fresh DB with the same settings (one embedder from openai)
- I sent 1/4 of movies.json to each index
- I sent a federated search request against all 4 indexes, with the same query for each index, using the embedder of each index.

Results:

- The first call took 400ms to 1s. Before this change, it took in the 3s range.
- Any repeated call with the same query took in the range of 25ms.
- Looking at the details at trace log level, I can see that the first index that needs the embedding is taking most of the 400ms in `embed_one`. The other indexes report that the query text is found in the cache and they each take a few µs.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-13 16:37:15 +00:00
e2d372823a Disable the cache by default and make it experimental 2025-03-13 17:22:51 +01:00
1876132172 Mutex-based implementation 2025-03-13 17:22:50 +01:00
d0b0b90d17 fixup tests, in particular foil the cache for the timeout test 2025-03-13 17:22:50 +01:00
b08544e86d Add embedding cache 2025-03-13 17:22:50 +01:00
d9111fe8ce Add lru crate to milli again 2025-03-13 17:22:50 +01:00
41d8161017 Update the versions 2025-03-13 17:22:32 +01:00
7df5715d39 Merge pull request #5406 from meilisearch/bump-heed
Bump heed to v0.22 and arroy to v0.6
2025-03-13 16:52:45 +01:00
5fe02ab5e0 Move to heed 0.22 and arroy 0.6 2025-03-13 15:48:18 +01:00
5ef7767429 Let arroy uses all the memory available instead of 50% of the 70% 2025-03-13 15:06:03 +01:00
3fad48167b remove arroy dependency in the index-scheduler 2025-03-13 14:57:56 +01:00
a92a48b9b9 Do not recompute stats on dumpless upgrade
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-13 13:58:58 +01:00
d53225bf64 uses a random seed instead of 42 2025-03-13 12:43:31 +01:00
20896500c2 Bump arroy to the latest version 2025-03-13 12:37:10 +01:00
1af520077c Call the underlying Env::copy_to_path method 2025-03-13 11:49:25 +01:00
7e07cb9de1 Make meilitool prefer WithoutTls Env 2025-03-13 11:47:19 +01:00
a12b06d99d Merge #5369
5369: exhaustive facet search r=ManyTheFish a=ManyTheFish

Fixes #5403

This PR adds an `exhaustiveFacetCount` field to the `/facet-search` API allowing the end-user to have a better facet count when having a distinct attribute set in the index settings.

 # Usage

`POST /index/:index_uid/facet-search`
**Body:**
```json
{
  "facetQuery": "blob",
  "facetName": "genres",
  "q": "",
  "exhaustiveFacetCount": true
}
```

# Prototype Docker images

```sh
$ docker pull getmeili/meilisearch:prototype-exhaustive-facet-search-00
```

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-13 10:36:04 +00:00
331dc3d241 Add a comment to explain why we keep debug assertions 2025-03-13 11:29:00 +01:00
ef9d9f8481 set the memory in arroy 2025-03-13 11:29:00 +01:00
d3d22d8ed4 Prefer waiting for the task before getting the indexes 2025-03-13 11:29:00 +01:00
5e6abcf50c Prefer using WithoutTls for the auth env 2025-03-13 11:29:00 +01:00
a4aaf932ba Fix some test (invalid anyway) 2025-03-13 11:29:00 +01:00
16c962eb30 Enable debug assertions of heed 2025-03-13 11:07:49 +01:00
55ca2c4481 Avoid opening the Auth environment multiple times 2025-03-13 11:07:49 +01:00
fedb444e66 Fix the upgrade arroy calls 2025-03-13 11:07:49 +01:00
bef5954741 Use a WithoutTls env 2025-03-13 11:07:49 +01:00
ff8cf38d6b Move to the latest version of arroy 2025-03-13 11:07:48 +01:00
f8ac575ec5 Move to the latest version of arroy 2025-03-13 11:07:48 +01:00
566b4efb06 Dumpless upgrade from v1.13 to v1.14 2025-03-13 11:07:44 +01:00
1d499ed9b2 Use the new arroy upgrade method to move from 0.4 to 0.5 2025-03-13 11:07:44 +01:00
3bc62f0549 WIP: Still need to introduce a Env::copy_to_path method 2025-03-13 11:07:39 +01:00
21bbbdec76 Specify WithoutTls everywhere 2025-03-13 11:07:38 +01:00
78ebd8dba2 Fix the error variants 2025-03-13 11:07:38 +01:00
34df44a002 Open Env without TLS 2025-03-13 11:07:38 +01:00
48a27f669e Bump heed and other dependencies 2025-03-13 11:07:37 +01:00
e2d0ce52ba Merge #5384
5384: Get multiple documents by ids r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5345 

## What does this PR do?
- Implements [public usage](https://www.notion.so/meilisearch/Get-documents-by-ID-1994b06b651f805ba273e1c6b75ce4d8)
- Slightly refactor error messages for the `/similar` route

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-12 17:26:49 +00:00
995f8962bd Merge #5398
5398: Bump ring from 0.17.8 to 0.17.13 r=Kerollmops a=dependabot[bot]

Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/briansmith/ring/blob/main/RELEASES.md">ring's changelog</a>.</em></p>
<blockquote>
<h1>Version 0.17.13 (2025-03-06)</h1>
<p>Increased MSRV to 1.66.0 to avoid bugs in earlier versions so that we can
safely use <code>core::arch::x86_64::__cpuid</code> and <code>core::arch::x86::__cpuid</code> from
Rust in future releases.</p>
<p>AVX2-based VAES-CLMUL implementation. This will be a notable performance
improvement for most newish x86-64 systems. This will likely raise the minimum
binutils version supported for very old Linux distros.</p>
<h1>Version 0.17.12 (2025-03-05)</h1>
<p>Bug fix: <a href="https://redirect.github.com/briansmith/ring/pull/2447">briansmith/ring#2447</a> for denial of service (DoS).</p>
<ul>
<li>
<p>Fixes a panic in <code>ring::aead::quic::HeaderProtectionKey::new_mask()</code> when
integer overflow checking is enabled. In the QUIC protocol, an attacker can
induce this panic by sending a specially-crafted packet. Even unintentionally
it is likely to occur in 1 out of every 2**32 packets sent and/or received.</p>
</li>
<li>
<p>Fixes a panic on 64-bit targets in <code>ring::aead::{AES_128_GCM, AES_256_GCM}</code>
when overflow checking is enabled, when encrypting/decrypting approximately
68,719,476,700 bytes (about 64 gigabytes) of data in a single chunk. Protocols
like TLS and SSH are not affected by this because those protocols break large
amounts of data into small chunks. Similarly, most applications will not
attempt to encrypt/decrypt 64GB of data in one chunk.</p>
</li>
</ul>
<p>Overflow checking is not enabled in release mode by default, but
<code>RUSTFLAGS=&quot;-C overflow-checks&quot;</code> or <code>overflow-checks = true</code> in the Cargo.toml
profile can override this. Overflow checking is usually enabled by default in
debug mode.</p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/briansmith/ring/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=ring&package-manager=cargo&previous-version=0.17.8&new-version=0.17.13)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-12 13:20:30 +00:00
1cd00f37c0 Merge #5413
5413: Make sure to delete useless prefixes r=ManyTheFish a=Kerollmops

We discovered a bug where the new indexer was still writing empty roaring bitmaps instead of deleting the prefix entry from the prefix database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-12 10:54:04 +00:00
1aa3375e12 Update version for the next release (v1.14.0) in Cargo.toml 2025-03-12 10:51:04 +00:00
60ff1b19a8 Searching for a document that does not exist no longer raises an error 2025-03-12 11:50:39 +01:00
7df5e3f059 Fix error message
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-12 11:48:40 +01:00
0197dc87e0 Make sure to delete useless prefixes 2025-03-12 11:24:13 +01:00
7a172b82ca Add test 2025-03-12 11:22:59 +01:00
eb3ff325d1 Add an exhaustiveFacetCount field to the facet-search API 2025-03-12 11:22:59 +01:00
d3cd5ea689 Check if the geo fields changed additionally to the other faceted fields when reindexing facets 2025-03-12 11:20:10 +01:00
3ed43f9097 add a failing test reproducing the bug 2025-03-12 11:20:10 +01:00
a2a86ef4e2 Merge #5254
5254: Granular Filterable attribute settings r=ManyTheFish a=ManyTheFish

# Related
**Issue:** https://github.com/meilisearch/meilisearch/issues/5163
**PRD:** https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-filterableAttributes-1764b06b651f80aba8bdf359b2df3ca8

# Summary
Change the `filterableAttributes` settings to let the user choose which facet feature he wants to activate or not.
Deactivating a feature will avoid some database computation in the indexing process and save time and disk size.

# Example

`PATCH /indexes/:index_uid/settings`

```json
{
  "filterableAttributes": [
    {
      "patterns": [
        "cattos",
        "doggos.age"
      ],
      "features": {
        "facetSearch": false,
        "filter": {
          "equality": true,
          "comparison": false
        }
      }
    }
  ]
}
```

# Impact on the codebase
- Settings API:
  - `/settings`
  - `/settings/filterable-attributes`
  - OpenAPI 
  - may impact the LocalizedAttributesRules due to the AttributePatterns factorization
- Database:
  - Filterable attributes format changed
  - Faceted field_ids are no more stored in the database
  - FieldIdsMap has no more unexisting fields
- Search:
  - Search using filters
  - Facet search
  - `Attributes` ranking rule
  - Distinct attribute
  - Facet distribution
- Settings reindexing:
  - searchable
  - facet
  - vector
  - geo
- Document indexing:
  - searchable
  - facet
  - vector
  - geo
- Dump import

# Note for the reviewers
The changes are huge and have been split in different commits with a dedicated explanation, I suggest reviewing the commit 1by1

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-12 09:00:43 +00:00
d500c7f625 Add default deserialize value 2025-03-11 17:55:49 +01:00
ea7e299663 Update has_changed_for_fields documentation 2025-03-11 16:48:55 +01:00
a370b467fe Merge MetadataBuilder::_new into MetadataBuilder::new 2025-03-11 15:31:57 +01:00
8790880589 Fix clippy 2025-03-11 15:22:39 +01:00
7072fe9780 Fix typos in comments and messages 2025-03-11 15:22:00 +01:00
d0dda78f3d Merge #5401
5401: Make composite embedders an experimental feature r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Introduce new `compositeEmbedders` experimental feature
- Guard `source = "composite"` and `searchEmbedder`, `indexingEmbedder` behind enabling the feature.
- Update tests accordingly

## Dumpless upgrade

- Adding an experimental feature is never a breaking change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-11 14:20:36 +00:00
fa8afc5cfd Style change after review
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-11 13:25:35 +01:00
6d52c6e711 Merge branch 'main' into granular-filterable-attributes 2025-03-11 10:05:58 +01:00
dfb8411647 Revert "Remove filter pre-check"
This reverts commit b12ffd1356.
2025-03-11 09:48:30 +01:00
6269f757ff Revert document creation in tests 2025-03-10 18:35:10 +01:00
40c5f911fd Revert metadata creation when computing the facet-distribution 2025-03-10 17:05:41 +01:00
abef655849 Revert metadata creation when computing facet search and distinct 2025-03-10 15:45:59 +01:00
b12ffd1356 Remove filter pre-check 2025-03-10 14:29:45 +01:00
c9a4c6ed96 REvert metadata creation when computing filters at search time 2025-03-10 14:29:44 +01:00
aa32b719c7 Add tests about experimentalness of the feature and fix existing 2025-03-10 14:23:22 +01:00
41d2b1e52b Analytics 2025-03-10 14:23:07 +01:00
54ee81bb09 Make composite embedders experimental 2025-03-10 14:22:47 +01:00
689e69d6d2 Take into account PR messages 2025-03-10 13:46:33 +01:00
9d9e0d4c54 Add analytics 2025-03-10 11:33:15 +01:00
19c9caed39 Fix tests 2025-03-10 11:11:48 +01:00
21c3b3957e tests: Change get_document_by_filter to fetch_documents 2025-03-10 11:11:48 +01:00
f292fc9ac0 Add ids parameter to GET documents and POST documents/fetch 2025-03-10 11:11:48 +01:00
1d3c4642a6 Don't use Deserr for ExternalDocumentId, instead convert to error afterward 2025-03-10 11:11:48 +01:00
9a282be0a2 Merge #5393
Some checks failed
Test suite / Tests on ubuntu-22.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 11s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Successful in 15m20s
Test suite / Run Rustfmt (push) Successful in 2m40s
Run the indexing fuzzer / Setup the action (push) Failing after 1h10m55s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5393: Bring back changes from v1.13.3 into main r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Strift <lau.cazanove@gmail.com>
2025-03-10 07:57:02 +00:00
bea28968a0 Bump ring from 0.17.8 to 0.17.13
Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13.
- [Changelog](https://github.com/briansmith/ring/blob/main/RELEASES.md)
- [Commits](https://github.com/briansmith/ring/commits)

---
updated-dependencies:
- dependency-name: ring
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-07 17:04:57 +00:00
ed1dcbe0f7 Fix behavior change in the Attributes criterion 2025-03-06 14:18:25 +01:00
5ceddbda84 Add the max_weight of the weight map if it's lacking 2025-03-06 13:58:28 +01:00
ca41ce3bbd Old indexer document addition now check if facet search is globally activated 2025-03-06 11:43:42 +01:00
8ec0c322ea Apply PR requests related to Refactor the FieldIdMapWithMetadata 2025-03-06 11:42:53 +01:00
b88aa9cc76 Rely on FieldIdMapWithMetadata in facet search and filters 2025-03-05 18:22:12 +01:00
3fd86e8d76 Merge #5371
Some checks failed
Test suite / Tests on ubuntu-22.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 8s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Successful in 7m1s
Test suite / Run Rustfmt (push) Successful in 2m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m40s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5371: Composite embedders r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Implement [public usage](https://www.notion.so/meilisearch/Composite-embedder-usage-14a4b06b651f81859dc3df21e8cd02a0)
- Refactor the way we check if a parameter is mandatory/allowed/disallowed for a given source
- Take the "nesting context" into account for computer if a parameter is mandatory/allowed/disallowed
- Add tests checking all parameters with all sources, and made sure the results didn't change compared with v1.13

## Dumpless Upgrade

- This adds a new value for an existing parameter => compatible without change
- This adds new optional parameters => compatible without change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-05 17:18:11 +00:00
67f7470c83 Apply PR requests related to Refactor search and facet-search 2025-03-05 18:17:42 +01:00
4fab72cbea Rename SettingsDiff::diff to SettingsDiff::apply_and_diff 2025-03-05 18:16:57 +01:00
afb4b9677f Remove Embedder:embed 2025-03-05 18:16:57 +01:00
73d2dbd60f Error handling 2025-03-05 18:16:57 +01:00
57a6beee30 Test composite embedders 2025-03-05 18:16:57 +01:00
b190b612a3 Add test on all parameters 2025-03-05 18:16:57 +01:00
111e77eff2 Bump mini-dashboard to v0.2.18 2025-03-05 15:24:53 +01:00
ba30747de3 Bump v1.13.2 to v1.13.3 in the TOMLs and snaps 2025-03-05 15:24:53 +01:00
25f0536f5a Update version for the next release (v1.13.3) in Cargo.toml 2025-03-05 15:24:52 +01:00
c8c0951c43 Update the snapshots 2025-03-05 15:24:21 +01:00
63e753bde0 Apply PR requests related to settings API 2025-03-05 12:05:40 +01:00
5fa4b5c50a Add a test on filterable attributes rules priority
**Changes:**
- Add a new test playing with filterable attributes rules priority
- Optimize the faceted field selector avoiding to match false positives
2025-03-05 09:44:52 +01:00
a7a62e5e4c Add some documentation in modules 2025-03-05 08:49:18 +01:00
683a2ac685 Merge #5379
Some checks failed
Publish binaries to GitHub release / Check the version validity (push) Failing after 57s
Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on ubuntu-22.04 (push) Failing after 6s
Test suite / Tests almost all features (push) Failing after 6s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 5s
Test suite / Run tests in debug (push) Failing after 6s
Test suite / Run Clippy (push) Failing after 6s
Test suite / Run Rustfmt (push) Failing after 6s
SDKs tests / define-docker-image (push) Failing after 15s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
Publish images to Docker Hub / docker (push) Has been cancelled
5379: Bring back the changes from v1.13.2 into main r=dureuill a=Kerollmops



Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-04 13:24:25 +00:00
e751342dfb Merge #5370
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Test with Ollama (push) Failing after 6m46s
Test suite / Run Clippy (push) Successful in 6m23s
Test suite / Run Rustfmt (push) Failing after 16s
Test suite / Tests on ubuntu-22.04 (push) Failing after 7m19s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m28s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5370: Introduce a CI to check milestones and branches r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 15:51:52 +00:00
17bf82235d Merge #5381
5381: Bump actions/checkout from 1 to 3 r=Kerollmops a=dependabot[bot]

Bumps [actions/checkout](https://github.com/actions/checkout) from 1 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/releases">actions/checkout's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Updated to the node16 runtime by default
<ul>
<li>This requires a minimum <a href="https://github.com/actions/runner/releases/tag/v2.285.0">Actions Runner</a> version of v2.285.0 to run, which is by default available in GHES 3.4 or later.</li>
</ul>
</li>
</ul>
<h2>v2.7.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add new public key for known_hosts (<a href="https://redirect.github.com/actions/checkout/issues/1237">#1237</a>) by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1238">actions/checkout#1238</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2.6.0...v2.7.0">https://github.com/actions/checkout/compare/v2.6.0...v2.7.0</a></p>
<h2>v2.6.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add backports to v2 branch by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1040">actions/checkout#1040</a>
<ul>
<li>Includes backports from the following changes: <a href="https://redirect.github.com/actions/checkout/pull/964">actions/checkout#964</a>, <a href="https://redirect.github.com/actions/checkout/pull/1002">actions/checkout#1002</a>, <a href="https://redirect.github.com/actions/checkout/pull/1029">actions/checkout#1029</a></li>
<li>Upgraded the licensed version to match what is used in v3.</li>
</ul>
</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2.5.0...v2.6.0">https://github.com/actions/checkout/compare/v2.5.0...v2.6.0</a></p>
<h2>v2.5.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update <code>`@​actions/core</code>` to 1.10.0 by <a href="https://github.com/rentziass"><code>`@​rentziass</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/962">actions/checkout#962</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2...v2.5.0">https://github.com/actions/checkout/compare/v2...v2.5.0</a></p>
<h2>v2.4.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Add set-safe-directory input to allow customers to take control. (<a href="https://redirect.github.com/actions/checkout/issues/770">#770</a>) by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/776">actions/checkout#776</a></li>
<li>Prepare changelog for v2.4.2. by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/778">actions/checkout#778</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2...v2.4.2">https://github.com/actions/checkout/compare/v2...v2.4.2</a></p>
<h2>v2.4.1</h2>
<ul>
<li>Fixed an issue where checkout failed to run in container jobs due to the new git setting <code>safe.directory</code></li>
</ul>
<h2>v2.4.0</h2>
<ul>
<li>Convert SSH URLs like <code>org-&lt;ORG_ID&gt;`@github.com:</code>` to <code>https://github.com/</code> - <a href="https://redirect.github.com/actions/checkout/pull/621">pr</a></li>
</ul>
<h2>v2.3.5</h2>
<p>Update dependencies</p>
<h2>v2.3.4</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/379">Add missing <code>await</code>s</a></li>
<li><a href="https://redirect.github.com/actions/checkout/pull/360">Swap to Environment Files</a></li>
</ul>
<h2>v2.3.3</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/345">Remove Unneeded commit information from build logs</a></li>
<li><a href="https://redirect.github.com/actions/checkout/pull/326">Add Licensed to verify third party dependencies</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/blob/main/CHANGELOG.md">actions/checkout's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>v4.2.2</h2>
<ul>
<li><code>url-helper.ts</code> now leverages well-known environment variables by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1941">actions/checkout#1941</a></li>
<li>Expand unit test coverage for <code>isGhes</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1946">actions/checkout#1946</a></li>
</ul>
<h2>v4.2.1</h2>
<ul>
<li>Check out other refs/* by commit if provided, fall back to ref by <a href="https://github.com/orhantoy"><code>`@​orhantoy</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1924">actions/checkout#1924</a></li>
</ul>
<h2>v4.2.0</h2>
<ul>
<li>Add Ref and Commit outputs by <a href="https://github.com/lucacome"><code>`@​lucacome</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1180">actions/checkout#1180</a></li>
<li>Dependency updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>-` <a href="https://redirect.github.com/actions/checkout/pull/1777">actions/checkout#1777</a>, <a href="https://redirect.github.com/actions/checkout/pull/1872">actions/checkout#1872</a></li>
</ul>
<h2>v4.1.7</h2>
<ul>
<li>Bump the minor-npm-dependencies group across 1 directory with 4 updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1739">actions/checkout#1739</a></li>
<li>Bump actions/checkout from 3 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1697">actions/checkout#1697</a></li>
<li>Check out other refs/* by commit by <a href="https://github.com/orhantoy"><code>`@​orhantoy</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1774">actions/checkout#1774</a></li>
<li>Pin actions/checkout's own workflows to a known, good, stable version. by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1776">actions/checkout#1776</a></li>
</ul>
<h2>v4.1.6</h2>
<ul>
<li>Check platform to set archive extension appropriately by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1732">actions/checkout#1732</a></li>
</ul>
<h2>v4.1.5</h2>
<ul>
<li>Update NPM dependencies by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1703">actions/checkout#1703</a></li>
<li>Bump github/codeql-action from 2 to 3 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1694">actions/checkout#1694</a></li>
<li>Bump actions/setup-node from 1 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1696">actions/checkout#1696</a></li>
<li>Bump actions/upload-artifact from 2 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1695">actions/checkout#1695</a></li>
<li>README: Suggest <code>user.email</code> to be <code>41898282+github-actions[bot]`@users.noreply.github.com</code>` by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1707">actions/checkout#1707</a></li>
</ul>
<h2>v4.1.4</h2>
<ul>
<li>Disable <code>extensions.worktreeConfig</code> when disabling <code>sparse-checkout</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1692">actions/checkout#1692</a></li>
<li>Add dependabot config by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1688">actions/checkout#1688</a></li>
<li>Bump the minor-actions-dependencies group with 2 updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1693">actions/checkout#1693</a></li>
<li>Bump word-wrap from 1.2.3 to 1.2.5 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1643">actions/checkout#1643</a></li>
</ul>
<h2>v4.1.3</h2>
<ul>
<li>Check git version before attempting to disable <code>sparse-checkout</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1656">actions/checkout#1656</a></li>
<li>Add SSH user parameter by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1685">actions/checkout#1685</a></li>
<li>Update <code>actions/checkout</code> version in <code>update-main-version.yml</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1650">actions/checkout#1650</a></li>
</ul>
<h2>v4.1.2</h2>
<ul>
<li>Fix: Disable sparse checkout whenever <code>sparse-checkout</code> option is not present <a href="https://github.com/dscho"><code>`@​dscho</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1598">actions/checkout#1598</a></li>
</ul>
<h2>v4.1.1</h2>
<ul>
<li>Correct link to GitHub Docs by <a href="https://github.com/peterbe"><code>`@​peterbe</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1511">actions/checkout#1511</a></li>
<li>Link to release page from what's new section by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1514">actions/checkout#1514</a></li>
</ul>
<h2>v4.1.0</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/1396">Add support for partial checkout filters</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="f43a0e5ff2"><code>f43a0e5</code></a> Release 3.6.0 (<a href="https://redirect.github.com/actions/checkout/issues/1437">#1437</a>)</li>
<li><a href="7739b9ba2e"><code>7739b9b</code></a> Add option to fetch tags even if fetch-depth &gt; 0 (<a href="https://redirect.github.com/actions/checkout/issues/579">#579</a>)</li>
<li><a href="96f53100ba"><code>96f5310</code></a> Mark test scripts with Bash'isms to be run via Bash (<a href="https://redirect.github.com/actions/checkout/issues/1377">#1377</a>)</li>
<li><a href="c85c95e3d7"><code>c85c95e</code></a> Release v3.5.3 (<a href="https://redirect.github.com/actions/checkout/issues/1376">#1376</a>)</li>
<li><a href="d106d4669b"><code>d106d46</code></a> Add support for sparse checkouts (<a href="https://redirect.github.com/actions/checkout/issues/1369">#1369</a>)</li>
<li><a href="f095bcc56b"><code>f095bcc</code></a> Fix typos found by codespell (<a href="https://redirect.github.com/actions/checkout/issues/1287">#1287</a>)</li>
<li><a href="47fbe2df0a"><code>47fbe2d</code></a> Fix: Checkout fail in self-hosted runners when faulty submodule are checked-i...</li>
<li><a href="8e5e7e5ab8"><code>8e5e7e5</code></a> Release v3.5.2 (<a href="https://redirect.github.com/actions/checkout/issues/1291">#1291</a>)</li>
<li><a href="eb35239ec2"><code>eb35239</code></a> Fix: convert baseUrl to serverApiUrl 'formatted' (<a href="https://redirect.github.com/actions/checkout/issues/1289">#1289</a>)</li>
<li><a href="83b7061638"><code>83b7061</code></a> Release v3.5.1 (<a href="https://redirect.github.com/actions/checkout/issues/1284">#1284</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/actions/checkout/compare/v1...v3">compare view</a></li>
</ul>
</details>
<br />

<details>
<summary>Most Recent Ignore Conditions Applied to This Pull Request</summary>

| Dependency Name | Ignore Conditions |
| --- | --- |
| actions/checkout | [>= 4.a, < 5] |
</details>


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=1&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 15:14:00 +00:00
0401c4e511 Add a settings API test 2025-03-03 16:08:21 +01:00
4798c35c50 Merge #5383
5383: Skip a snapshot test on Windows r=dureuill a=Kerollmops

This PR skips the `perform_on_demand_snapshot` test on Windows, which is very flaky on this platform. Note that we keep running it on macOS and Ubuntu.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 13:23:24 +00:00
9585950e0e Merge #5365
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Failing after 5s
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 10s
Test suite / Tests on ubuntu-22.04 (push) Failing after 17s
Test suite / Tests almost all features (push) Failing after 11s
Test suite / Test with Ollama (push) Failing after 17s
Test suite / Test disabled tokenization (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Rustfmt (push) Successful in 3m43s
Test suite / Run Clippy (push) Successful in 9m23s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5365: Mention openAPI in CONTRIBUTING.md r=Kerollmops a=irevoire

I only referred to other documents to be sure the process is written only once and won’t get out of sync.

Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-03 11:23:51 +00:00
b8c6eb5453 Improve bors toml 2025-03-03 12:22:33 +01:00
02586e727e Introduce a CI to check milestones and branches 2025-03-03 12:22:24 +01:00
0cfc9261ba Skip a snapshot test on Windows 2025-03-03 10:44:28 +01:00
035674d56e Bump actions/checkout from 1 to 4 2025-03-03 10:37:28 +01:00
d35470e29b Update dumps
**Impact:**
- dump import
2025-03-03 10:33:39 +01:00
23e07f1a93 Attribute positions changed in snapshots
**Reason:**
Only the existing field are registered in the fieldid_map
2025-03-03 10:33:39 +01:00
f2a28a4dd7 Add and enhance tests
**Changes:**
Introduce a test_settings_documents_indexing_swapping_and_search function that run the test twice:
1) by indexing the settings before the documents then running the test
2) by indexing the documents before the settings then running the test

This helps to ensure that their is no bug coming from one or the other indexer.
2025-03-03 10:33:39 +01:00
1994494155 Update snapshot using the new filterableAttributes type 2025-03-03 10:33:39 +01:00
6dbec91d2b Index document in filterable attributes tests
**Reason:**
Because the filterable attributes are patterns now,
the fieldIdMap will only register the fields that exists in at least one document.
if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields.
2025-03-03 10:33:39 +01:00
9a75dc6ab3 Update tests using filterable attributes rules
**Changes:**
Replace the BTreeSet<String> by Vec<FilterableAttributesRule> without changing the test results

**Impact:**
- None
2025-03-03 10:33:34 +01:00
ae8d453868 Refactor Document indexing process (searchables)
**Changes:**
The searchable database extraction is now relying on the AttributePatterns and FieldIdMapWithMetadata to match the field to extract.
Remove the SearchableExtractor trait to make the code less complex.

**Impact:**
- Document Addition/modification searchable indexing
- Document deletion searchable indexing
2025-03-03 10:32:42 +01:00
95bccaf5f5 Refactor Document indexing process (Facets)
**Changes:**
The Documents changes now take a selector closure instead of a list of field to match the field to extract.
The seek_leaf_values_in_object function now uses a selector closure of a list of field to match the field to extract
The facet database extraction is now relying on the FilterableAttributesRule to match the field to extract.
The facet-search database extraction is now relying on the FieldIdMapWithMetadata to select the field to index.
The facet level database extraction is now relying on the FieldIdMapWithMetadata to select the field to index.

**Important:**
Because the filterable attributes are patterns now,
the fieldIdMap will only register the fields that exists in at least one document.
if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields.

**Impact:**
- Document Addition/modification facet indexing
- Document deletion facet indexing
2025-03-03 10:32:03 +01:00
659855c88e Refactor Settings Indexing process
**Changes:**
The transform structure is now relying on FieldIdMapWithMetadata and AttributePatterns to prepare
the obkv documents during a settings reindexing.
The InnerIndexSettingsDiff and InnerIndexSettings structs are now relying on FieldIdMapWithMetadata, FilterableAttributesRule and AttributePatterns to define the field and the databases that should be reindexed.
The faceted_fields_ids, localized_searchable_fields_ids and localized_faceted_fields_ids have been removed in favor of the FieldIdMapWithMetadata.
We are now relying on the FieldIdMapWithMetadata to retain vectors_fids from the facets and the searchables.

The searchable database computing is now relying on the FieldIdMapWithMetadata to know if a field is searchable and retrieve the locales.

The facet database computing is now relying on the FieldIdMapWithMetadata to compute the facet databases, the facet-search and retrieve the locales.

The facet level database computing is now relying on the FieldIdMapWithMetadata and the facet level database are cleared depending on the settings differences (clear_facet_levels_based_on_settings_diff).

The vector point extraction uses the FieldIdMapWithMetadata instead of FieldsIdsMapWithMetadata.

**Impact:**
- Dump import
- Settings update
2025-03-03 10:32:02 +01:00
286d310287 Fix inconsistency in attribute ranking rule computation
**Changes:**
The building of the Attributes ranking rule graph was comparing fieldids with weights
which doesn't make sense and may be bug prone, we are now comparing fieldids with fieldids.

**Impact:**
- search: Attribute ranking rule
2025-03-03 10:29:34 +01:00
4f7ece2411 Refactor the FieldIdMapWithMetadata
**Changes:**
The FieldIdMapWithMetadata structure now stores more information about fields.
The metadata_for_field function computes all the needed information relying on the user provided data instead of the enriched data (searchable/sortable)
which may solve an indexing bug on sortable attributes that was not matching the nested fields.

The FieldIdMapWithMetadata structure was duplicated in the embeddings as FieldsIdsMapWithMetadata,
so the FieldsIdsMapWithMetadata has been removed in favor of FieldIdMapWithMetadata.

The Facet distribution is now relying on the FieldIdMapWithMetadata with metadata to match is a field can be faceted.

**Impact:**
- searchable attributes matching
- searchable attributes weight computation
- sortable attributes matching
- faceted fields matching
- prompt computing
- facet distribution
2025-03-03 10:29:33 +01:00
967033579d Refactor search and facet-search
**Changes:**
The search filters are now using the FilterableAttributesFeatures from the FilterableAttributesRules to know if a field is filterable.
Moreover, the FilterableAttributesFeatures is more precise and an error will be returned if an operator is used on a field that doesn't have the related feature.
The facet-search is now checking if the feature is allowed in the FilterableAttributesFeatures and an error will be returned if the field doesn't have the related feature.

**Impact:**
- facet-search is now relying on AttributePatterns to match the locales
- search using filters is now relying on FilterableAttributesFeatures
- distinct attribute is now relying on FilterableAttributesRules
2025-03-03 10:25:32 +01:00
0200c65ebf Change the filterableAttributes setting API
**Changes:**
The filterableAttributes type has been changed from a `BTreeSet<String>` to a `Vec<FilterableAttributesRule>`,
Which is a list of rules defining patterns to match the documents' fields and a set of feature to apply on the matching fields.
The rule order given by the user is now an important information, the features applied on a filterable field will be chosen based on the rule order as we do for the LocalizedAttributesRules.
This means that the list will not be reordered anymore and will keep the user defined order,
moreover, if there are any duplicates, they will not be de-duplicated anymore.

**Impact:**
- Settings API
- the database format of the filterable attributes changed
- may impact the LocalizedAttributesRules due to the AttributePatterns factorization
- OpenAPI generator
2025-03-03 10:22:02 +01:00
c63c25a9a2 Merge #5355
Some checks failed
Look for flaky tests / flaky (push) Failing after 1s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
Test suite / Tests almost all features (push) Failing after 13s
Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 10s
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Rustfmt (push) Failing after 16s
Test suite / Run Clippy (push) Successful in 9m39s
SDKs tests / define-docker-image (push) Failing after 5s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5355: Support fetching the pooling method from the model configuration r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5354 

## What does this PR do?
- Fetches the pooling configuration from the model repository
- Use a pooling method that depends on the pooling configuration of that model.
- Allow overriding the pooling method with a new huggingFace embedder parameter `pooling`
  - for backward-compatibility with Meilisearch v1.13
  - for compatibility with embedders that exhibit the same behavior as Meilisearch v1.13
- Handle the default value of that new parameter
   - for compatibility, when importing a db/a dump, it should be set to `forceMean`
   - when (re)set from the settings for an embedder, it should be set to `useModel`


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-27 14:55:13 +00:00
046bbea864 Keep old stat format to make sure the number of documents is available during dumpless upgrade 2025-02-27 15:17:23 +01:00
c5cb7d2f2c Forbid opening a db of v1.13.x from v1.13.y 2025-02-27 15:17:23 +01:00
5e7f226ac9 Support dumpless upgrade for all v1.13 patches 2025-02-27 15:17:23 +01:00
754f254a00 Update snapshots following version bump 2025-02-27 15:17:23 +01:00
39b5ad3c86 Update version for the next release (v1.13.2) in Cargo.toml 2025-02-27 15:17:22 +01:00
80adbb1bdc Merge #5338
Some checks are pending
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m23s
5338: Bump Ubuntu in the CI from 20.04 to 22.04 r=dureuill a=Kerollmops

This PR bumps the Ubuntu version we use in the CI from version 20.04 to version 22.04. This also means we are [using GLIBC version 2.35 and not version 2.28](https://gist.github.com/zchrissirhcz/ee13f604996bbbe312ba1d105954d2ed).

Note, the indentation fix is done by my IDE (Zed), sorry about that 🤦 

Fixes https://github.com/meilisearch/meilisearch/issues/5374

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-27 08:14:12 +00:00
4b6fa1cf41 Merge #5372
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Run the indexing fuzzer / Setup the action (push) Failing after 11s
Test suite / Test with Ollama (push) Failing after 10s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Run Rustfmt (push) Failing after 32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5372: Bring back changes from v1.13.1 to main r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Strift <lau.cazanove@gmail.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2025-02-26 17:24:51 +00:00
dc78d8e9c4 Fix the dumpless upgrade log 2025-02-26 17:02:46 +01:00
d4063c9dcd Fix fmt 2025-02-26 17:02:45 +01:00
abebc574f6 Update crates/milli/src/index.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-26 17:02:45 +01:00
f32ab67819 Update crates/milli/src/index.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-26 17:02:44 +01:00
d25953f322 fix clippy 2025-02-26 17:02:43 +01:00
405bbd04c1 Dumpless upgrade 2025-02-26 17:01:38 +01:00
5d421abdc4 Update Snapshots 2025-02-26 17:01:37 +01:00
9f3663e768 Implement Incremental document database stats computing 2025-02-26 17:01:35 +01:00
d9642ec916 Use checked_div in average computation 2025-02-26 17:01:34 +01:00
818e8b0237 Fix zero division 2025-02-26 17:01:31 +01:00
4f77a7fba5 fix clippy 2025-02-26 17:01:29 +01:00
058f08dff5 fix snapshots 2025-02-26 17:01:26 +01:00
9a6c1730aa Add document database stats 2025-02-26 17:01:25 +01:00
91a8a97045 Bump 2025-02-26 17:01:24 +01:00
15788773af Check the exact_word database when computing zero typo query 2025-02-26 17:01:22 +01:00
025b9b79bb Update the snapshots 2025-02-26 17:01:21 +01:00
1c60b17a37 Update version for the next release (v1.13.1) in Cargo.toml 2025-02-26 17:01:19 +01:00
3b2cd54b9d tests: add a check to know if a Value has an uid 2025-02-25 17:24:45 +01:00
0833cb7d34 Mention openAPI in CONTRIBUTING.md 2025-02-25 12:01:26 +01:00
b0d4f9590f Merge #5364
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 15s
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Rustfmt (push) Successful in 2m20s
Test suite / Run Clippy (push) Failing after 6m46s
Run the indexing fuzzer / Setup the action (push) Failing after 7m0s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5364: Rename `callTrace` into `progressTrace` r=Kerollmops a=Kerollmops

Rename the `callTrace` field into a `progressTrace`.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-25 09:34:13 +00:00
dfce20be21 Rename callTrace into progressTrace 2025-02-25 10:09:03 +01:00
24fe6cd205 Fix multiple embeddings in hf 2025-02-24 16:24:04 +01:00
e374b095a2 Fix tests 2025-02-24 14:11:26 +01:00
9f3e4801b1 Refactor settings validation and introduce SubEmbedderSettings 2025-02-24 13:58:26 +01:00
b85180fedb Error types 2025-02-24 13:58:26 +01:00
3cdcc54a9e analytics 2025-02-24 13:58:26 +01:00
294cf39cad Integrate composite embedder 2025-02-24 13:58:26 +01:00
4a2643daa2 Rename embed_one to embed_search and embed_chunks* to embed_index* 2025-02-24 13:58:26 +01:00
8d2d9066ba Add composite embedder 2025-02-24 13:58:26 +01:00
526476e168 Move settings test to its own file 2025-02-24 13:58:26 +01:00
ea7bae9a71 Merge #5356
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Test with Ollama (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 19s
Test suite / Run Rustfmt (push) Failing after 17s
Test suite / Run Clippy (push) Successful in 9m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h8m47s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5356: Display the internal indexing steps with timings on the `/batches` route r=irevoire a=Kerollmops

This PR computes the durations of each step, stores them in a map, and prints them (for now).

```
"callTrace": {
    "processing tasks > retrieving config": "185.38µs",
    "processing tasks > computing document changes > preparing update file > payload": "23.11ms",
    "processing tasks > computing document changes > preparing update file": "23.26ms",
    "processing tasks > computing document changes": "24.06ms",
    "processing tasks > indexing > extracting documents > document": "15.13ms",
    "processing tasks > indexing > extracting documents": "15.13ms",
    "processing tasks > indexing > extracting facets > document": "5.70ms",
    "processing tasks > indexing > extracting facets": "5.72ms",
    "processing tasks > indexing > extracting words > document": "597.24ms",
    "processing tasks > indexing > extracting words": "597.25ms",
    "processing tasks > indexing > extracting word proximity > document": "1.14s",
    "processing tasks > indexing > extracting word proximity": "1.15s",
    "processing tasks > indexing > tail writing to database": "430.91ms",
    "processing tasks > indexing > waiting for extractors": "52.54µs",
    "processing tasks > indexing > writing embeddings to database": "47.79µs",
    "processing tasks > indexing > post-processing facets": "476.04µs",
    "processing tasks > indexing > post-processing words": "97.82ms",
    "processing tasks > indexing > finalizing": "67.41ms",
    "processing tasks > indexing": "2.40s",
    "processing tasks": "2.43s",
    "writing tasks to disk > task": "37.71µs",
    "writing tasks to disk": "67.13µs"
},
"writeChannelCongestion": {
    "attempts": 2608482,
    "blocking_attempts": 0,
    "blocking_ratio": 0.0
}
```

## To Do
- [x] Update the batches PRD + delivery + tracking issue.
- [x] Store that in the batches to be visible from the `/batches` route.
- [x] Display the writer's congestion.
- [x] Display the info back in the logs too.
- [ ] (optional) Compute the size of each database by [using LMDB](https://docs.rs/heed/latest/heed/struct.DatabaseStat.html).
- [x] Push them in reverse order so that "processing task" is after the other sub-steps.


Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-20 17:38:50 +00:00
76fd5d92d7 Clarify the tail writing to database 2025-02-20 17:35:23 +01:00
245a55722a Remove commented code 2025-02-20 16:48:18 +01:00
434fad5327 Fix insta tests again 2025-02-20 16:41:48 +01:00
243a5fa6a8 Log the call trace and congestion 2025-02-20 14:17:34 +01:00
9d314ace09 Fix the insta tests 2025-02-20 11:51:58 +01:00
1b1172ad16 Fix dump tests 2025-02-20 10:44:53 +01:00
1d99c8465c Hide the batch stats to make insta pass 2025-02-20 10:16:54 +01:00
05cc8c650c Expose the write channel congestion in the batches 2025-02-19 15:47:54 +01:00
14e1459bf5 Document settings 2025-02-19 15:06:22 +01:00
589bf30ec6 make clippy happy 2025-02-19 11:38:07 +01:00
b367c71ad2 fixup test 2025-02-19 11:31:17 +01:00
3ff1de0a21 Expose the call trace in the batch stats 2025-02-19 11:24:11 +01:00
1005a60fb8 Fixup dump settings 2025-02-19 11:03:48 +01:00
e9add14189 Reorder steps 2025-02-18 19:26:41 +01:00
4a058a080e Simplify the name generation 2025-02-18 18:48:44 +01:00
11a11fc870 Accumulate step durations from the progress system 2025-02-18 18:33:19 +01:00
cd0dfa3f1b Fix test cases 2025-02-18 17:21:52 +01:00
7b4ce468a6 Allow overriding pooling method 2025-02-18 17:12:23 +01:00
11759c4be4 Support pooling 2025-02-18 16:10:51 +01:00
0f1aeb8eaa Merge #5351
Some checks failed
Look for flaky tests / flaky (push) Failing after 19s
SDKs tests / define-docker-image (push) Failing after 5s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 2s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Failing after 17s
Test suite / Run Rustfmt (push) Successful in 1m51s
Test suite / Tests almost all features (push) Failing after 7m7s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops

This PR brings back the changes made in v1.13 into the main branch.

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clémentine <clementine@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-18 08:05:02 +00:00
5e7803632d Merge #5342
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 14s
Test suite / Run Rustfmt (push) Successful in 1m54s
Test suite / Run Clippy (push) Failing after 6m49s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5342: Fix workload sha r=dureuill a=ManyTheFish

The dataset shasum was wrong for some workloads making the `/bench workloads/*.json` crash

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-12 16:27:09 +00:00
885710a07b Merge #5341
5341: Embeddings stats r=ManyTheFish a=ManyTheFish

# Pull Request

## Related issue
Fixes #5321

## What does this PR do?
- Add embedding stats
- force dumpless upgrade to recompute stats
- add tests


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-12 15:46:37 +00:00
c55fdad2c3 Fix dumpless upgrade target version 2025-02-12 16:35:05 +01:00
1caad4c4b0 Add multiple embeddings for the same embedder in tests 2025-02-12 16:13:34 +01:00
8419ed52a1 fix clippy 2025-02-12 14:38:51 +01:00
a65c52cc97 Convert dump test into snapshots 2025-02-12 14:14:10 +01:00
49e9655c24 Update snapshots 2025-02-12 14:05:32 +01:00
fa763ca5dc Merge #5339
5339: Add back timeout from v1.11.3 r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5337

## What does this PR do?
- Fix regression compared with v1.11 by reintroducing the 30s timeout on all REST API calls.

Thanks to `@migueltarga` for reporting the issue


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-12 12:50:27 +00:00
c7aeb554b2 Add tests 2025-02-12 13:37:41 +01:00
88d9d47928 Fix benchmark sha 2025-02-12 13:27:15 +01:00
8e0d8d31f9 Add back timeout from v1.11.3 2025-02-12 11:53:00 +01:00
81a38099ec Merge #5336
5336: Meilitool Hair Dryer r=dureuill a=Kerollmops

This pull request introduces a new subcommand to hair dry a specific part of specific indexes. It is useful when [the memory-mapped pages are not hot in the cache](https://arc.net/l/quote/ixhcdwcq) and must be. Hair drying those interesting pages makes the search requests using the vector store much faster.

The previous technique used the "cat method," which consists of reading the whole LMDB data file and pipping it into the null file descriptor. By doing that, the whole LMDB data file becomes hot in the cache. However, when the database is large, at least 30% of it is free, and unused pages and many other pages don't need to be hot, e.g., raw JSON documents or uninteresting parts of the inverted index.

This new subcommand reads all the Arroy pages of a given index to make them hot, and only those. More coming...

The current algorithm is single-threaded and takes a lot of time. I am in the process of multithreading it. This is the time it takes to hair dry a 305GiB database with a single thread.

```
real    21m51.054s
user    0m3.155s
sys     0m19.393s
```

## To Do
- [ ] (optional) Do the reads in parallel.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-12 10:45:16 +00:00
bd27fe7d02 force dumpless upgrade to recompute stats 2025-02-12 11:45:02 +01:00
41203f0931 Add embedders stats 2025-02-12 11:37:47 +01:00
803a699b15 Remove unsafes 2025-02-12 10:46:45 +01:00
246ad3b06e Display a progress percentage 2025-02-12 09:56:05 +01:00
a21c440274 Bump Ubuntu from 20.04 to 22.04 2025-02-12 09:49:50 +01:00
c01d26ffd7 Merge #5324
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 5s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 0s
Test suite / Run Rustfmt (push) Failing after 7s
Test suite / Run Clippy (push) Successful in 7m29s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m25s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5324: Mention utoipa in sprint issues r=curquiza a=irevoire

Update the sprint-issue template to mention the openAPI file and utoipa.

Let me know if something is not clear or missing

Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-11 20:46:26 +00:00
225af069a9 Merge #5149
5149: Ensure the settings routes are now configurated when a new field is added to the Settings struct  r=curquiza a=MichaScant

# Pull Request
## Related issue
Fixes #5126 

## What does this PR do?
Ensures the settings routes are properly configured before a new field is added to the settings structure. Changes were made based on what was proposed in the original issue, any new field for settings struct is added in the [make_settings_route! macro list](6298db5bea/crates/meilisearch/src/routes/indexes/settings.rs (L182-L403)) 

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: michascant <89426143+MichaScant@users.noreply.github.com>
2025-02-11 20:10:29 +00:00
70305b9f71 Merge #5332
5332: Fix geo update r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5331

## What does this PR do?
- use the merged version that contains all fields instead of the updated version that contains only updated fields
- add test that detects the problem
- As it is the second time that `changes.updated` is causing a bug, I'm changing its name to `only_changed_fields`, hopefully better communicating that old fields are not there


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-11 18:51:33 +00:00
5dab435d13 Add more logs about read txns 2025-02-11 18:14:48 +01:00
c83c1a3c51 Introduce the Hair Dryer meilitool sucommand 2025-02-11 18:01:53 +01:00
afc6c10a2a add more info on utoipa 2025-02-11 17:45:17 +01:00
b83275c9c5 Change the updated* functions to only_new functions, hopefully better communicating what they do 2025-02-11 15:27:10 +01:00
d7f35ee3ba Use merged document instead of updated 2025-02-11 15:27:10 +01:00
1dce341bfb Add test 2025-02-11 15:27:10 +01:00
4876c1c8eb Merge #5310
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 18s
Test suite / Run tests in debug (push) Failing after 17s
Test suite / Run Rustfmt (push) Successful in 2m42s
Test suite / Run Clippy (push) Failing after 7m17s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5310: Fix batch export/import dump r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5304
Fixes https://github.com/meilisearch/meilisearch/issues/5247

## What does this PR do?
- Add the batches to the dump
- Update the tests
- Create a new dump test containing batches and an enqueued task with a document addition


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-11 10:21:34 +00:00
43c8d54501 fix test after rebase 2025-02-11 11:19:13 +01:00
84e2a1f836 rename the atomic to something more meaningful 2025-02-11 11:14:49 +01:00
00eb47d42e use serde_json::to_writer instead of serializing + writing 2025-02-11 11:14:49 +01:00
9293e7f2c1 fix tests after rebase 2025-02-11 11:14:49 +01:00
80198aa855 add a dump test with batches and enqueued tasks 2025-02-11 11:14:49 +01:00
fa00b42c93 fix the missing batch in the dumps in meilisearch and meilitools 2025-02-11 11:14:49 +01:00
6c9409edf8 Merge #5326
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 15s
Test suite / Run Clippy (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 34s
Test suite / Run Rustfmt (push) Successful in 1m32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5326: Expose a route to get the file content associated with a task r=Kerollmops a=Kerollmops

This PR exposes a new `/tasks/{taskUid}/documents` route, exposing the update file associated with a task.

## To Do
- [x] (optional) Change the route to `/tasks/{taskUid}/documents` `@dureuill.`
- [x] Update Open API example.
- [x] Create [an Experimental Feature Discussion](https://github.com/orgs/meilisearch/discussions/808).
- [x] Make this route experimental and enable it via the experimental route.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-10 16:50:13 +00:00
acb06cb3e6 Improve the error message when missing documents
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-10 16:53:50 +01:00
7d0d8f4445 Make the feature experimental 2025-02-10 16:11:32 +01:00
491d115c3c Change the route to get the task documents 2025-02-10 14:55:07 +01:00
55fa2dda00 Update the Open API example 2025-02-10 14:52:48 +01:00
c71eea8023 Improve error message when update file has been processed 2025-02-10 14:33:01 +01:00
df40533741 Expose a route to get the update file content of a task 2025-02-10 14:05:32 +01:00
4e819a6187 mention utoipa in sprint issues 2025-02-10 13:35:15 +01:00
0c3e7fe963 Merge #5316
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 2s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 16s
Test suite / Run Clippy (push) Failing after 12s
Test suite / Run Rustfmt (push) Failing after 32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5316: Fix the dumpless upgrade corruption r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5280

## What does this PR do?
- Add a test that ensure we write the version in the index-scheduler even if we have a bug while writing the VERSION file
- Do what was described in the issue


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-10 09:53:57 +00:00
45f843ccb9 fmt 2025-02-10 10:46:42 +01:00
35b6bca598 remove the failing test 2025-02-10 10:20:14 +01:00
7f82d33597 update the version file atomically 2025-02-06 18:23:28 +01:00
f2185438ee Merge #5308
Some checks failed
Look for flaky tests / flaky (push) Failing after 13s
SDKs tests / define-docker-image (push) Failing after 7m9s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Publish binaries to GitHub release / Check the version validity (push) Successful in 12s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 16s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 19s
Test suite / Tests almost all features (push) Failing after 1s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Tests on ubuntu-20.04 (push) Failing after 15s
Test suite / Test disabled tokenization (push) Failing after 9s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Rustfmt (push) Failing after 9s
Test suite / Run Clippy (push) Failing after 18s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
5308: Ollama Integration Tests r=dureuill a=Kerollmops

This PR improves test coverage of #4757 by providing a new CI to test the Ollama setup with Ollama.

## To Do
- [x] Clean up the commits
- [x] Feature gate the Ollama tests and run them only in the CI

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-06 17:21:51 +00:00
8c5856007c flush+sync the version file just in case 2025-02-06 18:04:43 +01:00
ae1d7f4d9b Improve the test and disable it on windows and linux since they don't work on the CI 2025-02-06 17:54:12 +01:00
792be63567 Merge #5323
5323: exclude network time from processingMs r=Kerollmops a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-06 16:35:44 +00:00
ca1ad51564 Put the Ollama tests under a feature 2025-02-06 17:27:47 +01:00
70aac71c63 exclude network time from processingMs 2025-02-06 17:18:36 +01:00
a1d1e7c82a Setup dedicated CI to run the Ollama tests 2025-02-06 17:12:17 +01:00
56438bdea4 Introduce an Ollama integration test 2025-02-06 17:12:17 +01:00
a562d6abc1 Merge #5322
5322: Make sure arroy is using the rayon thread-pool r=dureuill a=Kerollmops

This PR fixes #5249 by ensuring arroy uses the rayon thread pool.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-06 15:28:47 +00:00
33b67b82e1 fixed rustfmt errors 2025-02-06 09:57:39 -05:00
b7fdd9516c Merge #4970
4970: Create a new export documents meilitool subcommand r=dureuill a=Kerollmops

This subcommand can be useful for extracting documents from an existing database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-06 14:48:27 +00:00
5f2a1a4fd1 Skip the documents before fetching them 2025-02-06 15:40:22 +01:00
2b0e17ede0 Make sure arroy is using the rayon thread-pool 2025-02-06 15:28:10 +01:00
37092adc71 Show a bit of progress 2025-02-06 10:37:05 +01:00
86fcad788e Introduce a parameter to skip the first documents 2025-02-06 10:32:50 +01:00
2ea5c57871 Create a new export documents meilitool subcommand based on v1.12 2025-02-06 10:32:39 +01:00
7b4f2aa593 updated code 2025-02-05 22:07:32 -05:00
1fb96d3edb made changes to ensure its not allowing everything through 2025-02-05 20:37:07 -05:00
b63c64395d add a test ensuring the index-scheduler version is set when we cannot write the version file 2025-02-05 18:08:50 +01:00
628119e31e fix the dumpless upgrade potential corruption when upgrading from the v1.12 2025-02-05 18:08:50 +01:00
78867b6852 Merge #5299
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Tests on ubuntu-20.04 (push) Failing after 17s
Test suite / Tests on windows-2022 (push) Failing after 25s
Test suite / Run Rustfmt (push) Failing after 1m6s
Test suite / Run Clippy (push) Successful in 8m46s
Test suite / Tests on macos-13 (push) Has been cancelled
5299: Remote federated search r=dureuill a=dureuill

Fixes #4980 

- Usage: https://www.notion.so/meilisearch/API-usage-Remote-search-request-f64fae093abf409e9434c9b9c8fab6f3?pvs=25#1894b06b651f809a9f3dcc6b7189646e

- Changes database format:
  - Adds a new database key: the code is resilient to the case where the key is missing
  - Adds a new experimental feature: the code for experimental features is resilient to this case

Changes:

- Add experimental feature `proxySearch`
- Add network routes
- Dump support for network
- Add proxy search
- Add various tests

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-05 16:08:48 +00:00
b21b8e8f30 Remote search tests 2025-02-05 15:03:33 +01:00
4a9e5ae215 mv multi.rs -> multi/mod.rs 2025-02-05 15:03:33 +01:00
6e1865b75b network integration tests 2025-02-05 15:03:32 +01:00
64409a1de7 Test server: clear_api_key 2025-02-05 15:03:32 +01:00
1b81cab782 Add more analytics 2025-02-05 15:03:32 +01:00
88190b5602 Fix tests 2025-02-05 15:03:32 +01:00
0b27aa5138 Multi search reads header to know if it is being proxied 2025-02-05 15:03:32 +01:00
35160788d7 Proxy search requests 2025-02-05 15:03:32 +01:00
c3e5c3ba36 Allow rebuilding a SearchQueryWithIndex from its components 2025-02-05 15:03:16 +01:00
04ac0af54b Add WeightedScoreValues to be able to compare remote scores 2025-02-05 15:03:16 +01:00
9996533364 Make search types serialize and deserialize so that reading from a proxy is possible 2025-02-05 15:03:16 +01:00
3f6b334fc5 Route network 2025-02-05 15:03:16 +01:00
b30e5a7a35 Add new permissions 2025-02-05 15:03:16 +01:00
6d79cb23ba New error codes 2025-02-05 15:03:16 +01:00
e34afca6d7 Support network in dumps 2025-02-05 15:03:16 +01:00
4918b9ffb6 Network stored in DB 2025-02-05 15:03:15 +01:00
73474e7af0 Network types 2025-02-05 15:03:15 +01:00
7ae6dda03f Add new experimental feature 2025-02-05 15:01:04 +01:00
00e764b0d3 Merge #5314
5314: Activate used database size r=irevoire a=ManyTheFish

# Pull Request

make the `/stats` route return the `usedDatabaseSize` corresponding to the size used to store the "real" data in the database and not the disk size used by LMDB


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-05 12:51:57 +00:00
4abf0db0b4 Activate used database size 2025-02-05 13:45:47 +01:00
acc885fd0a Merge #5312
5312: Send the OSS analytics once per day instead of once per hour r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5311

## What does this PR do?
- If the instance is OSS => we send the analytics once every day
- If the instance is on the meilisearch cloud => we send the analytics every hour


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-05 11:15:34 +00:00
61e8cfd4bc Send the OSS analytics once per day instead of once per hour 2025-02-04 15:39:00 +01:00
796acd1aee Merge #5288
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Tests on windows-2022 (push) Failing after 48s
Test suite / Run Rustfmt (push) Successful in 1m28s
Test suite / Tests on macos-13 (push) Has been cancelled
5288: Improve AI logging r=dureuill a=Kerollmops

This PR fixes #5285 and brings the changes from #5233 to simplify debugging indexation and search performance issues related to AI. The following texts can be found in the logs to debug and understand performance issues:

 - `embed_one: search` represents the time we spent waiting for the embedding generation, i.e., OpenAI, local HuggingFace, Ollama.
 - `filtered_universe: search::universe` the time spent filtering the documents.
 - ~`next_bucket: search::vector_sort` is the time spent finding the nearest neighbors (ANNs) in the vector store (arroy), locally~ was being triggered too many times.
 - `indexing::vectors` is the time arroy spends indexing the new vectors for a batch.
 - `documents::extract vectors` and `documents::merge vectors` to see the time spent generating and writing the embeddings.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-04 10:20:45 +00:00
cc8df5e11f Move back the search-side logging to tracing 2025-02-04 11:16:17 +01:00
ede74ccc42 Merge #5306
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 2s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 24s
Test suite / Run Rustfmt (push) Successful in 1m33s
Test suite / Run Clippy (push) Successful in 6m20s
Test suite / Tests on macos-13 (push) Has been cancelled
5306: Fix internal error when passing `documentTemplateMaxBytes` to a source that doesn't support it r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #5305 

## What does this PR do?
- add `DOCUMENT_TEMPLATE_MAX_BYTES` to `allowed_sources_for_field` and `allowed_fields_for_source` to prevent a panic


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-04 08:46:13 +00:00
e93a5719ef Merge #5293
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 14s
Test suite / Tests on windows-2022 (push) Failing after 24s
Test suite / Run Clippy (push) Failing after 31s
Test suite / Run Rustfmt (push) Successful in 1m45s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m3s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
5293: Support merging update and replacement operations r=irevoire a=Kerollmops

This PR fixes #5286 by modifying the auto-batcher and how we merge documents when preparing them for the new indexer.

## To do
- [x] Make sure we can auto-batch different operation types.
- [x] Make sure the indexer correctly understands and mixes the different kinds.
- [x] Create a test to see if it mixes the documents correctly.
- [x] Modify the auto-batcher tests for the new behavior.


Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-03 11:28:41 +00:00
d34f0b606c Update crates/milli/src/update/new/document_change.rs 2025-02-03 12:08:52 +01:00
6425451bbc Merge #5303
5303: Bring back changes from v1.12.8 into v1.13.0 r=Kerollmops a=Kerollmops

Fixes #5087 and other problems that you can find in the original PR #5294.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-03 10:49:26 +00:00
acc400face Support merging update and replacement operations 2025-02-03 11:47:17 +01:00
fe46855462 Merge #5235
5235: Introduce a compaction subcommand in meilitool r=dureuill a=Kerollmops

This PR proposes a change to the meilitool helper, introducing the `compact-index` subcommand to reduce the size of the indexes.

While working on this tool, I discovered that the current heed `Env::copy_to_file` API is not very temp file friendly and [could be improved](https://github.com/meilisearch/heed/issues/306).

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-03 10:11:01 +00:00
8e7d2d25f2 Only open indexes, do not create them 2025-02-03 10:50:38 +01:00
a436534515 Fix test 2025-02-03 10:36:34 +01:00
aa2327591e Add more mixing updates and replacements tests 2025-02-03 10:34:07 +01:00
a6f9e0ddf0 Fix auto batching related tests 2025-02-03 10:34:07 +01:00
60470bb647 Fix the tests to use the new replace/update documents 2025-02-03 10:34:07 +01:00
294e1ba16d Fix functions calls to use the new mixed system 2025-02-03 10:34:06 +01:00
8e6893ddbe Make sure we correctly mix different document operations 2025-02-03 10:34:06 +01:00
d018346f18 Make the auto-batcher batche replacement with updates 2025-02-03 10:34:05 +01:00
2385842537 Fix the imports 2025-02-03 10:29:09 +01:00
6a70c0ec92 Add a link to the experimental feature GitHub discussion 2025-02-03 10:24:53 +01:00
7a9382b115 Better document the rayon limitation condition 2025-02-03 10:24:53 +01:00
62dabeba5f Do not create too many rayon tasks when processing the settings 2025-02-03 10:24:52 +01:00
48812229a9 Remove a log that would log too much 2025-02-03 10:24:52 +01:00
915cc377fb Refine the env variable and the max readers 2025-02-03 10:24:52 +01:00
96544bfa43 add DOCUMENT_TEMPLATE_MAX_BYTES to allowed_sources_for_field and allowed_fields_for_source 2025-02-03 09:59:17 +01:00
09d474da63 Merge #5140
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 21s
Test suite / Tests on windows-2022 (push) Failing after 26s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Run Rustfmt (push) Successful in 4m7s
Test suite / Tests on ubuntu-20.04 (push) Failing after 14m22s
Test suite / Tests on macos-13 (push) Has been cancelled
5140: Fix workload inversion r=dureuill a=ManyTheFish

The used assets were inverted between `workloads/hackernews-modify-facet-numbers.json`
and `workloads/hackernews-modify-facet-strings.json`, now fixed.


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-03 08:22:22 +00:00
aaefbfae1f Do not create too many rayon tasks 2025-01-30 16:36:12 +01:00
97e17f52a1 Add more logs to see calls to the embedders 2025-01-30 16:36:12 +01:00
62ced0e3f1 Make cargo fmt happy 2025-01-30 11:09:54 +01:00
71bb24f17e Throw and error when the index is not found
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-30 11:07:43 +01:00
c72f114b33 Fix english in the comments
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-30 11:07:09 +01:00
8ed39f5de0 Merge #5300
5300: Improve unexpected panic message r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5273

## What does this PR do?
- When an unexpected panic happens in the index-scheduler we catch it and rebuild an error message from the join_error
- Same when the upgrade index-scheduler fails


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-30 09:23:17 +00:00
424c5bde40 Move the embedding computation and extraction log to debug 2025-01-29 16:40:36 +01:00
bdd3005d10 Log the progress when a batch fails 2025-01-29 16:36:23 +01:00
4224edea28 Merge #5177
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 0s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 23s
Test suite / Run Rustfmt (push) Successful in 2m17s
Test suite / Run Clippy (push) Successful in 5m55s
Test suite / Tests on macos-13 (push) Has been cancelled
5177: Debug log  the channel congestion r=Kerollmops a=Kerollmops

This PR displays the congestion of the BBQueue channel and the allocated memory for the channel and the extraction. This information can be beneficial for debugging and noticing slow disks. We show three pieces of information in debug:
- The direct attempts: the number of tries to send something in the BBQueue channel,
- The blocked attempts: the number of unsuccessful attempts that must be retried,
- The congestion: The percentage of blocking attempts. The higher, the slower the receiver and, therefore, the disk.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-29 15:35:31 +00:00
cb1b7513af Log the memory metrics only once 2025-01-29 15:21:52 +01:00
2f89b8209f Merge #5291
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 12s
Test suite / Run Clippy (push) Failing after 21s
Test suite / Run Rustfmt (push) Successful in 1m43s
Test suite / Tests on windows-2022 (push) Failing after 5m39s
Test suite / Tests on macos-13 (push) Has been cancelled
5291: Fix Dotnet tests in sdks-tests.yml r=irevoire a=curquiza



Co-authored-by: Clémentine <clementine@meilisearch.com>
2025-01-29 14:18:48 +00:00
a9d0f4a002 Improve english comments
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-29 15:16:40 +01:00
db032079d8 Show indexation allocated memory 2025-01-29 14:21:02 +01:00
a00796c46a Improve the naming in the log message 2025-01-29 14:21:02 +01:00
6112bd8caa Display the channel congestion 2025-01-29 14:21:02 +01:00
cec88cfc29 Measure the bbqueue congestion 2025-01-29 14:21:02 +01:00
8439aeb7cf improve error message in case of unexpected panic while processing tasks 2025-01-29 11:51:06 +01:00
42257eec53 Merge #5272
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 0s
Test suite / Tests on windows-2022 (push) Failing after 14s
Test suite / Run Rustfmt (push) Successful in 1m59s
Test suite / Run Clippy (push) Successful in 5m48s
Test suite / Tests on macos-13 (push) Has been cancelled
5272: Fix Batches Deletion and flaky tests r=irevoire a=Kerollmops

- This issue fixes #5263 by removing the batches from the date and time databases.
- It also introduces a new `enqueued_at` field in the batch object to quickly retrieve them in the `batches.enqueued_at` database
- Finally, it probably fixes all the flaky tests of the batches: https://github.com/meilisearch/meilisearch/issues/5256

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-28 16:14:11 +00:00
1beda3b9af fix another flaky test 2025-01-28 16:53:50 +01:00
8676e94f5c fix the flaky tests 2025-01-28 16:53:50 +01:00
ef47a0d820 apply review comment 2025-01-28 16:53:50 +01:00
e0f0da57e2 make sure the batches we snapshots actually all contains an enqueued_at 2025-01-28 16:53:50 +01:00
485e3127c7 use the remove_n_tasks_datetime_earlier_than function when updating batches 2025-01-28 16:53:50 +01:00
58f90b70c7 store the enqueued at to eases the batch deletion 2025-01-28 16:53:50 +01:00
508db9020d update the snapshots 2025-01-28 16:53:50 +01:00
6ff37c6fc4 Fix the insta snapshots 2025-01-28 16:53:50 +01:00
f21ae1f5d1 Remove the batch id from the date time databases 2025-01-28 16:53:50 +01:00
483c52f07b Merge #5289
5289: Fix workload files after removing the vectorStore experimental feature r=Kerollmops a=dureuill

Running the bench [currently fails](https://github.com/meilisearch/meilisearch/actions/runs/12990029453) on embedding-related workloads, due to the call to `/experimental-features` that is used to enable the vector store:

In v1.13, `vectorStore` is no longer an experimental feature, so trying to enable it causes a 400

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-28 10:28:21 +00:00
f0d7ab81ad Fix Dotnet tests in sdks-tests.yml 2025-01-27 15:37:32 +01:00
f88f415a00 Fix workload files after removing the vectorStore experimental feature 2025-01-27 14:39:28 +01:00
19bc885b07 Fix the milli logo 2025-01-27 14:30:59 +01:00
47f70e3d79 Debug the first vector sort fill buffer 2025-01-27 14:22:29 +01:00
0f8eb3b506 Improve the logs of the search with AI 2025-01-27 14:22:22 +01:00
4a5923a55e log the time arroy took to insert embeddings 2025-01-27 14:22:17 +01:00
de98656ed1 Merge #5210
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Run the indexing fuzzer / Setup the action (push) Failing after 6s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 14s
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Run Rustfmt (push) Failing after 8s
Test suite / Tests on windows-2022 (push) Failing after 20s
Test suite / Run Clippy (push) Successful in 5m48s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
5210: Improve test performance of get_index.rs  r=irevoire a=DerTimonius

# Pull Request

## Related issue
related to #4840

## What does this PR do?
This PR aims to improve the performance of the tests in `get_index.rs`.

There is a small issue though: 
the `list_multiple_indexes` test works great when ran alone, but when running with other tests it fails with a `corrupted task queue` error. I guess this has something to do with using a shared server, but I was not really able to pinpoint the issue.

Also, the `no_index_return_empty_list` does not work a shared server (as there now will always be at least one index on the server) and I was not really sure if rebuilding the whole suite for `get_and_paginate_indexes` should be viable? While waiting for feedback on the issue mentioned above, I'll try to change the `get_and_paginate_indexes` test so that it can use the shared server

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Timon Jurschitsch <timon.jurschitsch@gmail.com>
Co-authored-by: Timon Jurschitsch <103483059+DerTimonius@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-27 10:04:08 +00:00
da7469be38 removed unrelated files 2025-01-27 10:35:34 +01:00
df9d10ac44 Merge #5284
5284: Fix [5281] Removed CouldNotUpgrade from error file  r=irevoire a=manojks1999

# Pull Request

## Related issue
Fixes #5281

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ * ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ * ] Have you read the contributing guidelines?
- [ * ] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: manojks1999 <9743manoj@gmail.com>
2025-01-27 09:26:39 +00:00
528d9d6d8b Removed CouldNotUpgrade from error file 2025-01-26 21:04:57 +05:30
4fb5c39b92 resolve merge conflicts 2025-01-24 14:35:54 +01:00
022205af90 Merge #5279
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 0s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 22s
Test suite / Run Rustfmt (push) Successful in 2m14s
Test suite / Run Clippy (push) Successful in 5m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h4m54s
Test suite / Tests on macos-13 (push) Has been cancelled
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5279: Bring back changes from v1.12.7 into main r=dureuill a=Kerollmops

This PR brings back v1.12.7 into main.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-24 11:48:46 +00:00
50280bf02b Support offline upgrade up to v1.12.7 2025-01-24 12:25:33 +01:00
9b579069df Comment the max grant of the bbqueue
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-24 12:18:32 +01:00
f5a4a1c8b2 Give more RAM to bbqueue.
- bbqueue buffers used to have (5% * 2%) / num_threads
- they now have 5% / num_threads
2025-01-24 12:18:32 +01:00
5ab4cdb1f3 Reduce the maximum grant possible we can store in the BBQueue 2025-01-24 12:18:32 +01:00
1f54f07f72 Merge #5264
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 21s
Test suite / Run Rustfmt (push) Failing after 8s
Test suite / Run Clippy (push) Successful in 6m30s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m21s
Test suite / Tests on macos-13 (push) Has been cancelled
5264: Dumpless upgrade r=dureuill a=irevoire

# Pull Request
Usage: https://meilisearch.notion.site/Dumpless-upgrade-fff4b06b651f81f1acafe24d4687b3f7?pvs=74

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5162

## What does this PR do?
- Implement the dumpless upgrade with multiple hooks:
  - In meilisearch directly before the task queue has been opened
  - In the index-scheduler while processing the task
  - In milli while upgrading the indexes
- There is no hook at search/query time to handle the old version of a database. That's left to the next person upgrading a database
- A new special type of task (`upgradeDatabase`) that can be retried has been introduced
- A new experimental cli flag has been introduced
- The version has been upgraded to the v1.13.0 in this PR otherwise it was a lot of useless work to test the dumpless upgrade
- Multiple tests have been introduced

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Update the issue template we use for features, mentioning what we should do in case of a database upgrade
- [ ] The experimental feature discussion should be opened and updated in the PR
- [ ] Update the PRD
    - [ ] Add the new error codes
    - [ ] Add the task details
    - [ ] Add the telemetry

## Notes

The new tests introduced are not _that_ slow
![image](https://github.com/user-attachments/assets/c5884540-482f-41eb-97ef-fc995c62d666)



Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-23 16:22:37 +00:00
73d8a4eace Remove db.snapshot 2025-01-23 17:21:42 +01:00
c1e5897076 Do not assume v1.12 when there is no index-scheduler version 2025-01-23 17:16:53 +01:00
718a98fbbf remove : char from filenames 2025-01-23 17:08:35 +01:00
86bf231d29 Change to meilitool after rebase 2025-01-23 16:59:32 +01:00
182c3f4b80 Write assumed version to the index-scheduler version db when it is missing 2025-01-23 16:51:25 +01:00
c1eba66443 introduce a corruption in the v1.12 data.ms field distribution 2025-01-23 16:51:24 +01:00
7197ced673 fix the bad index version on opening 2025-01-23 16:51:24 +01:00
4f21ee6c66 update the data.ms snapshot 2025-01-23 16:51:24 +01:00
787472453d write the version of the index while upgrading it 2025-01-23 16:51:24 +01:00
8f65f35de9 rewrite part of the index-scheduler upgrade test 2025-01-23 16:51:23 +01:00
c27c923439 introduce a trait to upgrade the indexes 2025-01-23 16:51:23 +01:00
fd5649091d add the upgradeTo field in the details 2025-01-23 16:51:23 +01:00
9a57736773 fix the early exit when rewriting a batch 2025-01-23 16:51:23 +01:00
7740997ea8 reintroduce the unrecoverable error and use it where its supposed to be used 2025-01-23 16:51:22 +01:00
7eb23f73ba add the version to the index-scheduler snapshots + fix a bug when opening an index scheduler for the first time 2025-01-23 16:51:22 +01:00
b9e9fc376a add the version in the index-scheduler 2025-01-23 16:51:22 +01:00
27bf2f1298 remove the empty progress made for the upgrade database 2025-01-23 16:51:22 +01:00
d4d82fbd0c commit the index wtxn before the index-scheduler wtxn 2025-01-23 16:51:21 +01:00
eda09a54da improve the index-scheduler tests 2025-01-23 16:51:21 +01:00
b132d70413 fix the details in all cases 2025-01-23 16:51:21 +01:00
e41ebd3047 expose the number of database in the index-scheduler and rewrite the lib.rs to use the value provided in the options instead of a magic number 2025-01-23 16:51:21 +01:00
705d31e8bd apply all the comments changes 2025-01-23 16:51:21 +01:00
7d95950ce6 fix warning 2025-01-23 16:51:21 +01:00
c6b4c21c23 update the snapshots after the rebase 2025-01-23 16:51:20 +01:00
bf96fdb858 update the cli url 2025-01-23 16:51:20 +01:00
41eeffd88d fmt 2025-01-23 16:51:20 +01:00
1eb9fe8562 remove warnings 2025-01-23 16:51:20 +01:00
bac7a1623a fix the upgrade test 2025-01-23 16:51:19 +01:00
5458850d21 write a test ensuring the index-scheduler is effectively down when the upgrade task fail and try to process it when it restarts. There is a bug when deleting this task 2025-01-23 16:51:19 +01:00
20ac59c946 fix the field distribution when upgrading from the v1_12 2025-01-23 16:51:19 +01:00
cfc1e193b6 update the test with the stats 2025-01-23 16:51:19 +01:00
0cc25c7e4c add a large test importing a data.ms from the v1.12.0 2025-01-23 16:51:18 +01:00
102681e384 starts adding tests and fix the starts of meilisearch 2025-01-23 16:51:18 +01:00
3ef7a478cd move the version check to the task queue 2025-01-23 16:48:32 +01:00
e70ac35e02 fix bugs after rebase 2025-01-23 16:48:32 +01:00
d3654906bf Add the new tasks with most of the job done 2025-01-23 16:48:32 +01:00
e6295c9c5f Introduce a meilitool subcommand to compact an index 2025-01-22 16:37:00 +01:00
b15de68831 Merge #5257
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 0s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Failing after 11s
Test suite / Tests on windows-2022 (push) Failing after 43s
Test suite / Run Rustfmt (push) Successful in 2m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m6s
Test suite / Tests on macos-13 (push) Has been cancelled
5257: Fix ollama r=Kerollmops a=dureuill

Fix oversight in ollama embedder 

WIP Integration tests are on branch `ollama-integration-test` and will be added as a future PR.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-22 15:36:15 +00:00
6723700fb9 Merge #5262
5262: Bring back changes from v1.12.4, v1.12.5, and v1.12.6 into main r=dureuill a=Kerollmops

This PR follows [this guideline to bring back changes after we worked on v1.12.4, v1.12.5, and v1.12.6](https://github.com/meilisearch/engine-team/blob/main/resources/meilisearch-release.md#after-the-release-bring-back-changes-to-main).

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2025-01-22 14:55:02 +00:00
2c099b7c23 Update Cargo.lock again 2025-01-22 15:53:52 +01:00
50fca8fc70 Create update files in new format 2025-01-22 15:51:21 +01:00
b9d92c481b Update version for the next release (v1.12.6) in Cargo.toml 2025-01-22 15:51:20 +01:00
d142c5e432 Do not panic when the facet string is not found 2025-01-22 15:50:43 +01:00
4d4683adb6 Add a test to check the facet casing is good 2025-01-22 15:50:42 +01:00
d6063079af Unify facet strings by their normalized value 2025-01-22 15:50:42 +01:00
2e04ab4737 Replace guards by OR patterns
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-22 15:50:42 +01:00
d95384a636 Remove batch ids on export 2025-01-22 15:50:42 +01:00
c0690f5b9e Make offline upgrade more flexible 2025-01-22 15:50:42 +01:00
909d84447d meilitool dumps old-style dump for older DBs, otherwise new-style 2025-01-22 15:50:42 +01:00
2cf57d584e Handle empty payloads 2025-01-22 15:50:42 +01:00
59242b9c4f Fix warnings 2025-01-22 15:50:42 +01:00
6a6212d4e1 Fix warnings 2025-01-22 15:50:42 +01:00
a8006a3750 Change format of update file when importing dump 2025-01-22 15:50:41 +01:00
0e0e462f5b Also fix dump import from meilitool 2025-01-22 15:50:41 +01:00
805531c90d Do not explode on missing content file if the task has no docs 2025-01-22 15:50:41 +01:00
a6470a0c37 Improve error log 2025-01-22 15:50:41 +01:00
8a54f14b8e Demote panic to error log 2025-01-22 15:49:24 +01:00
be5e521cb0 Merge #5271
5271: Update version for the next release (v1.13.0) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
2025-01-22 13:04:20 +00:00
60f20119a2 Update version for the next release (v1.13.0) in Cargo.toml 2025-01-22 10:52:47 +00:00
2f257fdc3d fix clippy error 2025-01-21 17:11:29 +01:00
0991cb0de4 change list_multiple_indexes test to single server 2025-01-21 17:01:45 +01:00
4709c638ed Swap implementations of ollama 2025-01-20 22:22:22 +01:00
0776217801 Merge #5234
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Failing after 15s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 12m54s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Look for flaky tests / flaky (push) Failing after 7s
Publish binaries to GitHub release / Check the version validity (push) Successful in 11s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 1s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 15s
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Failing after 24s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests almost all features (push) Failing after 1s
Test suite / Test disabled tokenization (push) Failing after 2s
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Tests on windows-2022 (push) Failing after 26s
Test suite / Run Clippy (push) Failing after 21s
Test suite / Run Rustfmt (push) Successful in 1m37s
Test suite / Tests on macos-13 (push) Has been cancelled
5234: Parse ollama URL to adapt configuration depending on the endpoint r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5002 

## What does this PR do?
- Parses `url` parameter of `ollama` to recognize supported endpoint and adapt the REST configuration to the recognized endpoint
- Throws a new error if no endpoint is recognized
- Add a test for the various recognized endpoints


Thanks to `@Guikingone` for the original report and PR

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-20 09:51:42 +00:00
9eae36ce3e update snapshot 2025-01-16 17:17:06 +01:00
3f501c9b85 Update crates/index-scheduler/src/scheduler/test.rs
Co-authored-by: Tamo <irevoire@protonmail.ch>
2025-01-16 16:13:14 +01:00
c85146524b Merge #5232
Some checks failed
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m2s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Look for flaky tests / flaky (push) Failing after 1s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
Test suite / Tests almost all features (push) Failing after 2s
Test suite / Test disabled tokenization (push) Failing after 0s
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Tests on ubuntu-20.04 (push) Failing after 21s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s
Test suite / Run Rustfmt (push) Failing after 17s
Test suite / Run Clippy (push) Failing after 6m47s
Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
5232: Stabilize vector store feature r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #4733 

## What does this PR do?
- `vectorStore` feature can no longer be set or get from `/experimental-features`
- That feature has been removed, and there is no longer any check for its activation
- Always display `embedders` in the settings, even if empty
- Always hide `_vectors` in documents, unless `retrieveVectors: true`
- Make error codes consistent with the usual nomenclature
- Update tests as needed


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-16 11:50:21 +00:00
79d192fb3f implement suggestions 2025-01-16 11:42:12 +01:00
a4ed36f0cc Merge branch 'main' of github.com:meilisearch/meilisearch into chore/update-get-index-test 2025-01-16 11:17:17 +01:00
dddb51a9ca removed trailing whitespace so cargo fmt passes 2025-01-15 13:30:10 -05:00
8f006eeaf3 Merge #5239
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 27s
Test suite / Tests on ubuntu-20.04 (push) Failing after 20s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 26s
Test suite / Run Clippy (push) Successful in 7m55s
Test suite / Run Rustfmt (push) Successful in 2m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h6m18s
5239: Fix corrupted task queue errors on index creation r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5238

## What does this PR do?
- Add a test that reproduces the issue and ensure we never introduce the bug again
- Fix the bug by storing the stats of the index upon creation instead of waiting for the update index task to do it


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-15 12:45:25 +00:00
445e5aff02 fix the corruption 2025-01-15 12:38:40 +01:00
234d0c360f Add a test reproducing the issue 2025-01-15 12:29:56 +01:00
cd181b36c3 all test cases now passing 2025-01-14 17:50:31 -05:00
4cfe0dbdd8 Merge #5237
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 26s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Successful in 7m58s
Test suite / Run Rustfmt (push) Successful in 2m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m43s
5237: Bring back v1.12.3 changes into main r=irevoire a=dureuill

This brings back the (already reviewed) changes of v1.12.3 into main:

1. fix the field distribution issue
2. improve the error message when trying to delete a non-existing key

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-01-14 14:04:48 +00:00
89a4ac92eb fix after rebase 2025-01-14 14:08:56 +01:00
deb90ff573 Fix tests 2025-01-14 13:55:34 +01:00
0c10063a87 PATCH experimental-features also returns the route type rather than internal type 2025-01-14 13:55:34 +01:00
87ea080c10 Fully remove vector store feature 2025-01-14 13:55:34 +01:00
6d62fa061b Fix tests 2025-01-14 13:55:34 +01:00
de6cd3ac01 Consistent error codes 2025-01-14 13:55:34 +01:00
cb8f033130 Fix tests 2025-01-14 13:55:34 +01:00
03097e65e8 Always display embedders setting 2025-01-14 13:55:34 +01:00
c32bec338f Fix tests 2025-01-14 13:55:33 +01:00
73d3d286d9 Serialize features as camelCase 2025-01-14 13:53:53 +01:00
29eeb84ce3 Add --experimental-disable-vector-store CLI flag 2025-01-14 13:53:53 +01:00
d78951feb7 vectorStore stabilization
- `vectorStore` feature is always enabled
- `vectorStore` can no longer be set in the `/experimental-features` PATCH route
- `vectorStore` status is no longer returned in the `/experimental-features` GET route
2025-01-14 13:53:53 +01:00
63c8cbae5b Improve the panic message when deleting an unknown entry 2025-01-14 10:31:44 +01:00
72ded27e98 Update after review 2025-01-14 10:24:50 +01:00
c25781f720 Skip rebuilding field distribution if not coming from v1.12 2025-01-14 10:24:28 +01:00
c3b18fede9 write stats after rebuilding facet distribution 2025-01-14 10:24:27 +01:00
4070895a21 Add support to upgrade to v1.12.3 in meilitool 2025-01-14 10:24:27 +01:00
a21711f473 Fix test 2025-01-14 10:23:59 +01:00
f0ec8cbffe Add currently failing test 2025-01-14 10:23:15 +01:00
e568dbbabb Merge #5182
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 13s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 6m11s
Test suite / Run Rustfmt (push) Successful in 2m38s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m8s
5182: Remove hard coded task ids to prevent flaky tests r=irevoire a=mhmoudr

# Pull Request

## Related issue
Fixes partial #4840

## What does this PR do?
- Mainly scan the test code for any hard coded task Id and replace it by the returned task Id once the action or task is performed on an index.
- PS: _PR is not split by files as it has one theme applied to all tests which make it easy to review_ 


## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Mahmoud Rawas <mhmoudr@gmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-13 15:18:55 +00:00
8ff15b3dfb fix the tests 2025-01-13 16:17:50 +01:00
247eaed872 Merge #5221
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 1m4s
Test suite / Tests on ubuntu-20.04 (push) Failing after 27s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 20s
Test suite / Run Clippy (push) Successful in 8m45s
Test suite / Run Rustfmt (push) Successful in 2m31s
5221: Merge bitmaps by using `Extend::extend` r=Kerollmops a=Kerollmops

This PR tries to speed up the merging of bitmaps by using [the new `Extend::extend` implementation](https://github.com/RoaringBitmap/roaring-rs/pull/306).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-13 13:43:28 +00:00
8b1fcfd7f8 Parse ollama URL to adapt configuration depending on the endpoint 2025-01-13 14:34:11 +01:00
45f289488d Add test for url checks on ollama embedders 2025-01-13 14:33:30 +01:00
b0ef7701ae Merge #5231
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h6m28s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Publish binaries to GitHub release / Check the version validity (push) Successful in 8s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 8s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 27s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 19s
Test suite / Tests on ubuntu-20.04 (push) Failing after 26s
Test suite / Tests almost all features (push) Failing after 25s
Test suite / Test disabled tokenization (push) Failing after 26s
Test suite / Run tests in debug (push) Failing after 47s
Test suite / Run Rustfmt (push) Successful in 4m40s
Test suite / Run Clippy (push) Successful in 11m19s
5231: Improve openapi r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/open-api/issues/17
Fixes https://github.com/meilisearch/open-api/issues/13
Fixes https://github.com/meilisearch/open-api/issues/14
Fixes https://github.com/meilisearch/open-api/issues/16


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-13 13:04:09 +00:00
c9fb6c48b8 Update crates/meilisearch/src/routes/features.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-13 13:43:38 +01:00
0de34aa8fa avoid generating the same operationId 2025-01-13 12:36:22 +01:00
6bfcad4b05 Add the server property 2025-01-13 12:13:39 +01:00
67a0c9fff8 remove trailing slash in path 2025-01-13 11:55:59 +01:00
cc4aca78c4 Merge #5220
5220: Merge back changes of v1.12.2 in main r=dureuill a=dureuill



Co-authored-by: curquiza <curquiza@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: dureuill <dureuill@users.noreply.github.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-13 10:54:36 +00:00
5c7fa9b924 fix the examples of the experimental-feature route 2025-01-13 11:40:57 +01:00
9837de271d fixed majority of errors 2025-01-10 15:31:45 -05:00
fd251c37bb Merge #5225
Some checks failed
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 53s
Test suite / Run Clippy (push) Successful in 6m26s
Test suite / Run Rustfmt (push) Successful in 1m38s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m31s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5225: Update license for 2025 r=curquiza a=meili-bot

_This PR is auto-generated._


Co-authored-by: meili-bot <74670311+meili-bot@users.noreply.github.com>
2025-01-10 13:28:49 +00:00
adb6bca950 Update LICENSE 2025-01-10 14:19:54 +01:00
42854c0bca Merge #5223
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 6m16s
Test suite / Run Rustfmt (push) Successful in 1m39s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m11s
5223: Limit batched tasks total size r=curquiza a=Kerollmops

Introduce a new engine parameter (env and config, too) to limit the maximum payload size processed by the engine in batches. You can [review the Discussion and usage on GitHub](https://github.com/orgs/meilisearch/discussions/801).

Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-09 16:13:17 +00:00
d0bdff7b7b Make the batched tasks size limit effectively work 2025-01-09 12:06:28 +01:00
8650ee66c1 Introduce the new experimental-limit-batched-tasks-total-size argument 2025-01-09 12:06:28 +01:00
377fa09cb7 Merge pull request #5218 from meilisearch/upgrade-dependencies
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m17s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Look for flaky tests / flaky (push) Failing after 9s
Publish binaries to GitHub release / Check the version validity (push) Successful in 8s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 8s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 28s
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 13s
Test suite / Tests almost all features (push) Failing after 8s
Test suite / Test disabled tokenization (push) Failing after 7s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Rustfmt (push) Successful in 2m26s
Test suite / Run Clippy (push) Successful in 5m40s
Upgrade dependencies
2025-01-09 11:46:44 +01:00
00a03742ff Prefer using extend when merging bitmaps than unions (less allocations) 2025-01-09 10:42:38 +01:00
d11e359244 When spilling on the next fid, no longer ignore children 2025-01-09 10:36:38 +01:00
09d45439c7 Check valid_facet_value as part of a filter of the iterator 2025-01-09 10:36:38 +01:00
5d92da0c73 No longer ignore the first child without parent 2025-01-09 10:36:38 +01:00
677bb39e73 Modernize valid_lmdb_key 2025-01-09 10:36:38 +01:00
85ea77de0b Switch to an iterative algorithm for find_changed_parents 2025-01-09 10:36:38 +01:00
03317be0bd Update after review 2025-01-09 10:36:38 +01:00
4aa7c8f7b1 Remove unused FacetFieldIdOperation 2025-01-09 10:36:37 +01:00
ce57a342a3 center groups 2025-01-09 10:36:37 +01:00
1cc6cd78e0 Fix uselessly deep stack trace 2025-01-09 10:36:37 +01:00
c204afdc79 Update snapshot 2025-01-09 10:36:37 +01:00
c14967eeac Use new incremental facet indexing and enable sanity checks in debug 2025-01-09 10:36:35 +01:00
f38db86120 Add new incremental facet indexing 2025-01-09 10:24:36 +01:00
50b155fa2d add valid_facet_value utility function 2025-01-09 10:24:36 +01:00
a533c8e041 Add sanity checks for facet values 2025-01-09 10:24:36 +01:00
e5595a05df Update version for the next release (v1.12.2) in Cargo.toml 2025-01-09 10:24:36 +01:00
908adee6fc Fix the addition of empty payload 2025-01-09 10:24:36 +01:00
7b3353252f update the test to ensure it works when specifying the primary key or not: it doesn't work 2025-01-09 10:24:35 +01:00
647a10bf18 stop skipping empty tasks when adding documents 2025-01-09 10:24:34 +01:00
f2141a894a Bump roaring to v0.10.10 2025-01-09 10:21:05 +01:00
08c332980b add a test reproducing the bug 2025-01-09 10:12:12 +01:00
7b57a44b5a Update version for the next release (v1.12.1) in Cargo.toml 2025-01-09 10:12:12 +01:00
fe2c0cc3d5 Bump rust version to v1.81 2025-01-09 09:47:08 +01:00
eecf4c53e7 updated changes 2025-01-08 15:10:09 -05:00
cf4c3c287b Make rustfmt happy 2025-01-08 18:24:39 +01:00
71e5605daa Make clippy happy 2025-01-08 18:24:39 +01:00
890a5c64dd Merge #5216
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Failing after 9s
Test suite / Test disabled tokenization (push) Failing after 9s
Test suite / Run tests in debug (push) Failing after 9s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m27s
Test suite / Run Rustfmt (push) Successful in 1m37s
Test suite / Run Clippy (push) Successful in 5m58s
5216: Add support for GITHUB_TOKEN authentication in installation script r=curquiza a=Sherlouk

# Pull Request

## What does this PR do?
This tweaks the install script to support detection of a "GITHUB_TOKEN" variable. This is well documented [here](https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication) but is useful for GitHub Actions workflows, reducing the need for users to maintain a separate PAT token. This should be more reliable.

Note: these changes have been tested on the Swift project: https://github.com/meilisearch/meilisearch-swift/pull/464.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: James Sherlock <15193942+Sherlouk@users.noreply.github.com>
2025-01-08 17:15:10 +00:00
0ee4671a91 Fix after upgrading candle 2025-01-08 15:59:56 +01:00
68333424c6 Remove a useless script test 2025-01-08 15:59:43 +01:00
d4529d8c83 Fix after upgrading sysinfo 2025-01-08 15:59:30 +01:00
5e8144b0e1 Remove fuzzing feature 2025-01-08 15:59:03 +01:00
3e3695445f Fix after upgrading thiserror 2025-01-08 15:58:32 +01:00
091f989b72 Upgrade incompatible dependencies 2025-01-08 15:58:03 +01:00
dd28a3fd5a Bump the minimal version to 1.81 as we use std LazyLock 2025-01-08 15:31:24 +01:00
6f24b438e0 Ignore benchmarks folder 2025-01-08 15:31:24 +01:00
48a9ad4c17 Fix insta to 1.39 2025-01-08 15:18:08 +01:00
b997039a91 Upgrade compatible dependencies 2025-01-08 13:52:14 +01:00
0e6b6bd130 Merge #4867
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 48s
Test suite / Run Clippy (push) Successful in 7m0s
Test suite / Run Rustfmt (push) Successful in 1m55s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m59s
4867: Autogenerate the openAPI spec r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5073

## What does this PR do?
- Introduce utoipa and the auto-generation of the openAPI file
- Introduce the scalar swagger when the `swagger` feature flag is enabled.

Generating the openAPI file takes between 15 and 20ms at startup time on my computer. That could be an issue if we plan to stabilize the feature.

Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-08 09:45:50 +00:00
b1b0b0b67c Merge #5168
5168: Refactor indexer r=ManyTheFish a=dureuill

# Pull Request

Split the indexer mod into multiple submodules. 

This restores the ability of rustfmt to format the file 🎉

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-08 09:12:49 +00:00
27155f845c adding back a missing task wait. 2025-01-08 15:36:10 +11:00
c6f14279d7 remove unused imports. 2025-01-08 15:11:34 +11:00
fa15356209 Add support for GITHUB_TOKEN authentication 2025-01-07 20:21:00 +00:00
99f5e09a79 fix the tests 2025-01-07 16:42:53 +01:00
a8ef6f08e0 Update crates/meilisearch-types/src/settings.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-07 16:31:39 +01:00
ae5a04e85c apply review comments 2025-01-07 16:30:14 +01:00
8ebfc9fa92 Update crates/meilisearch-types/src/settings.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-07 16:30:14 +01:00
21026f0ca8 move the swagger behind a feature flag 2025-01-07 16:30:14 +01:00
e579554c84 fmt 2025-01-07 16:30:12 +01:00
ff49250c1a remove useless doc 2025-01-07 16:29:09 +01:00
8b95c6ae56 improve the description of all the settings route 2025-01-07 16:29:09 +01:00
28162759a4 fix imports after rebase 2025-01-07 16:29:08 +01:00
dd128656cb fix all the tests 2025-01-07 16:28:12 +01:00
4456df5a46 fix some tests 2025-01-07 16:28:11 +01:00
0b104b3efa fix the list indexes 2025-01-07 16:26:06 +01:00
ac944f0960 review all the return type 2025-01-07 16:26:06 +01:00
5f55e88484 review all the parameters and tags 2025-01-07 16:26:06 +01:00
aab6ffec30 fix and review all the documents route 2025-01-07 16:26:06 +01:00
1dd33af8a3 add the batches 2025-01-07 16:26:06 +01:00
8a2a1e4d27 add the experimental features route 2025-01-07 16:26:06 +01:00
e2686c0fce add the swap indexes 2025-01-07 16:26:06 +01:00
9473a2a6ca add the multi-search 2025-01-07 16:26:06 +01:00
11ce3b9636 fix the settings 2025-01-07 16:26:06 +01:00
0bf4157a75 try my best to make the sub-settings routes works, it doesn't 2025-01-07 16:26:06 +01:00
4eaa626bca add the similar route 2025-01-07 16:26:06 +01:00
668b26b641 add the facet search 2025-01-07 16:26:06 +01:00
04e4586fb3 add the searches route and fix a few broken things 2025-01-07 16:26:06 +01:00
78f6f22a80 implement all the /indexes/documents route 2025-01-07 16:26:06 +01:00
13afdaf393 finish rebase and update utoipa to the latest version 2025-01-07 16:26:06 +01:00
742d0ee531 Implements the get and delete tasks route 2025-01-07 16:26:04 +01:00
4275833bab Rename compute.rs to post_process.rs 2025-01-07 15:31:20 +01:00
de7f8c4406 refactor indexer mod 2025-01-07 15:29:02 +01:00
f00a285a6d Merge #5199
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 24s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 33s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 6m27s
Test suite / Run Rustfmt (push) Successful in 1m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m26s
5199: Refactorize the index-scheduler r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5115

## What does this PR do?
- Extract all the « task/batch queue » part of the `lib.rs` to a `queue` module containing:
  - The batches, and its test in another file
  - The tasks, and its test in another file
- Extract all the « scheduler » stuff to another module 
  - One file for the batch creation
  - One file for the autobatcher
  - One file for the batch process
  - The tests are a bit messier and are made by features (i.e.: All the embedder tests in one file)
- The average size of the files is around 500 loc now and R-A is way faster


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-07 14:05:21 +00:00
43bb02e7b4 split the autobatcher in two 2025-01-07 15:02:03 +01:00
56fd4ee9bd Merge #5211
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 46s
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Run Clippy (push) Successful in 6m53s
Test suite / Run Rustfmt (push) Successful in 1m40s
Run the indexing fuzzer / Setup the action (push) Successful in 1h6m8s
5211: Update README.md with AI integrations (langchain & MCP) r=ManyTheFish a=tpayet

With AI integrations 🤖

# Pull Request

## Related issue
None

## What does this PR do?
- Update the README with AI integrations

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Thomas Payet <thomas@meilisearch.com>
2025-01-07 08:39:09 +00:00
0625d08e4e adding a function toextract batch_uid from json and modifying the get_batch interface for easier call - did not work, so falling back to hard coded batch id for now. 2025-01-07 12:07:33 +11:00
9269086fda fixing a rebase issue 2025-01-07 11:48:09 +11:00
98e3ecb86b Format fixes after running: cargo +nightly fmt 2025-01-07 11:16:37 +11:00
9af9e73c45 Update README.md
With AI integrations 🤖
2025-01-06 18:02:30 +01:00
4b107b17cb test: improve performance of get_index.rs 2025-01-06 17:38:44 +01:00
cb82b0798a Split the index-scheduler in ~500 loc modules 2025-01-06 14:08:26 +01:00
7f1071943e Merge #5198
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 24s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 58s
Test suite / Run Clippy (push) Successful in 6m41s
Test suite / Run Rustfmt (push) Successful in 1m42s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m19s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5198: Bump Swatinem/rust-cache from 2.7.5 to 2.7.7 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.5 to 2.7.7.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.7.7</h2>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.7.6...v2.7.7">https://github.com/Swatinem/rust-cache/compare/v2.7.6...v2.7.7</a></p>
<h2>v2.7.6</h2>
<h2>What's Changed</h2>
<ul>
<li>Updated artifact upload action to v4 by <a href="https://github.com/guylamar2006"><code>`@​guylamar2006</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/212">Swatinem/rust-cache#212</a></li>
<li>Adds an option to do lookup-only of the cache by <a href="https://github.com/danlec"><code>`@​danlec</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/217">Swatinem/rust-cache#217</a></li>
<li>add runner OS in cache key by <a href="https://github.com/rnbguy"><code>`@​rnbguy</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/220">Swatinem/rust-cache#220</a></li>
<li>Allow opting out of caching $CARGO_HOME/bin. by <a href="https://github.com/benjyw"><code>`@​benjyw</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/216">Swatinem/rust-cache#216</a></li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/guylamar2006"><code>`@​guylamar2006</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/212">Swatinem/rust-cache#212</a></li>
<li><a href="https://github.com/danlec"><code>`@​danlec</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/217">Swatinem/rust-cache#217</a></li>
<li><a href="https://github.com/rnbguy"><code>`@​rnbguy</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/220">Swatinem/rust-cache#220</a></li>
<li><a href="https://github.com/benjyw"><code>`@​benjyw</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/216">Swatinem/rust-cache#216</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.7.5...v2.7.6">https://github.com/Swatinem/rust-cache/compare/v2.7.5...v2.7.6</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="f0deed1e0e"><code>f0deed1</code></a> 2.7.7</li>
<li><a href="008623fb83"><code>008623f</code></a> also cache <code>cargo install</code> metadata</li>
<li><a href="720f7e45cc"><code>720f7e4</code></a> 2.7.6</li>
<li><a href="4b1f006ad2"><code>4b1f006</code></a> update dependencies, in particular <code>`@actions/cache</code></li>`
<li><a href="e8e63cdbf2"><code>e8e63cd</code></a> Allow opting out of caching $CARGO_HOME/bin. (<a href="https://redirect.github.com/swatinem/rust-cache/issues/216">#216</a>)</li>
<li><a href="9a2e0d3212"><code>9a2e0d3</code></a> add runner OS in cache key (<a href="https://redirect.github.com/swatinem/rust-cache/issues/220">#220</a>)</li>
<li><a href="c00f3025ca"><code>c00f302</code></a> Adds an option to do lookup-only of the cache (<a href="https://redirect.github.com/swatinem/rust-cache/issues/217">#217</a>)</li>
<li><a href="68b3cb7503"><code>68b3cb7</code></a> Updated artifact upload action to v4 (<a href="https://redirect.github.com/swatinem/rust-cache/issues/212">#212</a>)</li>
<li>See full diff in <a href="https://github.com/swatinem/rust-cache/compare/v2.7.5...v2.7.7">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.7.5&new-version=2.7.7)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-01-02 09:18:13 +00:00
3c1e7c7428 Bump Swatinem/rust-cache from 2.7.5 to 2.7.7
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.5 to 2.7.7.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.7.5...v2.7.7)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-01 17:44:53 +00:00
baeefa4817 Merge #5166
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 2s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 53s
Test suite / Run Clippy (push) Successful in 6m35s
Test suite / Run Rustfmt (push) Successful in 1m41s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m27s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5166: fix list indexes r=dureuill a=irevoire

# Pull Request

### Smol benchmark on a meilisearch with 1009 indexes:

**Before** this PR on my computer, it was taking 5.5s to call the `GET /indexes` route on a cold computer where all the indexes were closed.
**After** this PR it takes 0.009s to call the route on the first 20 indexes, and 0.176 for the last 20 indexes (retrieving the first or last indexes on main has no impact on performances).

If my computations are right, that's between 61111.1% and 3125% faster on this test 😂 

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4694

## What does this PR do?
- Add the primary key to the cache we already have in the index-mapper
- Provide a new route to retrieve the paginated indexes straight from the cache without opening them
- Fix a bug where the cache was not computed when loading a dump and was forcing us to open the indexes to compute their stats on the fly

## Is it breaking?

Since the field I added is an `Option` I think we should consider it as non-breaking and let it update itself automatically on the next operation of this index.
I also tested to run my patch over a DB generated on release-v1.12.0 and it works. The importing a dump also works.

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-31 10:55:22 +00:00
e8ba7833ec Update crates/meilisearch/src/routes/indexes/mod.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-31 10:43:22 +01:00
db676aee73 Update crates/meilisearch/src/routes/indexes/mod.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-31 10:43:12 +01:00
1a0d8810e5 Merge #5178
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 52s
Test suite / Run Clippy (push) Successful in 6m45s
Test suite / Run Rustfmt (push) Successful in 1m43s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m31s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5178: Add Prometheus metrics to measure task queue latency r=irevoire a=takaebato

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5046

## What does this PR do?

- Added Prometheus metrics to measure task queue latency

(Confirmed locally that latency is measured during parallel task execution in the benchmark.)

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Takahiro Ebato <takahiro.ebato@gmail.com>
2024-12-30 14:47:15 +00:00
4615d86748 Merge #5169
5169: Replace hardcoded string with constants r=irevoire a=Gnosnay

# Pull Request

## Related issue
Fixes #5136

## What does this PR do?
- Replace all of hardcoded "_geo" to one constant string.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Gnosnay <iamgnosnay@gmail.com>
2024-12-30 14:12:05 +00:00
525e67ba93 Fix the format and linter error 2024-12-28 20:35:55 +08:00
44eb153619 Replace hardcoded string with constants 2024-12-28 20:35:55 +08:00
195785c47f Add a task queue latency panel to the grafana dashboard 2024-12-27 23:26:20 +09:00
4eae92f411 fix list indexes 2024-12-26 18:48:25 +01:00
d7cb319217 #4840 - Partial fix - Confirm task success after waiting for it - continued, few missing cases - batch 2 2024-12-24 23:07:43 +11:00
15062e7dba #4840 - Partial fix - Confirm task success after waiting for it - continued, few missing cases. 2024-12-24 23:06:07 +11:00
bf19f86e38 #4840 - Partial fix - Confirm task success after waiting for it. 2024-12-24 23:06:07 +11:00
91c7ef8723 #4840 - Partial fix - Remove hard coded task ids to prevent flaky tests.
# Conflicts:
#	crates/meilisearch/tests/documents/add_documents.rs
#	crates/meilisearch/tests/search/facet_search.rs
#	crates/meilisearch/tests/settings/get_settings.rs
#	crates/meilisearch/tests/snapshot/mod.rs
2024-12-24 23:05:59 +11:00
fc23a0ee52 Merge #5135
Some checks failed
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 24s
Test suite / Tests on ubuntu-20.04 (push) Failing after 16s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Clippy (push) Successful in 57m37s
Test suite / Run Rustfmt (push) Successful in 7m19s
Run the indexing fuzzer / Setup the action (push) Failing after 1h41m18s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5135: Check all search filter attributes are filterable upfront r=curquiza a=jameshiew

# Pull Request

## Related issue
Fixes #5069

## What does this PR do?
- checks all `fid`s in the `Filter` tree are filterable before evaluating search query
- returns AttributeNotFilterable error if any are not

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!

Co-authored-by: James Hiew <james@hiew.net>
2024-12-24 10:09:35 +00:00
d3491851bc Merge #5187
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 20s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 16s
Test suite / Run Clippy (push) Successful in 33m58s
Test suite / Run Rustfmt (push) Successful in 11m45s
Run the indexing fuzzer / Setup the action (push) Successful in 1h10m33s
5187: Bring back v1.12.0 of pre-release changes into `main` r=irevoire a=curquiza



Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2024-12-23 10:59:33 +00:00
886404cc4d Merge #5184
5184: Fix typo in a comment r=curquiza a=eltociear



# Pull Request


## What does this PR do?
- formating -> formatting

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Ikko Eltociear Ashimine <eltociear@gmail.com>
2024-12-23 09:55:52 +00:00
75a7f0e26c chore: update mod.rs
formating -> formatting
2024-12-21 22:09:15 +09:00
47827ca5c1 Add Prometheus metrics to measure task queue latency 2024-12-21 18:29:30 +09:00
f75d74a967 removed formating issue 2024-12-20 16:28:30 -05:00
42648919c7 updated settings to pass cargo fmt check 2024-12-19 10:24:15 -05:00
6987cac1ba Merge #5174
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 22s
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 29s
Test suite / Run Rustfmt (push) Successful in 1h14m6s
Test suite / Run Clippy (push) Failing after 2h47m59s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m44s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5174: Split tests for option crate meilisearch in a separate test file r=irevoire a=K-Kumar-01

# Pull Request
Splits the tests for meilisearch option crate in a separate testfile.

## Related issue
Partially solves #5116

## What does this PR do?
- Splits the test for `/src/option.rs` into a separate file `/src/option_test.rs` in meilisearch crate


## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Co-authored-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-19 09:39:25 +00:00
082237863e Merge #5175
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 18s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 8s
Test suite / Run Clippy (push) Successful in 6m42s
Test suite / Run Rustfmt (push) Successful in 1m42s
Run the indexing fuzzer / Setup the action (push) Failing after 20m27s
5175: fix the flaky batches test r=dureuill a=irevoire

## What does this PR do?
I finally reproduced the flaky test in the CI here: https://github.com/meilisearch/meilisearch/actions/runs/12390709982/job/34586313125

I cannot reproduce it locally even with `cargo flaky --iter 2000` so I'm not 100% my fix will work. 
But what I did was definitely part of the flakyness of the tests, we were querying a batch that could in some cases not be started.
That worked well for the tasks since an enqueued task is already written on disk, but since the batch do not exist if they're not processing they were just missing.

---

I also changed what we were doing because there is no point in doing an indexing process for this test

Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-18 12:47:50 +00:00
4bcdd7a9f9 fix the flaky batches test 2024-12-18 11:51:12 +01:00
fc4b7ccb70 Merge #5173
5173: Remove obsolete test code r=irevoire a=K-Kumar-01

# Pull Request
Removes the test from the meilisearch/search/mod.rs. The tests were already split in the PR #5171 


## What does this PR do?
- Removes the obsolete tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?


Co-authored-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-18 10:26:02 +00:00
df9ac07922 tests: split tests option crate in separate test file
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-18 02:55:02 +05:30
ba27a09efe refactor: fmt
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-18 02:28:02 +05:30
bc51d3a918 refactor: remove obsolete test code
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-18 02:18:57 +05:30
b39d4e9b50 removed unused import 2024-12-17 12:01:06 -05:00
b18cd9075d Merge #5171
Some checks failed
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 13s
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Clippy (push) Successful in 6m34s
Test suite / Run Rustfmt (push) Successful in 1m50s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m21s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5171: tests: split tests in separate file r=irevoire a=K-Kumar-01

# Pull Request
Splits the tests for meilisearch search crate in a separate testfile.

## Related issue
Partially solves #5116.

## Related Pull Requests
https://github.com/meilisearch/meilisearch/pull/5134

## What does this PR do?
- Splits the test for `/search/mod.rs` into a separate file `search/mod_test.rs` in meilisearch crate

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Kushal Kumar <kushalkumargupta4@gmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-17 10:42:16 +00:00
36b897858a fmt 2024-12-17 11:40:28 +01:00
a7b2f461cf fixed the cargo errors that were occuring 2024-12-16 18:01:27 -05:00
fce132a21b tests: split tests in separate file
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-17 03:04:50 +05:30
9c857ff48f handling error where multple attributes aren't allowed to be checked, only checking single now since this is being executed in make_setting_route 2024-12-16 16:08:22 -05:00
f27b33dabe undid changes from the pull 1.12.0 branch 2024-12-16 13:27:57 -05:00
9eb4b84abd now cheecking to enusre that all the settings in the struct are listed in this macro. 2024-12-16 13:23:24 -05:00
71834787ec Merge #5134
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 16s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Run Clippy (push) Successful in 6m16s
Test suite / Run Rustfmt (push) Successful in 1m55s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m33s
5134: Split Meilisearch Crate Tests in separate file r=irevoire a=K-Kumar-01

# Pull Request
Splits the tests for meilisearch crate in a separate file.

## Related issue
Partially solves #5116 

## What does this PR do?
- Splits the test for `/indexes/search.rs` into a separate file `indexes/search_test.rs` in meilisearch crate

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Kushal Kumar <kushalkumargupta4@gmail.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-16 15:27:14 +00:00
b004db37c7 fmt 2024-12-16 15:59:26 +01:00
0c04cd1d9f make clippy happy 2024-12-16 15:52:47 +01:00
63ea405b3e Merge branch 'release-v1.12.0' of https://github.com/meilisearch/meilisearch into configure_setting_routes_when_new_field_is_added 2024-12-13 13:08:45 -05:00
ba11121cfc Merge #5159
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Run Rustfmt (push) Successful in 1m18s
Test suite / Run Clippy (push) Successful in 5m30s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5159: Fix the New Indexer Spilling r=irevoire a=Kerollmops

Fix two bugs in the merging of the spilled caches. Thanks to `@ManyTheFish` and `@irevoire` 👏

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-12 17:16:53 +00:00
acdd5aa6ea Use the thread source id instead of the destination id
when filtering on the cache to merge
2024-12-12 18:12:00 +01:00
2f3cc8cdd2 Fix the merge_caches_sorted function 2024-12-12 16:15:37 +01:00
7a95fed23f Merge #5158
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 46s
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Rustfmt (push) Successful in 9m49s
Test suite / Run Clippy (push) Successful in 46m15s
5158: Indexer edition 2024 fix facet fst r=Kerollmops a=ManyTheFish

# Pull Request
Fix a regression in the new indexer; when several filterable attributes containing strings were set, all the field IDs were shifted, and the last one was overwriting the previous FST.

## What does this PR do?
- Add a test reproducing the bug
- fix the bug

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-12 14:14:44 +00:00
961de4d34e Fix facet fst 2024-12-12 15:12:28 +01:00
18ce95dcbf Add test reproducing the bug 2024-12-12 14:56:45 +01:00
c177210b1b Merge #5152
5152: Make xtasks be able to use the specified binary r=dureuill a=Kerollmops

Makes it possible to specify the binary to run. It is useful to run PGO optimized binaries.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-12-12 12:28:16 +00:00
1fc90fbacb Merge #5147
5147: Batch progress r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5068

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-12-12 09:15:54 +00:00
6c72559457 Update the binary-path description
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-12 09:39:39 +01:00
1fdfa3f208 Change the exit code to 130 when Ctrl-Ced 2024-12-12 09:26:14 +01:00
1a01196a80 removed the method outside of macro rules, no longer needed 2024-12-11 13:06:19 -05:00
0d0c18f519 rename the Step::name into Step::current_step 2024-12-11 18:41:03 +01:00
d12364c1e0 fix the tests 2024-12-11 18:30:48 +01:00
8cd3a1aa57 fmt 2024-12-11 18:18:40 +01:00
08fd026ebd fix warning 2024-12-11 18:18:13 +01:00
75d5cea624 use a with_capacity while allocating the progress view 2024-12-11 18:17:33 +01:00
ab9213fa94 ensure we never write the progress to the db 2024-12-11 18:16:20 +01:00
45d5d4bf40 make the progressview public 2024-12-11 18:15:33 +01:00
fa885e75b4 rename the send_progress in progress 2024-12-11 18:13:12 +01:00
29fc77ee5b remove usuless print 2024-12-11 18:11:19 +01:00
ad4dc70720 rename the ComputingTheChanges to ComputingDocumentChanges in the edit document progress 2024-12-11 18:09:54 +01:00
5d682b4700 rename the ComputingTheChanges to ComputingDocumentChanges 2024-12-11 18:08:45 +01:00
f1beb60204 make the progress use payload instead of documents 2024-12-11 18:07:45 +01:00
85577e70cd reuse the enqueued 2024-12-11 18:05:34 +01:00
c5536c37b5 rename the atomic::name to unit_name 2024-12-11 18:03:06 +01:00
9245c89cfe move the macros to milli 2024-12-11 18:00:46 +01:00
f4ff722247 simplified the method in the macro 2024-12-11 12:00:39 -05:00
262b429a4c updated to fix macro error by creating one method to ensure all routes corresponding to fields adn another to ensure each field provided in settings has a corresponding route 2024-12-11 10:43:13 -05:00
eaabc1af2f Merge #5144
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 47s
Test suite / Run Rustfmt (push) Successful in 1m17s
Test suite / Run Clippy (push) Successful in 5m30s
5144: Exactly 512 bytes docid fails r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5050 

## What does this PR do?
- Return a user error rather than an internal one for docids of exactly 512 bytes
- Fix up error message to indicate that exactly 512 bytes long docids are not supported.
- Fix up error message to reflect that index uids are actually limited to 400 bytes in length

## Impact

- Impacts docs: 
    - update [this paragraph](https://www.meilisearch.com/docs/learn/resources/known_limitations#length-of-primary-key-values) to say 511 bytes instead of 512 

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-11 15:41:05 +00:00
04a24a9239 Kill Meilisearch with a TERM signal 2024-12-11 16:27:07 +01:00
1f54dfa883 update the macro to look more like an enum 2024-12-11 16:26:09 +01:00
786b0fabea implement the progress for almost all the tasks 2024-12-11 16:26:08 +01:00
26733c705d add progress for the task deletion and task cancelation 2024-12-11 16:25:02 +01:00
ab75f53efd update all snapshots 2024-12-11 16:25:02 +01:00
867e6a8f1d rename the send_progress field to progress since it s not sending anything 2024-12-11 16:25:01 +01:00
6f4823fc97 make the number of document in the document tasks more incremental 2024-12-11 16:25:01 +01:00
df9b68f8ed inital implementation of the progress 2024-12-11 16:25:01 +01:00
0a0a5f84bf added attribute name such that each verify_field_exists generated by the macro is unique 2024-12-11 10:05:08 -05:00
5bc6391700 Merge #5153
5153: Return docid in case of errors while rendering the document template r=Kerollmops a=dureuill

Improves error message:

Before: 

```
ERROR index_scheduler: Batch failed Index `mieli`: user error: missing field in document: liquid: Unknown index
  with:
    variable=doc
    requested index=title
    available indexes=by, id, kids, parent, text, time, type
```

After:

```
ERROR index_scheduler: Batch failed Index `mieli`: user error: missing field in document `11345147`: liquid: Unknown index
  with:
    variable=doc
    requested index=title
    available indexes=by, id, kids, parent, text, time, type
```

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-11 15:01:40 +00:00
eaa897d983 Avoid compiling when unecessary 2024-12-11 15:57:16 +01:00
c06f386ac3 specifying generic structure now for verifiy_field_exists 2024-12-11 09:36:36 -05:00
bfca54cc2c Return docid in case of errors while rendering the document template 2024-12-11 15:26:18 +01:00
04a62d2b97 Compile Meilisearch or run the dedicated binary file 2024-12-11 14:57:07 +01:00
8c19cb0a0b Merge #5146
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 19s
Test suite / Run tests in debug (push) Failing after 14s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 23s
Test suite / Run Rustfmt (push) Successful in 4m52s
Test suite / Run Clippy (push) Failing after 8m9s
5146: Offline upgrade v1.12 r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #4978 

## What does this PR do?
- add v1_11_to_v1_12 function to upgrade Meilisearch from v1.11 to v1.12
- Convert the update files from OBKV to ndjson format


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2024-12-11 13:39:14 +00:00
5c492031d9 Update crates/meilitool/src/upgrade/v1_12.rs
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2024-12-11 14:34:18 +01:00
fb1caa4724 Merge #5148
5148: Do not duplicate NDJson data when unecessary r=dureuill a=Kerollmops

This PR improves the NDJSON support. Usually, we save all of the user's document content into a temporary file, validate its content, and then convert everything into NDJSON in the file store (update files in the tasks).

It is a waste of time when users are already sending NDJSON. So, this PR removes the last copy and directly stores the user content in the file store, validating it from the file store. If an issue arises, the file will not persist and will be dropped/deleted instead.

Related to #5078.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-11 13:00:50 +00:00
5622b9607d Wrap the read NDJSON pass into a tokio blocking 2024-12-11 12:18:36 +01:00
01bcc601be Use a nonrandom hasher when decoding JSON 2024-12-11 12:04:29 +01:00
93fbdc06d3 Use a nonrandom hasher when decoding NDJSON 2024-12-11 12:03:09 +01:00
69c931334f Fix the error messages categorization with invalid NDJson 2024-12-11 12:02:48 +01:00
d683f5980c Do not duplicate NDJson when unecessary 2024-12-11 12:02:48 +01:00
f8ba112f66 Merge #5150
5150: Reintroduce the Document Addition Logs r=dureuill a=Kerollmops

This PR reintroduces lost tracing logs showing some information about the number of indexed documents.

Related to #5078. Resolves [this comment](https://github.com/meilisearch/meilisearch/pull/4900/files?show-deleted-files=true&show-viewed-files=true&file-filters%5B%5D=#r1852158338) and [this other one](https://github.com/meilisearch/meilisearch/pull/4900/files?show-deleted-files=true&show-viewed-files=true&file-filters%5B%5D=#r1852159073).

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-11 10:48:48 +00:00
c614d0dd35 Add context when returning an error 2024-12-11 10:55:39 +01:00
479607e5dd Convert update files from OBKV to ndjson 2024-12-11 10:55:39 +01:00
bb00e70087 Reintroduce the document addition logs 2024-12-11 10:39:04 +01:00
2a04ecccc4 first commit 2024-12-11 01:43:37 -05:00
e974be9518 Merge #5145
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 9s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Run Rustfmt (push) Successful in 1m18s
Test suite / Run Clippy (push) Successful in 5m32s
5145: Use bumparaw-collections in Meilisearch/milli r=dureuill a=Kerollmops

This PR is related to #5078. It uses the now published bumparaw-collections and (soon) makes the `RawMap` hasher nonrandom.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2024-12-10 15:51:01 +00:00
aeb6b74725 Make sure we use an FxHashBuilder on the Value 2024-12-10 15:52:22 +01:00
a751972c57 Prefer using a stable than a random hash builder 2024-12-10 14:25:53 +01:00
6b269795d2 Update bumparaw-collections to 0.1.2 2024-12-10 14:25:13 +01:00
d075be798a Fix tests 2024-12-10 13:39:07 +01:00
89637bcaaf Use bumparaw-collections in Meilisearch/milli 2024-12-10 11:52:20 +01:00
866ac91be3 Fix error messages 2024-12-10 11:06:58 +01:00
e610af36aa User failure for documents with docid of ==512 bytes 2024-12-10 11:06:24 +01:00
7cf6707ed3 Extend test to add the ==512 bytes case 2024-12-10 11:05:42 +01:00
34254b42b6 refactor: use test configuration on import
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-10 00:00:43 +05:30
1995040846 Merge #5142
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 10s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s
Test suite / Run Rustfmt (push) Successful in 1m19s
Test suite / Run Clippy (push) Successful in 5m49s
5142: Try merge optimisation r=dureuill a=ManyTheFish

![Capture_decran_2024-12-09_a_11 59 42](https://github.com/user-attachments/assets/0dfc7e30-a603-4546-98d2-791990bdfcce)

Co-authored-by: ManyTheFish <many@meilisearch.com>
2024-12-09 14:48:26 +00:00
6768e4ef75 Fix workload inversion 2024-12-09 10:20:49 +01:00
54e34beac6 Check attributes are filterable before evaluating search query 2024-12-07 21:13:13 +00:00
c0aa018c87 tests: split test in separate file
Signed-off-by: Kushal Kumar <kushalkumargupta4@gmail.com>
2024-12-08 00:32:32 +05:30
b21d7aedf9 Merge #5029
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m5s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch, meilisearch-macos-amd64, macos-13) (push) Waiting to run
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Waiting to run
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Create issue to upgrade dependencies / create-issue (push) Failing after 13s
Look for flaky tests / flaky (push) Failing after 9s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 11s
Test suite / Tests almost all features (push) Failing after 8s
Test suite / Test disabled tokenization (push) Failing after 7s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Rustfmt (push) Successful in 1m53s
Test suite / Run Clippy (push) Successful in 6m7s
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for ${{ matrix.os }} (meilisearch.exe, meilisearch-windows-amd64.exe, windows-2022) (push) Failing after 21s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 10s
5029: Guide people to create custom reports on the benchboard r=Kerollmops a=Kerollmops



Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-12-03 10:18:11 +00:00
2f1a9105b9 Merge #5104
Some checks failed
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 23s
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 12s
Test suite / Run Clippy (push) Successful in 9m3s
Test suite / Run Rustfmt (push) Successful in 2m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h6m48s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5104: Bump xt0rted/pull-request-comment-branch from 2 to 3 r=curquiza a=dependabot[bot]

Bumps [xt0rted/pull-request-comment-branch](https://github.com/xt0rted/pull-request-comment-branch) from 2 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/xt0rted/pull-request-comment-branch/releases">xt0rted/pull-request-comment-branch's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Updated node runtime from 16 to 20</li>
<li>Bumped <code>`@actions/core</code>` from 1.10.0 to 1.11.1</li>
<li>Bumped <code>`@actions/github</code>` from 5.1.1 to 6.0.0</li>
<li>Bumped <code>undici</code> from 5.28.3 to 5.28.4</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/xt0rted/pull-request-comment-branch/blob/main/CHANGELOG.md">xt0rted/pull-request-comment-branch's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v2.0.0...v3.0.0">3.0.0</a> - 2024-11-19</h2>
<ul>
<li>Updated node runtime from 16 to 20</li>
<li>Bumped <code>`@actions/core</code>` from 1.10.0 to 1.11.1</li>
<li>Bumped <code>`@actions/github</code>` from 5.1.1 to 6.0.0</li>
<li>Bumped <code>undici</code> from 5.28.3 to 5.28.4</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.4.0...v2.0.0">2.0.0</a> - 2023-03-29</h2>
<ul>
<li>Updated node runtime from 12 to 16</li>
<li>Removed deprecated <code>ref</code> and <code>sha</code> outputs. If you're using these then you should switch to <code>head_ref</code> and <code>head_sha</code> respectively.</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.3.0...v1.4.0">1.4.0</a> - 2022-10-23</h2>
<ul>
<li>Bumped <code>`@actions/core</code>` from 1.2.7 to 1.10.0</li>
<li>Bumped <code>`@actions/github</code>` from 4.0.0 to 5.1.1</li>
<li>Bumped <code>node-fetch</code> from 2.6.1 to 2.6.7</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.2.0...v1.3.0">1.3.0</a> - 2021-05-09</h2>
<ul>
<li>Bumped <code>`@actions/core</code>` from 1.2.5 to 1.2.7</li>
<li>Updated the <code>repo_token</code> input so it defaults to <code>GITHUB_TOKEN</code>. If you're already using this value you can remove this setting from your workflow.</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.1.0...v1.2.0">1.2.0</a> - 2020-09-09</h2>
<ul>
<li>Deprecated <code>ref</code> and <code>sha</code> outputs in favor of <code>head_ref</code> and <code>head_sha</code>.</li>
<li>Added <code>base_ref</code> and <code>base_sha</code> outputs</li>
<li>Bumped <code>`@actions/core</code>` from 1.2.2 to 1.2.5</li>
<li>Bumped <code>`@actions/github</code>` from 2.1.1 to 4.0.0</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v1.0.0...v1.1.0">1.1.0</a> - 2020-02-21</h2>
<ul>
<li>Bumped <code>`@actions/github</code>` from 2.1.0 to 2.1.1</li>
</ul>
<h2><a href="https://github.com/xt0rted/pull-request-comment-branch/releases/tag/v1.0.0">1.0.0</a> - 2020-02-09</h2>
<ul>
<li>Initial release</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="e8b8daa837"><code>e8b8daa</code></a> Release v3.0.0</li>
<li><a href="bdedca277b"><code>bdedca2</code></a> v3.0.0</li>
<li><a href="4bff54f5df"><code>4bff54f</code></a> Merge pull request <a href="https://redirect.github.com/xt0rted/pull-request-comment-branch/issues/437">#437</a> from xt0rted/dependabot/npm_and_yarn/undici-5.28.4</li>
<li><a href="e0ea3daa0d"><code>e0ea3da</code></a> Update CHANGELOG.md</li>
<li><a href="3096af14cd"><code>3096af1</code></a> Bump undici from 5.28.3 to 5.28.4</li>
<li><a href="b7ffabdc5d"><code>b7ffabd</code></a> Merge pull request <a href="https://redirect.github.com/xt0rted/pull-request-comment-branch/issues/461">#461</a> from xt0rted/dependabot/npm_and_yarn/actions-e659d6d3f1</li>
<li><a href="6fc3c73d82"><code>6fc3c73</code></a> Update CHANGELOG.md</li>
<li><a href="20807fbbbc"><code>20807fb</code></a> Bump <code>`@​actions/core</code>` from 1.10.1 to 1.11.1 in the actions group</li>
<li><a href="8d51fb5346"><code>8d51fb5</code></a> Merge pull request <a href="https://redirect.github.com/xt0rted/pull-request-comment-branch/issues/463">#463</a> from xt0rted/dependabot/npm_and_yarn/typescript-5.6.3</li>
<li><a href="37c7636fab"><code>37c7636</code></a> Merge pull request <a href="https://redirect.github.com/xt0rted/pull-request-comment-branch/issues/462">#462</a> from xt0rted/dependabot/npm_and_yarn/vercel/ncc-0.38.3</li>
<li>Additional commits viewable in <a href="https://github.com/xt0rted/pull-request-comment-branch/compare/v2...v3">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xt0rted/pull-request-comment-branch&package-manager=github_actions&previous-version=2&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-12-02 09:28:38 +00:00
27bb591331 Bump xt0rted/pull-request-comment-branch from 2 to 3
Bumps [xt0rted/pull-request-comment-branch](https://github.com/xt0rted/pull-request-comment-branch) from 2 to 3.
- [Release notes](https://github.com/xt0rted/pull-request-comment-branch/releases)
- [Changelog](https://github.com/xt0rted/pull-request-comment-branch/blob/main/CHANGELOG.md)
- [Commits](https://github.com/xt0rted/pull-request-comment-branch/compare/v2...v3)

---
updated-dependencies:
- dependency-name: xt0rted/pull-request-comment-branch
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-12-01 17:52:21 +00:00
94a1f5a8ea First draft just for the commands 2024-10-31 16:30:05 +01:00
787 changed files with 49805 additions and 20399 deletions

View File

@ -22,6 +22,16 @@ Related product discussion:
<!---If necessary, create a list with technical/product steps-->
### Reminders when modifying the API
- [ ] Update the openAPI file with utoipa:
- [ ] If a new module has been introduced, create a new structure deriving [the OpenAPI proc-macro](https://docs.rs/utoipa/latest/utoipa/derive.OpenApi.html) and nest it in the main [openAPI structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L64-L78).
- [ ] If a new route has been introduced, add the [path decorator](https://docs.rs/utoipa/latest/utoipa/attr.path.html) to it and add the route at the top of the file in its openAPI structure.
- [ ] If a structure which is deserialized or serialized in the API has been introduced or modified, it must derive the [`schema`](https://docs.rs/utoipa/latest/utoipa/macro.schema.html) or the [`IntoParams`](https://docs.rs/utoipa/latest/utoipa/derive.IntoParams.html) proc-macro.
If it's a **new** structure you must also add it to the big list of structures [in the main `OpenApi` structure](https://github.com/meilisearch/meilisearch/blob/f2185438eed60fa32d25b15480c5ee064f6fba4a/crates/meilisearch/src/routes/mod.rs#L88).
- [ ] Once everything is done, start Meilisearch with the swagger flag: `cargo run --features swagger`, open `http://localhost:7700/scalar` on your browser, and ensure everything works as expected.
- For more info, refer to [this presentation](https://pitch.com/v/generating-the-openapi-file-jrn3nh).
### Reminders when modifying the Setting API
<!--- Special steps to remind when adding a new index setting -->

View File

@ -18,7 +18,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -55,7 +55,7 @@ jobs:
reaction-type: "rocket"
repo-token: ${{ env.GH_TOKEN }}
- uses: xt0rted/pull-request-comment-branch@v2
- uses: xt0rted/pull-request-comment-branch@v3
id: comment-branch
with:
repo_token: ${{ env.GH_TOKEN }}
@ -66,7 +66,7 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -12,7 +12,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -18,7 +18,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -44,7 +44,7 @@ jobs:
exit 1
fi
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
@ -56,7 +56,7 @@ jobs:
reaction-type: "eyes"
repo-token: ${{ env.GH_TOKEN }}
- uses: xt0rted/pull-request-comment-branch@v2
- uses: xt0rted/pull-request-comment-branch@v3
id: comment-branch
with:
repo_token: ${{ env.GH_TOKEN }}

View File

@ -16,7 +16,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -0,0 +1,100 @@
name: PR Milestone Check
on:
pull_request:
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
branches:
- "main"
- "release-v*.*.*"
jobs:
check-milestone:
name: Check PR Milestone
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Validate PR milestone
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Get PR number directly from the event payload
const prNumber = context.payload.pull_request.number;
// Get PR details
const { data: prData } = await github.rest.pulls.get({
owner: 'meilisearch',
repo: 'meilisearch',
pull_number: prNumber
});
// Get base branch name
const baseBranch = prData.base.ref;
console.log(`Base branch: ${baseBranch}`);
// Get PR milestone
const prMilestone = prData.milestone;
if (!prMilestone) {
core.setFailed('PR must have a milestone assigned');
return;
}
console.log(`PR milestone: ${prMilestone.title}`);
// Validate milestone format: vx.y.z
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
if (!milestoneRegex.test(prMilestone.title)) {
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
return;
}
// For main branch PRs, check if the milestone is the highest one
if (baseBranch === 'main') {
// Get all milestones
const { data: milestones } = await github.rest.issues.listMilestones({
owner: 'meilisearch',
repo: 'meilisearch',
state: 'open',
sort: 'due_on',
direction: 'desc'
});
// Sort milestones by version number (vx.y.z)
const sortedMilestones = milestones
.filter(m => milestoneRegex.test(m.title))
.sort((a, b) => {
const versionA = a.title.substring(1).split('.').map(Number);
const versionB = b.title.substring(1).split('.').map(Number);
// Compare major version
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
// Compare minor version
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
// Compare patch version
return versionB[2] - versionA[2];
});
if (sortedMilestones.length === 0) {
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
return;
}
const highestMilestone = sortedMilestones[0];
console.log(`Highest milestone: ${highestMilestone.title}`);
if (prMilestone.title !== highestMilestone.title) {
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
return;
}
} else {
// For release branches, the milestone should match the branch version
const branchVersion = baseBranch.substring(8); // remove 'release-'
if (prMilestone.title !== branchVersion) {
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
return;
}
}
console.log('PR milestone validation passed!');

View File

@ -9,22 +9,22 @@ jobs:
flaky:
runs-on: ubuntu-latest
container:
# Use ubuntu-20.04 to compile with glibc 2.28
image: ubuntu:20.04
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps
run: cd crates/dump; cargo flaky -i 100 --release
- name: Run cargo flaky in the index-scheduler
run: cd crates/index-scheduler; cargo flaky -i 100 --release
- name: Run cargo flaky in the auth
run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
- name: Run cargo flaky in meilisearch
run: cd crates/meilisearch; cargo flaky -i 100 --release
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.81
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps
run: cd crates/dump; cargo flaky -i 100 --release
- name: Run cargo flaky in the index-scheduler
run: cd crates/index-scheduler; cargo flaky -i 100 --release
- name: Run cargo flaky in the auth
run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
- name: Run cargo flaky in meilisearch
run: cd crates/meilisearch; cargo flaky -i 100 --release

View File

@ -12,7 +12,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal

View File

@ -18,28 +18,28 @@ jobs:
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-20.04 to compile with glibc 2.28
image: ubuntu:20.04
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.7.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb
asset_name: meilisearch.deb
tag: ${{ github.ref }}
- name: Upload debian pkg to apt repository
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.81
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v3
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.7.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb
asset_name: meilisearch.deb
tag: ${{ github.ref }}
- name: Upload debian pkg to apt repository
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
homebrew:
name: Bump Homebrew formula

View File

@ -3,7 +3,7 @@ name: Publish binaries to GitHub release
on:
workflow_dispatch:
schedule:
- cron: '0 2 * * *' # Every day at 2:00am
- cron: "0 2 * * *" # Every day at 2:00am
release:
types: [published]
@ -37,26 +37,26 @@ jobs:
runs-on: ubuntu-latest
needs: check-version
container:
# Use ubuntu-20.04 to compile with glibc 2.28
image: ubuntu:20.04
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }}
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.81
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }}
publish-macos-windows:
name: Publish binary for ${{ matrix.os }}
@ -74,19 +74,19 @@ jobs:
artifact_name: meilisearch.exe
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.81
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }}
publish-macos-apple-silicon:
name: Publish binary for macOS silicon
@ -101,7 +101,7 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.79
uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
target: ${{ matrix.target }}
@ -127,8 +127,8 @@ jobs:
env:
DEBIAN_FRONTEND: noninteractive
container:
# Use ubuntu-20.04 to compile with glibc 2.28
image: ubuntu:20.04
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
strategy:
matrix:
include:
@ -148,7 +148,7 @@ jobs:
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.79
uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
target: ${{ matrix.target }}

View File

@ -52,7 +52,7 @@ jobs:
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
with:
dotnet-version: "6.0.x"
dotnet-version: "8.0.x"
- name: Install dependencies
run: dotnet restore
- name: Build

View File

@ -4,13 +4,9 @@ on:
workflow_dispatch:
schedule:
# Everyday at 5:00am
- cron: '0 5 * * *'
- cron: "0 5 * * *"
pull_request:
push:
# trying and staging branches are for Bors config
branches:
- trying
- staging
merge_group:
env:
CARGO_TERM_COLOR: always
@ -19,21 +15,21 @@ env:
jobs:
test-linux:
name: Tests on ubuntu-20.04
name: Tests on ubuntu-22.04
runs-on: ubuntu-latest
container:
# Use ubuntu-20.04 to compile with glibc 2.28
image: ubuntu:20.04
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.79
uses: dtolnay/rust-toolchain@1.81
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.5
uses: Swatinem/rust-cache@v2.7.7
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -55,8 +51,8 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.5
- uses: dtolnay/rust-toolchain@1.79
uses: Swatinem/rust-cache@v2.7.7
- uses: dtolnay/rust-toolchain@1.81
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -72,8 +68,8 @@ jobs:
name: Tests almost all features
runs-on: ubuntu-latest
container:
# Use ubuntu-20.04 to compile with glibc 2.28
image: ubuntu:20.04
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v3
@ -81,19 +77,51 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
- name: Run cargo test with almost all features
run: |
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
cargo test --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
ollama-ubuntu:
name: Test with Ollama
runs-on: ubuntu-latest
env:
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
steps:
- uses: actions/checkout@v1
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh
- name: Start serving
run: |
# Run it in the background, there is no way to daemonise at the moment
ollama serve &
# A short pause is required before the HTTP port is opened
sleep 5
# This endpoint blocks until ready
time curl -i http://localhost:11434
- name: Pull nomic-embed-text & all-minilm
run: |
ollama pull nomic-embed-text
ollama pull all-minilm
- name: Run cargo test
uses: actions-rs/cargo@v1
with:
command: test
args: --locked --release --all --features test-ollama ollama
test-disabled-tokenization:
name: Test disabled tokenization
runs-on: ubuntu-latest
container:
image: ubuntu:20.04
image: ubuntu:22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v3
@ -101,7 +129,7 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@ -117,17 +145,17 @@ jobs:
name: Run tests in debug
runs-on: ubuntu-latest
container:
# Use ubuntu-20.04 to compile with glibc 2.28
image: ubuntu:20.04
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.5
uses: Swatinem/rust-cache@v2.7.7
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@ -139,12 +167,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.5
uses: Swatinem/rust-cache@v2.7.7
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@ -156,14 +184,14 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
toolchain: nightly-2024-07-09
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.5
uses: Swatinem/rust-cache@v2.7.7
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

View File

@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.79
- uses: dtolnay/rust-toolchain@1.81
with:
profile: minimal
- name: Install sd

1
.gitignore vendored
View File

@ -10,6 +10,7 @@
/dumps
/bench
/_xtask_benchmark.ms
/benchmarks
# Snapshots
## ... large

View File

@ -48,6 +48,27 @@ cargo xtask bench --no-dashboard -- workloads/my_workload_1.json workloads/my_wo
For processing the results, look at [Looking at benchmark results/Without dashboard](#without-dashboard).
#### Sending a workload by hand
Sometimes you want to visualize the metrics of a worlkoad that comes from a custom report.
It is not quite easy to trick the benchboard in thinking that your report is legitimate but here are the commands you can run to upload your firefox report on a running benchboard.
```bash
# Name this hostname whatever you want
echo '{ "hostname": "the-best-place" }' | xh PUT 'http://127.0.0.1:9001/api/v1/machine'
# You'll receive an UUID from this command that we will call $invocation_uuid
echo '{ "commit": { "sha1": "1234567", "commit_date": "2024-09-05 12:00:12.0 +00:00:00", "message": "A cool message" }, "machine_hostname": "the-best-place", "max_workloads": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/invocation'
# Just use UUID from the previous command
# and you'll receive another UUID that we will call $workload_uuid
echo '{ "invocation_uuid": "$invocation_uuid", "name": "toto", "max_runs": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/workload'
# And now use your $workload_uuid and the content of your firefox report
# but don't forget to convert your firefox report from JSONLines into an object
echo '{ "workload_uuid": "$workload_uuid", "data": $REPORT_JSON_DATA }' | xh PUT 'http://127.0.0.1:9001/api/v1/run'
```
### In CI
We have dedicated runners to run workloads on CI. Currently, there are three ways of running the CI:

View File

@ -95,6 +95,11 @@ Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) wo
Run `cargo xtask --help` from the root of the repository to find out what is available.
#### Update the openAPI file if the APIchanged
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
### Logging
Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message.
@ -145,7 +150,7 @@ Some notes on GitHub PRs:
- The PR title should be accurate and descriptive of the changes.
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
The draft PRs are recommended when you want to show that you are working on something and make your work visible.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
## Release Process (for internal team only)
@ -153,8 +158,7 @@ Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org
### Automation to rebase and Merge the PRs
This project integrates a bot that helps us manage pull requests merging.<br>
_[Read more about this](https://github.com/meilisearch/integration-guides/blob/main/resources/bors.md)._
This project uses GitHub Merge Queues that helps us manage pull requests merging.
### How to Publish a new Release

1565
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,7 @@ members = [
]
[workspace.package]
version = "1.12.0"
version = "1.14.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
@ -36,6 +36,12 @@ license = "MIT"
[profile.release]
codegen-units = 1
# We now compile heed without the NDEBUG define for better performance.
# However, we still enable debug assertions for a better detection of
# disk corruption on the cloud or in OSS.
[profile.release.package.heed]
debug-assertions = true
[profile.dev.package.flate2]
opt-level = 3

View File

@ -1,5 +1,5 @@
# Compile
FROM rust:1.79.0-alpine3.20 AS compiler
FROM rust:1.81.0-alpine3.20 AS compiler
RUN apk add -q --no-cache build-base openssl-dev

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2019-2024 Meili SAS
Copyright (c) 2019-2025 Meili SAS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

View File

@ -20,7 +20,7 @@
<p align="center">
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
<a href="https://github.com/meilisearch/meilisearch/queue"><img alt="Merge Queues enabled" src="https://img.shields.io/badge/Merge_Queues-enabled-%2357cf60?logo=github"></a>
</p>
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
@ -58,6 +58,7 @@ See the list of all our example apps in our [demos repository](https://github.co
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **AI-ready:** works out of the box with [langchain](https://www.meilisearch.com/with/langchain) and the [model context protocol](https://github.com/meilisearch/meilisearch-mcp)
- **Easy to install, deploy, and maintain**
## 📖 Documentation

View File

@ -1403,6 +1403,104 @@
"title": "Number of tasks by indexes",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 15,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "none"
},
"overrides": []
},
"gridPos": {
"h": 11,
"w": 12,
"x": 12,
"y": 51
},
"id": 29,
"interval": "5s",
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "right",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "8.1.4",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS}"
},
"editorMode": "builder",
"exemplar": true,
"expr": "meilisearch_task_queue_latency_seconds{instance=\"$instance\", job=\"$job\"}",
"interval": "",
"legendFormat": "{{value}} ",
"range": true,
"refId": "A"
}
],
"title": "Task queue latency",
"type": "timeseries"
},
{
"collapsed": true,
"datasource": {

View File

@ -1,11 +0,0 @@
status = [
'Tests on ubuntu-20.04',
'Tests on macos-13',
'Tests on windows-2022',
'Run Clippy',
'Run Rustfmt',
'Run tests in debug',
]
pr_status = ['Milestone Check']
# 3 hours timeout
timeout-sec = 10800

View File

@ -11,27 +11,27 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
anyhow = "1.0.95"
bumpalo = "3.16.0"
csv = "1.3.0"
csv = "1.3.1"
memmap2 = "0.9.5"
milli = { path = "../milli" }
mimalloc = { version = "0.1.43", default-features = false }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tempfile = "3.14.0"
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tempfile = "3.15.0"
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
rand = "0.8.5"
rand_chacha = "0.3.1"
roaring = "0.10.7"
roaring = "0.10.10"
[build-dependencies]
anyhow = "1.0.86"
bytes = "1.6.0"
anyhow = "1.0.95"
bytes = "1.9.0"
convert_case = "0.6.0"
flate2 = "1.0.30"
reqwest = { version = "0.12.5", features = ["blocking", "rustls-tls"], default-features = false }
flate2 = "1.0.35"
reqwest = { version = "0.12.12", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]

View File

@ -8,10 +8,11 @@ use bumpalo::Bump;
use criterion::{criterion_group, criterion_main, Criterion};
use milli::documents::PrimaryKey;
use milli::heed::{EnvOpenOptions, RwTxn};
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::update::{IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::Index;
use milli::{FilterableAttributesRule, Index};
use rand::seq::SliceRandom;
use rand_chacha::rand_core::SeedableRng;
use roaring::RoaringBitmap;
@ -34,10 +35,11 @@ fn setup_dir(path: impl AsRef<Path>) {
fn setup_index() -> Index {
let path = "benches.mmdb";
setup_dir(path);
let mut options = EnvOpenOptions::new();
let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100);
Index::new(options, path).unwrap()
Index::new(options, path, true).unwrap()
}
fn setup_settings<'t>(
@ -56,7 +58,8 @@ fn setup_settings<'t>(
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
let filterable_fields =
filterable_fields.iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect();
builder.set_filterable_fields(filterable_fields);
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
@ -137,10 +140,9 @@ fn indexing_songs_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -151,7 +153,7 @@ fn indexing_songs_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -166,7 +168,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -204,10 +206,9 @@ fn reindexing_songs_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -218,7 +219,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -233,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -249,10 +250,9 @@ fn reindexing_songs_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -263,7 +263,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -278,7 +278,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -318,10 +318,9 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -332,7 +331,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -347,7 +346,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -395,10 +394,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -409,7 +407,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -424,7 +422,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -440,10 +438,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -454,7 +451,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -469,7 +466,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -481,10 +478,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -495,7 +491,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -510,7 +506,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -548,11 +544,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -563,7 +558,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -578,7 +573,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -616,10 +611,9 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -630,7 +624,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -645,7 +639,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -683,10 +677,9 @@ fn indexing_wiki(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -697,7 +690,7 @@ fn indexing_wiki(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -712,7 +705,7 @@ fn indexing_wiki(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -749,10 +742,9 @@ fn reindexing_wiki(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -763,7 +755,7 @@ fn reindexing_wiki(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -778,7 +770,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -794,10 +786,9 @@ fn reindexing_wiki(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -808,7 +799,7 @@ fn reindexing_wiki(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -823,7 +814,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -862,10 +853,9 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -876,7 +866,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -891,7 +881,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -938,11 +928,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents =
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -953,7 +942,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -968,7 +957,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -984,11 +973,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents =
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -999,7 +987,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1014,7 +1002,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1026,11 +1014,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents =
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1041,7 +1028,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1056,7 +1043,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1094,10 +1081,9 @@ fn indexing_movies_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1108,7 +1094,7 @@ fn indexing_movies_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1123,7 +1109,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1160,10 +1146,9 @@ fn reindexing_movies_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1174,7 +1159,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1189,7 +1174,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1205,10 +1190,9 @@ fn reindexing_movies_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1219,7 +1203,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1234,7 +1218,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1273,10 +1257,9 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1287,7 +1270,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1302,7 +1285,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1350,7 +1333,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1386,10 +1369,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1400,7 +1382,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1415,7 +1397,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1431,10 +1413,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1445,7 +1426,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1460,7 +1441,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1472,10 +1453,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1486,7 +1466,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1501,7 +1481,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1562,10 +1542,9 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1576,7 +1555,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1591,7 +1570,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1653,10 +1632,9 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1667,7 +1645,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1682,7 +1660,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1736,10 +1714,9 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1750,7 +1727,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1765,7 +1742,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1803,10 +1780,9 @@ fn indexing_geo(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1817,7 +1793,7 @@ fn indexing_geo(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1832,7 +1808,7 @@ fn indexing_geo(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1869,10 +1845,9 @@ fn reindexing_geo(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1883,7 +1858,7 @@ fn reindexing_geo(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1898,7 +1873,7 @@ fn reindexing_geo(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1914,10 +1889,9 @@ fn reindexing_geo(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1928,7 +1902,7 @@ fn reindexing_geo(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -1943,7 +1917,7 @@ fn reindexing_geo(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();
@ -1982,10 +1956,9 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer =
indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
let mut indexer = indexer::DocumentOperation::new();
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
indexer.add_documents(&documents).unwrap();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -1996,7 +1969,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -2011,7 +1984,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();

View File

@ -2,7 +2,7 @@ mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use milli::{update::Settings, FilterableAttributesRule};
use utils::Conf;
#[cfg(not(windows))]
@ -21,8 +21,10 @@ fn base_conf(builder: &mut Settings) {
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let filterable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
let filterable_fields = ["_geo", "population", "elevation"]
.iter()
.map(|s| FilterableAttributesRule::Field(s.to_string()))
.collect();
builder.set_filterable_fields(filterable_fields);
let sortable_fields =

View File

@ -2,7 +2,7 @@ mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use milli::{update::Settings, FilterableAttributesRule};
use utils::Conf;
#[cfg(not(windows))]
@ -22,7 +22,7 @@ fn base_conf(builder: &mut Settings) {
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
.iter()
.map(|s| s.to_string())
.map(|s| FilterableAttributesRule::Field(s.to_string()))
.collect();
builder.set_filterable_fields(faceted_fields);
}

View File

@ -10,8 +10,9 @@ use bumpalo::Bump;
use criterion::BenchmarkId;
use memmap2::Mmap;
use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::update::{IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
use serde_json::Value;
@ -64,10 +65,11 @@ pub fn base_setup(conf: &Conf) -> Index {
}
create_dir_all(conf.database_name).unwrap();
let mut options = EnvOpenOptions::new();
let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100);
let index = Index::new(options, conf.database_name).unwrap();
let index = Index::new(options, conf.database_name, true).unwrap();
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
@ -98,8 +100,8 @@ pub fn base_setup(conf: &Conf) -> Index {
let mut new_fields_ids_map = db_fields_ids_map.clone();
let documents = documents_from(conf.dataset, conf.dataset_format);
let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments);
indexer.add_documents(&documents).unwrap();
let mut indexer = indexer::DocumentOperation::new();
indexer.replace_documents(&documents).unwrap();
let indexer_alloc = Bump::new();
let (document_changes, _operation_stats, primary_key) = indexer
@ -110,7 +112,7 @@ pub fn base_setup(conf: &Conf) -> Index {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -125,7 +127,7 @@ pub fn base_setup(conf: &Conf) -> Index {
&document_changes,
EmbeddingConfigs::default(),
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();

View File

@ -11,8 +11,8 @@ license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
time = { version = "0.3.36", features = ["parsing"] }
time = { version = "0.3.37", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.86"
vergen-git2 = "1.0.0"
anyhow = "1.0.95"
vergen-git2 = "1.0.2"

View File

@ -11,21 +11,21 @@ readme.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
flate2 = "1.0.30"
http = "1.1.0"
anyhow = "1.0.95"
flate2 = "1.0.35"
http = "1.2.0"
meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0"
regex = "1.10.5"
roaring = { version = "0.10.7", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tar = "0.4.41"
tempfile = "3.10.1"
thiserror = "1.0.61"
time = { version = "0.3.36", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
once_cell = "1.20.2"
regex = "1.11.1"
roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tar = "0.4.43"
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.41"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"

View File

@ -10,8 +10,10 @@ dump
├── instance-uid.uuid
├── keys.jsonl
├── metadata.json
── tasks
├── update_files
│ └── [task_id].jsonl
── tasks
├── update_files
│ └── [task_id].jsonl
│ └── queue.jsonl
└── batches
└── queue.jsonl
```
```

View File

@ -141,6 +141,9 @@ pub enum KindDump {
instance_uid: Option<InstanceUid>,
},
SnapshotCreation,
UpgradeDatabase {
from: (u32, u32, u32),
},
}
impl From<Task> for TaskDump {
@ -210,6 +213,9 @@ impl From<KindWithContent> for KindDump {
KindDump::DumpCreation { keys, instance_uid }
}
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
KindWithContent::UpgradeDatabase { from: version } => {
KindDump::UpgradeDatabase { from: version }
}
}
}
}
@ -222,14 +228,16 @@ pub(crate) mod test {
use big_s::S;
use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, FilterableAttributesRule};
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::tasks::{Details, Status};
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{Details, Kind, Status};
use serde_json::{json, Map, Value};
use time::macros::datetime;
use uuid::Uuid;
@ -271,7 +279,10 @@ pub(crate) mod test {
let settings = Settings {
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
filterable_attributes: Setting::Set(vec![
FilterableAttributesRule::Field(S("race")),
FilterableAttributesRule::Field(S("age")),
]),
sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
@ -299,6 +310,33 @@ pub(crate) mod test {
settings.check()
}
pub fn create_test_batches() -> Vec<Batch> {
vec![Batch {
uid: 0,
details: DetailsView {
received_documents: Some(12),
indexed_documents: Some(Some(10)),
..DetailsView::default()
},
progress: None,
stats: BatchStats {
total_nb_tasks: 1,
status: maplit::btreemap! { Status::Succeeded => 1 },
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
progress_trace: Default::default(),
write_channel_congestion: None,
internal_database_sizes: Default::default(),
},
enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC),
oldest: datetime!(2022-11-11 0:00 UTC),
}),
started_at: datetime!(2022-11-20 0:00 UTC),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
}]
}
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
vec![
(
@ -421,6 +459,15 @@ pub(crate) mod test {
index.flush().unwrap();
index.settings(&settings).unwrap();
// ========== pushing the batch queue
let batches = create_test_batches();
let mut batch_queue = dump.create_batches_queue().unwrap();
for batch in &batches {
batch_queue.push_batch(batch).unwrap();
}
batch_queue.flush().unwrap();
// ========== pushing the task queue
let tasks = create_test_tasks();
@ -449,6 +496,10 @@ pub(crate) mod test {
dump.create_experimental_features(features).unwrap();
// ========== network
let network = create_test_network();
dump.create_network(network).unwrap();
// create the dump
let mut file = tempfile::tempfile().unwrap();
dump.persist_to(&mut file).unwrap();
@ -458,7 +509,14 @@ pub(crate) mod test {
}
fn create_test_features() -> RuntimeTogglableFeatures {
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
RuntimeTogglableFeatures::default()
}
fn create_test_network() -> Network {
Network {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
}
}
#[test]
@ -509,5 +567,9 @@ pub(crate) mod test {
// ==== checking the features
let expected = create_test_features();
assert_eq!(dump.features().unwrap().unwrap(), expected);
// ==== checking the network
let expected = create_test_network();
assert_eq!(&expected, dump.network().unwrap().unwrap());
}
}

View File

@ -196,6 +196,10 @@ impl CompatV5ToV6 {
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
Ok(None)
}
pub fn network(&self) -> Result<Option<&v6::Network>> {
Ok(None)
}
}
pub enum CompatIndexV5ToV6 {
@ -318,7 +322,16 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
v6::Settings {
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
filterable_attributes: settings.filterable_attributes.into(),
filterable_attributes: match settings.filterable_attributes {
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
filterable_attributes
.into_iter()
.map(v6::FilterableAttributesRule::Field)
.collect(),
),
v5::settings::Setting::Reset => v6::Setting::Reset,
v5::settings::Setting::NotSet => v6::Setting::NotSet,
},
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: {
match settings.ranking_rules {

View File

@ -23,6 +23,7 @@ mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
#[allow(clippy::large_enum_variant)]
pub enum DumpReader {
Current(V6Reader),
Compat(CompatV5ToV6),
@ -101,6 +102,13 @@ impl DumpReader {
}
}
pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.batches()),
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
}
}
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.keys()),
@ -114,6 +122,13 @@ impl DumpReader {
DumpReader::Compat(compat) => compat.features(),
}
}
pub fn network(&self) -> Result<Option<&v6::Network>> {
match self {
DumpReader::Current(current) => Ok(current.network()),
DumpReader::Compat(compat) => compat.network(),
}
}
}
impl From<V6Reader> for DumpReader {
@ -219,6 +234,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -327,10 +346,8 @@ pub(crate) mod test {
}
}
assert_eq!(
dump.features().unwrap().unwrap(),
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
);
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
assert_eq!(dump.network().unwrap(), None);
}
#[test]
@ -342,6 +359,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -373,10 +394,28 @@ pub(crate) mod test {
assert_eq!(test.documents().unwrap().count(), 1);
assert_eq!(
dump.features().unwrap().unwrap(),
RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
);
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
}
#[test]
fn import_dump_v6_network() {
let dump = File::open("tests/assets/v6-with-network.dump").unwrap();
let dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// network
let network = dump.network().unwrap().unwrap();
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io");
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
}
#[test]
@ -388,6 +427,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -468,6 +511,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -545,6 +592,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -638,6 +689,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -731,6 +786,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -807,6 +866,10 @@ pub(crate) mod test {
assert_eq!(dump.date(), None);
assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();

View File

@ -1,5 +1,5 @@
---
source: dump/src/reader/mod.rs
source: crates/dump/src/reader/mod.rs
expression: vector_index.settings().unwrap()
---
{
@ -49,6 +49,7 @@ expression: vector_index.settings().unwrap()
"source": "huggingFace",
"model": "BAAI/bge-base-en-v1.5",
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
"pooling": "forceMean",
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
}
},

View File

@ -3,6 +3,7 @@ use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path;
pub use meilisearch_types::milli;
use meilisearch_types::milli::vector::hf::OverridePooling;
use tempfile::TempDir;
use time::OffsetDateTime;
use tracing::debug;
@ -18,8 +19,10 @@ pub type Checked = meilisearch_types::settings::Checked;
pub type Unchecked = meilisearch_types::settings::Unchecked;
pub type Task = crate::TaskDump;
pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::features::Network;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
@ -43,13 +46,17 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
pub type Code = meilisearch_types::error::Code;
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
pub struct V6Reader {
dump: TempDir,
instance_uid: Option<Uuid>,
metadata: Metadata,
tasks: BufReader<File>,
batches: Option<BufReader<File>>,
keys: BufReader<File>,
features: Option<RuntimeTogglableFeatures>,
network: Option<Network>,
}
impl V6Reader {
@ -77,13 +84,38 @@ impl V6Reader {
} else {
None
};
let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) {
Ok(file) => Some(BufReader::new(file)),
// The batch file was only introduced during the v1.13, anything prior to that won't have batches
Err(err) if err.kind() == ErrorKind::NotFound => None,
Err(e) => return Err(e.into()),
};
let network_file = match fs::read(dump.path().join("network.json")) {
Ok(network_file) => Some(network_file),
Err(error) => match error.kind() {
// Allows the file to be missing, this will only result in all experimental features disabled.
ErrorKind::NotFound => {
debug!("`network.json` not found in dump");
None
}
_ => return Err(error.into()),
},
};
let network = if let Some(network_file) = network_file {
Some(serde_json::from_reader(&*network_file)?)
} else {
None
};
Ok(V6Reader {
metadata: serde_json::from_reader(&*meta_file)?,
instance_uid,
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
batches,
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
features,
network,
dump,
})
}
@ -124,7 +156,7 @@ impl V6Reader {
&mut self,
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?).unwrap();
let task: Task = serde_json::from_str(&line?)?;
let update_file_path = self
.dump
@ -136,8 +168,7 @@ impl V6Reader {
if update_file_path.exists() {
Ok((
task,
Some(Box::new(UpdateFile::new(&update_file_path).unwrap())
as Box<super::UpdateFile>),
Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
))
} else {
Ok((task, None))
@ -145,6 +176,16 @@ impl V6Reader {
}))
}
pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> {
match self.batches.as_mut() {
Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> {
let batch = serde_json::from_str(&line?)?;
Ok(batch)
})),
None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>,
}
}
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
Box::new(
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
@ -154,6 +195,10 @@ impl V6Reader {
pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
self.features
}
pub fn network(&self) -> Option<&Network> {
self.network.as_ref()
}
}
pub struct UpdateFile {
@ -210,7 +255,29 @@ impl V6IndexReader {
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
patch_embedders(&mut settings);
Ok(settings.check())
}
}
fn patch_embedders(settings: &mut Settings<Unchecked>) {
if let Setting::Set(embedders) = &mut settings.embedders {
for settings in embedders.values_mut() {
let Setting::Set(settings) = &mut settings.inner else {
continue;
};
if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace)
{
continue;
}
settings.pooling = match settings.pooling {
Setting::Set(pooling) => Setting::Set(pooling),
// if the pooling for a hugging face embedder is not set, force it to `forceMean`
// for backward compatibility with v1.13
// dumps created in v1.14 and up will have the setting set for hugging face embedders
Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean),
};
}
}
}

View File

@ -4,7 +4,8 @@ use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::batches::Batch;
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings};
use serde_json::{Map, Value};
@ -54,6 +55,10 @@ impl DumpWriter {
TaskWriter::new(self.dir.path().join("tasks"))
}
pub fn create_batches_queue(&self) -> Result<BatchWriter> {
BatchWriter::new(self.dir.path().join("batches"))
}
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
Ok(std::fs::write(
self.dir.path().join("experimental-features.json"),
@ -61,6 +66,10 @@ impl DumpWriter {
)?)
}
pub fn create_network(&self, network: Network) -> Result<()> {
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
}
pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
let mut tar_encoder = tar::Builder::new(gz_encoder);
@ -84,7 +93,7 @@ impl KeyWriter {
}
pub fn push_key(&mut self, key: &Key) -> Result<()> {
self.keys.write_all(&serde_json::to_vec(key)?)?;
serde_json::to_writer(&mut self.keys, &key)?;
self.keys.write_all(b"\n")?;
Ok(())
}
@ -114,7 +123,7 @@ impl TaskWriter {
/// Pushes tasks in the dump.
/// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
self.queue.write_all(&serde_json::to_vec(task)?)?;
serde_json::to_writer(&mut self.queue, &task)?;
self.queue.write_all(b"\n")?;
Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
@ -126,6 +135,30 @@ impl TaskWriter {
}
}
pub struct BatchWriter {
queue: BufWriter<File>,
}
impl BatchWriter {
pub(crate) fn new(path: PathBuf) -> Result<Self> {
std::fs::create_dir(&path)?;
let queue = File::create(path.join("queue.jsonl"))?;
Ok(BatchWriter { queue: BufWriter::new(queue) })
}
/// Pushes batches in the dump.
pub fn push_batch(&mut self, batch: &Batch) -> Result<()> {
serde_json::to_writer(&mut self.queue, &batch)?;
self.queue.write_all(b"\n")?;
Ok(())
}
pub fn flush(mut self) -> Result<()> {
self.queue.flush()?;
Ok(())
}
}
pub struct UpdateFile {
path: PathBuf,
writer: Option<BufWriter<File>>,
@ -137,8 +170,8 @@ impl UpdateFile {
}
pub fn push_document(&mut self, document: &Document) -> Result<()> {
if let Some(writer) = self.writer.as_mut() {
writer.write_all(&serde_json::to_vec(document)?)?;
if let Some(mut writer) = self.writer.as_mut() {
serde_json::to_writer(&mut writer, &document)?;
writer.write_all(b"\n")?;
} else {
let file = File::create(&self.path).unwrap();
@ -205,8 +238,8 @@ pub(crate) mod test {
use super::*;
use crate::reader::Document;
use crate::test::{
create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid,
create_test_settings, create_test_tasks,
create_test_api_keys, create_test_batches, create_test_documents, create_test_dump,
create_test_instance_uid, create_test_settings, create_test_tasks,
};
fn create_directory_hierarchy(dir: &Path) -> String {
@ -281,8 +314,10 @@ pub(crate) mod test {
let dump_path = dump.path();
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###"
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r"
.
├---- batches/
│ └---- queue.jsonl
├---- indexes/
│ └---- doggos/
│ │ ├---- documents.jsonl
@ -295,8 +330,9 @@ pub(crate) mod test {
├---- experimental-features.json
├---- instance_uid.uuid
├---- keys.jsonl
---- metadata.json
"###);
---- metadata.json
└---- network.json
");
// ==== checking the top level infos
let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
@ -349,6 +385,16 @@ pub(crate) mod test {
}
}
// ==== checking the batch queue
let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap();
for (batch, expected) in batches_queue.lines().zip(create_test_batches()) {
let mut batch = serde_json::from_str::<Batch>(batch).unwrap();
if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) {
batch.details.settings = None;
}
assert_eq!(batch, expected, "{batch:#?}{expected:#?}");
}
// ==== checking the keys
let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
for (key, expected) in keys.lines().zip(create_test_api_keys()) {

Binary file not shown.

View File

@ -11,7 +11,7 @@ edition.workspace = true
license.workspace = true
[dependencies]
tempfile = "3.10.1"
thiserror = "1.0.61"
tracing = "0.1.40"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
tempfile = "3.15.0"
thiserror = "2.0.9"
tracing = "0.1.41"
uuid = { version = "1.11.0", features = ["serde", "v4"] }

View File

@ -136,6 +136,14 @@ pub struct File {
}
impl File {
pub fn from_parts(path: PathBuf, file: Option<NamedTempFile>) -> Self {
Self { path, file }
}
pub fn into_parts(self) -> (PathBuf, Option<NamedTempFile>) {
(self.path, self.file)
}
pub fn dry_file() -> Result<Self> {
Ok(Self { path: PathBuf::new(), file: None })
}

View File

@ -17,4 +17,5 @@ nom_locate = "4.2.0"
unescaper = "0.1.5"
[dev-dependencies]
insta = "1.39.0"
# fixed version due to format breakages in v1.40
insta = "=1.39.0"

View File

@ -30,6 +30,25 @@ pub enum Condition<'a> {
StartsWith { keyword: Token<'a>, word: Token<'a> },
}
impl Condition<'_> {
pub fn operator(&self) -> &str {
match self {
Condition::GreaterThan(_) => ">",
Condition::GreaterThanOrEqual(_) => ">=",
Condition::Equal(_) => "=",
Condition::NotEqual(_) => "!=",
Condition::Null => "IS NULL",
Condition::Empty => "IS EMPTY",
Condition::Exists => "EXISTS",
Condition::LowerThan(_) => "<",
Condition::LowerThanOrEqual(_) => "<=",
Condition::Between { .. } => "TO",
Condition::Contains { .. } => "CONTAINS",
Condition::StartsWith { .. } => "STARTS WITH",
}
}
}
/// condition = value ("==" | ">" ...) value
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));

View File

@ -179,6 +179,26 @@ impl<'a> FilterCondition<'a> {
}
}
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
if depth == 0 {
return Box::new(std::iter::empty());
}
match self {
FilterCondition::Condition { fid, .. } | FilterCondition::In { fid, .. } => {
Box::new(std::iter::once(fid))
}
FilterCondition::Not(filter) => {
let depth = depth.saturating_sub(1);
filter.fids(depth)
}
FilterCondition::And(subfilters) | FilterCondition::Or(subfilters) => {
let depth = depth.saturating_sub(1);
Box::new(subfilters.iter().flat_map(move |f| f.fids(depth)))
}
_ => Box::new(std::iter::empty()),
}
}
/// Returns the first token found at the specified depth, `None` if no token at this depth.
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
match self {
@ -978,6 +998,43 @@ pub mod tests {
assert!(filter.token_at_depth(3).is_none());
}
#[test]
fn fids() {
let filter = Fc::parse("field = value").unwrap().unwrap();
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
assert_eq!(fids.len(), 1);
assert_eq!(fids[0].value(), "field");
let filter = Fc::parse("field IN [1, 2, 3]").unwrap().unwrap();
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
assert_eq!(fids.len(), 1);
assert_eq!(fids[0].value(), "field");
let filter = Fc::parse("field != value").unwrap().unwrap();
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
assert_eq!(fids.len(), 1);
assert_eq!(fids[0].value(), "field");
let filter = Fc::parse("field1 = value1 AND field2 = value2").unwrap().unwrap();
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
assert_eq!(fids.len(), 2);
assert!(fids[0].value() == "field1");
assert!(fids[1].value() == "field2");
let filter = Fc::parse("field1 = value1 OR field2 = value2").unwrap().unwrap();
let fids: Vec<_> = filter.fids(MAX_FILTER_DEPTH).collect();
assert_eq!(fids.len(), 2);
assert!(fids[0].value() == "field1");
assert!(fids[1].value() == "field2");
let depth = 2;
let filter =
Fc::parse("field1 = value1 AND (field2 = value2 OR field3 = value3)").unwrap().unwrap();
let fids: Vec<_> = filter.fids(depth).collect();
assert_eq!(fids.len(), 1);
assert_eq!(fids[0].value(), "field1");
}
#[test]
fn token_from_str() {
let s = "test string that should not be parsed";

View File

@ -11,12 +11,12 @@ edition.workspace = true
license.workspace = true
[dependencies]
arbitrary = { version = "1.3.2", features = ["derive"] }
arbitrary = { version = "1.4.1", features = ["derive"] }
bumpalo = "3.16.0"
clap = { version = "4.5.9", features = ["derive"] }
clap = { version = "4.5.24", features = ["derive"] }
either = "1.13.0"
fastrand = "2.1.0"
fastrand = "2.3.0"
milli = { path = "../milli" }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
tempfile = "3.10.1"
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] }
tempfile = "3.15.0"

View File

@ -10,8 +10,9 @@ use either::Either;
use fuzzers::Operation;
use milli::documents::mmap_from_objects;
use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig};
use milli::update::IndexerConfig;
use milli::vector::EmbeddingConfigs;
use milli::Index;
use serde_json::Value;
@ -56,13 +57,14 @@ fn main() {
let opt = opt.clone();
let handle = std::thread::spawn(move || {
let mut options = EnvOpenOptions::new();
let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(1024 * 1024 * 1024 * 1024);
let tempdir = match opt.path {
Some(path) => TempDir::new_in(path).unwrap(),
None => TempDir::new().unwrap(),
};
let index = Index::new(options, tempdir.path()).unwrap();
let index = Index::new(options, tempdir.path(), true).unwrap();
let indexer_config = IndexerConfig::default();
std::thread::scope(|s| {
@ -88,9 +90,7 @@ fn main() {
let indexer_alloc = Bump::new();
let embedders = EmbeddingConfigs::default();
let mut indexer = indexer::DocumentOperation::new(
IndexDocumentsMethod::ReplaceDocuments,
);
let mut indexer = indexer::DocumentOperation::new();
let mut operations = Vec::new();
for op in batch.0 {
@ -114,7 +114,7 @@ fn main() {
for op in &operations {
match op {
Either::Left(documents) => {
indexer.add_documents(documents).unwrap()
indexer.replace_documents(documents).unwrap()
}
Either::Right(ids) => indexer.delete_documents(ids),
}
@ -128,7 +128,7 @@ fn main() {
None,
&mut new_fields_ids_map,
&|| false,
&|_progress| (),
Progress::default(),
)
.unwrap();
@ -143,7 +143,7 @@ fn main() {
&document_changes,
embedders,
&|| false,
&|_| (),
&Progress::default(),
)
.unwrap();

View File

@ -11,41 +11,44 @@ edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.86"
anyhow = "1.0.95"
bincode = "1.3.3"
csv = "1.3.0"
derive_builder = "0.20.0"
byte-unit = "5.1.6"
bumpalo = "3.16.0"
bumparaw-collections = "0.1.4"
convert_case = "0.6.0"
csv = "1.3.1"
derive_builder = "0.20.2"
dump = { path = "../dump" }
enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.0.30"
flate2 = "1.0.35"
indexmap = "2.7.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5"
page_size = "0.6.0"
raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
rayon = "1.10.0"
roaring = { version = "0.10.7", features = ["serde"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = { version = "1.0.120", features = ["preserve_order"] }
roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.138", features = ["preserve_order"] }
synchronoise = "1.0.1"
tempfile = "3.10.1"
thiserror = "1.0.61"
memmap2 = "0.9.4"
time = { version = "0.3.36", features = [
tempfile = "3.15.0"
thiserror = "2.0.9"
time = { version = "0.3.37", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.10.0"
uuid = { version = "1.10.0", features = ["serde", "v4"] }
bumpalo = "3.16.0"
tracing = "0.1.41"
ureq = "2.12.1"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies]
arroy = "0.5.0"
big_s = "1.0.2"
crossbeam-channel = "0.5.13"
insta = { version = "1.39.0", features = ["json", "redactions"] }
crossbeam-channel = "0.5.14"
# fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["json", "redactions"] }
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }

View File

@ -1,901 +0,0 @@
/*!
The autobatcher is responsible for combining the next enqueued
tasks affecting a single index into a [batch](crate::batch::Batch).
The main function of the autobatcher is [`next_autobatch`].
*/
use std::ops::ControlFlow::{self, Break, Continue};
use meilisearch_types::milli::update::IndexDocumentsMethod::{
self, ReplaceDocuments, UpdateDocuments,
};
use meilisearch_types::tasks::TaskId;
use crate::KindWithContent;
/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind)
/// for the purpose of simplifying the implementation of the autobatcher.
///
/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`]
enum AutobatchKind {
DocumentImport {
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
},
DocumentEdition,
DocumentDeletion {
by_filter: bool,
},
DocumentClear,
Settings {
allow_index_creation: bool,
},
IndexCreation,
IndexDeletion,
IndexUpdate,
IndexSwap,
}
impl AutobatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
AutobatchKind::DocumentImport { allow_index_creation, .. }
| AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
}
fn primary_key(&self) -> Option<Option<&str>> {
match self {
AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()),
_ => None,
}
}
}
impl From<KindWithContent> for AutobatchKind {
fn from(kind: KindWithContent) -> Self {
match kind {
KindWithContent::DocumentAdditionOrUpdate {
method,
allow_index_creation,
primary_key,
..
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
KindWithContent::DocumentDeletion { .. } => {
AutobatchKind::DocumentDeletion { by_filter: false }
}
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
KindWithContent::DocumentDeletionByFilter { .. } => {
AutobatchKind::DocumentDeletion { by_filter: true }
}
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
AutobatchKind::Settings {
allow_index_creation: allow_index_creation && !is_deletion,
}
}
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
KindWithContent::TaskCancelation { .. }
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpCreation { .. }
| KindWithContent::SnapshotCreation => {
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
}
}
}
}
#[derive(Debug)]
pub enum BatchKind {
DocumentClear {
ids: Vec<TaskId>,
},
DocumentOperation {
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
},
DocumentEdition {
id: TaskId,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
includes_by_filter: bool,
},
ClearAndSettings {
other: Vec<TaskId>,
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
Settings {
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
IndexDeletion {
ids: Vec<TaskId>,
},
IndexCreation {
id: TaskId,
},
IndexUpdate {
id: TaskId,
},
IndexSwap {
id: TaskId,
},
}
impl BatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
BatchKind::DocumentOperation { allow_index_creation, .. }
| BatchKind::ClearAndSettings { allow_index_creation, .. }
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
}
fn primary_key(&self) -> Option<Option<&str>> {
match self {
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
_ => None,
}
}
}
impl BatchKind {
/// Returns a `ControlFlow::Break` if you must stop right now.
/// The boolean tell you if an index has been created by the batched task.
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
/// but false can't be returned if you needs to create an index.
// TODO use an AutoBatchKind as input
pub fn new(
task_id: TaskId,
kind: KindWithContent,
primary_key: Option<&str>,
) -> (ControlFlow<BatchKind, BatchKind>, bool) {
use AutobatchKind as K;
match AutobatchKind::from(kind) {
K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true),
K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false),
K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false),
K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false),
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
K::DocumentImport { method, allow_index_creation, primary_key: pk }
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
{
(
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key: pk,
operation_ids: vec![task_id],
}),
allow_index_creation,
)
}
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
K::DocumentImport { method, allow_index_creation, primary_key } => (
Break(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: vec![task_id],
}),
allow_index_creation,
),
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
K::DocumentDeletion { by_filter: includes_by_filter } => (
Continue(BatchKind::DocumentDeletion {
deletion_ids: vec![task_id],
includes_by_filter,
}),
false,
),
K::Settings { allow_index_creation } => (
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
allow_index_creation,
),
}
}
/// Returns a `ControlFlow::Break` if you must stop right now.
/// The boolean tell you if an index has been created by the batched task.
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
/// but false can't be returned if you needs to create an index.
#[rustfmt::skip]
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
use AutobatchKind as K;
match (self, kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break(this)
},
// NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue.
// I wrote it this way because it's easier to understand than the other way around.
(this, kind) if !(
// 1. If both task don't interact with primary key -> we can continue
(this.primary_key().is_none() && kind.primary_key().is_none()) ||
// 2. Else ->
(
// 2.1 If we already have a primary-key ->
(
primary_key.is_some() &&
// 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key
// 2.1.2 If the task don't have a primary-key -> we can continue
kind.primary_key().map_or(true, |pk| pk == primary_key)
) ||
// 2.2 If we don't have a primary-key ->
(
// 2.2.1 If both the batch and the task have a primary key they should be equal
// 2.2.2 If the batch is set to Some(None), the task should be too
// 2.2.3 If the batch is set to None -> we can continue
this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind)
)
)
) // closing the negation
=> {
Break(this)
},
// The index deletion can batch with everything but must stop after
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
) => {
ids.push(id);
Break(BatchKind::IndexDeletion { ids })
}
(
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
K::IndexDeletion,
) => {
ids.push(id);
ids.append(&mut other);
Break(BatchKind::IndexDeletion { ids })
}
(
BatchKind::DocumentClear { mut ids },
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
) => {
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
}
(
this @ BatchKind::DocumentClear { .. },
K::DocumentImport { .. } | K::Settings { .. },
) => Break(this),
(
BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids },
K::DocumentClear,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentClear { ids: operation_ids })
}
// we can autobatch the same kind of document additions / updates
(
BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method: ReplaceDocuments,
allow_index_creation,
operation_ids,
primary_key: pk,
})
}
(
BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method: UpdateDocuments,
allow_index_creation,
primary_key: pk,
operation_ids,
})
}
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
K::DocumentDeletion { by_filter: false },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids,
})
}
// We can't batch a document operation with a delete by filter
(
this @ BatchKind::DocumentOperation { .. },
K::DocumentDeletion { by_filter: true },
) => {
Break(this)
}
// but we can't autobatch documents if it's not the same kind
// this match branch MUST be AFTER the previous one
(
this @ BatchKind::DocumentOperation { .. },
K::DocumentImport { .. },
) => Break(this),
(
this @ BatchKind::DocumentOperation { .. },
K::Settings { .. },
) => Break(this),
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
// we can't autobatch the deletion and import if the document deletion contained a filter
(
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
K::DocumentImport { .. }
) => Break(this),
// we can autobatch the deletion and import if the index already exists
(
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
K::DocumentImport { method, allow_index_creation, primary_key }
) if index_already_exists => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can autobatch the deletion and import if both can't create an index
(
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
K::DocumentImport { method, allow_index_creation, primary_key }
) if !allow_index_creation => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
(
this @ BatchKind::DocumentDeletion { .. },
K::DocumentImport { .. }
) => {
Break(this)
}
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
}
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
(
BatchKind::Settings { settings_ids, allow_index_creation },
K::DocumentClear,
) => Continue(BatchKind::ClearAndSettings {
settings_ids,
allow_index_creation,
other: vec![id],
}),
(
this @ BatchKind::Settings { .. },
K::DocumentImport { .. } | K::DocumentDeletion { .. },
) => Break(this),
(
BatchKind::Settings { mut settings_ids, allow_index_creation },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::Settings {
allow_index_creation,
settings_ids,
})
}
(
BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation },
K::DocumentClear,
) => {
other.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this),
(
BatchKind::ClearAndSettings {
mut other,
settings_ids,
allow_index_creation,
},
K::DocumentDeletion { .. },
) => {
other.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(
BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(
BatchKind::IndexCreation { .. }
| BatchKind::IndexDeletion { .. }
| BatchKind::IndexUpdate { .. }
| BatchKind::IndexSwap { .. }
| BatchKind::DocumentEdition { .. },
_,
) => {
unreachable!()
}
}
}
}
/// Create a batch from an ordered list of tasks.
///
/// ## Preconditions
/// 1. The tasks must be enqueued and given in the order in which they were enqueued
/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion)
/// 3. The tasks must all be related to the same index
///
/// ## Return
/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents
/// a subset of the given tasks.
pub fn autobatch(
enqueued: Vec<(TaskId, KindWithContent)>,
index_already_exists: bool,
primary_key: Option<&str>,
) -> Option<(BatchKind, bool)> {
let mut enqueued = enqueued.into_iter();
let (id, kind) = enqueued.next()?;
// index_exist will keep track of if the index should exist at this point after the tasks we batched.
let mut index_exist = index_already_exists;
let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) {
(Continue(acc), create) => (acc, create),
(Break(acc), create) => return Some((acc, create)),
};
// if an index has been created in the previous step we can consider it as existing.
index_exist |= must_create_index;
for (id, kind) in enqueued {
acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) {
Continue(acc) => acc,
Break(acc) => return Some((acc, must_create_index)),
};
}
Some((acc, must_create_index))
}
#[cfg(test)]
mod tests {
use meilisearch_types::tasks::IndexSwap;
use uuid::Uuid;
use super::*;
use crate::debug_snapshot;
fn autobatch_from(
index_already_exists: bool,
primary_key: Option<&str>,
input: impl IntoIterator<Item = KindWithContent>,
) -> Option<(BatchKind, bool)> {
autobatch(
input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(),
index_already_exists,
primary_key,
)
}
fn doc_imp(
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<&str>,
) -> KindWithContent {
KindWithContent::DocumentAdditionOrUpdate {
index_uid: String::from("doggo"),
primary_key: primary_key.map(|pk| pk.to_string()),
method,
content_file: Uuid::new_v4(),
documents_count: 0,
allow_index_creation,
}
}
fn doc_del() -> KindWithContent {
KindWithContent::DocumentDeletion {
index_uid: String::from("doggo"),
documents_ids: Vec::new(),
}
}
fn doc_del_fil() -> KindWithContent {
KindWithContent::DocumentDeletionByFilter {
index_uid: String::from("doggo"),
filter_expr: serde_json::json!("cuteness > 100"),
}
}
fn doc_clr() -> KindWithContent {
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
}
fn settings(allow_index_creation: bool) -> KindWithContent {
KindWithContent::SettingsUpdate {
index_uid: String::from("doggo"),
new_settings: Default::default(),
is_deletion: false,
allow_index_creation,
}
}
fn idx_create() -> KindWithContent {
KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None }
}
fn idx_update() -> KindWithContent {
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
}
fn idx_del() -> KindWithContent {
KindWithContent::IndexDeletion { index_uid: String::from("doggo") }
}
fn idx_swap() -> KindWithContent {
KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }],
}
}
#[test]
fn autobatch_simple_operation_together() {
// we can autobatch one or multiple `ReplaceDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// we can autobatch one or multiple `UpdateDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
// we can autobatch one or multiple DocumentDeletionByFilter together
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
// we can autobatch one or multiple Settings together
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
// We can autobatch document addition with document deletion
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// But we can't autobatch document addition with document deletion by filter
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
}
#[test]
fn simple_document_operation_dont_autobatch_with_other() {
// addition, updates and deletion by filter can't batch together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
}
#[test]
fn document_addition_doesnt_batch_with_settings() {
// simple case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
// multiple settings and doc addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
// addition and setting unordered
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
// Doesn't batch with other forbidden operations
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
}
#[test]
fn clear_and_additions() {
// these two doesn't need to batch
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
// Basic use case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
// This batch kind doesn't mix with other document addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
// But you can batch multiple clear together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
}
#[test]
fn clear_and_additions_and_settings() {
// A clear don't need to autobatch the settings that happens AFTER there is no documents
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
}
#[test]
fn anything_and_index_deletion() {
// The `IndexDeletion` doesn't batch with anything that happens AFTER.
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
// The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`.
// First, the basic cases
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
}
#[test]
fn allowed_and_disallowed_index_creation() {
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// batch deletion and addition
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
}
#[test]
fn autobatch_primary_key() {
// ==> If I have a pk
// With a single update
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// ==> If I don't have a pk
// With a single update
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,304 @@
use std::collections::HashMap;
use std::io;
use dump::{KindDump, TaskDump, UpdateFile};
use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::RwTxn;
use meilisearch_types::milli;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use uuid::Uuid;
use crate::{utils, Error, IndexScheduler, Result};
pub struct Dump<'a> {
index_scheduler: &'a IndexScheduler,
wtxn: RwTxn<'a>,
batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>,
indexes: HashMap<String, RoaringBitmap>,
statuses: HashMap<Status, RoaringBitmap>,
kinds: HashMap<Kind, RoaringBitmap>,
batch_indexes: HashMap<String, RoaringBitmap>,
batch_statuses: HashMap<Status, RoaringBitmap>,
batch_kinds: HashMap<Kind, RoaringBitmap>,
}
impl<'a> Dump<'a> {
pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
// While loading a dump no one should be able to access the scheduler thus I can block everything.
let wtxn = index_scheduler.env.write_txn()?;
Ok(Dump {
index_scheduler,
wtxn,
batch_to_task_mapping: HashMap::new(),
indexes: HashMap::new(),
statuses: HashMap::new(),
kinds: HashMap::new(),
batch_indexes: HashMap::new(),
batch_statuses: HashMap::new(),
batch_kinds: HashMap::new(),
})
}
/// Register a new batch coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> {
self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?;
if let Some(enqueued_at) = batch.enqueued_at {
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.enqueued_at,
enqueued_at.earliest,
batch.uid,
)?;
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.enqueued_at,
enqueued_at.oldest,
batch.uid,
)?;
}
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.started_at,
batch.started_at,
batch.uid,
)?;
if let Some(finished_at) = batch.finished_at {
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.finished_at,
finished_at,
batch.uid,
)?;
}
for index in batch.stats.index_uids.keys() {
match self.batch_indexes.get_mut(index) {
Some(bitmap) => {
bitmap.insert(batch.uid);
}
None => {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(batch.uid);
self.batch_indexes.insert(index.to_string(), bitmap);
}
};
}
for status in batch.stats.status.keys() {
self.batch_statuses.entry(*status).or_default().insert(batch.uid);
}
for kind in batch.stats.types.keys() {
self.batch_kinds.entry(*kind).or_default().insert(batch.uid);
}
Ok(())
}
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(
&mut self,
task: TaskDump,
content_file: Option<Box<UpdateFile>>,
) -> Result<Task> {
let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
let content_uuid = match content_file {
Some(content_file) if task.status == Status::Enqueued => {
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
let mut writer = io::BufWriter::new(file);
for doc in content_file {
let doc = doc?;
serde_json::to_writer(&mut writer, &doc).map_err(|e| {
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
})?;
}
let file = writer.into_inner().map_err(|e| e.into_error())?;
file.persist()?;
Some(uuid)
}
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
// in case we try to open it later.
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
None if task.status == Status::Enqueued && task_has_no_docs => {
let (uuid, file) = self.index_scheduler.queue.create_update_file(false)?;
file.persist()?;
Some(uuid)
}
_ => None,
};
let task = Task {
uid: task.uid,
batch_uid: task.batch_uid,
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
error: task.error,
canceled_by: task.canceled_by,
details: task.details,
status: task.status,
kind: match task.kind {
KindDump::DocumentImport {
primary_key,
method,
documents_count,
allow_index_creation,
} => KindWithContent::DocumentAdditionOrUpdate {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
primary_key,
method,
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
documents_count,
allow_index_creation,
},
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
documents_ids,
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
},
KindDump::DocumentDeletionByFilter { filter } => {
KindWithContent::DocumentDeletionByFilter {
filter_expr: filter,
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
}
}
KindDump::DocumentEdition { filter, context, function } => {
KindWithContent::DocumentEdition {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
filter_expr: filter,
context,
function,
}
}
KindDump::DocumentClear => KindWithContent::DocumentClear {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
},
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
KindWithContent::SettingsUpdate {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
new_settings: settings,
is_deletion,
allow_index_creation,
}
}
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
},
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
primary_key,
},
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
primary_key,
},
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
KindDump::TaskCancelation { query, tasks } => {
KindWithContent::TaskCancelation { query, tasks }
}
KindDump::TasksDeletion { query, tasks } => {
KindWithContent::TaskDeletion { query, tasks }
}
KindDump::DumpCreation { keys, instance_uid } => {
KindWithContent::DumpCreation { keys, instance_uid }
}
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
},
};
self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
if let Some(batch_id) = task.batch_uid {
self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid);
}
for index in task.indexes() {
match self.indexes.get_mut(index) {
Some(bitmap) => {
bitmap.insert(task.uid);
}
None => {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(task.uid);
self.indexes.insert(index.to_string(), bitmap);
}
};
}
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.tasks.enqueued_at,
task.enqueued_at,
task.uid,
)?;
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
if let Some(started_at) = task.started_at {
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.tasks.started_at,
started_at,
task.uid,
)?;
}
if let Some(finished_at) = task.finished_at {
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.tasks.finished_at,
finished_at,
task.uid,
)?;
}
}
self.statuses.entry(task.status).or_default().insert(task.uid);
self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);
Ok(task)
}
/// Commit all the changes and exit the importing dump state
pub fn finish(mut self) -> Result<()> {
for (batch_id, task_ids) in self.batch_to_task_mapping {
self.index_scheduler.queue.batch_to_tasks_mapping.put(
&mut self.wtxn,
&batch_id,
&task_ids,
)?;
}
for (index, bitmap) in self.indexes {
self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
}
for (status, bitmap) in self.statuses {
self.index_scheduler.queue.tasks.put_status(&mut self.wtxn, status, &bitmap)?;
}
for (kind, bitmap) in self.kinds {
self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
}
for (index, bitmap) in self.batch_indexes {
self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
}
for (status, bitmap) in self.batch_statuses {
self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?;
}
for (kind, bitmap) in self.batch_kinds {
self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?;
}
self.wtxn.commit()?;
self.index_scheduler.scheduler.wake_up.signal();
Ok(())
}
}

View File

@ -1,12 +1,13 @@
use std::fmt::Display;
use crate::TaskId;
use meilisearch_types::batches::BatchId;
use meilisearch_types::error::{Code, ErrorCode};
use meilisearch_types::tasks::{Kind, Status};
use meilisearch_types::{heed, milli};
use thiserror::Error;
use crate::TaskId;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DateField {
BeforeEnqueuedAt,
@ -103,11 +104,13 @@ pub enum Error {
)]
InvalidTaskCanceledBy { canceled_by: String },
#[error(
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
"{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
)]
InvalidIndexUid { index_uid: String },
#[error("Task `{0}` not found.")]
TaskNotFound(TaskId),
#[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")]
TaskFileNotFound(TaskId),
#[error("Batch `{0}` not found.")]
BatchNotFound(BatchId),
#[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
@ -126,8 +129,8 @@ pub enum Error {
_ => format!("{error}")
})]
Milli { error: milli::Error, index_uid: Option<String> },
#[error("An unexpected crash occurred when processing the task.")]
ProcessBatchPanicked,
#[error("An unexpected crash occurred when processing the task: {0}")]
ProcessBatchPanicked(String),
#[error(transparent)]
FileStore(#[from] file_store::Error),
#[error(transparent)]
@ -146,7 +149,9 @@ pub enum Error {
#[error("Corrupted task queue.")]
CorruptedTaskQueue,
#[error(transparent)]
TaskDatabaseUpdate(Box<Self>),
DatabaseUpgrade(Box<Self>),
#[error(transparent)]
UnrecoverableError(Box<Self>),
#[error(transparent)]
HeedTransaction(heed::Error),
@ -186,6 +191,7 @@ impl Error {
| Error::InvalidTaskCanceledBy { .. }
| Error::InvalidIndexUid { .. }
| Error::TaskNotFound(_)
| Error::TaskFileNotFound(_)
| Error::BatchNotFound(_)
| Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery
@ -193,7 +199,7 @@ impl Error {
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli { .. }
| Error::ProcessBatchPanicked
| Error::ProcessBatchPanicked(_)
| Error::FileStore(_)
| Error::IoError(_)
| Error::Persist(_)
@ -201,7 +207,8 @@ impl Error {
| Error::Anyhow(_) => true,
Error::CreateBatch(_)
| Error::CorruptedTaskQueue
| Error::TaskDatabaseUpdate(_)
| Error::DatabaseUpgrade(_)
| Error::UnrecoverableError(_)
| Error::HeedTransaction(_) => false,
#[cfg(test)]
Error::PlannedFailure => false,
@ -246,6 +253,7 @@ impl ErrorCode for Error {
Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
Error::TaskNotFound(_) => Code::TaskNotFound,
Error::TaskFileNotFound(_) => Code::TaskFileNotFound,
Error::BatchNotFound(_) => Code::BatchNotFound,
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
@ -253,7 +261,7 @@ impl ErrorCode for Error {
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::Dump(e) => e.error_code(),
Error::Milli { error, .. } => error.error_code(),
Error::ProcessBatchPanicked => Code::Internal,
Error::ProcessBatchPanicked(_) => Code::Internal,
Error::Heed(e) => e.error_code(),
Error::HeedTransaction(e) => e.error_code(),
Error::FileStore(e) => e.error_code(),
@ -265,7 +273,8 @@ impl ErrorCode for Error {
Error::Anyhow(_) => Code::Internal,
Error::CorruptedTaskQueue => Code::Internal,
Error::CorruptedDump => Code::Internal,
Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::DatabaseUpgrade(_) => Code::Internal,
Error::UnrecoverableError(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal,
// This one should never be seen by the end user

View File

@ -1,18 +1,29 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn};
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
use crate::error::FeatureNotEnabledError;
use crate::Result;
const EXPERIMENTAL_FEATURES: &str = "experimental-features";
/// The number of database used by features
const NUMBER_OF_DATABASES: u32 = 1;
/// Database const names for the `FeatureData`.
mod db_name {
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
}
mod db_keys {
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
pub const NETWORK: &str = "network";
}
#[derive(Clone)]
pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
network: Arc<RwLock<Network>>,
}
#[derive(Debug, Clone, Copy)]
@ -56,19 +67,6 @@ impl RoFeatures {
}
}
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.vector_store {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "vector store",
issue_link: "https://github.com/meilisearch/product/discussions/677",
}
.into())
}
}
pub fn check_edit_documents_by_function(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.edit_documents_by_function {
Ok(())
@ -94,17 +92,62 @@ impl RoFeatures {
.into())
}
}
pub fn check_network(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.network {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "network",
issue_link: "https://github.com/orgs/meilisearch/discussions/805",
}
.into())
}
}
pub fn check_get_task_documents_route(&self) -> Result<()> {
if self.runtime.get_task_documents_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Getting the documents of an enqueued task",
feature: "get task documents route",
issue_link: "https://github.com/orgs/meilisearch/discussions/808",
}
.into())
}
}
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.composite_embedders {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "composite embedders",
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
}
.into())
}
}
}
impl FeatureData {
pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
wtxn.commit()?;
pub(crate) const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub fn new(
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
instance_features: InstanceTogglableFeatures,
) -> Result<Self> {
let runtime_features_db =
env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
let txn = env.read_txn()?;
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: metrics || persisted_features.metrics,
@ -113,7 +156,14 @@ impl FeatureData {
..persisted_features
}));
Ok(Self { persisted: runtime_features_db, runtime })
let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
Ok(Self {
persisted: runtime_features_db,
runtime,
network: Arc::new(RwLock::new(network)),
})
}
pub fn put_runtime_features(
@ -121,7 +171,7 @@ impl FeatureData {
mut wtxn: RwTxn,
features: RuntimeTogglableFeatures,
) -> Result<()> {
self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?;
wtxn.commit()?;
// safe to unwrap, the lock will only fail if:
@ -142,4 +192,21 @@ impl FeatureData {
pub fn features(&self) -> RoFeatures {
RoFeatures::new(self)
}
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
&mut wtxn,
db_keys::NETWORK,
&new_network,
)?;
wtxn.commit()?;
let mut network = self.network.write().unwrap();
*network = new_network;
Ok(())
}
pub fn network(&self) -> Network {
Network::clone(&*self.network.read().unwrap())
}
}

View File

@ -1,5 +1,7 @@
use std::collections::BTreeMap;
use std::env::VarError;
use std::path::Path;
use std::str::FromStr;
use std::time::Duration;
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
@ -102,7 +104,7 @@ impl ReopenableIndex {
return Ok(());
}
map.unavailable.remove(&self.uuid);
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size, false)?;
}
Ok(())
}
@ -171,11 +173,12 @@ impl IndexMap {
date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize,
creation: bool,
) -> Result<Index> {
if !matches!(self.get_unavailable(uuid), Missing) {
panic!("Attempt to open an index that was unavailable");
}
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size, creation)?;
match self.available.insert(*uuid, index.clone()) {
InsertionOutcome::InsertedNew => (),
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
@ -299,18 +302,31 @@ fn create_or_open_index(
date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize,
creation: bool,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
// You can find more details about this experimental
// environment variable on the following GitHub discussion:
// <https://github.com/orgs/meilisearch/discussions/806>
let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
Ok(value) => u32::from_str(&value).unwrap(),
Err(VarError::NotPresent) => 1024,
Err(VarError::NotUnicode(value)) => panic!(
"Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
),
};
options.max_readers(max_readers);
if enable_mdb_writemap {
unsafe { options.flags(EnvFlags::WRITE_MAP) };
}
if let Some((created, updated)) = date {
Ok(Index::new_with_creation_dates(options, path, created, updated)?)
Ok(Index::new_with_creation_dates(options, path, created, updated, creation)?)
} else {
Ok(Index::new(options, path)?)
Ok(Index::new(options, path, creation)?)
}
}
@ -318,17 +334,17 @@ fn create_or_open_index(
#[cfg(test)]
mod tests {
use meilisearch_types::heed::Env;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::Index;
use uuid::Uuid;
use super::super::IndexMapper;
use crate::tests::IndexSchedulerHandle;
use crate::test_utils::IndexSchedulerHandle;
use crate::utils::clamp_to_page_size;
use crate::IndexScheduler;
impl IndexMapper {
fn test() -> (Self, Env, IndexSchedulerHandle) {
fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
(index_scheduler.index_mapper, index_scheduler.env, handle)
}

View File

@ -3,13 +3,10 @@ use std::sync::{Arc, RwLock};
use std::time::Duration;
use std::{fs, thread};
use self::index_map::IndexMap;
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
use crate::uuid_codec::UuidCodec;
use crate::{Error, Result};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli;
use meilisearch_types::milli::database_stats::DatabaseStats;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
@ -17,10 +14,20 @@ use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid;
use self::index_map::IndexMap;
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
use crate::uuid_codec::UuidCodec;
use crate::{Error, IndexBudget, IndexSchedulerOptions, Result};
mod index_map;
const INDEX_MAPPING: &str = "index-mapping";
const INDEX_STATS: &str = "index-stats";
/// The number of database used by index mapper
const NUMBER_OF_DATABASES: u32 = 2;
/// Database const names for the `IndexMapper`.
mod db_name {
pub const INDEX_MAPPING: &str = "index-mapping";
pub const INDEX_STATS: &str = "index-stats";
}
/// Structure managing meilisearch's indexes.
///
@ -92,19 +99,32 @@ pub enum IndexStatus {
/// The statistics that can be computed from an `Index` object.
#[derive(Serialize, Deserialize, Debug)]
pub struct IndexStats {
/// Number of documents in the index.
pub number_of_documents: u64,
/// Stats of the documents database.
#[serde(default)]
pub documents_database_stats: DatabaseStats,
#[serde(default, skip_serializing)]
pub number_of_documents: Option<u64>,
/// Size taken up by the index' DB, in bytes.
///
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
/// are not returned to the disk after a deletion, this number is typically larger than
/// `used_database_size` that only includes the size of the used pages.
pub database_size: u64,
/// Number of embeddings in the index.
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
pub number_of_embeddings: Option<u64>,
/// Number of embedded documents in the index.
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
pub number_of_embedded_documents: Option<u64>,
/// Size taken by the used pages of the index' DB, in bytes.
///
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
/// this value is typically smaller than `database_size`.
pub used_database_size: u64,
/// The primary key of the index
pub primary_key: Option<String>,
/// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution,
/// Creation date of the index.
@ -122,10 +142,15 @@ impl IndexStats {
///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
let arroy_stats = index.arroy_stats(rtxn)?;
Ok(IndexStats {
number_of_documents: index.number_of_documents(rtxn)?,
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
number_of_embedded_documents: Some(arroy_stats.documents.len()),
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
number_of_documents: None,
database_size: index.on_disk_size()?,
used_database_size: index.used_size()?,
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
field_distribution: index.field_distribution(rtxn)?,
created_at: index.created_at(rtxn)?,
updated_at: index.updated_at(rtxn)?,
@ -134,29 +159,25 @@ impl IndexStats {
}
impl IndexMapper {
pub fn new(
env: &Env,
base_path: PathBuf,
index_base_map_size: usize,
index_growth_amount: usize,
index_count: usize,
enable_mdb_writemap: bool,
indexer_config: IndexerConfig,
) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
wtxn.commit()?;
pub(crate) const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub fn new(
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
options: &IndexSchedulerOptions,
budget: IndexBudget,
) -> Result<Self> {
Ok(Self {
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
index_mapping,
index_stats,
base_path,
index_base_map_size,
index_growth_amount,
enable_mdb_writemap,
indexer_config: Arc::new(indexer_config),
index_map: Arc::new(RwLock::new(IndexMap::new(budget.index_count))),
index_mapping: env.create_database(wtxn, Some(db_name::INDEX_MAPPING))?,
index_stats: env.create_database(wtxn, Some(db_name::INDEX_STATS))?,
base_path: options.indexes_path.clone(),
index_base_map_size: budget.map_size,
index_growth_amount: options.index_growth_amount,
enable_mdb_writemap: options.enable_mdb_writemap,
indexer_config: options.indexer_config.clone(),
currently_updating_index: Default::default(),
})
}
@ -193,8 +214,14 @@ impl IndexMapper {
date,
self.enable_mdb_writemap,
self.index_base_map_size,
true,
)
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
self.store_stats_of(&mut wtxn, name, &stats)?;
drop(index_rtxn);
wtxn.commit()?;
@ -386,6 +413,7 @@ impl IndexMapper {
None,
self.enable_mdb_writemap,
self.index_base_map_size,
false,
)
.map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
}

View File

@ -1,15 +1,16 @@
use std::collections::BTreeSet;
use std::fmt::Write;
use meilisearch_types::batches::Batch;
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Task};
use meilisearch_types::tasks::{Details, Kind, Status, Task};
use meilisearch_types::versioning;
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
use crate::{IndexScheduler, Kind, Status, BEI128};
use crate::{IndexScheduler, BEI128};
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
// Since we'll snapshot the index right afterward, we don't need to ensure it's internally consistent for every run.
@ -18,41 +19,15 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
scheduler.assert_internally_consistent();
let IndexScheduler {
autobatching_enabled,
cleanup_enabled: _,
must_stop_processing: _,
processing_tasks,
file_store,
env,
all_tasks,
all_batches,
batch_to_tasks_mapping,
// task reverse index
status,
kind,
index_tasks,
canceled_by,
enqueued_at,
started_at,
finished_at,
// batch reverse index
batch_status,
batch_kind,
batch_index_tasks,
batch_enqueued_at,
batch_started_at,
batch_finished_at,
version,
queue,
scheduler,
index_mapper,
features: _,
max_number_of_tasks: _,
max_number_of_batched_tasks: _,
wake_up: _,
dumps_path: _,
snapshots_path: _,
auth_path: _,
version_file_path: _,
webhook_url: _,
webhook_authorization_header: _,
test_breakpoint_sdr: _,
@ -65,8 +40,18 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let mut snap = String::new();
let indx_sched_version = version.get_version(&rtxn).unwrap();
let latest_version = (
versioning::VERSION_MAJOR.parse().unwrap(),
versioning::VERSION_MINOR.parse().unwrap(),
versioning::VERSION_PATCH.parse().unwrap(),
);
if indx_sched_version != Some(latest_version) {
snap.push_str(&format!("index scheduler running on version {indx_sched_version:?}\n"));
}
let processing = processing_tasks.read().unwrap().clone();
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
snap.push_str(&format!("### Autobatching Enabled = {}\n", scheduler.autobatching_enabled));
snap.push_str(&format!(
"### Processing batch {:?}:\n",
processing.batch.as_ref().map(|batch| batch.uid)
@ -79,19 +64,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
snap.push_str("\n----------------------------------------------------------------------\n");
snap.push_str("### All Tasks:\n");
snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks));
snap.push_str(&snapshot_all_tasks(&rtxn, queue.tasks.all_tasks));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Status:\n");
snap.push_str(&snapshot_status(&rtxn, *status));
snap.push_str(&snapshot_status(&rtxn, queue.tasks.status));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Kind:\n");
snap.push_str(&snapshot_kind(&rtxn, *kind));
snap.push_str(&snapshot_kind(&rtxn, queue.tasks.kind));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Index Tasks:\n");
snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks));
snap.push_str(&snapshot_index_tasks(&rtxn, queue.tasks.index_tasks));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Index Mapper:\n");
@ -99,55 +84,55 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
snap.push_str("\n----------------------------------------------------------------------\n");
snap.push_str("### Canceled By:\n");
snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by));
snap.push_str(&snapshot_canceled_by(&rtxn, queue.tasks.canceled_by));
snap.push_str("\n----------------------------------------------------------------------\n");
snap.push_str("### Enqueued At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at));
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.enqueued_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Started At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *started_at));
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.started_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Finished At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *finished_at));
snap.push_str(&snapshot_date_db(&rtxn, queue.tasks.finished_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### All Batches:\n");
snap.push_str(&snapshot_all_batches(&rtxn, *all_batches));
snap.push_str(&snapshot_all_batches(&rtxn, queue.batches.all_batches));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Batch to tasks mapping:\n");
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, *batch_to_tasks_mapping));
snap.push_str(&snapshot_batches_to_tasks_mappings(&rtxn, queue.batch_to_tasks_mapping));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Batches Status:\n");
snap.push_str(&snapshot_status(&rtxn, *batch_status));
snap.push_str(&snapshot_status(&rtxn, queue.batches.status));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Batches Kind:\n");
snap.push_str(&snapshot_kind(&rtxn, *batch_kind));
snap.push_str(&snapshot_kind(&rtxn, queue.batches.kind));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Batches Index Tasks:\n");
snap.push_str(&snapshot_index_tasks(&rtxn, *batch_index_tasks));
snap.push_str(&snapshot_index_tasks(&rtxn, queue.batches.index_tasks));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Batches Enqueued At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *batch_enqueued_at));
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.enqueued_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Batches Started At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *batch_started_at));
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.started_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### Batches Finished At:\n");
snap.push_str(&snapshot_date_db(&rtxn, *batch_finished_at));
snap.push_str(&snapshot_date_db(&rtxn, queue.batches.finished_at));
snap.push_str("----------------------------------------------------------------------\n");
snap.push_str("### File Store:\n");
snap.push_str(&snapshot_file_store(file_store));
snap.push_str(&snapshot_file_store(&queue.file_store));
snap.push_str("\n----------------------------------------------------------------------\n");
snap
@ -306,6 +291,9 @@ fn snapshot_details(d: &Details) -> String {
Details::IndexSwap { swaps } => {
format!("{{ swaps: {swaps:?} }}")
}
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")
}
}
}
@ -353,14 +341,24 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
pub fn snapshot_batch(batch: &Batch) -> String {
let mut snap = String::new();
let Batch { uid, details, stats, started_at, finished_at } = batch;
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
let stats = BatchStats {
progress_trace: Default::default(),
internal_database_sizes: Default::default(),
write_channel_congestion: None,
..stats.clone()
};
if let Some(finished_at) = finished_at {
assert!(finished_at > started_at);
}
let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap();
assert!(*started_at > earliest);
assert!(earliest >= oldest);
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(stats).unwrap()));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
snap.push('}');
snap
}
@ -373,7 +371,8 @@ pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
let stats = mapper.stats_of(rtxn, &name).unwrap();
s.push_str(&format!(
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
stats.number_of_documents, stats.field_distribution
stats.documents_database_stats.number_of_entries(),
stats.field_distribution
));
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,296 @@
use std::sync::Arc;
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView};
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
use roaring::RoaringBitmap;
use crate::utils::ProcessingBatch;
#[derive(Clone, Default)]
pub struct ProcessingTasks {
pub batch: Option<Arc<ProcessingBatch>>,
/// The list of tasks ids that are currently running.
pub processing: Arc<RoaringBitmap>,
/// The progress on processing tasks
pub progress: Option<Progress>,
}
impl ProcessingTasks {
/// Creates an empty `ProcessingAt` struct.
pub fn new() -> ProcessingTasks {
ProcessingTasks::default()
}
pub fn get_progress_view(&self) -> Option<ProgressView> {
Some(self.progress.as_ref()?.as_progress_view())
}
/// Stores the currently processing tasks, and the date time at which it started.
pub fn start_processing(
&mut self,
processing_batch: ProcessingBatch,
processing: RoaringBitmap,
) -> Progress {
self.batch = Some(Arc::new(processing_batch));
self.processing = Arc::new(processing);
let progress = Progress::default();
progress.update_progress(BatchProgress::ProcessingTasks);
self.progress = Some(progress.clone());
progress
}
/// Set the processing tasks to an empty list
pub fn stop_processing(&mut self) -> Self {
self.progress = None;
Self {
batch: std::mem::take(&mut self.batch),
processing: std::mem::take(&mut self.processing),
progress: None,
}
}
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
!self.processing.is_disjoint(canceled_tasks)
}
}
make_enum_progress! {
pub enum BatchProgress {
ProcessingTasks,
WritingTasksToDisk,
}
}
make_enum_progress! {
pub enum FinalizingIndexStep {
Committing,
ComputingStats,
}
}
make_enum_progress! {
pub enum TaskCancelationProgress {
RetrievingTasks,
UpdatingTasks,
}
}
make_enum_progress! {
pub enum TaskDeletionProgress {
DeletingTasksDateTime,
DeletingTasksMetadata,
DeletingTasks,
DeletingBatches,
}
}
make_enum_progress! {
pub enum SnapshotCreationProgress {
StartTheSnapshotCreation,
SnapshotTheIndexScheduler,
SnapshotTheUpdateFiles,
SnapshotTheIndexes,
SnapshotTheApiKeys,
CreateTheTarball,
}
}
make_enum_progress! {
pub enum DumpCreationProgress {
StartTheDumpCreation,
DumpTheApiKeys,
DumpTheTasks,
DumpTheBatches,
DumpTheIndexes,
DumpTheExperimentalFeatures,
CompressTheDump,
}
}
make_enum_progress! {
pub enum CreateIndexProgress {
CreatingTheIndex,
}
}
make_enum_progress! {
pub enum UpdateIndexProgress {
UpdatingTheIndex,
}
}
make_enum_progress! {
pub enum DeleteIndexProgress {
DeletingTheIndex,
}
}
make_enum_progress! {
pub enum SwappingTheIndexes {
EnsuringCorrectnessOfTheSwap,
SwappingTheIndexes,
}
}
make_enum_progress! {
pub enum InnerSwappingTwoIndexes {
RetrieveTheTasks,
UpdateTheTasks,
UpdateTheIndexesMetadata,
}
}
make_enum_progress! {
pub enum DocumentOperationProgress {
RetrievingConfig,
ComputingDocumentChanges,
Indexing,
}
}
make_enum_progress! {
pub enum DocumentEditionProgress {
RetrievingConfig,
ComputingDocumentChanges,
Indexing,
}
}
make_enum_progress! {
pub enum DocumentDeletionProgress {
RetrievingConfig,
DeleteDocuments,
Indexing,
}
}
make_enum_progress! {
pub enum SettingsProgress {
RetrievingAndMergingTheSettings,
ApplyTheSettings,
}
}
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
#[cfg(test)]
mod test {
use std::sync::atomic::Ordering;
use meili_snap::{json_string, snapshot};
use super::*;
#[test]
fn one_level() {
let mut processing = ProcessingTasks::new();
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "processing tasks",
"finished": 0,
"total": 2
}
],
"percentage": 0.0
}
"#);
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "writing tasks to disk",
"finished": 1,
"total": 2
}
],
"percentage": 50.0
}
"#);
}
#[test]
fn task_progress() {
let mut processing = ProcessingTasks::new();
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
let (atomic, tasks) = AtomicTaskStep::new(10);
processing.progress.as_ref().unwrap().update_progress(tasks);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "processing tasks",
"finished": 0,
"total": 2
},
{
"currentStep": "task",
"finished": 0,
"total": 10
}
],
"percentage": 0.0
}
"#);
atomic.fetch_add(6, Ordering::Relaxed);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "processing tasks",
"finished": 0,
"total": 2
},
{
"currentStep": "task",
"finished": 6,
"total": 10
}
],
"percentage": 30.000002
}
"#);
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "writing tasks to disk",
"finished": 1,
"total": 2
}
],
"percentage": 50.0
}
"#);
let (atomic, tasks) = AtomicTaskStep::new(5);
processing.progress.as_ref().unwrap().update_progress(tasks);
atomic.fetch_add(4, Ordering::Relaxed);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "writing tasks to disk",
"finished": 1,
"total": 2
},
{
"currentStep": "task",
"finished": 4,
"total": 5
}
],
"percentage": 90.0
}
"#);
}
}

View File

@ -0,0 +1,603 @@
use std::collections::HashSet;
use std::ops::{Bound, RangeBounds};
use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
use super::{Query, Queue};
use crate::processing::ProcessingTasks;
use crate::utils::{
insert_task_datetime, keep_ids_within_datetimes, map_bound,
remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch,
};
use crate::{Error, Result, BEI128};
/// The number of database used by the batch queue
const NUMBER_OF_DATABASES: u32 = 7;
/// Database const names for the `IndexScheduler`.
mod db_name {
pub const ALL_BATCHES: &str = "all-batches";
pub const BATCH_STATUS: &str = "batch-status";
pub const BATCH_KIND: &str = "batch-kind";
pub const BATCH_INDEX_TASKS: &str = "batch-index-tasks";
pub const BATCH_ENQUEUED_AT: &str = "batch-enqueued-at";
pub const BATCH_STARTED_AT: &str = "batch-started-at";
pub const BATCH_FINISHED_AT: &str = "batch-finished-at";
}
pub struct BatchQueue {
/// Contains all the batches accessible by their Id.
pub(crate) all_batches: Database<BEU32, SerdeJson<Batch>>,
/// All the batches containing a task matching the selected status.
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
/// All the batches ids grouped by the kind of their task.
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
/// Store the batches associated to an index.
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the batches containing tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the batches containing finished tasks started at a specific date
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the batches containing tasks finished at a specific date
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
}
impl BatchQueue {
pub(crate) fn private_clone(&self) -> BatchQueue {
BatchQueue {
all_batches: self.all_batches,
status: self.status,
kind: self.kind,
index_tasks: self.index_tasks,
enqueued_at: self.enqueued_at,
started_at: self.started_at,
finished_at: self.finished_at,
}
}
pub(crate) const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self {
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,
kind: env.create_database(wtxn, Some(db_name::BATCH_KIND))?,
index_tasks: env.create_database(wtxn, Some(db_name::BATCH_INDEX_TASKS))?,
enqueued_at: env.create_database(wtxn, Some(db_name::BATCH_ENQUEUED_AT))?,
started_at: env.create_database(wtxn, Some(db_name::BATCH_STARTED_AT))?,
finished_at: env.create_database(wtxn, Some(db_name::BATCH_FINISHED_AT))?,
})
}
pub(crate) fn all_batch_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
}
pub(crate) fn next_batch_id(&self, rtxn: &RoTxn) -> Result<BatchId> {
Ok(self
.all_batches
.remap_data_type::<DecodeIgnore>()
.last(rtxn)?
.map(|(k, _)| k + 1)
.unwrap_or_default())
}
pub(crate) fn get_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<Option<Batch>> {
Ok(self.all_batches.get(rtxn, &batch_id)?)
}
/// Returns the whole set of batches that belongs to this index.
pub(crate) fn index_batches(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
}
pub(crate) fn update_index(
&self,
wtxn: &mut RwTxn,
index: &str,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut batches = self.index_batches(wtxn, index)?;
f(&mut batches);
if batches.is_empty() {
self.index_tasks.delete(wtxn, index)?;
} else {
self.index_tasks.put(wtxn, index, &batches)?;
}
Ok(())
}
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
}
pub(crate) fn put_status(
&self,
wtxn: &mut RwTxn,
status: Status,
bitmap: &RoaringBitmap,
) -> Result<()> {
Ok(self.status.put(wtxn, &status, bitmap)?)
}
pub(crate) fn update_status(
&self,
wtxn: &mut RwTxn,
status: Status,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut tasks = self.get_status(wtxn, status)?;
f(&mut tasks);
self.put_status(wtxn, status, &tasks)?;
Ok(())
}
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
}
pub(crate) fn put_kind(
&self,
wtxn: &mut RwTxn,
kind: Kind,
bitmap: &RoaringBitmap,
) -> Result<()> {
Ok(self.kind.put(wtxn, &kind, bitmap)?)
}
pub(crate) fn update_kind(
&self,
wtxn: &mut RwTxn,
kind: Kind,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut tasks = self.get_kind(wtxn, kind)?;
f(&mut tasks);
self.put_kind(wtxn, kind, &tasks)?;
Ok(())
}
pub(crate) fn write_batch(&self, wtxn: &mut RwTxn, batch: ProcessingBatch) -> Result<()> {
let old_batch = self.all_batches.get(wtxn, &batch.uid)?;
self.all_batches.put(
wtxn,
&batch.uid,
&Batch {
uid: batch.uid,
progress: None,
details: batch.details,
stats: batch.stats,
started_at: batch.started_at,
finished_at: batch.finished_at,
enqueued_at: batch.enqueued_at,
},
)?;
// Update the statuses
if let Some(ref old_batch) = old_batch {
for status in old_batch.stats.status.keys() {
self.update_status(wtxn, *status, |bitmap| {
bitmap.remove(batch.uid);
})?;
}
}
for status in batch.statuses {
self.update_status(wtxn, status, |bitmap| {
bitmap.insert(batch.uid);
})?;
}
// Update the kinds / types
if let Some(ref old_batch) = old_batch {
let kinds: HashSet<_> = old_batch.stats.types.keys().cloned().collect();
for kind in kinds.difference(&batch.kinds) {
self.update_kind(wtxn, *kind, |bitmap| {
bitmap.remove(batch.uid);
})?;
}
}
for kind in batch.kinds {
self.update_kind(wtxn, kind, |bitmap| {
bitmap.insert(batch.uid);
})?;
}
// Update the indexes
if let Some(ref old_batch) = old_batch {
let indexes: HashSet<_> = old_batch.stats.index_uids.keys().cloned().collect();
for index in indexes.difference(&batch.indexes) {
self.update_index(wtxn, index, |bitmap| {
bitmap.remove(batch.uid);
})?;
}
}
for index in batch.indexes {
self.update_index(wtxn, &index, |bitmap| {
bitmap.insert(batch.uid);
})?;
}
// Update the enqueued_at: we cannot retrieve the previous enqueued at from the previous batch, and
// must instead go through the db looking for it. We cannot look at the task contained in this batch either
// because they may have been removed.
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
// to the DB per batches.
if let Some(ref old_batch) = old_batch {
if let Some(enqueued_at) = old_batch.enqueued_at {
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?;
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?;
} else {
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
// and we still need to find the date by scrolling the database
remove_n_tasks_datetime_earlier_than(
wtxn,
self.enqueued_at,
old_batch.started_at,
old_batch.stats.total_nb_tasks.clamp(1, 2) as usize,
old_batch.uid,
)?;
}
}
// A finished batch MUST contains at least one task and have an enqueued_at
let enqueued_at = batch.enqueued_at.as_ref().unwrap();
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?;
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?;
// Update the started at and finished at
if let Some(ref old_batch) = old_batch {
remove_task_datetime(wtxn, self.started_at, old_batch.started_at, old_batch.uid)?;
if let Some(finished_at) = old_batch.finished_at {
remove_task_datetime(wtxn, self.finished_at, finished_at, old_batch.uid)?;
}
}
insert_task_datetime(wtxn, self.started_at, batch.started_at, batch.uid)?;
insert_task_datetime(wtxn, self.finished_at, batch.finished_at.unwrap(), batch.uid)?;
Ok(())
}
/// Convert an iterator to a `Vec` of batches. The batches MUST exist or a
/// `CorruptedTaskQueue` error will be thrown.
pub(crate) fn get_existing_batches(
&self,
rtxn: &RoTxn,
tasks: impl IntoIterator<Item = BatchId>,
processing: &ProcessingTasks,
) -> Result<Vec<Batch>> {
tasks
.into_iter()
.map(|batch_id| {
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
let mut batch = processing.batch.as_ref().unwrap().to_batch();
batch.progress = processing.get_progress_view();
Ok(batch)
} else {
self.get_batch(rtxn, batch_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
}
})
.collect::<Result<_>>()
}
}
impl Queue {
/// Return the batch ids matched by the given query from the index scheduler's point of view.
pub(crate) fn get_batch_ids(
&self,
rtxn: &RoTxn,
query: &Query,
processing: &ProcessingTasks,
) -> Result<RoaringBitmap> {
let Query {
limit,
from,
reverse,
uids,
batch_uids,
statuses,
types,
index_uids,
canceled_by,
before_enqueued_at,
after_enqueued_at,
before_started_at,
after_started_at,
before_finished_at,
after_finished_at,
} = query;
let mut batches = self.batches.all_batch_ids(rtxn)?;
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
batches.insert(batch_id);
}
if let Some(from) = from {
let range = if reverse.unwrap_or_default() {
u32::MIN..*from
} else {
from.saturating_add(1)..u32::MAX
};
batches.remove_range(range);
}
if let Some(batch_uids) = &batch_uids {
let batches_uids = RoaringBitmap::from_iter(batch_uids);
batches &= batches_uids;
}
if let Some(status) = &statuses {
let mut status_batches = RoaringBitmap::new();
for status in status {
match status {
// special case for Processing batches
Status::Processing => {
if let Some(batch_id) = processing.batch.as_ref().map(|batch| batch.uid) {
status_batches.insert(batch_id);
}
}
// Enqueued tasks are not stored in batches
Status::Enqueued => (),
status => status_batches |= &self.batches.get_status(rtxn, *status)?,
};
}
if !status.contains(&Status::Processing) {
if let Some(ref batch) = processing.batch {
batches.remove(batch.uid);
}
}
batches &= status_batches;
}
if let Some(task_uids) = &uids {
let mut batches_by_task_uids = RoaringBitmap::new();
for task_uid in task_uids {
if let Some(task) = self.tasks.get_task(rtxn, *task_uid)? {
if let Some(batch_uid) = task.batch_uid {
batches_by_task_uids.insert(batch_uid);
}
}
}
batches &= batches_by_task_uids;
}
// There is no database for this query, we must retrieve the task queried by the client and ensure it's valid
if let Some(canceled_by) = &canceled_by {
let mut all_canceled_batches = RoaringBitmap::new();
for cancel_uid in canceled_by {
if let Some(task) = self.tasks.get_task(rtxn, *cancel_uid)? {
if task.kind.as_kind() == Kind::TaskCancelation
&& task.status == Status::Succeeded
{
if let Some(batch_uid) = task.batch_uid {
all_canceled_batches.insert(batch_uid);
}
}
}
}
// if the canceled_by has been specified but no batch
// matches then we prefer matching zero than all batches.
if all_canceled_batches.is_empty() {
return Ok(RoaringBitmap::new());
} else {
batches &= all_canceled_batches;
}
}
if let Some(kind) = &types {
let mut kind_batches = RoaringBitmap::new();
for kind in kind {
kind_batches |= self.batches.get_kind(rtxn, *kind)?;
if let Some(uid) = processing
.batch
.as_ref()
.and_then(|batch| batch.kinds.contains(kind).then_some(batch.uid))
{
kind_batches.insert(uid);
}
}
batches &= &kind_batches;
}
if let Some(index) = &index_uids {
let mut index_batches = RoaringBitmap::new();
for index in index {
index_batches |= self.batches.index_batches(rtxn, index)?;
if let Some(uid) = processing
.batch
.as_ref()
.and_then(|batch| batch.indexes.contains(index).then_some(batch.uid))
{
index_batches.insert(uid);
}
}
batches &= &index_batches;
}
// For the started_at filter, we need to treat the part of the batches that are processing from the part of the
// batches that are not processing. The non-processing ones are filtered normally while the processing ones
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
batches = {
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
(&batches - &*processing.processing, &batches & &*processing.processing);
// special case for Processing batches
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
let mut clear_filtered_processing_batches =
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
let start = map_bound(start, |b| b.unix_timestamp_nanos());
let end = map_bound(end, |b| b.unix_timestamp_nanos());
let is_within_dates = RangeBounds::contains(
&(start, end),
&processing
.batch
.as_ref()
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
.unix_timestamp_nanos(),
);
if !is_within_dates {
filtered_processing_batches.clear();
}
};
match (after_started_at, before_started_at) {
(None, None) => (),
(None, Some(before)) => {
clear_filtered_processing_batches(Bound::Unbounded, Bound::Excluded(*before))
}
(Some(after), None) => {
clear_filtered_processing_batches(Bound::Excluded(*after), Bound::Unbounded)
}
(Some(after), Some(before)) => clear_filtered_processing_batches(
Bound::Excluded(*after),
Bound::Excluded(*before),
),
};
keep_ids_within_datetimes(
rtxn,
&mut filtered_non_processing_batches,
self.batches.started_at,
*after_started_at,
*before_started_at,
)?;
filtered_non_processing_batches | filtered_processing_batches
};
keep_ids_within_datetimes(
rtxn,
&mut batches,
self.batches.enqueued_at,
*after_enqueued_at,
*before_enqueued_at,
)?;
keep_ids_within_datetimes(
rtxn,
&mut batches,
self.batches.finished_at,
*after_finished_at,
*before_finished_at,
)?;
if let Some(limit) = limit {
batches = if query.reverse.unwrap_or_default() {
batches.into_iter().take(*limit as usize).collect()
} else {
batches.into_iter().rev().take(*limit as usize).collect()
};
}
Ok(batches)
}
/// Return the batch ids matching the query along with the total number of batches
/// by ignoring the from and limit parameters from the user's point of view.
///
/// There are two differences between an internal query and a query executed by
/// the user.
///
/// 1. IndexSwap tasks are not publicly associated with any index, but they are associated
/// with many indexes internally.
/// 2. The user may not have the rights to access the tasks (internally) associated with all indexes.
pub(crate) fn get_batch_ids_from_authorized_indexes(
&self,
rtxn: &RoTxn,
query: &Query,
filters: &meilisearch_auth::AuthFilter,
processing: &ProcessingTasks,
) -> Result<(RoaringBitmap, u64)> {
// compute all batches matching the filter by ignoring the limits, to find the number of batches matching
// the filter.
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
// us from modifying the underlying implementation, and the performance remains sufficient.
// Should this change, we would modify `get_batch_ids` to directly return the number of matching batches.
let total_batches =
self.get_batch_ids(rtxn, &query.clone().without_limits(), processing)?;
let mut batches = self.get_batch_ids(rtxn, query, processing)?;
// If the query contains a list of index uid or there is a finite list of authorized indexes,
// then we must exclude all the batches that only contains tasks associated to multiple indexes.
// This works because we don't autobatch tasks associated to multiple indexes with tasks associated
// to a single index. e.g: IndexSwap cannot be batched with IndexCreation.
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
batches -= self.tasks.get_kind(rtxn, kind)?;
if let Some(batch) = processing.batch.as_ref() {
if batch.kinds.contains(&kind) {
batches.remove(batch.uid);
}
}
}
}
// Any batch that is internally associated with at least one authorized index
// must be returned.
if !filters.all_indexes_authorized() {
let mut valid_indexes = RoaringBitmap::new();
let mut forbidden_indexes = RoaringBitmap::new();
let all_indexes_iter = self.batches.index_tasks.iter(rtxn)?;
for result in all_indexes_iter {
let (index, index_tasks) = result?;
if filters.is_index_authorized(index) {
valid_indexes |= index_tasks;
} else {
forbidden_indexes |= index_tasks;
}
}
if let Some(batch) = processing.batch.as_ref() {
for index in &batch.indexes {
if filters.is_index_authorized(index) {
valid_indexes.insert(batch.uid);
} else {
forbidden_indexes.insert(batch.uid);
}
}
}
// If a batch had ONE valid task then it should be returned
let invalid_batches = forbidden_indexes - valid_indexes;
batches -= invalid_batches;
}
Ok((batches, total_batches.len()))
}
pub(crate) fn get_batches_from_authorized_indexes(
&self,
rtxn: &RoTxn,
query: &Query,
filters: &meilisearch_auth::AuthFilter,
processing: &ProcessingTasks,
) -> Result<(Vec<Batch>, u64)> {
let (batches, total) =
self.get_batch_ids_from_authorized_indexes(rtxn, query, filters, processing)?;
let batches = if query.reverse.unwrap_or_default() {
Box::new(batches.into_iter()) as Box<dyn Iterator<Item = u32>>
} else {
Box::new(batches.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
};
let batches = self.batches.get_existing_batches(
rtxn,
batches.take(query.limit.unwrap_or(u32::MAX) as usize),
processing,
)?;
Ok((batches, total))
}
}

View File

@ -0,0 +1,476 @@
use meili_snap::snapshot;
use meilisearch_auth::AuthFilter;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
use time::{Duration, OffsetDateTime};
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
use crate::test_utils::Breakpoint::*;
use crate::test_utils::{index_creation_task, FailureLocation};
use crate::{IndexScheduler, Query};
#[test]
fn query_batches_from_and_limit() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let kind = index_creation_task("doggo", "bone");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
let kind = index_creation_task("whalo", "plankton");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
let kind = index_creation_task("catto", "his_own_vomit");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
handle.advance_n_successful_batches(3);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { limit: Some(0), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[]");
let query = Query { limit: Some(1), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[2,]");
let query = Query { limit: Some(2), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[1,2,]");
let query = Query { from: Some(1), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
let query = Query { from: Some(2), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,]");
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[1,]");
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
}
#[test]
fn query_batches_simple() {
let start_time = OffsetDateTime::now_utc();
let (index_scheduler, mut handle) =
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
let kind = index_creation_task("catto", "mouse");
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = index_creation_task("doggo", "sheep");
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = index_creation_task("whalo", "fish");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
handle.advance_till([Start, BatchCreated]);
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
let (mut batches, _) = index_scheduler
.get_batches_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
assert_eq!(batches.len(), 1);
batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
assert!(batches[0].enqueued_at.is_some());
batches[0].enqueued_at = None;
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
snapshot!(batch, @r#"
{
"uid": 0,
"details": {
"primaryKey": "mouse"
},
"stats": {
"totalNbTasks": 1,
"status": {
"processing": 1
},
"types": {
"indexCreation": 1
},
"indexUids": {
"catto": 1
}
},
"startedAt": "1970-01-01T00:00:00Z",
"finishedAt": null,
"enqueuedAt": null
}
"#);
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[]"); // The batches don't contains any enqueued tasks
let query =
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&batches), @"[0,]"); // both enqueued and processing tasks in the first tick
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Processing]),
after_started_at: Some(start_time),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test, which should excludes the enqueued tasks
snapshot!(snapshot_bitmap(&batches), @"[0,]");
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Processing]),
before_started_at: Some(start_time),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes before the start of the test, which should excludes all of them
snapshot!(snapshot_bitmap(&batches), @"[]");
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Processing]),
after_started_at: Some(start_time),
before_started_at: Some(start_time + Duration::minutes(1)),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test and before one minute after the start of the test,
// which should exclude the enqueued tasks and include the only processing task
snapshot!(snapshot_bitmap(&batches), @"[0,]");
handle.advance_till([
InsideProcessBatch,
InsideProcessBatch,
ProcessBatchSucceeded,
AfterProcessing,
Start,
BatchCreated,
]);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-advancing-a-bit");
let second_start_time = OffsetDateTime::now_utc();
let query = Query {
statuses: Some(vec![Status::Succeeded, Status::Processing]),
after_started_at: Some(start_time),
before_started_at: Some(start_time + Duration::minutes(1)),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test and before one minute after the start of the test,
// which should include all tasks
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
let query = Query {
statuses: Some(vec![Status::Succeeded, Status::Processing]),
before_started_at: Some(start_time),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes before the start of the test, which should exclude all tasks
snapshot!(snapshot_bitmap(&batches), @"[]");
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the second part of the test and before one minute after the
// second start of the test, which should exclude all tasks
snapshot!(snapshot_bitmap(&batches), @"[]");
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
handle.advance_till([
InsideProcessBatch,
InsideProcessBatch,
ProcessBatchSucceeded,
AfterProcessing,
Start,
BatchCreated,
]);
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// we run the same query to verify that, and indeed find that the last task is matched
snapshot!(snapshot_bitmap(&batches), @"[2,]");
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// enqueued, succeeded, or processing tasks started after the second part of the test, should
// again only return the last task
snapshot!(snapshot_bitmap(&batches), @"[2,]");
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
// now the last task should have failed
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// so running the last query should return nothing
snapshot!(snapshot_bitmap(&batches), @"[]");
let query = Query {
statuses: Some(vec![Status::Failed]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// but the same query on failed tasks should return the last task
snapshot!(snapshot_bitmap(&batches), @"[2,]");
let query = Query {
statuses: Some(vec![Status::Failed]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// but the same query on failed tasks should return the last task
snapshot!(snapshot_bitmap(&batches), @"[2,]");
let query = Query {
statuses: Some(vec![Status::Failed]),
uids: Some(vec![1]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// same query but with an invalid uid
snapshot!(snapshot_bitmap(&batches), @"[]");
let query = Query {
statuses: Some(vec![Status::Failed]),
uids: Some(vec![2]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// same query but with a valid uid
snapshot!(snapshot_bitmap(&batches), @"[2,]");
}
#[test]
fn query_batches_special_rules() {
let (index_scheduler, mut handle) =
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
let kind = index_creation_task("catto", "mouse");
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = index_creation_task("doggo", "sheep");
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
};
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
};
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap().clone();
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
snapshot!(snapshot_bitmap(&batches), @"[0,]");
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
&proc,
)
.unwrap();
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
// associated with doggo -> empty result
snapshot!(snapshot_bitmap(&batches), @"[]");
drop(rtxn);
// We're going to advance and process all the batches for the next query to actually hit the db
handle.advance_till([
InsideProcessBatch,
InsideProcessBatch,
ProcessBatchSucceeded,
AfterProcessing,
]);
handle.advance_one_successful_batch();
handle.advance_n_failed_batches(2);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after-processing-everything");
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query::default();
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
&proc,
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
// -> only the index creation of doggo should be returned
snapshot!(snapshot_bitmap(&batches), @"[1,]");
let query = Query::default();
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![
IndexUidPattern::new_unchecked("catto"),
IndexUidPattern::new_unchecked("doggo"),
]
.into_iter()
.collect(),
),
&proc,
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
// -> all tasks except the swap of catto with whalo are returned
snapshot!(snapshot_bitmap(&batches), @"[0,1,]");
let query = Query::default();
let (batches, _) = index_scheduler
.queue
.get_batch_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
// we asked for all the tasks with all index authorized -> all tasks returned
snapshot!(snapshot_bitmap(&batches), @"[0,1,2,3,]");
}
#[test]
fn query_batches_canceled_by() {
let (index_scheduler, mut handle) =
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
let kind = index_creation_task("catto", "mouse");
let _ = index_scheduler.register(kind, None, false).unwrap();
let kind = index_creation_task("doggo", "sheep");
let _ = index_scheduler.register(kind, None, false).unwrap();
let kind = KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
};
let _task = index_scheduler.register(kind, None, false).unwrap();
handle.advance_n_successful_batches(1);
let kind = KindWithContent::TaskCancelation {
query: "test_query".to_string(),
tasks: [0, 1, 2, 3].into_iter().collect(),
};
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
handle.advance_n_successful_batches(1);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// The batch zero was the index creation task, the 1 is the task cancellation
snapshot!(snapshot_bitmap(&batches), @"[1,]");
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let (batches, _) = index_scheduler
.get_batch_ids_from_authorized_indexes(
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
)
.unwrap();
// Return only 1 because the user is not authorized to see task 2
snapshot!(snapshot_bitmap(&batches), @"[1,]");
}

View File

@ -0,0 +1,391 @@
mod batches;
#[cfg(test)]
mod batches_test;
mod tasks;
#[cfg(test)]
mod tasks_test;
#[cfg(test)]
mod test;
use std::collections::BTreeMap;
use std::fs::File as StdFile;
use std::time::Duration;
use file_store::FileStore;
use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use uuid::Uuid;
pub(crate) use self::batches::BatchQueue;
pub(crate) use self::tasks::TaskQueue;
use crate::processing::ProcessingTasks;
use crate::utils::{
check_index_swap_validity, filter_out_references_to_newer_tasks, ProcessingBatch,
};
use crate::{Error, IndexSchedulerOptions, Result, TaskId};
/// The number of database used by queue itself
const NUMBER_OF_DATABASES: u32 = 1;
/// Database const names for the `IndexScheduler`.
mod db_name {
pub const BATCH_TO_TASKS_MAPPING: &str = "batch-to-tasks-mapping";
}
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
///
/// An empty/default query (where each field is set to `None`) matches all tasks.
/// Each non-null field restricts the set of tasks further.
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct Query {
/// The maximum number of tasks to be matched
pub limit: Option<u32>,
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
pub from: Option<u32>,
/// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
pub reverse: Option<bool>,
/// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched
pub uids: Option<Vec<TaskId>>,
/// The [batch ids](`meilisearch_types::batches::Batch::uid`) to be matched
pub batch_uids: Option<Vec<BatchId>>,
/// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls
pub statuses: Option<Vec<Status>>,
/// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks.
///
/// The kind of a task is given by:
/// ```
/// # use meilisearch_types::tasks::{Task, Kind};
/// # fn doc_func(task: Task) -> Kind {
/// task.kind.as_kind()
/// # }
/// ```
pub types: Option<Vec<Kind>>,
/// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks
pub index_uids: Option<Vec<String>>,
/// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks
/// that canceled the matched tasks.
pub canceled_by: Option<Vec<TaskId>>,
/// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
pub before_enqueued_at: Option<OffsetDateTime>,
/// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field.
pub after_enqueued_at: Option<OffsetDateTime>,
/// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
pub before_started_at: Option<OffsetDateTime>,
/// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field.
pub after_started_at: Option<OffsetDateTime>,
/// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
pub before_finished_at: Option<OffsetDateTime>,
/// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field.
pub after_finished_at: Option<OffsetDateTime>,
}
impl Query {
/// Return `true` if every field of the query is set to `None`, such that the query
/// matches all tasks.
pub fn is_empty(&self) -> bool {
matches!(
self,
Query {
limit: None,
from: None,
reverse: None,
uids: None,
batch_uids: None,
statuses: None,
types: None,
index_uids: None,
canceled_by: None,
before_enqueued_at: None,
after_enqueued_at: None,
before_started_at: None,
after_started_at: None,
before_finished_at: None,
after_finished_at: None,
}
)
}
/// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes.
pub fn with_index(self, index_uid: String) -> Self {
let mut index_vec = self.index_uids.unwrap_or_default();
index_vec.push(index_uid);
Self { index_uids: Some(index_vec), ..self }
}
// Removes the `from` and `limit` restrictions from the query.
// Useful to get the total number of tasks matching a filter.
pub fn without_limits(self) -> Self {
Query { limit: None, from: None, ..self }
}
}
/// Structure which holds meilisearch's indexes and schedules the tasks
/// to be performed on them.
pub struct Queue {
pub(crate) tasks: tasks::TaskQueue,
pub(crate) batches: batches::BatchQueue,
/// Matches a batch id with the associated task ids.
pub(crate) batch_to_tasks_mapping: Database<BEU32, CboRoaringBitmapCodec>,
/// The list of files referenced by the tasks.
pub(crate) file_store: FileStore,
/// The max number of tasks allowed before the scheduler starts to delete
/// the finished tasks automatically.
pub(crate) max_number_of_tasks: usize,
}
impl Queue {
pub(crate) fn private_clone(&self) -> Queue {
Queue {
tasks: self.tasks.private_clone(),
batches: self.batches.private_clone(),
batch_to_tasks_mapping: self.batch_to_tasks_mapping,
file_store: self.file_store.clone(),
max_number_of_tasks: self.max_number_of_tasks,
}
}
pub(crate) const fn nb_db() -> u32 {
tasks::TaskQueue::nb_db() + batches::BatchQueue::nb_db() + NUMBER_OF_DATABASES
}
/// Create an index scheduler and start its run loop.
pub(crate) fn new(
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
options: &IndexSchedulerOptions,
) -> Result<Self> {
// allow unreachable_code to get rids of the warning in the case of a test build.
Ok(Self {
file_store: FileStore::new(&options.update_file_path)?,
batch_to_tasks_mapping: env
.create_database(wtxn, Some(db_name::BATCH_TO_TASKS_MAPPING))?,
tasks: TaskQueue::new(env, wtxn)?,
batches: BatchQueue::new(env, wtxn)?,
max_number_of_tasks: options.max_number_of_tasks,
})
}
/// Returns the whole set of tasks that belongs to this batch.
pub(crate) fn tasks_in_batch(&self, rtxn: &RoTxn, batch_id: BatchId) -> Result<RoaringBitmap> {
Ok(self.batch_to_tasks_mapping.get(rtxn, &batch_id)?.unwrap_or_default())
}
/// Convert an iterator to a `Vec` of tasks and edit the `ProcessingBatch` to add the given tasks.
///
/// The tasks MUST exist, or a `CorruptedTaskQueue` error will be thrown.
pub(crate) fn get_existing_tasks_for_processing_batch(
&self,
rtxn: &RoTxn,
processing_batch: &mut ProcessingBatch,
tasks: impl IntoIterator<Item = TaskId>,
) -> Result<Vec<Task>> {
tasks
.into_iter()
.map(|task_id| {
let mut task = self
.tasks
.get_task(rtxn, task_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue));
processing_batch.processing(&mut task);
task
})
.collect::<Result<_>>()
}
pub(crate) fn write_batch(
&self,
wtxn: &mut RwTxn,
batch: ProcessingBatch,
tasks: &RoaringBitmap,
) -> Result<()> {
self.batch_to_tasks_mapping.put(wtxn, &batch.uid, tasks)?;
self.batches.write_batch(wtxn, batch)?;
Ok(())
}
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
match task.content_uuid() {
Some(content_file) => self.delete_update_file(content_file),
None => Ok(()),
}
}
/// Open and returns the task's content File.
pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> {
self.file_store.get_update(uuid)
}
/// Delete a file from the index scheduler.
///
/// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.
pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> {
Ok(self.file_store.delete(uuid)?)
}
/// Create a file and register it in the index scheduler.
///
/// The returned file and uuid can be used to associate
/// some data to a task. The file will be kept until
/// the task has been fully processed.
pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> {
if dry_run {
Ok((Uuid::nil(), file_store::File::dry_file()?))
} else {
Ok(self.file_store.new_update()?)
}
}
#[cfg(test)]
pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> {
Ok(self.file_store.new_update_with_uuid(uuid)?)
}
/// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes.
pub fn compute_update_file_size(&self) -> Result<u64> {
Ok(self.file_store.compute_total_size()?)
}
pub fn register(
&self,
wtxn: &mut RwTxn,
kind: &KindWithContent,
task_id: Option<TaskId>,
dry_run: bool,
) -> Result<Task> {
let next_task_id = self.tasks.next_task_id(wtxn)?;
if let Some(uid) = task_id {
if uid < next_task_id {
return Err(Error::BadTaskId { received: uid, expected: next_task_id });
}
}
let mut task = Task {
uid: task_id.unwrap_or(next_task_id),
// The batch is defined once we starts processing the task
batch_uid: None,
enqueued_at: OffsetDateTime::now_utc(),
started_at: None,
finished_at: None,
error: None,
canceled_by: None,
details: kind.default_details(),
status: Status::Enqueued,
kind: kind.clone(),
};
// For deletion and cancelation tasks, we want to make extra sure that they
// don't attempt to delete/cancel tasks that are newer than themselves.
filter_out_references_to_newer_tasks(&mut task);
// If the register task is an index swap task, verify that it is well-formed
// (that it does not contain duplicate indexes).
check_index_swap_validity(&task)?;
// At this point the task is going to be registered and no further checks will be done
if dry_run {
return Ok(task);
}
// Get rid of the mutability.
let task = task;
self.tasks.register(wtxn, &task)?;
Ok(task)
}
/// Register a task to cleanup the task queue if needed
pub fn cleanup_task_queue(&self, wtxn: &mut RwTxn) -> Result<()> {
let nb_tasks = self.tasks.all_task_ids(wtxn)?.len();
// if we have less than 1M tasks everything is fine
if nb_tasks < self.max_number_of_tasks as u64 {
return Ok(());
}
let finished = self.tasks.status.get(wtxn, &Status::Succeeded)?.unwrap_or_default()
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves.
if to_delete.len() < 2 {
tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
// the only thing we can do is hope that the user tasks are going to finish
return Ok(());
}
tracing::info!(
"The task queue is almost full. Deleting the oldest {} finished tasks.",
to_delete.len()
);
// it's safe to unwrap here because we checked the len above
let newest_task_id = to_delete.iter().last().unwrap();
let last_task_to_delete =
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
// increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date.
let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1);
self.register(
wtxn,
&KindWithContent::TaskDeletion {
query: format!(
"?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled",
delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?,
),
tasks: to_delete,
},
None,
false,
)?;
Ok(())
}
pub fn get_stats(
&self,
rtxn: &RoTxn,
processing: &ProcessingTasks,
) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let mut res = BTreeMap::new();
let processing_tasks = processing.processing.len();
res.insert(
"statuses".to_string(),
enum_iterator::all::<Status>()
.map(|s| {
let tasks = self.tasks.get_status(rtxn, s)?.len();
match s {
Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)),
Status::Processing => Ok((s.to_string(), processing_tasks)),
s => Ok((s.to_string(), tasks)),
}
})
.collect::<Result<BTreeMap<String, u64>>>()?,
);
res.insert(
"types".to_string(),
enum_iterator::all::<Kind>()
.map(|s| Ok((s.to_string(), self.tasks.get_kind(rtxn, s)?.len())))
.collect::<Result<BTreeMap<String, u64>>>()?,
);
res.insert(
"indexes".to_string(),
self.tasks
.index_tasks
.iter(rtxn)?
.map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
.collect::<Result<BTreeMap<String, u64>>>()?,
);
Ok(res)
}
}

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/batches_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/tasks_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/tasks_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/tasks_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/tasks_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/tasks_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/tasks_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/tasks_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/tasks_test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,5 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/test.rs
snapshot_kind: text
---
### Autobatching Enabled = true

View File

@ -1,5 +1,6 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/test.rs
snapshot_kind: text
---
[
{

View File

@ -1,5 +1,6 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/test.rs
snapshot_kind: text
---
[
{

View File

@ -1,5 +1,6 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/test.rs
snapshot_kind: text
---
[
{

View File

@ -1,5 +1,6 @@
---
source: crates/index-scheduler/src/lib.rs
source: crates/index-scheduler/src/queue/test.rs
snapshot_kind: text
---
[
{

View File

@ -0,0 +1,541 @@
use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status, Task};
use roaring::{MultiOps, RoaringBitmap};
use time::OffsetDateTime;
use super::{Query, Queue};
use crate::processing::ProcessingTasks;
use crate::utils::{
self, insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime,
};
use crate::{Error, Result, TaskId, BEI128};
/// The number of database used by the task queue
const NUMBER_OF_DATABASES: u32 = 8;
/// Database const names for the `IndexScheduler`.
mod db_name {
pub const ALL_TASKS: &str = "all-tasks";
pub const STATUS: &str = "status";
pub const KIND: &str = "kind";
pub const INDEX_TASKS: &str = "index-tasks";
pub const CANCELED_BY: &str = "canceled_by";
pub const ENQUEUED_AT: &str = "enqueued-at";
pub const STARTED_AT: &str = "started-at";
pub const FINISHED_AT: &str = "finished-at";
}
pub struct TaskQueue {
/// The main database, it contains all the tasks accessible by their Id.
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
/// All the tasks ids grouped by their status.
// TODO we should not be able to serialize a `Status::Processing` in this database.
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
/// All the tasks ids grouped by their kind.
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
/// Store the tasks associated to an index.
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
/// Store the tasks that were canceled by a task uid
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
/// Store the task ids of tasks which were enqueued at a specific date
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of finished tasks which started being processed at a specific date
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
/// Store the task ids of tasks which finished at a specific date
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
}
impl TaskQueue {
pub(crate) fn private_clone(&self) -> TaskQueue {
TaskQueue {
all_tasks: self.all_tasks,
status: self.status,
kind: self.kind,
index_tasks: self.index_tasks,
canceled_by: self.canceled_by,
enqueued_at: self.enqueued_at,
started_at: self.started_at,
finished_at: self.finished_at,
}
}
pub(crate) const fn nb_db() -> u32 {
NUMBER_OF_DATABASES
}
pub(crate) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self {
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
status: env.create_database(wtxn, Some(db_name::STATUS))?,
kind: env.create_database(wtxn, Some(db_name::KIND))?,
index_tasks: env.create_database(wtxn, Some(db_name::INDEX_TASKS))?,
canceled_by: env.create_database(wtxn, Some(db_name::CANCELED_BY))?,
enqueued_at: env.create_database(wtxn, Some(db_name::ENQUEUED_AT))?,
started_at: env.create_database(wtxn, Some(db_name::STARTED_AT))?,
finished_at: env.create_database(wtxn, Some(db_name::FINISHED_AT))?,
})
}
pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
}
pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
Ok(self.last_task_id(rtxn)?.unwrap_or_default())
}
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
}
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
Ok(self.all_tasks.get(rtxn, &task_id)?)
}
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
let reprocessing = old_task.status != Status::Enqueued;
debug_assert!(old_task != *task);
debug_assert_eq!(old_task.uid, task.uid);
// If we're processing a task that failed it may already contains a batch_uid
debug_assert!(
reprocessing || (old_task.batch_uid.is_none() && task.batch_uid.is_some()),
"\n==> old: {old_task:?}\n==> new: {task:?}"
);
if old_task.status != task.status {
self.update_status(wtxn, old_task.status, |bitmap| {
bitmap.remove(task.uid);
})?;
self.update_status(wtxn, task.status, |bitmap| {
bitmap.insert(task.uid);
})?;
}
if old_task.kind.as_kind() != task.kind.as_kind() {
self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| {
bitmap.remove(task.uid);
})?;
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
bitmap.insert(task.uid);
})?;
}
assert_eq!(
old_task.enqueued_at, task.enqueued_at,
"Cannot update a task's enqueued_at time"
);
if old_task.started_at != task.started_at {
assert!(
reprocessing || old_task.started_at.is_none(),
"Cannot update a task's started_at time"
);
if let Some(started_at) = old_task.started_at {
remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
}
if let Some(started_at) = task.started_at {
insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?;
}
}
if old_task.finished_at != task.finished_at {
assert!(
reprocessing || old_task.finished_at.is_none(),
"Cannot update a task's finished_at time"
);
if let Some(finished_at) = old_task.finished_at {
remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
}
if let Some(finished_at) = task.finished_at {
insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?;
}
}
self.all_tasks.put(wtxn, &task.uid, task)?;
Ok(())
}
/// Returns the whole set of tasks that belongs to this index.
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
}
pub(crate) fn update_index(
&self,
wtxn: &mut RwTxn,
index: &str,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut tasks = self.index_tasks(wtxn, index)?;
f(&mut tasks);
if tasks.is_empty() {
self.index_tasks.delete(wtxn, index)?;
} else {
self.index_tasks.put(wtxn, index, &tasks)?;
}
Ok(())
}
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
}
pub(crate) fn put_status(
&self,
wtxn: &mut RwTxn,
status: Status,
bitmap: &RoaringBitmap,
) -> Result<()> {
Ok(self.status.put(wtxn, &status, bitmap)?)
}
pub(crate) fn update_status(
&self,
wtxn: &mut RwTxn,
status: Status,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut tasks = self.get_status(wtxn, status)?;
f(&mut tasks);
self.put_status(wtxn, status, &tasks)?;
Ok(())
}
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
}
pub(crate) fn put_kind(
&self,
wtxn: &mut RwTxn,
kind: Kind,
bitmap: &RoaringBitmap,
) -> Result<()> {
Ok(self.kind.put(wtxn, &kind, bitmap)?)
}
pub(crate) fn update_kind(
&self,
wtxn: &mut RwTxn,
kind: Kind,
f: impl Fn(&mut RoaringBitmap),
) -> Result<()> {
let mut tasks = self.get_kind(wtxn, kind)?;
f(&mut tasks);
self.put_kind(wtxn, kind, &tasks)?;
Ok(())
}
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
/// `CorruptedTaskQueue` error will be thrown.
pub(crate) fn get_existing_tasks(
&self,
rtxn: &RoTxn,
tasks: impl IntoIterator<Item = TaskId>,
) -> Result<Vec<Task>> {
tasks
.into_iter()
.map(|task_id| {
self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
})
.collect::<Result<_>>()
}
pub(crate) fn register(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
self.all_tasks.put(wtxn, &task.uid, task)?;
for index in task.indexes() {
self.update_index(wtxn, index, |bitmap| {
bitmap.insert(task.uid);
})?;
}
self.update_status(wtxn, Status::Enqueued, |bitmap| {
bitmap.insert(task.uid);
})?;
self.update_kind(wtxn, task.kind.as_kind(), |bitmap| {
bitmap.insert(task.uid);
})?;
utils::insert_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?;
Ok(())
}
}
impl Queue {
/// Return the task ids matched by the given query from the index scheduler's point of view.
pub(crate) fn get_task_ids(
&self,
rtxn: &RoTxn,
query: &Query,
processing_tasks: &ProcessingTasks,
) -> Result<RoaringBitmap> {
let ProcessingTasks { batch: processing_batch, processing: processing_tasks, progress: _ } =
processing_tasks;
let Query {
limit,
from,
reverse,
uids,
batch_uids,
statuses,
types,
index_uids,
canceled_by,
before_enqueued_at,
after_enqueued_at,
before_started_at,
after_started_at,
before_finished_at,
after_finished_at,
} = query;
let mut tasks = self.tasks.all_task_ids(rtxn)?;
if let Some(from) = from {
let range = if reverse.unwrap_or_default() {
u32::MIN..*from
} else {
from.saturating_add(1)..u32::MAX
};
tasks.remove_range(range);
}
if let Some(batch_uids) = batch_uids {
let mut batch_tasks = RoaringBitmap::new();
for batch_uid in batch_uids {
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
batch_tasks |= &**processing_tasks;
} else {
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
}
}
tasks &= batch_tasks;
}
if let Some(status) = statuses {
let mut status_tasks = RoaringBitmap::new();
for status in status {
match status {
// special case for Processing tasks
Status::Processing => {
status_tasks |= &**processing_tasks;
}
status => status_tasks |= &self.tasks.get_status(rtxn, *status)?,
};
}
if !status.contains(&Status::Processing) {
tasks -= &**processing_tasks;
}
tasks &= status_tasks;
}
if let Some(uids) = uids {
let uids = RoaringBitmap::from_iter(uids);
tasks &= &uids;
}
if let Some(canceled_by) = canceled_by {
let mut all_canceled_tasks = RoaringBitmap::new();
for cancel_task_uid in canceled_by {
if let Some(canceled_by_uid) = self.tasks.canceled_by.get(rtxn, cancel_task_uid)? {
all_canceled_tasks |= canceled_by_uid;
}
}
// if the canceled_by has been specified but no task
// matches then we prefer matching zero than all tasks.
if all_canceled_tasks.is_empty() {
return Ok(RoaringBitmap::new());
} else {
tasks &= all_canceled_tasks;
}
}
if let Some(kind) = types {
let mut kind_tasks = RoaringBitmap::new();
for kind in kind {
kind_tasks |= self.tasks.get_kind(rtxn, *kind)?;
}
tasks &= &kind_tasks;
}
if let Some(index) = index_uids {
let mut index_tasks = RoaringBitmap::new();
for index in index {
index_tasks |= self.tasks.index_tasks(rtxn, index)?;
}
tasks &= &index_tasks;
}
// For the started_at filter, we need to treat the part of the tasks that are processing from the part of the
// tasks that are not processing. The non-processing ones are filtered normally while the processing ones
// are entirely removed unless the in-memory startedAt variable falls within the date filter.
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
tasks = {
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
(&tasks - &**processing_tasks, &tasks & &**processing_tasks);
// special case for Processing tasks
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
let mut clear_filtered_processing_tasks =
|start: Bound<OffsetDateTime>, end: Bound<OffsetDateTime>| {
let start = map_bound(start, |b| b.unix_timestamp_nanos());
let end = map_bound(end, |b| b.unix_timestamp_nanos());
let is_within_dates = RangeBounds::contains(
&(start, end),
&processing_batch
.as_ref()
.map_or_else(OffsetDateTime::now_utc, |batch| batch.started_at)
.unix_timestamp_nanos(),
);
if !is_within_dates {
filtered_processing_tasks.clear();
}
};
match (after_started_at, before_started_at) {
(None, None) => (),
(None, Some(before)) => {
clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(*before))
}
(Some(after), None) => {
clear_filtered_processing_tasks(Bound::Excluded(*after), Bound::Unbounded)
}
(Some(after), Some(before)) => clear_filtered_processing_tasks(
Bound::Excluded(*after),
Bound::Excluded(*before),
),
};
keep_ids_within_datetimes(
rtxn,
&mut filtered_non_processing_tasks,
self.tasks.started_at,
*after_started_at,
*before_started_at,
)?;
filtered_non_processing_tasks | filtered_processing_tasks
};
keep_ids_within_datetimes(
rtxn,
&mut tasks,
self.tasks.enqueued_at,
*after_enqueued_at,
*before_enqueued_at,
)?;
keep_ids_within_datetimes(
rtxn,
&mut tasks,
self.tasks.finished_at,
*after_finished_at,
*before_finished_at,
)?;
if let Some(limit) = limit {
tasks = if query.reverse.unwrap_or_default() {
tasks.into_iter().take(*limit as usize).collect()
} else {
tasks.into_iter().rev().take(*limit as usize).collect()
};
}
Ok(tasks)
}
pub(crate) fn get_task_ids_from_authorized_indexes(
&self,
rtxn: &RoTxn,
query: &Query,
filters: &meilisearch_auth::AuthFilter,
processing_tasks: &ProcessingTasks,
) -> Result<(RoaringBitmap, u64)> {
// compute all tasks matching the filter by ignoring the limits, to find the number of tasks matching
// the filter.
// As this causes us to compute the filter twice it is slightly inefficient, but doing it this way spares
// us from modifying the underlying implementation, and the performance remains sufficient.
// Should this change, we would modify `get_task_ids` to directly return the number of matching tasks.
let total_tasks =
self.get_task_ids(rtxn, &query.clone().without_limits(), processing_tasks)?;
let mut tasks = self.get_task_ids(rtxn, query, processing_tasks)?;
// If the query contains a list of index uid or there is a finite list of authorized indexes,
// then we must exclude all the kinds that aren't associated to one and only one index.
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
tasks -= self.tasks.get_kind(rtxn, kind)?;
}
}
// Any task that is internally associated with a non-authorized index
// must be discarded.
if !filters.all_indexes_authorized() {
let all_indexes_iter = self.tasks.index_tasks.iter(rtxn)?;
for result in all_indexes_iter {
let (index, index_tasks) = result?;
if !filters.is_index_authorized(index) {
tasks -= index_tasks;
}
}
}
Ok((tasks, total_tasks.len()))
}
pub(crate) fn get_tasks_from_authorized_indexes(
&self,
rtxn: &RoTxn,
query: &Query,
filters: &meilisearch_auth::AuthFilter,
processing_tasks: &ProcessingTasks,
) -> Result<(Vec<Task>, u64)> {
let (tasks, total) =
self.get_task_ids_from_authorized_indexes(rtxn, query, filters, processing_tasks)?;
let tasks = if query.reverse.unwrap_or_default() {
Box::new(tasks.into_iter()) as Box<dyn Iterator<Item = u32>>
} else {
Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
};
let tasks = self
.tasks
.get_existing_tasks(rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?;
let ProcessingTasks { batch, processing, progress: _ } = processing_tasks;
let ret = tasks.into_iter();
if processing.is_empty() || batch.is_none() {
Ok((ret.collect(), total))
} else {
// Safe because we ensured there was a batch in the previous branch
let batch = batch.as_ref().unwrap();
Ok((
ret.map(|task| {
if processing.contains(task.uid) {
Task {
status: Status::Processing,
batch_uid: Some(batch.uid),
started_at: Some(batch.started_at),
..task
}
} else {
task
}
})
.collect(),
total,
))
}
}
}

View File

@ -0,0 +1,441 @@
use meili_snap::snapshot;
use meilisearch_auth::AuthFilter;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::tasks::{IndexSwap, KindWithContent, Status};
use time::{Duration, OffsetDateTime};
use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler};
use crate::test_utils::Breakpoint::*;
use crate::test_utils::{index_creation_task, FailureLocation};
use crate::{IndexScheduler, Query};
#[test]
fn query_tasks_from_and_limit() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let kind = index_creation_task("doggo", "bone");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
let kind = index_creation_task("whalo", "plankton");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
let kind = index_creation_task("catto", "his_own_vomit");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task");
handle.advance_n_successful_batches(3);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks");
let rtxn = index_scheduler.env.read_txn().unwrap();
let processing = index_scheduler.processing_tasks.read().unwrap();
let query = Query { limit: Some(0), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query { limit: Some(1), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
let query = Query { limit: Some(2), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
let query = Query { from: Some(1), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
let query = Query { from: Some(2), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &processing)
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
}
#[test]
fn query_tasks_simple() {
let start_time = OffsetDateTime::now_utc();
let (index_scheduler, mut handle) =
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
let kind = index_creation_task("catto", "mouse");
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = index_creation_task("doggo", "sheep");
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = index_creation_task("whalo", "fish");
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
handle.advance_till([Start, BatchCreated]);
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick
let query =
Query { statuses: Some(vec![Status::Enqueued, Status::Processing]), ..Default::default() };
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Processing]),
after_started_at: Some(start_time),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test, which should excludes the enqueued tasks
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Processing]),
before_started_at: Some(start_time),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes before the start of the test, which should excludes all of them
snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Processing]),
after_started_at: Some(start_time),
before_started_at: Some(start_time + Duration::minutes(1)),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test and before one minute after the start of the test,
// which should exclude the enqueued tasks and include the only processing task
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
handle.advance_till([
InsideProcessBatch,
InsideProcessBatch,
ProcessBatchSucceeded,
AfterProcessing,
Start,
BatchCreated,
]);
let second_start_time = OffsetDateTime::now_utc();
let query = Query {
statuses: Some(vec![Status::Succeeded, Status::Processing]),
after_started_at: Some(start_time),
before_started_at: Some(start_time + Duration::minutes(1)),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test and before one minute after the start of the test,
// which should include all tasks
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
let query = Query {
statuses: Some(vec![Status::Succeeded, Status::Processing]),
before_started_at: Some(start_time),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes before the start of the test, which should exclude all tasks
snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the second part of the test and before one minute after the
// second start of the test, which should exclude all tasks
snapshot!(snapshot_bitmap(&tasks), @"[]");
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
handle.advance_till([
InsideProcessBatch,
InsideProcessBatch,
ProcessBatchSucceeded,
AfterProcessing,
Start,
BatchCreated,
]);
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// we run the same query to verify that, and indeed find that the last task is matched
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// enqueued, succeeded, or processing tasks started after the second part of the test, should
// again only return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
handle.advance_till([ProcessBatchFailed, AfterProcessing]);
// now the last task should have failed
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// so running the last query should return nothing
snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query {
statuses: Some(vec![Status::Failed]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// but the same query on failed tasks should return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
let query = Query {
statuses: Some(vec![Status::Failed]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// but the same query on failed tasks should return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
let query = Query {
statuses: Some(vec![Status::Failed]),
uids: Some(vec![1]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// same query but with an invalid uid
snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query {
statuses: Some(vec![Status::Failed]),
uids: Some(vec![2]),
after_started_at: Some(second_start_time),
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let (tasks, _) = index_scheduler
.get_task_ids_from_authorized_indexes(&query, &AuthFilter::default())
.unwrap();
// same query but with a valid uid
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
}
#[test]
fn query_tasks_special_rules() {
let (index_scheduler, mut handle) =
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
let kind = index_creation_task("catto", "mouse");
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = index_creation_task("doggo", "sheep");
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
};
let _task = index_scheduler.register(kind, None, false).unwrap();
let kind = KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }],
};
let _task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
&proc,
)
.unwrap();
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
// associated with doggo -> empty result
snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query::default();
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
&proc,
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
// -> only the index creation of doggo should be returned
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
let query = Query::default();
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![
IndexUidPattern::new_unchecked("catto"),
IndexUidPattern::new_unchecked("doggo"),
]
.into_iter()
.collect(),
),
&proc,
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
// -> all tasks except the swap of catto with whalo are returned
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
let query = Query::default();
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
// we asked for all the tasks with all index authorized -> all tasks returned
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]");
}
#[test]
fn query_tasks_canceled_by() {
let (index_scheduler, mut handle) =
IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]);
let kind = index_creation_task("catto", "mouse");
let _ = index_scheduler.register(kind, None, false).unwrap();
let kind = index_creation_task("doggo", "sheep");
let _ = index_scheduler.register(kind, None, false).unwrap();
let kind = KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }],
};
let _task = index_scheduler.register(kind, None, false).unwrap();
handle.advance_n_successful_batches(1);
let kind = KindWithContent::TaskCancelation {
query: "test_query".to_string(),
tasks: [0, 1, 2, 3].into_iter().collect(),
};
let task_cancelation = index_scheduler.register(kind, None, false).unwrap();
handle.advance_n_successful_batches(1);
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start");
let rtxn = index_scheduler.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default(), &proc)
.unwrap();
// 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the
// taskCancelation itself
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let (tasks, _) = index_scheduler
.queue
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
&proc,
)
.unwrap();
// Return only 1 because the user is not authorized to see task 2
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
}

View File

@ -0,0 +1,398 @@
use big_s::S;
use meili_snap::{json_string, snapshot};
use meilisearch_types::error::ErrorCode;
use meilisearch_types::tasks::{KindWithContent, Status};
use roaring::RoaringBitmap;
use crate::insta_snapshot::snapshot_index_scheduler;
use crate::test_utils::Breakpoint::*;
use crate::test_utils::{index_creation_task, replace_document_import_task};
use crate::{IndexScheduler, Query};
#[test]
fn register() {
// In this test, the handle doesn't make any progress, we only check that the tasks are registered
let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]);
let kinds = [
index_creation_task("catto", "mouse"),
replace_document_import_task("catto", None, 0, 12),
replace_document_import_task("catto", None, 1, 50),
replace_document_import_task("doggo", Some("bone"), 2, 5000),
];
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(0).unwrap();
file.persist().unwrap();
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(1).unwrap();
file.persist().unwrap();
let (_, file) = index_scheduler.queue.create_update_file_with_uuid(2).unwrap();
file.persist().unwrap();
for (idx, kind) in kinds.into_iter().enumerate() {
let k = kind.as_kind();
let task = index_scheduler.register(kind, None, false).unwrap();
index_scheduler.assert_internally_consistent();
assert_eq!(task.uid, idx as u32);
assert_eq!(task.status, Status::Enqueued);
assert_eq!(task.kind.as_kind(), k);
}
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered");
}
#[test]
fn dry_run() {
let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]);
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
let task = index_scheduler.register(kind, None, true).unwrap();
snapshot!(task.uid, @"0");
snapshot!(snapshot_index_scheduler(&index_scheduler), @r"
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
----------------------------------------------------------------------
### Status:
----------------------------------------------------------------------
### Kind:
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### All Batches:
----------------------------------------------------------------------
### Batch to tasks mapping:
----------------------------------------------------------------------
### Batches Status:
----------------------------------------------------------------------
### Batches Kind:
----------------------------------------------------------------------
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
----------------------------------------------------------------------
### Batches Started At:
----------------------------------------------------------------------
### Batches Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------
");
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
let task = index_scheduler.register(kind, Some(12), true).unwrap();
snapshot!(task.uid, @"12");
snapshot!(snapshot_index_scheduler(&index_scheduler), @r"
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
----------------------------------------------------------------------
### Status:
----------------------------------------------------------------------
### Kind:
----------------------------------------------------------------------
### Index Tasks:
----------------------------------------------------------------------
### Index Mapper:
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### All Batches:
----------------------------------------------------------------------
### Batch to tasks mapping:
----------------------------------------------------------------------
### Batches Status:
----------------------------------------------------------------------
### Batches Kind:
----------------------------------------------------------------------
### Batches Index Tasks:
----------------------------------------------------------------------
### Batches Enqueued At:
----------------------------------------------------------------------
### Batches Started At:
----------------------------------------------------------------------
### Batches Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------
");
}
#[test]
fn basic_set_taskid() {
let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]);
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
let task = index_scheduler.register(kind, None, false).unwrap();
snapshot!(task.uid, @"0");
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
let task = index_scheduler.register(kind, Some(12), false).unwrap();
snapshot!(task.uid, @"12");
let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None };
let error = index_scheduler.register(kind, Some(5), false).unwrap_err();
snapshot!(error, @"Received bad task id: 5 should be >= to 13.");
}
#[test]
fn test_disable_auto_deletion_of_tasks() {
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
config.cleanup_enabled = false;
config.max_number_of_tasks = 2;
None
});
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
handle.advance_one_failed_batch();
// at this point the max number of tasks is reached
// we can still enqueue multiple tasks
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
drop(rtxn);
drop(proc);
// now we're above the max number of tasks
// and if we try to advance in the tick function no new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued");
drop(rtxn);
drop(proc);
}
#[test]
fn test_auto_deletion_of_tasks() {
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
config.max_number_of_tasks = 2;
None
});
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
handle.advance_one_failed_batch();
// at this point the max number of tasks is reached
// we can still enqueue multiple tasks
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
drop(rtxn);
drop(proc);
// now we're above the max number of tasks
// and if we try to advance in the tick function a new task deletion should be enqueued
handle.advance_till([Start, BatchCreated]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
drop(rtxn);
drop(proc);
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
drop(rtxn);
drop(proc);
handle.advance_one_failed_batch();
// a new task deletion has been enqueued
handle.advance_one_successful_batch();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
drop(rtxn);
drop(proc);
handle.advance_one_failed_batch();
handle.advance_one_successful_batch();
let rtxn = index_scheduler.env.read_txn().unwrap();
let proc = index_scheduler.processing_tasks.read().unwrap();
let tasks =
index_scheduler.queue.get_task_ids(&rtxn, &Query { ..Default::default() }, &proc).unwrap();
let tasks = index_scheduler.queue.tasks.get_existing_tasks(&rtxn, tasks).unwrap();
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
drop(rtxn);
drop(proc);
}
#[test]
fn test_task_queue_is_full() {
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
// that's the minimum map size possible
config.task_db_size = 1048576 * 3;
None
});
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
// on average this task takes ~600 bytes
loop {
let result = index_scheduler.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
);
if result.is_err() {
break;
}
handle.advance_one_failed_batch();
}
index_scheduler.assert_internally_consistent();
// at this point the task DB shoud have reached its limit and we should not be able to register new tasks
let result = index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap_err();
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
// Even the task deletion that doesn't delete anything shouldn't be accepted
let result = index_scheduler
.register(
KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() },
None,
false,
)
.unwrap_err();
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
// But a task deletion that delete something should works
index_scheduler
.register(
KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() },
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
// Now we should be able to enqueue a few tasks again
index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap();
handle.advance_one_failed_batch();
}

View File

@ -0,0 +1,474 @@
/*!
The autobatcher is responsible for combining the next enqueued
tasks affecting a single index into a [batch](crate::batch::Batch).
The main function of the autobatcher is [`next_autobatch`].
*/
use meilisearch_types::tasks::TaskId;
use std::ops::ControlFlow::{self, Break, Continue};
use crate::KindWithContent;
/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind)
/// for the purpose of simplifying the implementation of the autobatcher.
///
/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`]
enum AutobatchKind {
DocumentImport { allow_index_creation: bool, primary_key: Option<String> },
DocumentEdition,
DocumentDeletion { by_filter: bool },
DocumentClear,
Settings { allow_index_creation: bool },
IndexCreation,
IndexDeletion,
IndexUpdate,
IndexSwap,
}
impl AutobatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
AutobatchKind::DocumentImport { allow_index_creation, .. }
| AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
}
fn primary_key(&self) -> Option<Option<&str>> {
match self {
AutobatchKind::DocumentImport { primary_key, .. } => Some(primary_key.as_deref()),
_ => None,
}
}
}
impl From<KindWithContent> for AutobatchKind {
fn from(kind: KindWithContent) -> Self {
match kind {
KindWithContent::DocumentAdditionOrUpdate {
allow_index_creation, primary_key, ..
} => AutobatchKind::DocumentImport { allow_index_creation, primary_key },
KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition,
KindWithContent::DocumentDeletion { .. } => {
AutobatchKind::DocumentDeletion { by_filter: false }
}
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
KindWithContent::DocumentDeletionByFilter { .. } => {
AutobatchKind::DocumentDeletion { by_filter: true }
}
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
AutobatchKind::Settings {
allow_index_creation: allow_index_creation && !is_deletion,
}
}
KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion,
KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation,
KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate,
KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap,
KindWithContent::TaskCancelation { .. }
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpCreation { .. }
| KindWithContent::UpgradeDatabase { .. }
| KindWithContent::SnapshotCreation => {
panic!("The autobatcher should never be called with tasks that don't apply to an index.")
}
}
}
}
#[derive(Debug)]
pub enum BatchKind {
DocumentClear {
ids: Vec<TaskId>,
},
DocumentOperation {
allow_index_creation: bool,
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
},
DocumentEdition {
id: TaskId,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
includes_by_filter: bool,
},
ClearAndSettings {
other: Vec<TaskId>,
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
Settings {
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
IndexDeletion {
ids: Vec<TaskId>,
},
IndexCreation {
id: TaskId,
},
IndexUpdate {
id: TaskId,
},
IndexSwap {
id: TaskId,
},
}
impl BatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
BatchKind::DocumentOperation { allow_index_creation, .. }
| BatchKind::ClearAndSettings { allow_index_creation, .. }
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
}
fn primary_key(&self) -> Option<Option<&str>> {
match self {
BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
_ => None,
}
}
}
impl BatchKind {
/// Returns a `ControlFlow::Break` if you must stop right now.
/// The boolean tell you if an index has been created by the batched task.
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
/// but false can't be returned if you needs to create an index.
// TODO use an AutoBatchKind as input
pub fn new(
task_id: TaskId,
kind: KindWithContent,
primary_key: Option<&str>,
) -> (ControlFlow<BatchKind, BatchKind>, bool) {
use AutobatchKind as K;
match AutobatchKind::from(kind) {
K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true),
K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false),
K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false),
K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false),
K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false),
K::DocumentImport { allow_index_creation, primary_key: pk }
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
{
(
Continue(BatchKind::DocumentOperation {
allow_index_creation,
primary_key: pk,
operation_ids: vec![task_id],
}),
allow_index_creation,
)
}
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
K::DocumentImport { allow_index_creation, primary_key } => (
Break(BatchKind::DocumentOperation {
allow_index_creation,
primary_key,
operation_ids: vec![task_id],
}),
allow_index_creation,
),
K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false),
K::DocumentDeletion { by_filter: includes_by_filter } => (
Continue(BatchKind::DocumentDeletion {
deletion_ids: vec![task_id],
includes_by_filter,
}),
false,
),
K::Settings { allow_index_creation } => (
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
allow_index_creation,
),
}
}
/// Returns a `ControlFlow::Break` if you must stop right now.
/// The boolean tell you if an index has been created by the batched task.
/// To ease the writing of the code. `true` can be returned when you don't need to create an index
/// but false can't be returned if you needs to create an index.
#[rustfmt::skip]
fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
use AutobatchKind as K;
match (self, kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break(this)
},
// NOTE: We need to negate the whole condition since we're checking if we need to break instead of continue.
// I wrote it this way because it's easier to understand than the other way around.
(this, kind) if !(
// 1. If both task don't interact with primary key -> we can continue
(this.primary_key().is_none() && kind.primary_key().is_none()) ||
// 2. Else ->
(
// 2.1 If we already have a primary-key ->
(
primary_key.is_some() &&
// 2.1.1 If the task we're trying to accumulate have a pk it must be equal to our primary key
// 2.1.2 If the task don't have a primary-key -> we can continue
kind.primary_key().map_or(true, |pk| pk == primary_key)
) ||
// 2.2 If we don't have a primary-key ->
(
// 2.2.1 If both the batch and the task have a primary key they should be equal
// 2.2.2 If the batch is set to Some(None), the task should be too
// 2.2.3 If the batch is set to None -> we can continue
this.primary_key().zip(kind.primary_key()).map_or(true, |(this, kind)| this == kind)
)
)
) // closing the negation
=> {
Break(this)
},
// The index deletion can batch with everything but must stop after
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ }
| BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, operation_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
) => {
ids.push(id);
Break(BatchKind::IndexDeletion { ids })
}
(
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
K::IndexDeletion,
) => {
ids.push(id);
ids.append(&mut other);
Break(BatchKind::IndexDeletion { ids })
}
(
BatchKind::DocumentClear { mut ids },
K::DocumentClear | K::DocumentDeletion { by_filter: _ },
) => {
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
}
(
this @ BatchKind::DocumentClear { .. },
K::DocumentImport { .. } | K::Settings { .. },
) => Break(this),
(
BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, mut operation_ids },
K::DocumentClear,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentClear { ids: operation_ids })
}
// we can autobatch different kind of document operations and mix replacements with updates
(
BatchKind::DocumentOperation { allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
allow_index_creation,
operation_ids,
primary_key: pk,
})
}
(
BatchKind::DocumentOperation { allow_index_creation, primary_key, mut operation_ids },
K::DocumentDeletion { by_filter: false },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
allow_index_creation,
primary_key,
operation_ids,
})
}
// We can't batch a document operation with a delete by filter
(
this @ BatchKind::DocumentOperation { .. },
K::DocumentDeletion { by_filter: true },
) => {
Break(this)
}
(
this @ BatchKind::DocumentOperation { .. },
K::Settings { .. },
) => Break(this),
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
// we can't autobatch the deletion and import if the document deletion contained a filter
(
this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true },
K::DocumentImport { .. }
) => Break(this),
// we can autobatch the deletion and import if the index already exists
(
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
K::DocumentImport { allow_index_creation, primary_key }
) if index_already_exists => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can autobatch the deletion and import if both can't create an index
(
BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false },
K::DocumentImport { allow_index_creation, primary_key }
) if !allow_index_creation => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
(
this @ BatchKind::DocumentDeletion { .. },
K::DocumentImport { .. }
) => {
Break(this)
}
(BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter })
}
(this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this),
(
BatchKind::Settings { settings_ids, allow_index_creation },
K::DocumentClear,
) => Continue(BatchKind::ClearAndSettings {
settings_ids,
allow_index_creation,
other: vec![id],
}),
(
this @ BatchKind::Settings { .. },
K::DocumentImport { .. } | K::DocumentDeletion { .. },
) => Break(this),
(
BatchKind::Settings { mut settings_ids, allow_index_creation },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::Settings {
allow_index_creation,
settings_ids,
})
}
(
BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation },
K::DocumentClear,
) => {
other.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this),
(
BatchKind::ClearAndSettings {
mut other,
settings_ids,
allow_index_creation,
},
K::DocumentDeletion { .. },
) => {
other.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(
BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::ClearAndSettings {
other,
settings_ids,
allow_index_creation,
})
}
(
BatchKind::IndexCreation { .. }
| BatchKind::IndexDeletion { .. }
| BatchKind::IndexUpdate { .. }
| BatchKind::IndexSwap { .. }
| BatchKind::DocumentEdition { .. },
_,
) => {
unreachable!()
}
}
}
}
/// Create a batch from an ordered list of tasks.
///
/// ## Preconditions
/// 1. The tasks must be enqueued and given in the order in which they were enqueued
/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion)
/// 3. The tasks must all be related to the same index
///
/// ## Return
/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents
/// a subset of the given tasks.
pub fn autobatch(
enqueued: Vec<(TaskId, KindWithContent)>,
index_already_exists: bool,
primary_key: Option<&str>,
) -> Option<(BatchKind, bool)> {
let mut enqueued = enqueued.into_iter();
let (id, kind) = enqueued.next()?;
// index_exist will keep track of if the index should exist at this point after the tasks we batched.
let mut index_exist = index_already_exists;
let (mut acc, must_create_index) = match BatchKind::new(id, kind, primary_key) {
(Continue(acc), create) => (acc, create),
(Break(acc), create) => return Some((acc, create)),
};
// if an index has been created in the previous step we can consider it as existing.
index_exist |= must_create_index;
for (id, kind) in enqueued {
acc = match acc.accumulate(id, kind.into(), index_exist, primary_key) {
Continue(acc) => acc,
Break(acc) => return Some((acc, must_create_index)),
};
}
Some((acc, must_create_index))
}

View File

@ -0,0 +1,395 @@
use meilisearch_types::milli::update::IndexDocumentsMethod::{
self, ReplaceDocuments, UpdateDocuments,
};
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
use uuid::Uuid;
use self::autobatcher::{autobatch, BatchKind};
use super::*;
use crate::TaskId;
#[macro_export]
macro_rules! debug_snapshot {
($value:expr, @$snapshot:literal) => {{
let value = format!("{:?}", $value);
meili_snap::snapshot!(value, @$snapshot);
}};
}
fn autobatch_from(
index_already_exists: bool,
primary_key: Option<&str>,
input: impl IntoIterator<Item = KindWithContent>,
) -> Option<(BatchKind, bool)> {
autobatch(
input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(),
index_already_exists,
primary_key,
)
}
fn doc_imp(
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<&str>,
) -> KindWithContent {
KindWithContent::DocumentAdditionOrUpdate {
index_uid: String::from("doggo"),
primary_key: primary_key.map(|pk| pk.to_string()),
method,
content_file: Uuid::new_v4(),
documents_count: 0,
allow_index_creation,
}
}
fn doc_del() -> KindWithContent {
KindWithContent::DocumentDeletion {
index_uid: String::from("doggo"),
documents_ids: Vec::new(),
}
}
fn doc_del_fil() -> KindWithContent {
KindWithContent::DocumentDeletionByFilter {
index_uid: String::from("doggo"),
filter_expr: serde_json::json!("cuteness > 100"),
}
}
fn doc_clr() -> KindWithContent {
KindWithContent::DocumentClear { index_uid: String::from("doggo") }
}
fn settings(allow_index_creation: bool) -> KindWithContent {
KindWithContent::SettingsUpdate {
index_uid: String::from("doggo"),
new_settings: Default::default(),
is_deletion: false,
allow_index_creation,
}
}
fn idx_create() -> KindWithContent {
KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None }
}
fn idx_update() -> KindWithContent {
KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None }
}
fn idx_del() -> KindWithContent {
KindWithContent::IndexDeletion { index_uid: String::from("doggo") }
}
fn idx_swap() -> KindWithContent {
KindWithContent::IndexSwap {
swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }],
}
}
#[test]
fn autobatch_simple_operation_together() {
// we can autobatch one or multiple `ReplaceDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// we can autobatch one or multiple `UpdateDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))");
// we can autobatch one or multiple DocumentDeletionByFilter together
debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))");
// we can autobatch one or multiple Settings together
debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
// We can autobatch document addition with document deletion
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// But we can't autobatch document addition with document deletion by filter
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
}
#[test]
fn simple_different_document_operations_autobatch_together() {
// addition and updates with deletion by filter can't batch together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))");
}
#[test]
fn document_addition_doesnt_batch_with_settings() {
// simple case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
// multiple settings and doc addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
// addition and setting unordered
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
// Doesn't batch with other forbidden operations
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
}
#[test]
fn clear_and_additions() {
// these two doesn't need to batch
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0] }, false))");
// Basic use case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
// This batch kind doesn't mix with other document addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))");
// But you can batch multiple clear together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))");
}
#[test]
fn clear_and_additions_and_settings() {
// A clear don't need to autobatch the settings that happens AFTER there is no documents
debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
}
#[test]
fn anything_and_index_deletion() {
// The `IndexDeletion` doesn't batch with anything that happens AFTER.
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))");
// The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`.
// First, the basic cases
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
}
#[test]
fn allowed_and_disallowed_index_creation() {
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// batch deletion and addition
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))");
}
#[test]
fn autobatch_primary_key() {
// ==> If I have a pk
// With a single update
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// ==> If I don't have a pk
// With a single update
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
}

View File

@ -0,0 +1,567 @@
use std::fmt;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap;
use uuid::Uuid;
use super::autobatcher::{self, BatchKind};
use crate::utils::ProcessingBatch;
use crate::{Error, IndexScheduler, Result};
/// Represents a combination of tasks that can all be processed at the same time.
///
/// A batch contains the set of tasks that it represents (accessible through
/// [`self.ids()`](Batch::ids)), as well as additional information on how to
/// be processed.
#[derive(Debug)]
pub(crate) enum Batch {
TaskCancelation {
/// The task cancelation itself.
task: Task,
},
TaskDeletions(Vec<Task>),
SnapshotCreation(Vec<Task>),
Dump(Task),
IndexOperation {
op: IndexOperation,
must_create_index: bool,
},
IndexCreation {
index_uid: String,
primary_key: Option<String>,
task: Task,
},
IndexUpdate {
index_uid: String,
primary_key: Option<String>,
task: Task,
},
IndexDeletion {
index_uid: String,
tasks: Vec<Task>,
index_has_been_created: bool,
},
IndexSwap {
task: Task,
},
UpgradeDatabase {
tasks: Vec<Task>,
},
}
#[derive(Debug)]
pub(crate) enum DocumentOperation {
Replace(Uuid),
Update(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug)]
pub(crate) enum IndexOperation {
DocumentOperation {
index_uid: String,
primary_key: Option<String>,
operations: Vec<DocumentOperation>,
tasks: Vec<Task>,
},
DocumentEdition {
index_uid: String,
task: Task,
},
DocumentDeletion {
index_uid: String,
tasks: Vec<Task>,
},
DocumentClear {
index_uid: String,
tasks: Vec<Task>,
},
Settings {
index_uid: String,
// The boolean indicates if it's a settings deletion or creation.
settings: Vec<(bool, Settings<Unchecked>)>,
tasks: Vec<Task>,
},
DocumentClearAndSetting {
index_uid: String,
cleared_tasks: Vec<Task>,
// The boolean indicates if it's a settings deletion or creation.
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<Task>,
},
}
impl Batch {
/// Return the task ids associated with this batch.
pub fn ids(&self) -> RoaringBitmap {
match self {
Batch::TaskCancelation { task, .. }
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexUpdate { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
Batch::SnapshotCreation(tasks)
| Batch::TaskDeletions(tasks)
| Batch::UpgradeDatabase { tasks }
| Batch::IndexDeletion { tasks, .. } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
}
IndexOperation::DocumentEdition { task, .. } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
IndexOperation::DocumentClearAndSetting {
cleared_tasks: tasks,
settings_tasks: other,
..
} => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
},
Batch::IndexSwap { task } => {
RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
}
}
}
/// Return the index UID associated with this batch
pub fn index_uid(&self) -> Option<&str> {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletions(_)
| SnapshotCreation(_)
| Dump(_)
| UpgradeDatabase { .. }
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
impl fmt::Display for Batch {
/// A text used when we debug the profiling reports.
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let index_uid = self.index_uid();
let tasks = self.ids();
match self {
Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
Batch::TaskDeletions(_) => f.write_str("TaskDeletion")?,
Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
Batch::Dump(_) => f.write_str("Dump")?,
Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
Batch::UpgradeDatabase { .. } => f.write_str("UpgradeDatabase")?,
};
match index_uid {
Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
}
}
}
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentEdition { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid,
}
}
}
impl fmt::Display for IndexOperation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IndexOperation::DocumentOperation { .. } => {
f.write_str("IndexOperation::DocumentOperation")
}
IndexOperation::DocumentEdition { .. } => {
f.write_str("IndexOperation::DocumentEdition")
}
IndexOperation::DocumentDeletion { .. } => {
f.write_str("IndexOperation::DocumentDeletion")
}
IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
IndexOperation::DocumentClearAndSetting { .. } => {
f.write_str("IndexOperation::DocumentClearAndSetting")
}
}
}
}
impl IndexScheduler {
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
///
/// ## Arguments
/// - `rtxn`: read transaction
/// - `index_uid`: name of the index affected by the operations of the autobatch
/// - `batch`: the result of the autobatcher
pub(crate) fn create_next_batch_index(
&self,
rtxn: &RoTxn,
index_uid: String,
batch: BatchKind,
current_batch: &mut ProcessingBatch,
must_create_index: bool,
) -> Result<Option<Batch>> {
match batch {
BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentClear {
tasks: self.queue.get_existing_tasks_for_processing_batch(
rtxn,
current_batch,
ids,
)?,
index_uid,
},
must_create_index,
})),
BatchKind::DocumentEdition { id } => {
let mut task =
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
match &task.kind {
KindWithContent::DocumentEdition { index_uid, .. } => {
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentEdition {
index_uid: index_uid.clone(),
task,
},
must_create_index: false,
}))
}
_ => unreachable!(),
}
}
BatchKind::DocumentOperation { operation_ids, .. } => {
let tasks = self.queue.get_existing_tasks_for_processing_batch(
rtxn,
current_batch,
operation_ids,
)?;
let primary_key = tasks
.iter()
.find_map(|task| match task.kind {
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
// we want to stop on the first document addition
Some(primary_key.clone())
}
KindWithContent::DocumentDeletion { .. } => None,
_ => unreachable!(),
})
.flatten();
let mut operations = Vec::new();
for task in tasks.iter() {
match task.kind {
KindWithContent::DocumentAdditionOrUpdate {
content_file, method, ..
} => match method {
IndexDocumentsMethod::ReplaceDocuments => {
operations.push(DocumentOperation::Replace(content_file))
}
IndexDocumentsMethod::UpdateDocuments => {
operations.push(DocumentOperation::Update(content_file))
}
_ => unreachable!("Unknown document merging method"),
},
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
operations.push(DocumentOperation::Delete(documents_ids.clone()));
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentOperation {
index_uid,
primary_key,
operations,
tasks,
},
must_create_index,
}))
}
BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => {
let tasks = self.queue.get_existing_tasks_for_processing_batch(
rtxn,
current_batch,
deletion_ids,
)?;
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentDeletion { index_uid, tasks },
must_create_index,
}))
}
BatchKind::Settings { settings_ids, .. } => {
let tasks = self.queue.get_existing_tasks_for_processing_batch(
rtxn,
current_batch,
settings_ids,
)?;
let mut settings = Vec::new();
for task in &tasks {
match task.kind {
KindWithContent::SettingsUpdate {
ref new_settings, is_deletion, ..
} => settings.push((is_deletion, *new_settings.clone())),
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::Settings { index_uid, settings, tasks },
must_create_index,
}))
}
BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => {
let (index_uid, settings, settings_tasks) = match self
.create_next_batch_index(
rtxn,
index_uid,
BatchKind::Settings { settings_ids, allow_index_creation },
current_batch,
must_create_index,
)?
.unwrap()
{
Batch::IndexOperation {
op: IndexOperation::Settings { index_uid, settings, tasks, .. },
..
} => (index_uid, settings, tasks),
_ => unreachable!(),
};
let (index_uid, cleared_tasks) = match self
.create_next_batch_index(
rtxn,
index_uid,
BatchKind::DocumentClear { ids: other },
current_batch,
must_create_index,
)?
.unwrap()
{
Batch::IndexOperation {
op: IndexOperation::DocumentClear { index_uid, tasks },
..
} => (index_uid, tasks),
_ => unreachable!(),
};
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentClearAndSetting {
index_uid,
cleared_tasks,
settings,
settings_tasks,
},
must_create_index,
}))
}
BatchKind::IndexCreation { id } => {
let mut task =
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
let (index_uid, primary_key) = match &task.kind {
KindWithContent::IndexCreation { index_uid, primary_key } => {
(index_uid.clone(), primary_key.clone())
}
_ => unreachable!(),
};
Ok(Some(Batch::IndexCreation { index_uid, primary_key, task }))
}
BatchKind::IndexUpdate { id } => {
let mut task =
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
let primary_key = match &task.kind {
KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(),
_ => unreachable!(),
};
Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task }))
}
BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion {
index_uid,
index_has_been_created: must_create_index,
tasks: self.queue.get_existing_tasks_for_processing_batch(
rtxn,
current_batch,
ids,
)?,
})),
BatchKind::IndexSwap { id } => {
let mut task =
self.queue.tasks.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
Ok(Some(Batch::IndexSwap { task }))
}
}
}
/// Create the next batch to be processed;
/// 1. We get the *last* task to cancel.
/// 2. We get the *next* task to delete.
/// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(
&self,
rtxn: &RoTxn,
) -> Result<Option<(Batch, ProcessingBatch)>> {
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::InsideCreateBatch)?;
let batch_id = self.queue.batches.next_batch_id(rtxn)?;
let mut current_batch = ProcessingBatch::new(batch_id);
let enqueued = &self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
let failed = &self.queue.tasks.get_status(rtxn, Status::Failed)?;
// 0. The priority over everything is to upgrade the instance
// There shouldn't be multiple upgrade tasks but just in case we're going to batch all of them at the same time
let upgrade = self.queue.tasks.get_kind(rtxn, Kind::UpgradeDatabase)? & (enqueued | failed);
if !upgrade.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, upgrade)?;
// In the case of an upgrade database batch, we want to find back the original batch that tried processing it
// and re-use its id
if let Some(batch_uid) = tasks.last().unwrap().batch_uid {
current_batch.uid = batch_uid;
}
current_batch.processing(&mut tasks);
return Ok(Some((Batch::UpgradeDatabase { tasks }, current_batch)));
}
// 1. we get the last task to cancel.
let to_cancel = self.queue.tasks.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
if let Some(task_id) = to_cancel.max() {
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
return Ok(Some((Batch::TaskCancelation { task }, current_batch)));
}
// 2. we get the next task to delete
let to_delete = self.queue.tasks.get_kind(rtxn, Kind::TaskDeletion)? & enqueued;
if !to_delete.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_delete)?;
current_batch.processing(&mut tasks);
return Ok(Some((Batch::TaskDeletions(tasks), current_batch)));
}
// 3. we batch the snapshot.
let to_snapshot = self.queue.tasks.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued;
if !to_snapshot.is_empty() {
let mut tasks = self.queue.tasks.get_existing_tasks(rtxn, to_snapshot)?;
current_batch.processing(&mut tasks);
return Ok(Some((Batch::SnapshotCreation(tasks), current_batch)));
}
// 4. we batch the dumps.
let to_dump = self.queue.tasks.get_kind(rtxn, Kind::DumpCreation)? & enqueued;
if let Some(to_dump) = to_dump.min() {
let mut task =
self.queue.tasks.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?;
current_batch.processing(Some(&mut task));
return Ok(Some((Batch::Dump(task), current_batch)));
}
// 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
let mut task =
self.queue.tasks.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
// If the task is not associated with any index, verify that it is an index swap and
// create the batch directly. Otherwise, get the index name associated with the task
// and use the autobatcher to batch the enqueued tasks associated with it
let index_name = if let Some(&index_name) = task.indexes().first() {
index_name
} else {
assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
current_batch.processing(Some(&mut task));
return Ok(Some((Batch::IndexSwap { task }, current_batch)));
};
let index_already_exists = self.index_mapper.exists(rtxn, index_name)?;
let mut primary_key = None;
if index_already_exists {
let index = self.index_mapper.index(rtxn, index_name)?;
let rtxn = index.read_txn()?;
primary_key = index.primary_key(&rtxn)?.map(|pk| pk.to_string());
}
let index_tasks = self.queue.tasks.index_tasks(rtxn, index_name)? & enqueued;
// If autobatching is disabled we only take one task at a time.
// Otherwise, we take only a maximum of tasks to create batches.
let tasks_limit = if self.scheduler.autobatching_enabled {
self.scheduler.max_number_of_batched_tasks
} else {
1
};
let mut enqueued = Vec::new();
let mut total_size: u64 = 0;
for task_id in index_tasks.into_iter().take(tasks_limit) {
let task = self
.queue
.tasks
.get_task(rtxn, task_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
if let Some(uuid) = task.content_uuid() {
let content_size = self.queue.file_store.compute_size(uuid)?;
total_size = total_size.saturating_add(content_size);
}
if total_size > self.scheduler.batched_tasks_size_limit && !enqueued.is_empty() {
break;
}
enqueued.push((task.uid, task.kind));
}
if let Some((batchkind, create_index)) =
autobatcher::autobatch(enqueued, index_already_exists, primary_key.as_deref())
{
return Ok(self
.create_next_batch_index(
rtxn,
index_name.to_string(),
batchkind,
&mut current_batch,
create_index,
)?
.map(|batch| (batch, current_batch)));
}
// If we found no tasks then we were notified for something that got autobatched
// somehow and there is nothing to do.
Ok(None)
}
}

View File

@ -0,0 +1,453 @@
mod autobatcher;
#[cfg(test)]
mod autobatcher_test;
mod create_batch;
mod process_batch;
mod process_dump_creation;
mod process_index_operation;
mod process_snapshot_creation;
mod process_upgrade;
#[cfg(test)]
mod test;
#[cfg(test)]
mod test_document_addition;
#[cfg(test)]
mod test_embedders;
#[cfg(test)]
mod test_failure;
use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::Arc;
use convert_case::{Case, Casing as _};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli;
use meilisearch_types::tasks::Status;
use process_batch::ProcessBatchInfo;
use rayon::current_num_threads;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap;
use synchronoise::SignalEvent;
use crate::processing::{AtomicTaskStep, BatchProgress};
use crate::{Error, IndexScheduler, IndexSchedulerOptions, Result, TickOutcome};
#[derive(Default, Clone, Debug)]
pub struct MustStopProcessing(Arc<AtomicBool>);
impl MustStopProcessing {
pub fn get(&self) -> bool {
self.0.load(Ordering::Relaxed)
}
pub fn must_stop(&self) {
self.0.store(true, Ordering::Relaxed);
}
pub fn reset(&self) {
self.0.store(false, Ordering::Relaxed);
}
}
pub struct Scheduler {
/// A boolean that can be set to true to stop the currently processing tasks.
pub must_stop_processing: MustStopProcessing,
/// Get a signal when a batch needs to be processed.
pub(crate) wake_up: Arc<SignalEvent>,
/// Whether auto-batching is enabled or not.
pub(crate) autobatching_enabled: bool,
/// The maximum number of tasks that will be batched together.
pub(crate) max_number_of_batched_tasks: usize,
/// The maximum size, in bytes, of tasks in a batch.
pub(crate) batched_tasks_size_limit: u64,
/// The path used to create the dumps.
pub(crate) dumps_path: PathBuf,
/// The path used to create the snapshots.
pub(crate) snapshots_path: PathBuf,
/// The path to the folder containing the auth LMDB env.
pub(crate) auth_env: Env<WithoutTls>,
/// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf,
/// The maximal number of entries in the search query cache of an embedder.
///
/// 0 disables the cache.
pub(crate) embedding_cache_cap: usize,
}
impl Scheduler {
pub(crate) fn private_clone(&self) -> Scheduler {
Scheduler {
must_stop_processing: self.must_stop_processing.clone(),
wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled,
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
batched_tasks_size_limit: self.batched_tasks_size_limit,
dumps_path: self.dumps_path.clone(),
snapshots_path: self.snapshots_path.clone(),
auth_env: self.auth_env.clone(),
version_file_path: self.version_file_path.clone(),
embedding_cache_cap: self.embedding_cache_cap,
}
}
pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
Scheduler {
must_stop_processing: MustStopProcessing::default(),
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)),
autobatching_enabled: options.autobatching_enabled,
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
batched_tasks_size_limit: options.batched_tasks_size_limit,
dumps_path: options.dumps_path.clone(),
snapshots_path: options.snapshots_path.clone(),
auth_env,
version_file_path: options.version_file_path.clone(),
embedding_cache_cap: options.embedding_cache_cap,
}
}
}
impl IndexScheduler {
/// Perform one iteration of the run loop.
///
/// 1. See if we need to cleanup the task queue
/// 2. Find the next batch of tasks to be processed.
/// 3. Update the information of these tasks following the start of their processing.
/// 4. Update the in-memory list of processed tasks accordingly.
/// 5. Process the batch:
/// - perform the actions of each batched task
/// - update the information of each batched task following the end
/// of their processing.
/// 6. Reset the in-memory list of processed tasks.
///
/// Returns the number of processed tasks.
pub(crate) fn tick(&self) -> Result<TickOutcome> {
#[cfg(test)]
{
*self.run_loop_iteration.write().unwrap() += 1;
self.breakpoint(crate::test_utils::Breakpoint::Start);
}
if self.cleanup_enabled {
let mut wtxn = self.env.write_txn()?;
self.queue.cleanup_task_queue(&mut wtxn)?;
wtxn.commit()?;
}
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
let (batch, mut processing_batch) =
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
Some(batch) => batch,
None => return Ok(TickOutcome::WaitForSignal),
};
let index_uid = batch.index_uid().map(ToOwned::to_owned);
drop(rtxn);
// 1. store the starting date with the bitmap of processing tasks.
let mut ids = batch.ids();
let processed_tasks = ids.len();
// We reset the must_stop flag to be sure that we don't stop processing tasks
self.scheduler.must_stop_processing.reset();
let progress = self
.processing_tasks
.write()
.unwrap()
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
.start_processing(processing_batch.clone(), ids.clone());
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::BatchCreated);
// 2. Process the tasks
let res = {
let cloned_index_scheduler = self.private_clone();
let processing_batch = &mut processing_batch;
let progress = progress.clone();
std::thread::scope(|s| {
let p = progress.clone();
let handle = std::thread::Builder::new()
.name(String::from("batch-operation"))
.spawn_scoped(s, move || {
cloned_index_scheduler.process_batch(batch, processing_batch, p)
})
.unwrap();
match handle.join() {
Ok(ret) => {
if ret.is_err() {
if let Ok(progress_view) =
serde_json::to_string(&progress.as_progress_view())
{
tracing::warn!("Batch failed while doing: {progress_view}")
}
}
ret
}
Err(panic) => {
if let Ok(progress_view) =
serde_json::to_string(&progress.as_progress_view())
{
tracing::warn!("Batch failed while doing: {progress_view}")
}
let msg = match panic.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match panic.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
Err(Error::ProcessBatchPanicked(msg.to_string()))
}
}
})
};
// Reset the currently updating index to relinquish the index handle
self.index_mapper.set_currently_updating_index(None);
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::AcquiringWtxn)?;
progress.update_progress(BatchProgress::WritingTasksToDisk);
processing_batch.finished();
let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new();
let mut process_batch_info = ProcessBatchInfo::default();
match res {
Ok((tasks, info)) => {
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj);
process_batch_info = info;
let mut success = 0;
let mut failure = 0;
let mut canceled_by = None;
#[allow(unused_variables)]
for (i, mut task) in tasks.into_iter().enumerate() {
task_progress.fetch_add(1, Ordering::Relaxed);
processing_batch.update(&mut task);
if task.status == Status::Canceled {
canceled.insert(task.uid);
canceled_by = task.canceled_by;
}
#[cfg(test)]
self.maybe_fail(
crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchSuccess {
task_uid: i as u32,
},
)?;
match task.error {
Some(_) => failure += 1,
None => success += 1,
}
self.queue
.tasks
.update_task(&mut wtxn, &task)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
}
if let Some(canceled_by) = canceled_by {
self.queue.tasks.canceled_by.put(&mut wtxn, &canceled_by, &canceled)?;
}
tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
}
// If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli {
error: milli::Error::InternalError(milli::InternalError::AbortedIndexation),
..
})
| Err(Error::AbortedTask) => {
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::AbortedIndexation);
wtxn.abort();
tracing::info!("A batch of tasks was aborted.");
// We make sure that we don't call `stop_processing` on the `processing_tasks`,
// this is because we want to let the next tick call `create_next_batch` and keep
// the `started_at` date times and `processings` of the current processing tasks.
// This date time is used by the task cancelation to store the right `started_at`
// date in the task on disk.
return Ok(TickOutcome::TickAgain(0));
}
// If an index said it was full, we need to:
// 1. identify which index is full
// 2. close the associated environment
// 3. resize it
// 4. re-schedule tasks
Err(Error::Milli {
error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached),
..
}) if index_uid.is_some() => {
// fixme: add index_uid to match to avoid the unwrap
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort();
tracing::info!("The max database size was reached. Resizing the index.");
return Ok(TickOutcome::TickAgain(0));
}
// In case of a failure we must get back and patch all the tasks with the error.
Err(err) => {
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchFailed);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
progress.update_progress(task_progress_obj);
if matches!(err, Error::DatabaseUpgrade(_)) {
tracing::error!(
"Upgrade task failed, tasks won't be processed until the following issue is fixed: {err}"
);
stop_scheduler_forever = true;
}
let error: ResponseError = err.into();
for id in ids.iter() {
task_progress.fetch_add(1, Ordering::Relaxed);
let mut task = self
.queue
.tasks
.get_task(&wtxn, id)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
.ok_or(Error::CorruptedTaskQueue)?;
task.status = Status::Failed;
task.error = Some(error.clone());
task.details = task.details.map(|d| d.to_failed());
processing_batch.update(&mut task);
#[cfg(test)]
self.maybe_fail(
crate::test_utils::FailureLocation::UpdatingTaskAfterProcessBatchFailure,
)?;
tracing::error!("Batch failed {}", error);
self.queue
.tasks
.update_task(&mut wtxn, &task)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
}
}
}
// We must re-add the canceled task so they're part of the same batch.
ids |= canceled;
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
process_batch_info;
processing_batch.stats.progress_trace =
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
let mut congestion_info = serde_json::Map::new();
congestion_info.insert("attempts".into(), congestion.attempts.into());
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info
});
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
.iter()
.flat_map(|(dbname, pre_size)| {
post_commit_dabases_sizes
.get(dbname)
.map(|post_size| {
use byte_unit::{Byte, UnitType::Binary};
use std::cmp::Ordering::{Equal, Greater, Less};
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
let diff_size = post_size.abs_diff(*pre_size) as u64;
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
let sign = match post_size.cmp(pre_size) {
Equal => return None,
Greater => "+",
Less => "-",
};
Some((
dbname.to_case(Case::Camel),
format!("{post:#.2} ({sign}{diff:#.2})").into(),
))
})
.into_iter()
.flatten()
})
.collect();
if let Some(congestion) = congestion {
tracing::debug!(
"Channel congestion metrics - Attempts: {}, Blocked attempts: {} ({:.1}% congestion)",
congestion.attempts,
congestion.blocking_attempts,
congestion.congestion_ratio(),
);
}
tracing::debug!("call trace: {:?}", progress.accumulated_durations());
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
#[cfg(test)]
self.maybe_fail(crate::test_utils::FailureLocation::CommittingWtxn)?;
wtxn.commit().map_err(Error::HeedTransaction)?;
// We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
// and then become « not found » for some time until the commit everything is written and the final commit is made.
self.processing_tasks.write().unwrap().stop_processing();
// Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
tracing::debug!("Deleting the update files");
//We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
let idx = AtomicU32::new(0);
(0..current_num_threads()).into_par_iter().try_for_each(|_| -> Result<()> {
let rtxn = self.read_txn()?;
while let Some(id) = ids.select(idx.fetch_add(1, Ordering::Relaxed)) {
let task = self
.queue
.tasks
.get_task(&rtxn, id)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?
.ok_or(Error::CorruptedTaskQueue)?;
if let Err(e) = self.queue.delete_persisted_task_data(&task) {
tracing::error!(
"Failure to delete the content files associated with task {}. Error: {e}",
task.uid
);
}
}
Ok(())
})?;
// We shouldn't crash the tick function if we can't send data to the webhook.
let _ = self.notify_webhook(&ids);
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::AfterProcessing);
if stop_scheduler_forever {
Ok(TickOutcome::StopProcessingForever)
} else {
Ok(TickOutcome::TickAgain(processed_tasks))
}
}
}

View File

@ -0,0 +1,687 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use std::panic::{catch_unwind, AssertUnwindSafe};
use std::sync::atomic::Ordering;
use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self, ChannelCongestion};
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
use milli::update::Settings as MilliSettings;
use roaring::RoaringBitmap;
use super::create_batch::Batch;
use crate::processing::{
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress,
};
use crate::utils::{
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
ProcessingBatch,
};
use crate::{Error, IndexScheduler, Result, TaskId};
#[derive(Debug, Default)]
pub struct ProcessBatchInfo {
/// The write channel congestion. None when unavailable: settings update.
pub congestion: Option<ChannelCongestion>,
/// The sizes of the different databases before starting the indexation.
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
/// The sizes of the different databases after commiting the indexation.
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
}
impl IndexScheduler {
/// Apply the operation associated with the given batch.
///
/// ## Return
/// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch(
&self,
batch: Batch,
current_batch: &mut ProcessingBatch,
progress: Progress,
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
#[cfg(test)]
{
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
self.maybe_fail(crate::test_utils::FailureLocation::PanicInsideProcessBatch)?;
self.breakpoint(crate::test_utils::Breakpoint::InsideProcessBatch);
}
match batch {
Batch::TaskCancelation { mut task } => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
let matched_tasks =
if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind {
tasks
} else {
unreachable!()
};
let rtxn = self.env.read_txn()?;
let mut canceled_tasks = self.cancel_matched_tasks(
&rtxn,
task.uid,
current_batch,
matched_tasks,
&progress,
)?;
task.status = Status::Succeeded;
match &mut task.details {
Some(Details::TaskCancelation {
matched_tasks: _,
canceled_tasks: canceled_tasks_details,
original_filter: _,
}) => {
*canceled_tasks_details = Some(canceled_tasks.len() as u64);
}
_ => unreachable!(),
}
canceled_tasks.push(task);
Ok((canceled_tasks, ProcessBatchInfo::default()))
}
Batch::TaskDeletions(mut tasks) => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
let mut matched_tasks = RoaringBitmap::new();
for task in tasks.iter() {
if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind {
matched_tasks |= tasks;
} else {
unreachable!()
}
}
let mut wtxn = self.env.write_txn()?;
let mut deleted_tasks =
self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
wtxn.commit()?;
for task in tasks.iter_mut() {
task.status = Status::Succeeded;
let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind else {
unreachable!()
};
let deleted_tasks_count = deleted_tasks.intersection_len(tasks);
deleted_tasks -= tasks;
match &mut task.details {
Some(Details::TaskDeletion {
matched_tasks: _,
deleted_tasks,
original_filter: _,
}) => {
*deleted_tasks = Some(deleted_tasks_count);
}
_ => unreachable!(),
}
}
Ok((tasks, ProcessBatchInfo::default()))
}
Batch::SnapshotCreation(tasks) => self
.process_snapshot(progress, tasks)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::Dump(task) => self
.process_dump_creation(progress, task)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::IndexOperation { op, must_create_index } => {
let index_uid = op.index_uid().to_string();
let index = if must_create_index {
// create the index if it doesn't already exist
let wtxn = self.env.write_txn()?;
self.index_mapper.create_index(wtxn, &index_uid, None)?
} else {
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, &index_uid)?
};
// the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
self.index_mapper
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?;
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
{
progress.update_progress(FinalizingIndexStep::Committing);
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
let _entered = span.enter();
index_wtxn.commit()?;
}
// if the update processed successfully, we're going to store the new
// stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail
// the entire batch.
let mut post_commit_dabases_sizes = None;
let res = || -> Result<()> {
progress.update_progress(FinalizingIndexStep::ComputingStats);
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
let mut wtxn = self.env.write_txn()?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
wtxn.commit()?;
Ok(())
}();
match res {
Ok(_) => (),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
}
let info = ProcessBatchInfo {
congestion,
// In case we fail to the get post-commit sizes we decide
// that nothing changed and use the pre-commit sizes.
post_commit_dabases_sizes: post_commit_dabases_sizes
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
pre_commit_dabases_sizes,
};
Ok((tasks, info))
}
Batch::IndexCreation { index_uid, primary_key, task } => {
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
let wtxn = self.env.write_txn()?;
if self.index_mapper.exists(&wtxn, &index_uid)? {
return Err(Error::IndexAlreadyExists(index_uid));
}
self.index_mapper.create_index(wtxn, &index_uid, None)?;
self.process_batch(
Batch::IndexUpdate { index_uid, primary_key, task },
current_batch,
progress,
)
}
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
let rtxn = self.env.read_txn()?;
let index = self.index_mapper.index(&rtxn, &index_uid)?;
if let Some(primary_key) = primary_key.clone() {
let mut index_wtxn = index.write_txn()?;
let mut builder = MilliSettings::new(
&mut index_wtxn,
&index,
self.index_mapper.indexer_config(),
);
builder.set_primary_key(primary_key);
let must_stop_processing = self.scheduler.must_stop_processing.clone();
builder
.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
index_wtxn.commit()?;
}
// drop rtxn before starting a new wtxn on the same db
rtxn.commit()?;
task.status = Status::Succeeded;
task.details = Some(Details::IndexInfo { primary_key });
// if the update processed successfully, we're going to store the new
// stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail
// the entire batch.
let res = || -> Result<()> {
let mut wtxn = self.env.write_txn()?;
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
wtxn.commit()?;
Ok(())
}();
match res {
Ok(_) => (),
Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
}
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
let wtxn = self.env.write_txn()?;
// it's possible that the index doesn't exist
let number_of_documents = || -> Result<u64> {
let index = self.index_mapper.index(&wtxn, &index_uid)?;
let index_rtxn = index.read_txn()?;
index
.number_of_documents(&index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))
}()
.unwrap_or_default();
// The write transaction is directly owned and committed inside.
match self.index_mapper.delete_index(wtxn, &index_uid) {
Ok(()) => (),
Err(Error::IndexNotFound(_)) if index_has_been_created => (),
Err(e) => return Err(e),
}
// We set all the tasks details to the default value.
for task in &mut tasks {
task.status = Status::Succeeded;
task.details = match &task.kind {
KindWithContent::IndexDeletion { .. } => {
Some(Details::ClearAll { deleted_documents: Some(number_of_documents) })
}
otherwise => otherwise.default_finished_details(),
};
}
// Here we could also show that all the internal database sizes goes to 0
// but it would mean opening the index and that's costly.
Ok((tasks, ProcessBatchInfo::default()))
}
Batch::IndexSwap { mut task } => {
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
let mut wtxn = self.env.write_txn()?;
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
swaps
} else {
unreachable!()
};
let mut not_found_indexes = BTreeSet::new();
for IndexSwap { indexes: (lhs, rhs) } in swaps {
for index in [lhs, rhs] {
let index_exists = self.index_mapper.index_exists(&wtxn, index)?;
if !index_exists {
not_found_indexes.insert(index);
}
}
}
if !not_found_indexes.is_empty() {
if not_found_indexes.len() == 1 {
return Err(Error::SwapIndexNotFound(
not_found_indexes.into_iter().next().unwrap().clone(),
));
} else {
return Err(Error::SwapIndexesNotFound(
not_found_indexes.into_iter().cloned().collect(),
));
}
}
progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
for (step, swap) in swaps.iter().enumerate() {
progress.update_progress(VariableNameStep::<SwappingTheIndexes>::new(
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
step as u32,
swaps.len() as u32,
));
self.apply_index_swap(
&mut wtxn,
&progress,
task.uid,
&swap.indexes.0,
&swap.indexes.1,
)?;
}
wtxn.commit()?;
task.status = Status::Succeeded;
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::UpgradeDatabase { mut tasks } => {
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
unreachable!();
};
let ret = catch_unwind(AssertUnwindSafe(|| self.process_upgrade(from, progress)));
match ret {
Ok(Ok(())) => (),
Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))),
Err(e) => {
let msg = match e.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match e.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked(
msg.to_string(),
))));
}
}
for task in tasks.iter_mut() {
task.status = Status::Succeeded;
// Since this task can be retried we must reset its error status
task.error = None;
}
Ok((tasks, ProcessBatchInfo::default()))
}
}
}
/// Swap the index `lhs` with the index `rhs`.
fn apply_index_swap(
&self,
wtxn: &mut RwTxn,
progress: &Progress,
task_id: u32,
lhs: &str,
rhs: &str,
) -> Result<()> {
progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
// 1. Verify that both lhs and rhs are existing indexes
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
if !index_lhs_exists {
return Err(Error::IndexNotFound(lhs.to_owned()));
}
let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?;
if !index_rhs_exists {
return Err(Error::IndexNotFound(rhs.to_owned()));
}
// 2. Get the task set for index = name that appeared before the index swap task
let mut index_lhs_task_ids = self.queue.tasks.index_tasks(wtxn, lhs)?;
index_lhs_task_ids.remove_range(task_id..);
let mut index_rhs_task_ids = self.queue.tasks.index_tasks(wtxn, rhs)?;
index_rhs_task_ids.remove_range(task_id..);
// 3. before_name -> new_name in the task's KindWithContent
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
progress.update_progress(task_progress);
for task_id in tasks_to_update {
let mut task =
self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
swap_index_uid_in_task(&mut task, (lhs, rhs));
self.queue.tasks.all_tasks.put(wtxn, &task_id, &task)?;
atomic.fetch_add(1, Ordering::Relaxed);
}
// 4. remove the task from indexuid = before_name
// 5. add the task to indexuid = after_name
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
self.queue.tasks.update_index(wtxn, lhs, |lhs_tasks| {
*lhs_tasks -= &index_lhs_task_ids;
*lhs_tasks |= &index_rhs_task_ids;
})?;
self.queue.tasks.update_index(wtxn, rhs, |rhs_tasks| {
*rhs_tasks -= &index_rhs_task_ids;
*rhs_tasks |= &index_lhs_task_ids;
})?;
// 6. Swap in the index mapper
self.index_mapper.swap(wtxn, lhs, rhs)?;
Ok(())
}
/// Delete each given task from all the databases (if it is deleteable).
///
/// Return the number of tasks that were actually deleted.
fn delete_matched_tasks(
&self,
wtxn: &mut RwTxn,
matched_tasks: &RoaringBitmap,
progress: &Progress,
) -> Result<RoaringBitmap> {
progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
// 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.queue.tasks.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
let all_task_ids = self.queue.tasks.all_task_ids(wtxn)?;
let mut to_delete_tasks = all_task_ids & matched_tasks;
to_delete_tasks -= &**processing_tasks;
to_delete_tasks -= &enqueued_tasks;
// 2. We now have a list of tasks to delete, delete them
let mut affected_indexes = HashSet::new();
let mut affected_statuses = HashSet::new();
let mut affected_kinds = HashSet::new();
let mut affected_canceled_by = RoaringBitmap::new();
// The tasks that have been removed *per batches*.
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
progress.update_progress(task_progress);
for task_id in to_delete_tasks.iter() {
let task =
self.queue.tasks.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned()));
affected_statuses.insert(task.status);
affected_kinds.insert(task.kind.as_kind());
// Note: don't delete the persisted task data since
// we can only delete succeeded, failed, and canceled tasks.
// In each of those cases, the persisted data is supposed to
// have been deleted already.
utils::remove_task_datetime(
wtxn,
self.queue.tasks.enqueued_at,
task.enqueued_at,
task.uid,
)?;
if let Some(started_at) = task.started_at {
utils::remove_task_datetime(
wtxn,
self.queue.tasks.started_at,
started_at,
task.uid,
)?;
}
if let Some(finished_at) = task.finished_at {
utils::remove_task_datetime(
wtxn,
self.queue.tasks.finished_at,
finished_at,
task.uid,
)?;
}
if let Some(canceled_by) = task.canceled_by {
affected_canceled_by.insert(canceled_by);
}
if let Some(batch_uid) = task.batch_uid {
affected_batches.entry(batch_uid).or_default().insert(task_id);
}
atomic_progress.fetch_add(1, Ordering::Relaxed);
}
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
let (atomic_progress, task_progress) = AtomicTaskStep::new(
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
);
progress.update_progress(task_progress);
for index in affected_indexes.iter() {
self.queue.tasks.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
atomic_progress.fetch_add(1, Ordering::Relaxed);
}
for status in affected_statuses.iter() {
self.queue.tasks.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
atomic_progress.fetch_add(1, Ordering::Relaxed);
}
for kind in affected_kinds.iter() {
self.queue.tasks.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
atomic_progress.fetch_add(1, Ordering::Relaxed);
}
progress.update_progress(TaskDeletionProgress::DeletingTasks);
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
progress.update_progress(task_progress);
for task in to_delete_tasks.iter() {
self.queue.tasks.all_tasks.delete(wtxn, &task)?;
atomic_progress.fetch_add(1, Ordering::Relaxed);
}
for canceled_by in affected_canceled_by {
if let Some(mut tasks) = self.queue.tasks.canceled_by.get(wtxn, &canceled_by)? {
tasks -= &to_delete_tasks;
if tasks.is_empty() {
self.queue.tasks.canceled_by.delete(wtxn, &canceled_by)?;
} else {
self.queue.tasks.canceled_by.put(wtxn, &canceled_by, &tasks)?;
}
}
}
progress.update_progress(TaskDeletionProgress::DeletingBatches);
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
progress.update_progress(batch_progress);
for (batch_id, to_delete_tasks) in affected_batches {
if let Some(mut tasks) = self.queue.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
tasks -= &to_delete_tasks;
// We must remove the batch entirely
if tasks.is_empty() {
if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at {
remove_task_datetime(
wtxn,
self.queue.batches.enqueued_at,
earliest,
batch_id,
)?;
remove_task_datetime(
wtxn,
self.queue.batches.enqueued_at,
oldest,
batch_id,
)?;
} else {
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
// and we still need to find the date by scrolling the database
remove_n_tasks_datetime_earlier_than(
wtxn,
self.queue.batches.enqueued_at,
batch.started_at,
batch.stats.total_nb_tasks.clamp(1, 2) as usize,
batch_id,
)?;
}
remove_task_datetime(
wtxn,
self.queue.batches.started_at,
batch.started_at,
batch_id,
)?;
if let Some(finished_at) = batch.finished_at {
remove_task_datetime(
wtxn,
self.queue.batches.finished_at,
finished_at,
batch_id,
)?;
}
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
}
}
// Anyway, we must remove the batch from all its reverse indexes.
// The only way to do that is to check
for index in affected_indexes.iter() {
let index_tasks = self.queue.tasks.index_tasks(wtxn, index)?;
let remaining_index_tasks = index_tasks & &tasks;
if remaining_index_tasks.is_empty() {
self.queue.batches.update_index(wtxn, index, |bitmap| {
bitmap.remove(batch_id);
})?;
}
}
for status in affected_statuses.iter() {
let status_tasks = self.queue.tasks.get_status(wtxn, *status)?;
let remaining_status_tasks = status_tasks & &tasks;
if remaining_status_tasks.is_empty() {
self.queue.batches.update_status(wtxn, *status, |bitmap| {
bitmap.remove(batch_id);
})?;
}
}
for kind in affected_kinds.iter() {
let kind_tasks = self.queue.tasks.get_kind(wtxn, *kind)?;
let remaining_kind_tasks = kind_tasks & &tasks;
if remaining_kind_tasks.is_empty() {
self.queue.batches.update_kind(wtxn, *kind, |bitmap| {
bitmap.remove(batch_id);
})?;
}
}
}
atomic_progress.fetch_add(1, Ordering::Relaxed);
}
Ok(to_delete_tasks)
}
/// Cancel each given task from all the databases (if it is cancelable).
///
/// Returns the list of tasks that matched the filter and must be written in the database.
fn cancel_matched_tasks(
&self,
rtxn: &RoTxn,
cancel_task_id: TaskId,
current_batch: &mut ProcessingBatch,
matched_tasks: &RoaringBitmap,
progress: &Progress,
) -> Result<Vec<Task>> {
progress.update_progress(TaskCancelationProgress::RetrievingTasks);
// 1. Remove from this list the tasks that we are not allowed to cancel
// Notice that only the _enqueued_ ones are cancelable and we should
// have already aborted the indexation of the _processing_ ones
let cancelable_tasks = self.queue.tasks.get_status(rtxn, Status::Enqueued)?;
let tasks_to_cancel = cancelable_tasks & matched_tasks;
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
progress.update_progress(progress_obj);
// 2. We now have a list of tasks to cancel, cancel them
let mut tasks = self.queue.tasks.get_existing_tasks(
rtxn,
tasks_to_cancel.iter().inspect(|_| {
task_progress.fetch_add(1, Ordering::Relaxed);
}),
)?;
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
progress.update_progress(progress_obj);
for task in tasks.iter_mut() {
task.status = Status::Canceled;
task.canceled_by = Some(cancel_task_id);
task.details = task.details.as_ref().map(|d| d.to_failed());
current_batch.processing(Some(task));
task_progress.fetch_add(1, Ordering::Relaxed);
}
Ok(tasks)
}
}

View File

@ -0,0 +1,278 @@
use std::collections::BTreeMap;
use std::fs::File;
use std::io::BufWriter;
use std::sync::atomic::Ordering;
use dump::IndexMetadata;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use time::macros::format_description;
use time::OffsetDateTime;
use crate::processing::{
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress,
};
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
pub(super) fn process_dump_creation(
&self,
progress: Progress,
mut task: Task,
) -> Result<Vec<Task>> {
progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
let started_at = OffsetDateTime::now_utc();
let (keys, instance_uid) =
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
(keys, instance_uid)
} else {
unreachable!();
};
let dump = dump::DumpWriter::new(*instance_uid)?;
// 1. dump the keys
progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
let mut dump_keys = dump.create_keys()?;
for key in keys {
dump_keys.push_key(key)?;
}
dump_keys.flush()?;
let rtxn = self.env.read_txn()?;
// 2. dump the tasks
progress.update_progress(DumpCreationProgress::DumpTheTasks);
let mut dump_tasks = dump.create_tasks_queue()?;
let (atomic, update_task_progress) =
AtomicTaskStep::new(self.queue.tasks.all_tasks.len(&rtxn)? as u32);
progress.update_progress(update_task_progress);
for ret in self.queue.tasks.all_tasks.iter(&rtxn)? {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_, mut t) = ret?;
let status = t.status;
let content_file = t.content_uuid();
// In the case we're dumping ourselves we want to be marked as finished
// to not loop over ourselves indefinitely.
if t.uid == task.uid {
let finished_at = OffsetDateTime::now_utc();
// We're going to fake the date because we don't know if everything is going to go well.
// But we need to dump the task as finished and successful.
// If something fail everything will be set appropriately in the end.
t.status = Status::Succeeded;
t.started_at = Some(started_at);
t.finished_at = Some(finished_at);
}
// Patch the task to remove the batch uid, because as of v1.12.5 batches are not persisted.
// This prevent from referencing *future* batches not actually associated with the task.
//
// See <https://github.com/meilisearch/meilisearch/issues/5247> for details.
t.batch_uid = None;
let mut dump_content_file = dump_tasks.push_task(&t.into())?;
// 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
if let Some(content_file) = content_file {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
if status == Status::Enqueued {
let content_file = self.queue.file_store.get_update(content_file)?;
for document in
serde_json::de::Deserializer::from_reader(content_file).into_iter()
{
let document = document.map_err(|e| {
Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
})?;
dump_content_file.push_document(&document)?;
}
dump_content_file.flush()?;
}
}
atomic.fetch_add(1, Ordering::Relaxed);
}
dump_tasks.flush()?;
// 3. dump the batches
progress.update_progress(DumpCreationProgress::DumpTheBatches);
let mut dump_batches = dump.create_batches_queue()?;
let (atomic_batch_progress, update_batch_progress) =
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
progress.update_progress(update_batch_progress);
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_, mut b) = ret?;
// In the case we're dumping ourselves we want to be marked as finished
// to not loop over ourselves indefinitely.
if b.uid == task.uid {
let finished_at = OffsetDateTime::now_utc();
// We're going to fake the date because we don't know if everything is going to go well.
// But we need to dump the task as finished and successful.
// If something fail everything will be set appropriately in the end.
let mut statuses = BTreeMap::new();
statuses.insert(Status::Succeeded, b.stats.total_nb_tasks);
b.stats.status = statuses;
b.finished_at = Some(finished_at);
}
dump_batches.push_batch(&b)?;
atomic_batch_progress.fetch_add(1, Ordering::Relaxed);
}
dump_batches.flush()?;
// 4. Dump the indexes
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
let mut count = 0;
let () = self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
progress.update_progress(VariableNameStep::<DumpCreationProgress>::new(
uid.to_string(),
count,
nb_indexes,
));
count += 1;
let rtxn = index.read_txn()?;
let metadata = IndexMetadata {
uid: uid.to_owned(),
primary_key: index.primary_key(&rtxn)?.map(String::from),
created_at: index
.created_at(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
updated_at: index
.updated_at(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
};
let mut index_dumper = dump.create_index(uid, &metadata)?;
let fields_ids_map = index.fields_ids_map(&rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index
.embedding_configs(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let nb_documents = index
.number_of_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
as u32;
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
progress.update_progress(update_document_progress);
let documents = index
.all_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// 4.1. Dump the documents
for ret in documents {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (id, doc) = ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
'inject_vectors: {
let embeddings = index
.embeddings(&rtxn, id)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
if embeddings.is_empty() {
break 'inject_vectors;
}
let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
.or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else {
let user_err =
milli::Error::UserError(milli::UserError::InvalidVectorsMapType {
document_id: {
if let Ok(Some(Ok(index))) = index
.external_id_of(&rtxn, std::iter::once(id))
.map(|it| it.into_iter().next())
{
index
} else {
format!("internal docid={id}")
}
},
value: vectors.clone(),
});
return Err(Error::from_milli(user_err, Some(uid.to_string())));
};
for (embedder_name, embeddings) in embeddings {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_provided.contains(id));
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate: !user_provided,
};
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}
}
index_dumper.push_document(&document)?;
atomic.fetch_add(1, Ordering::Relaxed);
}
// 4.2. Dump the settings
let settings = meilisearch_types::settings::settings(
index,
&rtxn,
meilisearch_types::settings::SecretPolicy::RevealSecrets,
)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
index_dumper.settings(&settings)?;
Ok(())
})?;
// 5. Dump experimental feature settings
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
let features = self.features().runtime_features();
dump.create_experimental_features(features)?;
let network = self.network();
dump.create_network(network)?;
let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
)).unwrap();
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
progress.update_progress(DumpCreationProgress::CompressTheDump);
let path = self.scheduler.dumps_path.join(format!("{}.dump", dump_uid));
let file = File::create(path)?;
dump.persist_to(BufWriter::new(file))?;
// if we reached this step we can tell the scheduler we succeeded to dump ourselves.
task.status = Status::Succeeded;
task.details = Some(Details::Dump { dump_uid: Some(dump_uid) });
Ok(vec![task])
}
}

View File

@ -0,0 +1,539 @@
use bumpalo::collections::CollectIn;
use bumpalo::Bump;
use meilisearch_types::heed::RwTxn;
use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::DocumentAdditionResult;
use meilisearch_types::milli::{self, ChannelCongestion, Filter, ThreadPoolNoAbortBuilder};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::Index;
use roaring::RoaringBitmap;
use super::create_batch::{DocumentOperation, IndexOperation};
use crate::processing::{
DocumentDeletionProgress, DocumentEditionProgress, DocumentOperationProgress, SettingsProgress,
};
use crate::{Error, IndexScheduler, Result};
impl IndexScheduler {
/// Process the index operation on the given index.
///
/// ## Return
/// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index, progress),
target = "indexing::scheduler"
)]
pub(crate) fn apply_index_operation<'i>(
&self,
index_wtxn: &mut RwTxn<'i>,
index: &'i Index,
operation: IndexOperation,
progress: &Progress,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now();
let must_stop_processing = self.scheduler.must_stop_processing.clone();
match operation {
IndexOperation::DocumentClear { index_uid, mut tasks } => {
let count = milli::update::ClearDocuments::new(index_wtxn, index)
.execute()
.map_err(|e| Error::from_milli(e, Some(index_uid)))?;
let mut first_clear_found = false;
for task in &mut tasks {
task.status = Status::Succeeded;
// The first document clear will effectively delete every documents
// in the database but the next ones will clear 0 documents.
task.details = match &task.kind {
KindWithContent::DocumentClear { .. } => {
let count = if first_clear_found { 0 } else { count };
first_clear_found = true;
Some(Details::ClearAll { deleted_documents: Some(count) })
}
otherwise => otherwise.default_details(),
};
}
Ok((tasks, None))
}
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
// to a fresh thread.
let mut content_files = Vec::new();
for operation in &operations {
match operation {
DocumentOperation::Replace(content_uuid)
| DocumentOperation::Update(content_uuid) => {
let content_file = self.queue.file_store.get_update(*content_uuid)?;
let mmap = unsafe { memmap2::Mmap::map(&content_file)? };
content_files.push(mmap);
}
_ => (),
}
}
let rtxn = index.read_txn()?;
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut content_files_iter = content_files.iter();
let mut indexer = indexer::DocumentOperation::new();
let embedders = index
.embedding_configs(index_wtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
for operation in operations {
match operation {
DocumentOperation::Replace(_content_uuid) => {
let mmap = content_files_iter.next().unwrap();
indexer
.replace_documents(mmap)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
}
DocumentOperation::Update(_content_uuid) => {
let mmap = content_files_iter.next().unwrap();
indexer
.update_documents(mmap)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
}
DocumentOperation::Delete(document_ids) => {
let document_ids: bumpalo::collections::vec::Vec<_> = document_ids
.iter()
.map(|s| &*indexer_alloc.alloc_str(s))
.collect_in(&indexer_alloc);
indexer.delete_documents(document_ids.into_bump_slice());
}
}
}
let local_pool;
let indexer_config = self.index_mapper.indexer_config();
let pool = match &indexer_config.thread_pool {
Some(pool) => pool,
None => {
local_pool = ThreadPoolNoAbortBuilder::new()
.thread_name(|i| format!("indexing-thread-{i}"))
.build()
.unwrap();
&local_pool
}
};
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
let (document_changes, operation_stats, primary_key) = indexer
.into_changes(
&indexer_alloc,
index,
&rtxn,
primary_key.as_deref(),
&mut new_fields_ids_map,
&|| must_stop_processing.get(),
progress.clone(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
let mut candidates_count = 0;
for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
candidates_count += stats.document_count;
match stats.error {
Some(error) => {
task.status = Status::Failed;
task.error = Some(milli::Error::UserError(error).into());
}
None => task.status = Status::Succeeded,
}
task.details = match task.details {
Some(Details::DocumentAdditionOrUpdate { received_documents, .. }) => {
Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(stats.document_count),
})
}
Some(Details::DocumentDeletion { provided_ids, .. }) => {
Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(stats.document_count),
})
}
_ => {
// In the case of a `documentAdditionOrUpdate` or `DocumentDeletion`
// the details MUST be set to either addition or deletion
unreachable!();
}
}
}
progress.update_progress(DocumentOperationProgress::Indexing);
let mut congestion = None;
if tasks.iter().any(|res| res.error.is_none()) {
congestion = Some(
indexer::index(
index_wtxn,
index,
pool,
indexer_config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
primary_key,
&document_changes,
embedders,
&|| must_stop_processing.get(),
progress,
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult {
indexed_documents: candidates_count,
number_of_documents: index
.number_of_documents(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
};
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
}
Ok((tasks, congestion))
}
IndexOperation::DocumentEdition { index_uid, mut task } => {
progress.update_progress(DocumentEditionProgress::RetrievingConfig);
let (filter, code) = if let KindWithContent::DocumentEdition {
filter_expr,
context: _,
function,
..
} = &task.kind
{
(filter_expr, function)
} else {
unreachable!()
};
let candidates = match filter.as_ref().map(Filter::from_json) {
Some(Ok(Some(filter))) => filter
.evaluate(index_wtxn, index)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
None | Some(Ok(None)) => index.documents_ids(index_wtxn)?,
Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))),
};
let (original_filter, context, function) = if let Some(Details::DocumentEdition {
original_filter,
context,
function,
..
}) = task.details
{
(original_filter, context, function)
} else {
// In the case of a `documentEdition` the details MUST be set
unreachable!();
};
if candidates.is_empty() {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentEdition {
original_filter,
context,
function,
deleted_documents: Some(0),
edited_documents: Some(0),
});
return Ok((vec![task], None));
}
let rtxn = index.read_txn()?;
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
let mut new_fields_ids_map = db_fields_ids_map.clone();
// candidates not empty => index not empty => a primary key is set
let primary_key = index.primary_key(&rtxn)?.unwrap();
let primary_key =
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
let mut congestion = None;
if task.error.is_none() {
let local_pool;
let indexer_config = self.index_mapper.indexer_config();
let pool = match &indexer_config.thread_pool {
Some(pool) => pool,
None => {
local_pool = ThreadPoolNoAbortBuilder::new()
.thread_name(|i| format!("indexing-thread-{i}"))
.build()
.unwrap();
&local_pool
}
};
let candidates_count = candidates.len();
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
let document_changes = pool
.install(|| {
indexer
.into_changes(&primary_key)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))
})
.unwrap()?;
let embedders = index
.embedding_configs(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentEditionProgress::Indexing);
congestion = Some(
indexer::index(
index_wtxn,
index,
pool,
indexer_config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
None, // cannot change primary key in DocumentEdition
&document_changes,
embedders,
&|| must_stop_processing.get(),
progress,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult {
indexed_documents: candidates_count,
number_of_documents: index
.number_of_documents(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
};
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
}
match result_count {
Ok((deleted_documents, edited_documents)) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentEdition {
original_filter,
context,
function,
deleted_documents: Some(deleted_documents),
edited_documents: Some(edited_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentEdition {
original_filter,
context,
function,
deleted_documents: Some(0),
edited_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok((vec![task], congestion))
}
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
let mut to_delete = RoaringBitmap::new();
let external_documents_ids = index.external_documents_ids();
for task in tasks.iter_mut() {
let before = to_delete.len();
task.status = Status::Succeeded;
match &task.kind {
KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => {
for id in documents_ids {
if let Some(id) = external_documents_ids.get(index_wtxn, id)? {
to_delete.insert(id);
}
}
let will_be_removed = to_delete.len() - before;
task.details = Some(Details::DocumentDeletion {
provided_ids: documents_ids.len(),
deleted_documents: Some(will_be_removed),
});
}
KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => {
let before = to_delete.len();
let filter = match Filter::from_json(filter_expr) {
Ok(filter) => filter,
Err(err) => {
// theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens
task.status = Status::Failed;
task.error = Some(
Error::from_milli(err, Some(index_uid.clone())).into(),
);
None
}
};
if let Some(filter) = filter {
let candidates = filter
.evaluate(index_wtxn, index)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())));
match candidates {
Ok(candidates) => to_delete |= candidates,
Err(err) => {
task.status = Status::Failed;
task.error = Some(err.into());
}
};
}
let will_be_removed = to_delete.len() - before;
if let Some(Details::DocumentDeletionByFilter {
original_filter: _,
deleted_documents,
}) = &mut task.details
{
*deleted_documents = Some(will_be_removed);
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!()
}
}
_ => unreachable!(),
}
}
if to_delete.is_empty() {
return Ok((tasks, None));
}
let rtxn = index.read_txn()?;
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
let mut new_fields_ids_map = db_fields_ids_map.clone();
// to_delete not empty => index not empty => primary key set
let primary_key = index.primary_key(&rtxn)?.unwrap();
let primary_key =
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let mut congestion = None;
if !tasks.iter().all(|res| res.error.is_some()) {
let local_pool;
let indexer_config = self.index_mapper.indexer_config();
let pool = match &indexer_config.thread_pool {
Some(pool) => pool,
None => {
local_pool = ThreadPoolNoAbortBuilder::new()
.thread_name(|i| format!("indexing-thread-{i}"))
.build()
.unwrap();
&local_pool
}
};
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
let mut indexer = indexer::DocumentDeletion::new();
let candidates_count = to_delete.len();
indexer.delete_documents_by_docids(to_delete);
let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
let embedders = index
.embedding_configs(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentDeletionProgress::Indexing);
congestion = Some(
indexer::index(
index_wtxn,
index,
pool,
indexer_config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
None, // document deletion never changes primary key
&document_changes,
embedders,
&|| must_stop_processing.get(),
progress,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult {
indexed_documents: candidates_count,
number_of_documents: index
.number_of_documents(index_wtxn)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
};
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
}
Ok((tasks, congestion))
}
IndexOperation::Settings { index_uid, settings, mut tasks } => {
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
let indexer_config = self.index_mapper.indexer_config();
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
let checked_settings = settings.clone().check();
task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
apply_settings_to_builder(&checked_settings, &mut builder);
// We can apply the status right now and if an update fail later
// the whole batch will be marked as failed.
task.status = Status::Succeeded;
}
progress.update_progress(SettingsProgress::ApplyTheSettings);
builder
.execute(
|indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(),
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
Ok((tasks, None))
}
IndexOperation::DocumentClearAndSetting {
index_uid,
cleared_tasks,
settings,
settings_tasks,
} => {
let (mut import_tasks, _congestion) = self.apply_index_operation(
index_wtxn,
index,
IndexOperation::DocumentClear {
index_uid: index_uid.clone(),
tasks: cleared_tasks,
},
progress,
)?;
let (settings_tasks, _congestion) = self.apply_index_operation(
index_wtxn,
index,
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
progress,
)?;
let mut tasks = settings_tasks;
tasks.append(&mut import_tasks);
Ok((tasks, None))
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More