Compare commits

...

342 Commits

Author SHA1 Message Date
934b73142d reduce the number of computed prefix 2025-03-27 17:57:57 +01:00
bb2e9419d3 Merge pull request #5468 from meilisearch/more-precise-post-processing
More Precise Post Processing
2025-03-27 10:07:09 +00:00
cf68713145 Merge pull request #5465 from meilisearch/improve-stats-perf
Improve documents stats performances
2025-03-27 09:20:14 +00:00
811143cbe9 Add more progress precision when doing post processing 2025-03-27 10:17:28 +01:00
c670e9a39b Make sure the snaps are happy 2025-03-26 20:03:35 +01:00
65f1b13475 Merge pull request #5464 from meilisearch/camel-case-database-sizes
Prefer camelCase for internal database sizes db name
2025-03-26 16:40:39 +00:00
db7ce03763 Improve the performances of computing the size of the documents database 2025-03-26 17:40:12 +01:00
7ed9adde29 Prefer camelCase for internal database sizes db name 2025-03-26 16:45:52 +01:00
9ce7ccfbe7 Merge pull request #5457 from meilisearch/show-database-sizes-changes
Show database sizes batches
2025-03-26 10:19:40 +00:00
3deb1ef78f Fix the snapshots again 2025-03-26 10:38:49 +01:00
5820d822c8 Add more details about the finalizing progress step 2025-03-26 09:49:43 +01:00
637bea0370 Compute and store the database sizes 2025-03-26 09:49:42 +01:00
fd079c6757 Add an index method to get the database sizes 2025-03-25 16:30:51 +01:00
182e5d5632 Add database sizes stats to the batches 2025-03-25 16:30:15 +01:00
82aee6a9af Merge pull request #5415 from meilisearch/isolate-word-fst-usage
Isolate word fst usage
2025-03-25 11:43:37 +00:00
fca947219f Merge pull request #5402 from meilisearch/do-not-reindex-searchable-order-change
Avoid reindexing searchable order changes
2025-03-25 07:03:14 +00:00
fb7ae9f97f Merge pull request #5454 from meilisearch/update-charabia-v0.9.3
Update Charabia v0.9.3
2025-03-24 22:34:51 +00:00
cd421fea1e Merge pull request #5456 from meilisearch/fix-CI
Fix CI to work with merge queues
2025-03-25 09:55:59 +00:00
1ad4235beb Remove the bors file 2025-03-25 10:05:41 +01:00
de6c7e551e Remove bors references from the repository 2025-03-25 10:04:38 +01:00
c0fe70c5f0 Make the CI work with merge queue grouping 2025-03-25 10:04:24 +01:00
a09d08c7b6 Avoid reindexing searchable order changes
Update settings.rs

Update settings.rs
2025-03-24 16:26:52 +01:00
2e6aa63efc Update Charabia v0.9.3 2025-03-24 14:32:21 +01:00
7df2bdfb15 Merge #5436
5436: Update mini-dashboard to v0.2.19 version r=Kerollmops a=curquiza

Fixes mini dashboard to prevent the panel from popping up every time

Fixed by `@mdubus` 👍 

Co-authored-by: curquiza <clementine@meilisearch.com>
2025-03-18 16:24:31 +00:00
71f7456748 Update mini-dashboard to v0.2.19 version 2025-03-18 12:48:38 +01:00
c98b313d03 Merge #5426
5426: Bump zip from 2.2.2 to 2.3.0 r=Kerollmops a=dependabot[bot]

Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/releases">zip's releases</a>.</em></p>
<blockquote>
<h2>v2.3.0</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2>v2.2.3</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md">zip's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.3...v2.3.0">2.3.0</a> - 2025-03-16</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.2.3">2.2.3</a> - 2025-02-26</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6eab5f5cc6"><code>6eab5f5</code></a> chore: release v2.3.0 (<a href="https://redirect.github.com/zip-rs/zip2/issues/300">#300</a>)</li>
<li><a href="e4aee2050f"><code>e4aee20</code></a> implement <code>ZipFile::options</code> + refactor options normalization (<a href="https://redirect.github.com/zip-rs/zip2/issues/305">#305</a>)</li>
<li><a href="ea8a7bba24"><code>ea8a7bb</code></a> fix(test): Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/issues/308">#308</a>)</li>
<li><a href="365c81a39f"><code>365c81a</code></a> Use <code>xz2</code> crate instead of a custom implementation (<a href="https://redirect.github.com/zip-rs/zip2/issues/306">#306</a>)</li>
<li><a href="ae94b3452b"><code>ae94b34</code></a> chore: Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/issues/310">#310</a>)</li>
<li><a href="a2e062f370"><code>a2e062f</code></a> Merge commit from fork</li>
<li><a href="0199ac2cb8"><code>0199ac2</code></a> Simplify handling for symlink targets</li>
<li><a href="977bb9479d"><code>977bb94</code></a> fix failing tests, remove symlink loop check</li>
<li><a href="3cb29e70d1"><code>3cb29e7</code></a> Partial fix for tests</li>
<li><a href="2182b07686"><code>2182b07</code></a> Refactor</li>
<li>Additional commits viewable in <a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=zip&package-manager=cargo&previous-version=2.2.2&new-version=2.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-18 08:57:11 +00:00
69678ed8e1 Bump zip from 2.2.2 to 2.3.0
Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
- [Release notes](https://github.com/zip-rs/zip2/releases)
- [Changelog](https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md)
- [Commits](https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0)

---
updated-dependencies:
- dependency-name: zip
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-18 00:19:49 +00:00
bf144a94d8 No more use FST to find a word without any typo 2025-03-17 14:20:10 +01:00
b0b1888ef9 Add test 2025-03-17 14:20:10 +01:00
6ec1d2b712 Merge #5423
5423: Bump ring to v0.17.14 to compile on old aarch64 r=irevoire a=Kerollmops

This PR will fix [this CI issue](https://github.com/meilisearch/meilisearch/actions/runs/13896085925/job/38876941154) where ring v0.17.13 breaks the compilation on old aarch64 machines by bumping its version to v0.17.14.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:53:02 +00:00
cbdf80893d Merge #5422
5422: Add more progress levels to measure merging r=Kerollmops a=Kerollmops

I found out that Meilisearch was not correctly reporting the long indexing times in the progress and that a lot of time was spent on extracting words with all documents already extracted. The reason was that there was no step to report merging the cache and sending the entries to write to the writer thread. This PR adds these entries to the progress.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:02:46 +00:00
e2156ddfc7 Simplify the IndexingStep progress enum 2025-03-17 11:40:50 +01:00
49dd50dab2 Bump ring to v0.17.14 to compile on old aarch64 2025-03-17 11:29:17 +01:00
13a88d6131 Merge #5407
5407: Geo update bug r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #5380
Fixes #5399



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-17 10:24:33 +00:00
d9875b782d Merge #5421
5421: Accept total batch size in human size r=irevoire a=Kerollmops

This PR fixes the new `experimental-limit-batched-tasks-total-size` to accept human-defined sizes in bytes.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 09:41:22 +00:00
cb16baab18 Add more progress levels to measure merging 2025-03-17 10:13:29 +01:00
2500e3c067 Merge #5414
5414: Update version for the next release (v1.14.0) in Cargo.toml r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Fixes https://github.com/meilisearch/meilisearch/issues/5268.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-14 13:35:54 +00:00
d3e4b2dfe7 Accept total batch size in human size 2025-03-14 13:07:51 +01:00
2a46624e19 Merge #5420
5420: Add support for the progress API of arroy r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5419

## What does this PR do?
- Convert the arroy progress to the meilisearch progress
- Use the new arroy closure to support the progress of arroy


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-13 18:03:08 +00:00
009c36a4d0 Add support for the progress API of arroy 2025-03-13 19:00:43 +01:00
2a47e25e6d Update the upgrade path snap 2025-03-13 18:35:06 +01:00
82912e191b Merge #5418
5418: Cache embeddings in search r=Kerollmops a=dureuill

# Pull Request

## Related issue
TBD

## What does this PR do?
- Adds a cache for embeddings produced in search
- The cache is disabled by default, and can be enabled following the instructions [here](https://github.com/orgs/meilisearch/discussions/818).
- Had to accommodate the `timeout` test for openai that uses a mock that simulates a timeout on subsequent responses: since the test was reusing the same query, the cache would kick-in and no request would be made to the mock, meaning no timeout any longer and so a failing test 😅 
- `Embedder::embed_search` now accepts a reference instead of an owned `String`.

## Manual testing

- I created 4 indexes on a fresh DB with the same settings (one embedder from openai)
- I sent 1/4 of movies.json to each index
- I sent a federated search request against all 4 indexes, with the same query for each index, using the embedder of each index.

Results:

- The first call took 400ms to 1s. Before this change, it took in the 3s range.
- Any repeated call with the same query took in the range of 25ms.
- Looking at the details at trace log level, I can see that the first index that needs the embedding is taking most of the 400ms in `embed_one`. The other indexes report that the query text is found in the cache and they each take a few µs.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-13 16:37:15 +00:00
e2d372823a Disable the cache by default and make it experimental 2025-03-13 17:22:51 +01:00
1876132172 Mutex-based implementation 2025-03-13 17:22:50 +01:00
d0b0b90d17 fixup tests, in particular foil the cache for the timeout test 2025-03-13 17:22:50 +01:00
b08544e86d Add embedding cache 2025-03-13 17:22:50 +01:00
d9111fe8ce Add lru crate to milli again 2025-03-13 17:22:50 +01:00
41d8161017 Update the versions 2025-03-13 17:22:32 +01:00
7df5715d39 Merge pull request #5406 from meilisearch/bump-heed
Bump heed to v0.22 and arroy to v0.6
2025-03-13 16:52:45 +01:00
5fe02ab5e0 Move to heed 0.22 and arroy 0.6 2025-03-13 15:48:18 +01:00
5ef7767429 Let arroy uses all the memory available instead of 50% of the 70% 2025-03-13 15:06:03 +01:00
3fad48167b remove arroy dependency in the index-scheduler 2025-03-13 14:57:56 +01:00
a92a48b9b9 Do not recompute stats on dumpless upgrade
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-13 13:58:58 +01:00
d53225bf64 uses a random seed instead of 42 2025-03-13 12:43:31 +01:00
20896500c2 Bump arroy to the latest version 2025-03-13 12:37:10 +01:00
1af520077c Call the underlying Env::copy_to_path method 2025-03-13 11:49:25 +01:00
7e07cb9de1 Make meilitool prefer WithoutTls Env 2025-03-13 11:47:19 +01:00
a12b06d99d Merge #5369
5369: exhaustive facet search r=ManyTheFish a=ManyTheFish

Fixes #5403

This PR adds an `exhaustiveFacetCount` field to the `/facet-search` API allowing the end-user to have a better facet count when having a distinct attribute set in the index settings.

 # Usage

`POST /index/:index_uid/facet-search`
**Body:**
```json
{
  "facetQuery": "blob",
  "facetName": "genres",
  "q": "",
  "exhaustiveFacetCount": true
}
```

# Prototype Docker images

```sh
$ docker pull getmeili/meilisearch:prototype-exhaustive-facet-search-00
```

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-13 10:36:04 +00:00
331dc3d241 Add a comment to explain why we keep debug assertions 2025-03-13 11:29:00 +01:00
ef9d9f8481 set the memory in arroy 2025-03-13 11:29:00 +01:00
d3d22d8ed4 Prefer waiting for the task before getting the indexes 2025-03-13 11:29:00 +01:00
5e6abcf50c Prefer using WithoutTls for the auth env 2025-03-13 11:29:00 +01:00
a4aaf932ba Fix some test (invalid anyway) 2025-03-13 11:29:00 +01:00
16c962eb30 Enable debug assertions of heed 2025-03-13 11:07:49 +01:00
55ca2c4481 Avoid opening the Auth environment multiple times 2025-03-13 11:07:49 +01:00
fedb444e66 Fix the upgrade arroy calls 2025-03-13 11:07:49 +01:00
bef5954741 Use a WithoutTls env 2025-03-13 11:07:49 +01:00
ff8cf38d6b Move to the latest version of arroy 2025-03-13 11:07:48 +01:00
f8ac575ec5 Move to the latest version of arroy 2025-03-13 11:07:48 +01:00
566b4efb06 Dumpless upgrade from v1.13 to v1.14 2025-03-13 11:07:44 +01:00
1d499ed9b2 Use the new arroy upgrade method to move from 0.4 to 0.5 2025-03-13 11:07:44 +01:00
3bc62f0549 WIP: Still need to introduce a Env::copy_to_path method 2025-03-13 11:07:39 +01:00
21bbbdec76 Specify WithoutTls everywhere 2025-03-13 11:07:38 +01:00
78ebd8dba2 Fix the error variants 2025-03-13 11:07:38 +01:00
34df44a002 Open Env without TLS 2025-03-13 11:07:38 +01:00
48a27f669e Bump heed and other dependencies 2025-03-13 11:07:37 +01:00
e2d0ce52ba Merge #5384
5384: Get multiple documents by ids r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5345 

## What does this PR do?
- Implements [public usage](https://www.notion.so/meilisearch/Get-documents-by-ID-1994b06b651f805ba273e1c6b75ce4d8)
- Slightly refactor error messages for the `/similar` route

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-12 17:26:49 +00:00
995f8962bd Merge #5398
5398: Bump ring from 0.17.8 to 0.17.13 r=Kerollmops a=dependabot[bot]

Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/briansmith/ring/blob/main/RELEASES.md">ring's changelog</a>.</em></p>
<blockquote>
<h1>Version 0.17.13 (2025-03-06)</h1>
<p>Increased MSRV to 1.66.0 to avoid bugs in earlier versions so that we can
safely use <code>core::arch::x86_64::__cpuid</code> and <code>core::arch::x86::__cpuid</code> from
Rust in future releases.</p>
<p>AVX2-based VAES-CLMUL implementation. This will be a notable performance
improvement for most newish x86-64 systems. This will likely raise the minimum
binutils version supported for very old Linux distros.</p>
<h1>Version 0.17.12 (2025-03-05)</h1>
<p>Bug fix: <a href="https://redirect.github.com/briansmith/ring/pull/2447">briansmith/ring#2447</a> for denial of service (DoS).</p>
<ul>
<li>
<p>Fixes a panic in <code>ring::aead::quic::HeaderProtectionKey::new_mask()</code> when
integer overflow checking is enabled. In the QUIC protocol, an attacker can
induce this panic by sending a specially-crafted packet. Even unintentionally
it is likely to occur in 1 out of every 2**32 packets sent and/or received.</p>
</li>
<li>
<p>Fixes a panic on 64-bit targets in <code>ring::aead::{AES_128_GCM, AES_256_GCM}</code>
when overflow checking is enabled, when encrypting/decrypting approximately
68,719,476,700 bytes (about 64 gigabytes) of data in a single chunk. Protocols
like TLS and SSH are not affected by this because those protocols break large
amounts of data into small chunks. Similarly, most applications will not
attempt to encrypt/decrypt 64GB of data in one chunk.</p>
</li>
</ul>
<p>Overflow checking is not enabled in release mode by default, but
<code>RUSTFLAGS=&quot;-C overflow-checks&quot;</code> or <code>overflow-checks = true</code> in the Cargo.toml
profile can override this. Overflow checking is usually enabled by default in
debug mode.</p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/briansmith/ring/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=ring&package-manager=cargo&previous-version=0.17.8&new-version=0.17.13)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-12 13:20:30 +00:00
1cd00f37c0 Merge #5413
5413: Make sure to delete useless prefixes r=ManyTheFish a=Kerollmops

We discovered a bug where the new indexer was still writing empty roaring bitmaps instead of deleting the prefix entry from the prefix database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-12 10:54:04 +00:00
1aa3375e12 Update version for the next release (v1.14.0) in Cargo.toml 2025-03-12 10:51:04 +00:00
60ff1b19a8 Searching for a document that does not exist no longer raises an error 2025-03-12 11:50:39 +01:00
7df5e3f059 Fix error message
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-12 11:48:40 +01:00
0197dc87e0 Make sure to delete useless prefixes 2025-03-12 11:24:13 +01:00
7a172b82ca Add test 2025-03-12 11:22:59 +01:00
eb3ff325d1 Add an exhaustiveFacetCount field to the facet-search API 2025-03-12 11:22:59 +01:00
d3cd5ea689 Check if the geo fields changed additionally to the other faceted fields when reindexing facets 2025-03-12 11:20:10 +01:00
3ed43f9097 add a failing test reproducing the bug 2025-03-12 11:20:10 +01:00
a2a86ef4e2 Merge #5254
5254: Granular Filterable attribute settings r=ManyTheFish a=ManyTheFish

# Related
**Issue:** https://github.com/meilisearch/meilisearch/issues/5163
**PRD:** https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-filterableAttributes-1764b06b651f80aba8bdf359b2df3ca8

# Summary
Change the `filterableAttributes` settings to let the user choose which facet feature he wants to activate or not.
Deactivating a feature will avoid some database computation in the indexing process and save time and disk size.

# Example

`PATCH /indexes/:index_uid/settings`

```json
{
  "filterableAttributes": [
    {
      "patterns": [
        "cattos",
        "doggos.age"
      ],
      "features": {
        "facetSearch": false,
        "filter": {
          "equality": true,
          "comparison": false
        }
      }
    }
  ]
}
```

# Impact on the codebase
- Settings API:
  - `/settings`
  - `/settings/filterable-attributes`
  - OpenAPI 
  - may impact the LocalizedAttributesRules due to the AttributePatterns factorization
- Database:
  - Filterable attributes format changed
  - Faceted field_ids are no more stored in the database
  - FieldIdsMap has no more unexisting fields
- Search:
  - Search using filters
  - Facet search
  - `Attributes` ranking rule
  - Distinct attribute
  - Facet distribution
- Settings reindexing:
  - searchable
  - facet
  - vector
  - geo
- Document indexing:
  - searchable
  - facet
  - vector
  - geo
- Dump import

# Note for the reviewers
The changes are huge and have been split in different commits with a dedicated explanation, I suggest reviewing the commit 1by1

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-12 09:00:43 +00:00
d500c7f625 Add default deserialize value 2025-03-11 17:55:49 +01:00
ea7e299663 Update has_changed_for_fields documentation 2025-03-11 16:48:55 +01:00
a370b467fe Merge MetadataBuilder::_new into MetadataBuilder::new 2025-03-11 15:31:57 +01:00
8790880589 Fix clippy 2025-03-11 15:22:39 +01:00
7072fe9780 Fix typos in comments and messages 2025-03-11 15:22:00 +01:00
d0dda78f3d Merge #5401
5401: Make composite embedders an experimental feature r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Introduce new `compositeEmbedders` experimental feature
- Guard `source = "composite"` and `searchEmbedder`, `indexingEmbedder` behind enabling the feature.
- Update tests accordingly

## Dumpless upgrade

- Adding an experimental feature is never a breaking change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-11 14:20:36 +00:00
fa8afc5cfd Style change after review
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-11 13:25:35 +01:00
6d52c6e711 Merge branch 'main' into granular-filterable-attributes 2025-03-11 10:05:58 +01:00
dfb8411647 Revert "Remove filter pre-check"
This reverts commit b12ffd1356.
2025-03-11 09:48:30 +01:00
6269f757ff Revert document creation in tests 2025-03-10 18:35:10 +01:00
40c5f911fd Revert metadata creation when computing the facet-distribution 2025-03-10 17:05:41 +01:00
abef655849 Revert metadata creation when computing facet search and distinct 2025-03-10 15:45:59 +01:00
b12ffd1356 Remove filter pre-check 2025-03-10 14:29:45 +01:00
c9a4c6ed96 REvert metadata creation when computing filters at search time 2025-03-10 14:29:44 +01:00
aa32b719c7 Add tests about experimentalness of the feature and fix existing 2025-03-10 14:23:22 +01:00
41d2b1e52b Analytics 2025-03-10 14:23:07 +01:00
54ee81bb09 Make composite embedders experimental 2025-03-10 14:22:47 +01:00
689e69d6d2 Take into account PR messages 2025-03-10 13:46:33 +01:00
9d9e0d4c54 Add analytics 2025-03-10 11:33:15 +01:00
19c9caed39 Fix tests 2025-03-10 11:11:48 +01:00
21c3b3957e tests: Change get_document_by_filter to fetch_documents 2025-03-10 11:11:48 +01:00
f292fc9ac0 Add ids parameter to GET documents and POST documents/fetch 2025-03-10 11:11:48 +01:00
1d3c4642a6 Don't use Deserr for ExternalDocumentId, instead convert to error afterward 2025-03-10 11:11:48 +01:00
9a282be0a2 Merge #5393
Some checks failed
Test suite / Tests on ubuntu-22.04 (push) Failing after 13s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 11s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Successful in 15m20s
Test suite / Run Rustfmt (push) Successful in 2m40s
Run the indexing fuzzer / Setup the action (push) Failing after 1h10m55s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5393: Bring back changes from v1.13.3 into main r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Strift <lau.cazanove@gmail.com>
2025-03-10 07:57:02 +00:00
bea28968a0 Bump ring from 0.17.8 to 0.17.13
Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13.
- [Changelog](https://github.com/briansmith/ring/blob/main/RELEASES.md)
- [Commits](https://github.com/briansmith/ring/commits)

---
updated-dependencies:
- dependency-name: ring
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-07 17:04:57 +00:00
ed1dcbe0f7 Fix behavior change in the Attributes criterion 2025-03-06 14:18:25 +01:00
5ceddbda84 Add the max_weight of the weight map if it's lacking 2025-03-06 13:58:28 +01:00
ca41ce3bbd Old indexer document addition now check if facet search is globally activated 2025-03-06 11:43:42 +01:00
8ec0c322ea Apply PR requests related to Refactor the FieldIdMapWithMetadata 2025-03-06 11:42:53 +01:00
b88aa9cc76 Rely on FieldIdMapWithMetadata in facet search and filters 2025-03-05 18:22:12 +01:00
3fd86e8d76 Merge #5371
Some checks failed
Test suite / Tests on ubuntu-22.04 (push) Failing after 12s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 8s
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Successful in 7m1s
Test suite / Run Rustfmt (push) Successful in 2m44s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m40s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5371: Composite embedders r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Implement [public usage](https://www.notion.so/meilisearch/Composite-embedder-usage-14a4b06b651f81859dc3df21e8cd02a0)
- Refactor the way we check if a parameter is mandatory/allowed/disallowed for a given source
- Take the "nesting context" into account for computer if a parameter is mandatory/allowed/disallowed
- Add tests checking all parameters with all sources, and made sure the results didn't change compared with v1.13

## Dumpless Upgrade

- This adds a new value for an existing parameter => compatible without change
- This adds new optional parameters => compatible without change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-05 17:18:11 +00:00
67f7470c83 Apply PR requests related to Refactor search and facet-search 2025-03-05 18:17:42 +01:00
4fab72cbea Rename SettingsDiff::diff to SettingsDiff::apply_and_diff 2025-03-05 18:16:57 +01:00
afb4b9677f Remove Embedder:embed 2025-03-05 18:16:57 +01:00
73d2dbd60f Error handling 2025-03-05 18:16:57 +01:00
57a6beee30 Test composite embedders 2025-03-05 18:16:57 +01:00
b190b612a3 Add test on all parameters 2025-03-05 18:16:57 +01:00
111e77eff2 Bump mini-dashboard to v0.2.18 2025-03-05 15:24:53 +01:00
ba30747de3 Bump v1.13.2 to v1.13.3 in the TOMLs and snaps 2025-03-05 15:24:53 +01:00
25f0536f5a Update version for the next release (v1.13.3) in Cargo.toml 2025-03-05 15:24:52 +01:00
c8c0951c43 Update the snapshots 2025-03-05 15:24:21 +01:00
63e753bde0 Apply PR requests related to settings API 2025-03-05 12:05:40 +01:00
5fa4b5c50a Add a test on filterable attributes rules priority
**Changes:**
- Add a new test playing with filterable attributes rules priority
- Optimize the faceted field selector avoiding to match false positives
2025-03-05 09:44:52 +01:00
a7a62e5e4c Add some documentation in modules 2025-03-05 08:49:18 +01:00
683a2ac685 Merge #5379
Some checks failed
Publish binaries to GitHub release / Check the version validity (push) Failing after 57s
Publish binaries to GitHub release / Publish binary for Linux (push) Has been skipped
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Has been skipped
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on ubuntu-22.04 (push) Failing after 6s
Test suite / Tests almost all features (push) Failing after 6s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 5s
Test suite / Run tests in debug (push) Failing after 6s
Test suite / Run Clippy (push) Failing after 6s
Test suite / Run Rustfmt (push) Failing after 6s
SDKs tests / define-docker-image (push) Failing after 15s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
Publish images to Docker Hub / docker (push) Has been cancelled
5379: Bring back the changes from v1.13.2 into main r=dureuill a=Kerollmops



Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-04 13:24:25 +00:00
e751342dfb Merge #5370
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 10s
Test suite / Test with Ollama (push) Failing after 6m46s
Test suite / Run Clippy (push) Successful in 6m23s
Test suite / Run Rustfmt (push) Failing after 16s
Test suite / Tests on ubuntu-22.04 (push) Failing after 7m19s
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m28s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5370: Introduce a CI to check milestones and branches r=curquiza a=Kerollmops



Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 15:51:52 +00:00
17bf82235d Merge #5381
5381: Bump actions/checkout from 1 to 3 r=Kerollmops a=dependabot[bot]

Bumps [actions/checkout](https://github.com/actions/checkout) from 1 to 3.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/releases">actions/checkout's releases</a>.</em></p>
<blockquote>
<h2>v3.0.0</h2>
<ul>
<li>Updated to the node16 runtime by default
<ul>
<li>This requires a minimum <a href="https://github.com/actions/runner/releases/tag/v2.285.0">Actions Runner</a> version of v2.285.0 to run, which is by default available in GHES 3.4 or later.</li>
</ul>
</li>
</ul>
<h2>v2.7.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add new public key for known_hosts (<a href="https://redirect.github.com/actions/checkout/issues/1237">#1237</a>) by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1238">actions/checkout#1238</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2.6.0...v2.7.0">https://github.com/actions/checkout/compare/v2.6.0...v2.7.0</a></p>
<h2>v2.6.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add backports to v2 branch by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1040">actions/checkout#1040</a>
<ul>
<li>Includes backports from the following changes: <a href="https://redirect.github.com/actions/checkout/pull/964">actions/checkout#964</a>, <a href="https://redirect.github.com/actions/checkout/pull/1002">actions/checkout#1002</a>, <a href="https://redirect.github.com/actions/checkout/pull/1029">actions/checkout#1029</a></li>
<li>Upgraded the licensed version to match what is used in v3.</li>
</ul>
</li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2.5.0...v2.6.0">https://github.com/actions/checkout/compare/v2.5.0...v2.6.0</a></p>
<h2>v2.5.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Update <code>`@​actions/core</code>` to 1.10.0 by <a href="https://github.com/rentziass"><code>`@​rentziass</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/962">actions/checkout#962</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2...v2.5.0">https://github.com/actions/checkout/compare/v2...v2.5.0</a></p>
<h2>v2.4.2</h2>
<h2>What's Changed</h2>
<ul>
<li>Add set-safe-directory input to allow customers to take control. (<a href="https://redirect.github.com/actions/checkout/issues/770">#770</a>) by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/776">actions/checkout#776</a></li>
<li>Prepare changelog for v2.4.2. by <a href="https://github.com/TingluoHuang"><code>`@​TingluoHuang</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/778">actions/checkout#778</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/actions/checkout/compare/v2...v2.4.2">https://github.com/actions/checkout/compare/v2...v2.4.2</a></p>
<h2>v2.4.1</h2>
<ul>
<li>Fixed an issue where checkout failed to run in container jobs due to the new git setting <code>safe.directory</code></li>
</ul>
<h2>v2.4.0</h2>
<ul>
<li>Convert SSH URLs like <code>org-&lt;ORG_ID&gt;`@github.com:</code>` to <code>https://github.com/</code> - <a href="https://redirect.github.com/actions/checkout/pull/621">pr</a></li>
</ul>
<h2>v2.3.5</h2>
<p>Update dependencies</p>
<h2>v2.3.4</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/379">Add missing <code>await</code>s</a></li>
<li><a href="https://redirect.github.com/actions/checkout/pull/360">Swap to Environment Files</a></li>
</ul>
<h2>v2.3.3</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/345">Remove Unneeded commit information from build logs</a></li>
<li><a href="https://redirect.github.com/actions/checkout/pull/326">Add Licensed to verify third party dependencies</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/actions/checkout/blob/main/CHANGELOG.md">actions/checkout's changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<h2>v4.2.2</h2>
<ul>
<li><code>url-helper.ts</code> now leverages well-known environment variables by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1941">actions/checkout#1941</a></li>
<li>Expand unit test coverage for <code>isGhes</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1946">actions/checkout#1946</a></li>
</ul>
<h2>v4.2.1</h2>
<ul>
<li>Check out other refs/* by commit if provided, fall back to ref by <a href="https://github.com/orhantoy"><code>`@​orhantoy</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1924">actions/checkout#1924</a></li>
</ul>
<h2>v4.2.0</h2>
<ul>
<li>Add Ref and Commit outputs by <a href="https://github.com/lucacome"><code>`@​lucacome</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1180">actions/checkout#1180</a></li>
<li>Dependency updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>-` <a href="https://redirect.github.com/actions/checkout/pull/1777">actions/checkout#1777</a>, <a href="https://redirect.github.com/actions/checkout/pull/1872">actions/checkout#1872</a></li>
</ul>
<h2>v4.1.7</h2>
<ul>
<li>Bump the minor-npm-dependencies group across 1 directory with 4 updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1739">actions/checkout#1739</a></li>
<li>Bump actions/checkout from 3 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1697">actions/checkout#1697</a></li>
<li>Check out other refs/* by commit by <a href="https://github.com/orhantoy"><code>`@​orhantoy</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1774">actions/checkout#1774</a></li>
<li>Pin actions/checkout's own workflows to a known, good, stable version. by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1776">actions/checkout#1776</a></li>
</ul>
<h2>v4.1.6</h2>
<ul>
<li>Check platform to set archive extension appropriately by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1732">actions/checkout#1732</a></li>
</ul>
<h2>v4.1.5</h2>
<ul>
<li>Update NPM dependencies by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1703">actions/checkout#1703</a></li>
<li>Bump github/codeql-action from 2 to 3 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1694">actions/checkout#1694</a></li>
<li>Bump actions/setup-node from 1 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1696">actions/checkout#1696</a></li>
<li>Bump actions/upload-artifact from 2 to 4 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1695">actions/checkout#1695</a></li>
<li>README: Suggest <code>user.email</code> to be <code>41898282+github-actions[bot]`@users.noreply.github.com</code>` by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1707">actions/checkout#1707</a></li>
</ul>
<h2>v4.1.4</h2>
<ul>
<li>Disable <code>extensions.worktreeConfig</code> when disabling <code>sparse-checkout</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1692">actions/checkout#1692</a></li>
<li>Add dependabot config by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1688">actions/checkout#1688</a></li>
<li>Bump the minor-actions-dependencies group with 2 updates by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1693">actions/checkout#1693</a></li>
<li>Bump word-wrap from 1.2.3 to 1.2.5 by <a href="https://github.com/dependabot"><code>`@​dependabot</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1643">actions/checkout#1643</a></li>
</ul>
<h2>v4.1.3</h2>
<ul>
<li>Check git version before attempting to disable <code>sparse-checkout</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1656">actions/checkout#1656</a></li>
<li>Add SSH user parameter by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1685">actions/checkout#1685</a></li>
<li>Update <code>actions/checkout</code> version in <code>update-main-version.yml</code> by <a href="https://github.com/jww3"><code>`@​jww3</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1650">actions/checkout#1650</a></li>
</ul>
<h2>v4.1.2</h2>
<ul>
<li>Fix: Disable sparse checkout whenever <code>sparse-checkout</code> option is not present <a href="https://github.com/dscho"><code>`@​dscho</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1598">actions/checkout#1598</a></li>
</ul>
<h2>v4.1.1</h2>
<ul>
<li>Correct link to GitHub Docs by <a href="https://github.com/peterbe"><code>`@​peterbe</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1511">actions/checkout#1511</a></li>
<li>Link to release page from what's new section by <a href="https://github.com/cory-miller"><code>`@​cory-miller</code></a>` in <a href="https://redirect.github.com/actions/checkout/pull/1514">actions/checkout#1514</a></li>
</ul>
<h2>v4.1.0</h2>
<ul>
<li><a href="https://redirect.github.com/actions/checkout/pull/1396">Add support for partial checkout filters</a></li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="f43a0e5ff2"><code>f43a0e5</code></a> Release 3.6.0 (<a href="https://redirect.github.com/actions/checkout/issues/1437">#1437</a>)</li>
<li><a href="7739b9ba2e"><code>7739b9b</code></a> Add option to fetch tags even if fetch-depth &gt; 0 (<a href="https://redirect.github.com/actions/checkout/issues/579">#579</a>)</li>
<li><a href="96f53100ba"><code>96f5310</code></a> Mark test scripts with Bash'isms to be run via Bash (<a href="https://redirect.github.com/actions/checkout/issues/1377">#1377</a>)</li>
<li><a href="c85c95e3d7"><code>c85c95e</code></a> Release v3.5.3 (<a href="https://redirect.github.com/actions/checkout/issues/1376">#1376</a>)</li>
<li><a href="d106d4669b"><code>d106d46</code></a> Add support for sparse checkouts (<a href="https://redirect.github.com/actions/checkout/issues/1369">#1369</a>)</li>
<li><a href="f095bcc56b"><code>f095bcc</code></a> Fix typos found by codespell (<a href="https://redirect.github.com/actions/checkout/issues/1287">#1287</a>)</li>
<li><a href="47fbe2df0a"><code>47fbe2d</code></a> Fix: Checkout fail in self-hosted runners when faulty submodule are checked-i...</li>
<li><a href="8e5e7e5ab8"><code>8e5e7e5</code></a> Release v3.5.2 (<a href="https://redirect.github.com/actions/checkout/issues/1291">#1291</a>)</li>
<li><a href="eb35239ec2"><code>eb35239</code></a> Fix: convert baseUrl to serverApiUrl 'formatted' (<a href="https://redirect.github.com/actions/checkout/issues/1289">#1289</a>)</li>
<li><a href="83b7061638"><code>83b7061</code></a> Release v3.5.1 (<a href="https://redirect.github.com/actions/checkout/issues/1284">#1284</a>)</li>
<li>Additional commits viewable in <a href="https://github.com/actions/checkout/compare/v1...v3">compare view</a></li>
</ul>
</details>
<br />

<details>
<summary>Most Recent Ignore Conditions Applied to This Pull Request</summary>

| Dependency Name | Ignore Conditions |
| --- | --- |
| actions/checkout | [>= 4.a, < 5] |
</details>


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=actions/checkout&package-manager=github_actions&previous-version=1&new-version=3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 15:14:00 +00:00
0401c4e511 Add a settings API test 2025-03-03 16:08:21 +01:00
4798c35c50 Merge #5383
5383: Skip a snapshot test on Windows r=dureuill a=Kerollmops

This PR skips the `perform_on_demand_snapshot` test on Windows, which is very flaky on this platform. Note that we keep running it on macOS and Ubuntu.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-03 13:23:24 +00:00
9585950e0e Merge #5365
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Failing after 5s
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 9s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 10s
Test suite / Tests on ubuntu-22.04 (push) Failing after 17s
Test suite / Tests almost all features (push) Failing after 11s
Test suite / Test with Ollama (push) Failing after 17s
Test suite / Test disabled tokenization (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Rustfmt (push) Successful in 3m43s
Test suite / Run Clippy (push) Successful in 9m23s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5365: Mention openAPI in CONTRIBUTING.md r=Kerollmops a=irevoire

I only referred to other documents to be sure the process is written only once and won’t get out of sync.

Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-03 11:23:51 +00:00
b8c6eb5453 Improve bors toml 2025-03-03 12:22:33 +01:00
02586e727e Introduce a CI to check milestones and branches 2025-03-03 12:22:24 +01:00
0cfc9261ba Skip a snapshot test on Windows 2025-03-03 10:44:28 +01:00
035674d56e Bump actions/checkout from 1 to 4 2025-03-03 10:37:28 +01:00
d35470e29b Update dumps
**Impact:**
- dump import
2025-03-03 10:33:39 +01:00
23e07f1a93 Attribute positions changed in snapshots
**Reason:**
Only the existing field are registered in the fieldid_map
2025-03-03 10:33:39 +01:00
f2a28a4dd7 Add and enhance tests
**Changes:**
Introduce a test_settings_documents_indexing_swapping_and_search function that run the test twice:
1) by indexing the settings before the documents then running the test
2) by indexing the documents before the settings then running the test

This helps to ensure that their is no bug coming from one or the other indexer.
2025-03-03 10:33:39 +01:00
1994494155 Update snapshot using the new filterableAttributes type 2025-03-03 10:33:39 +01:00
6dbec91d2b Index document in filterable attributes tests
**Reason:**
Because the filterable attributes are patterns now,
the fieldIdMap will only register the fields that exists in at least one document.
if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields.
2025-03-03 10:33:39 +01:00
9a75dc6ab3 Update tests using filterable attributes rules
**Changes:**
Replace the BTreeSet<String> by Vec<FilterableAttributesRule> without changing the test results

**Impact:**
- None
2025-03-03 10:33:34 +01:00
ae8d453868 Refactor Document indexing process (searchables)
**Changes:**
The searchable database extraction is now relying on the AttributePatterns and FieldIdMapWithMetadata to match the field to extract.
Remove the SearchableExtractor trait to make the code less complex.

**Impact:**
- Document Addition/modification searchable indexing
- Document deletion searchable indexing
2025-03-03 10:32:42 +01:00
95bccaf5f5 Refactor Document indexing process (Facets)
**Changes:**
The Documents changes now take a selector closure instead of a list of field to match the field to extract.
The seek_leaf_values_in_object function now uses a selector closure of a list of field to match the field to extract
The facet database extraction is now relying on the FilterableAttributesRule to match the field to extract.
The facet-search database extraction is now relying on the FieldIdMapWithMetadata to select the field to index.
The facet level database extraction is now relying on the FieldIdMapWithMetadata to select the field to index.

**Important:**
Because the filterable attributes are patterns now,
the fieldIdMap will only register the fields that exists in at least one document.
if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields.

**Impact:**
- Document Addition/modification facet indexing
- Document deletion facet indexing
2025-03-03 10:32:03 +01:00
659855c88e Refactor Settings Indexing process
**Changes:**
The transform structure is now relying on FieldIdMapWithMetadata and AttributePatterns to prepare
the obkv documents during a settings reindexing.
The InnerIndexSettingsDiff and InnerIndexSettings structs are now relying on FieldIdMapWithMetadata, FilterableAttributesRule and AttributePatterns to define the field and the databases that should be reindexed.
The faceted_fields_ids, localized_searchable_fields_ids and localized_faceted_fields_ids have been removed in favor of the FieldIdMapWithMetadata.
We are now relying on the FieldIdMapWithMetadata to retain vectors_fids from the facets and the searchables.

The searchable database computing is now relying on the FieldIdMapWithMetadata to know if a field is searchable and retrieve the locales.

The facet database computing is now relying on the FieldIdMapWithMetadata to compute the facet databases, the facet-search and retrieve the locales.

The facet level database computing is now relying on the FieldIdMapWithMetadata and the facet level database are cleared depending on the settings differences (clear_facet_levels_based_on_settings_diff).

The vector point extraction uses the FieldIdMapWithMetadata instead of FieldsIdsMapWithMetadata.

**Impact:**
- Dump import
- Settings update
2025-03-03 10:32:02 +01:00
286d310287 Fix inconsistency in attribute ranking rule computation
**Changes:**
The building of the Attributes ranking rule graph was comparing fieldids with weights
which doesn't make sense and may be bug prone, we are now comparing fieldids with fieldids.

**Impact:**
- search: Attribute ranking rule
2025-03-03 10:29:34 +01:00
4f7ece2411 Refactor the FieldIdMapWithMetadata
**Changes:**
The FieldIdMapWithMetadata structure now stores more information about fields.
The metadata_for_field function computes all the needed information relying on the user provided data instead of the enriched data (searchable/sortable)
which may solve an indexing bug on sortable attributes that was not matching the nested fields.

The FieldIdMapWithMetadata structure was duplicated in the embeddings as FieldsIdsMapWithMetadata,
so the FieldsIdsMapWithMetadata has been removed in favor of FieldIdMapWithMetadata.

The Facet distribution is now relying on the FieldIdMapWithMetadata with metadata to match is a field can be faceted.

**Impact:**
- searchable attributes matching
- searchable attributes weight computation
- sortable attributes matching
- faceted fields matching
- prompt computing
- facet distribution
2025-03-03 10:29:33 +01:00
967033579d Refactor search and facet-search
**Changes:**
The search filters are now using the FilterableAttributesFeatures from the FilterableAttributesRules to know if a field is filterable.
Moreover, the FilterableAttributesFeatures is more precise and an error will be returned if an operator is used on a field that doesn't have the related feature.
The facet-search is now checking if the feature is allowed in the FilterableAttributesFeatures and an error will be returned if the field doesn't have the related feature.

**Impact:**
- facet-search is now relying on AttributePatterns to match the locales
- search using filters is now relying on FilterableAttributesFeatures
- distinct attribute is now relying on FilterableAttributesRules
2025-03-03 10:25:32 +01:00
0200c65ebf Change the filterableAttributes setting API
**Changes:**
The filterableAttributes type has been changed from a `BTreeSet<String>` to a `Vec<FilterableAttributesRule>`,
Which is a list of rules defining patterns to match the documents' fields and a set of feature to apply on the matching fields.
The rule order given by the user is now an important information, the features applied on a filterable field will be chosen based on the rule order as we do for the LocalizedAttributesRules.
This means that the list will not be reordered anymore and will keep the user defined order,
moreover, if there are any duplicates, they will not be de-duplicated anymore.

**Impact:**
- Settings API
- the database format of the filterable attributes changed
- may impact the LocalizedAttributesRules due to the AttributePatterns factorization
- OpenAPI generator
2025-03-03 10:22:02 +01:00
c63c25a9a2 Merge #5355
Some checks failed
Look for flaky tests / flaky (push) Failing after 1s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Publish binaries to GitHub release / Check the version validity (push) Failing after 5s
Test suite / Tests almost all features (push) Failing after 13s
Test suite / Tests on ubuntu-22.04 (push) Failing after 19s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 10s
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Rustfmt (push) Failing after 16s
Test suite / Run Clippy (push) Successful in 9m39s
SDKs tests / define-docker-image (push) Failing after 5s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5355: Support fetching the pooling method from the model configuration r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5354 

## What does this PR do?
- Fetches the pooling configuration from the model repository
- Use a pooling method that depends on the pooling configuration of that model.
- Allow overriding the pooling method with a new huggingFace embedder parameter `pooling`
  - for backward-compatibility with Meilisearch v1.13
  - for compatibility with embedders that exhibit the same behavior as Meilisearch v1.13
- Handle the default value of that new parameter
   - for compatibility, when importing a db/a dump, it should be set to `forceMean`
   - when (re)set from the settings for an embedder, it should be set to `useModel`


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-27 14:55:13 +00:00
046bbea864 Keep old stat format to make sure the number of documents is available during dumpless upgrade 2025-02-27 15:17:23 +01:00
c5cb7d2f2c Forbid opening a db of v1.13.x from v1.13.y 2025-02-27 15:17:23 +01:00
5e7f226ac9 Support dumpless upgrade for all v1.13 patches 2025-02-27 15:17:23 +01:00
754f254a00 Update snapshots following version bump 2025-02-27 15:17:23 +01:00
39b5ad3c86 Update version for the next release (v1.13.2) in Cargo.toml 2025-02-27 15:17:22 +01:00
80adbb1bdc Merge #5338
Some checks are pending
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h5m23s
5338: Bump Ubuntu in the CI from 20.04 to 22.04 r=dureuill a=Kerollmops

This PR bumps the Ubuntu version we use in the CI from version 20.04 to version 22.04. This also means we are [using GLIBC version 2.35 and not version 2.28](https://gist.github.com/zchrissirhcz/ee13f604996bbbe312ba1d105954d2ed).

Note, the indentation fix is done by my IDE (Zed), sorry about that 🤦 

Fixes https://github.com/meilisearch/meilisearch/issues/5374

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-27 08:14:12 +00:00
4b6fa1cf41 Merge #5372
Some checks failed
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Run the indexing fuzzer / Setup the action (push) Failing after 11s
Test suite / Test with Ollama (push) Failing after 10s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Run Rustfmt (push) Failing after 32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5372: Bring back changes from v1.13.1 to main r=irevoire a=Kerollmops



Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Strift <lau.cazanove@gmail.com>
Co-authored-by: Many the fish <many@meilisearch.com>
2025-02-26 17:24:51 +00:00
dc78d8e9c4 Fix the dumpless upgrade log 2025-02-26 17:02:46 +01:00
d4063c9dcd Fix fmt 2025-02-26 17:02:45 +01:00
abebc574f6 Update crates/milli/src/index.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-26 17:02:45 +01:00
f32ab67819 Update crates/milli/src/index.rs
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-26 17:02:44 +01:00
d25953f322 fix clippy 2025-02-26 17:02:43 +01:00
405bbd04c1 Dumpless upgrade 2025-02-26 17:01:38 +01:00
5d421abdc4 Update Snapshots 2025-02-26 17:01:37 +01:00
9f3663e768 Implement Incremental document database stats computing 2025-02-26 17:01:35 +01:00
d9642ec916 Use checked_div in average computation 2025-02-26 17:01:34 +01:00
818e8b0237 Fix zero division 2025-02-26 17:01:31 +01:00
4f77a7fba5 fix clippy 2025-02-26 17:01:29 +01:00
058f08dff5 fix snapshots 2025-02-26 17:01:26 +01:00
9a6c1730aa Add document database stats 2025-02-26 17:01:25 +01:00
91a8a97045 Bump 2025-02-26 17:01:24 +01:00
15788773af Check the exact_word database when computing zero typo query 2025-02-26 17:01:22 +01:00
025b9b79bb Update the snapshots 2025-02-26 17:01:21 +01:00
1c60b17a37 Update version for the next release (v1.13.1) in Cargo.toml 2025-02-26 17:01:19 +01:00
3b2cd54b9d tests: add a check to know if a Value has an uid 2025-02-25 17:24:45 +01:00
0833cb7d34 Mention openAPI in CONTRIBUTING.md 2025-02-25 12:01:26 +01:00
b0d4f9590f Merge #5364
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Test with Ollama (push) Failing after 15s
Test suite / Run tests in debug (push) Failing after 15s
Test suite / Run Rustfmt (push) Successful in 2m20s
Test suite / Run Clippy (push) Failing after 6m46s
Run the indexing fuzzer / Setup the action (push) Failing after 7m0s
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5364: Rename `callTrace` into `progressTrace` r=Kerollmops a=Kerollmops

Rename the `callTrace` field into a `progressTrace`.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-25 09:34:13 +00:00
dfce20be21 Rename callTrace into progressTrace 2025-02-25 10:09:03 +01:00
24fe6cd205 Fix multiple embeddings in hf 2025-02-24 16:24:04 +01:00
e374b095a2 Fix tests 2025-02-24 14:11:26 +01:00
9f3e4801b1 Refactor settings validation and introduce SubEmbedderSettings 2025-02-24 13:58:26 +01:00
b85180fedb Error types 2025-02-24 13:58:26 +01:00
3cdcc54a9e analytics 2025-02-24 13:58:26 +01:00
294cf39cad Integrate composite embedder 2025-02-24 13:58:26 +01:00
4a2643daa2 Rename embed_one to embed_search and embed_chunks* to embed_index* 2025-02-24 13:58:26 +01:00
8d2d9066ba Add composite embedder 2025-02-24 13:58:26 +01:00
526476e168 Move settings test to its own file 2025-02-24 13:58:26 +01:00
ea7bae9a71 Merge #5356
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Test with Ollama (push) Failing after 12s
Test suite / Tests on ubuntu-20.04 (push) Failing after 19s
Test suite / Run Rustfmt (push) Failing after 17s
Test suite / Run Clippy (push) Successful in 9m21s
Run the indexing fuzzer / Setup the action (push) Successful in 1h8m47s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled
5356: Display the internal indexing steps with timings on the `/batches` route r=irevoire a=Kerollmops

This PR computes the durations of each step, stores them in a map, and prints them (for now).

```
"callTrace": {
    "processing tasks > retrieving config": "185.38µs",
    "processing tasks > computing document changes > preparing update file > payload": "23.11ms",
    "processing tasks > computing document changes > preparing update file": "23.26ms",
    "processing tasks > computing document changes": "24.06ms",
    "processing tasks > indexing > extracting documents > document": "15.13ms",
    "processing tasks > indexing > extracting documents": "15.13ms",
    "processing tasks > indexing > extracting facets > document": "5.70ms",
    "processing tasks > indexing > extracting facets": "5.72ms",
    "processing tasks > indexing > extracting words > document": "597.24ms",
    "processing tasks > indexing > extracting words": "597.25ms",
    "processing tasks > indexing > extracting word proximity > document": "1.14s",
    "processing tasks > indexing > extracting word proximity": "1.15s",
    "processing tasks > indexing > tail writing to database": "430.91ms",
    "processing tasks > indexing > waiting for extractors": "52.54µs",
    "processing tasks > indexing > writing embeddings to database": "47.79µs",
    "processing tasks > indexing > post-processing facets": "476.04µs",
    "processing tasks > indexing > post-processing words": "97.82ms",
    "processing tasks > indexing > finalizing": "67.41ms",
    "processing tasks > indexing": "2.40s",
    "processing tasks": "2.43s",
    "writing tasks to disk > task": "37.71µs",
    "writing tasks to disk": "67.13µs"
},
"writeChannelCongestion": {
    "attempts": 2608482,
    "blocking_attempts": 0,
    "blocking_ratio": 0.0
}
```

## To Do
- [x] Update the batches PRD + delivery + tracking issue.
- [x] Store that in the batches to be visible from the `/batches` route.
- [x] Display the writer's congestion.
- [x] Display the info back in the logs too.
- [ ] (optional) Compute the size of each database by [using LMDB](https://docs.rs/heed/latest/heed/struct.DatabaseStat.html).
- [x] Push them in reverse order so that "processing task" is after the other sub-steps.


Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-20 17:38:50 +00:00
76fd5d92d7 Clarify the tail writing to database 2025-02-20 17:35:23 +01:00
245a55722a Remove commented code 2025-02-20 16:48:18 +01:00
434fad5327 Fix insta tests again 2025-02-20 16:41:48 +01:00
243a5fa6a8 Log the call trace and congestion 2025-02-20 14:17:34 +01:00
9d314ace09 Fix the insta tests 2025-02-20 11:51:58 +01:00
1b1172ad16 Fix dump tests 2025-02-20 10:44:53 +01:00
1d99c8465c Hide the batch stats to make insta pass 2025-02-20 10:16:54 +01:00
05cc8c650c Expose the write channel congestion in the batches 2025-02-19 15:47:54 +01:00
14e1459bf5 Document settings 2025-02-19 15:06:22 +01:00
589bf30ec6 make clippy happy 2025-02-19 11:38:07 +01:00
b367c71ad2 fixup test 2025-02-19 11:31:17 +01:00
3ff1de0a21 Expose the call trace in the batch stats 2025-02-19 11:24:11 +01:00
1005a60fb8 Fixup dump settings 2025-02-19 11:03:48 +01:00
e9add14189 Reorder steps 2025-02-18 19:26:41 +01:00
4a058a080e Simplify the name generation 2025-02-18 18:48:44 +01:00
11a11fc870 Accumulate step durations from the progress system 2025-02-18 18:33:19 +01:00
cd0dfa3f1b Fix test cases 2025-02-18 17:21:52 +01:00
7b4ce468a6 Allow overriding pooling method 2025-02-18 17:12:23 +01:00
11759c4be4 Support pooling 2025-02-18 16:10:51 +01:00
0f1aeb8eaa Merge #5351
Some checks failed
Look for flaky tests / flaky (push) Failing after 19s
SDKs tests / define-docker-image (push) Failing after 5s
SDKs tests / .NET SDK tests (push) Has been skipped
SDKs tests / Dart SDK tests (push) Has been skipped
SDKs tests / Go SDK tests (push) Has been skipped
SDKs tests / Java SDK tests (push) Has been skipped
SDKs tests / JS SDK tests (push) Has been skipped
SDKs tests / PHP SDK tests (push) Has been skipped
SDKs tests / Python SDK tests (push) Has been skipped
SDKs tests / Ruby SDK tests (push) Has been skipped
SDKs tests / Rust SDK tests (push) Has been skipped
SDKs tests / Swift SDK tests (push) Has been skipped
SDKs tests / meilisearch-js-plugins tests (push) Has been skipped
SDKs tests / meilisearch-rails tests (push) Has been skipped
SDKs tests / meilisearch-symfony tests (push) Has been skipped
Publish binaries to GitHub release / Check the version validity (push) Successful in 9s
Publish binaries to GitHub release / Publish binary for aarch64 (meilisearch-linux-aarch64, aarch64-unknown-linux-gnu) (push) Failing after 2s
Publish binaries to GitHub release / Publish binary for Linux (push) Failing after 12s
Publish binaries to GitHub release / Publish binary for macos-13 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for windows-2022 (push) Has been cancelled
Publish binaries to GitHub release / Publish binary for macOS silicon (meilisearch-macos-apple-silicon, aarch64-apple-darwin) (push) Has been cancelled
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Test with Ollama (push) Failing after 7s
Test suite / Test disabled tokenization (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 11s
Test suite / Run Clippy (push) Failing after 17s
Test suite / Run Rustfmt (push) Successful in 1m51s
Test suite / Tests almost all features (push) Failing after 7m7s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5351: Bring back v1.13.0 changes into main r=irevoire a=Kerollmops

This PR brings back the changes made in v1.13 into the main branch.

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clémentine <clementine@meilisearch.com>
Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-18 08:05:02 +00:00
5e7803632d Merge #5342
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 14s
Test suite / Run Rustfmt (push) Successful in 1m54s
Test suite / Run Clippy (push) Failing after 6m49s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5342: Fix workload sha r=dureuill a=ManyTheFish

The dataset shasum was wrong for some workloads making the `/bench workloads/*.json` crash

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-12 16:27:09 +00:00
885710a07b Merge #5341
5341: Embeddings stats r=ManyTheFish a=ManyTheFish

# Pull Request

## Related issue
Fixes #5321

## What does this PR do?
- Add embedding stats
- force dumpless upgrade to recompute stats
- add tests


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-12 15:46:37 +00:00
c55fdad2c3 Fix dumpless upgrade target version 2025-02-12 16:35:05 +01:00
1caad4c4b0 Add multiple embeddings for the same embedder in tests 2025-02-12 16:13:34 +01:00
8419ed52a1 fix clippy 2025-02-12 14:38:51 +01:00
a65c52cc97 Convert dump test into snapshots 2025-02-12 14:14:10 +01:00
49e9655c24 Update snapshots 2025-02-12 14:05:32 +01:00
fa763ca5dc Merge #5339
5339: Add back timeout from v1.11.3 r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5337

## What does this PR do?
- Fix regression compared with v1.11 by reintroducing the 30s timeout on all REST API calls.

Thanks to `@migueltarga` for reporting the issue


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-12 12:50:27 +00:00
c7aeb554b2 Add tests 2025-02-12 13:37:41 +01:00
88d9d47928 Fix benchmark sha 2025-02-12 13:27:15 +01:00
8e0d8d31f9 Add back timeout from v1.11.3 2025-02-12 11:53:00 +01:00
81a38099ec Merge #5336
5336: Meilitool Hair Dryer r=dureuill a=Kerollmops

This pull request introduces a new subcommand to hair dry a specific part of specific indexes. It is useful when [the memory-mapped pages are not hot in the cache](https://arc.net/l/quote/ixhcdwcq) and must be. Hair drying those interesting pages makes the search requests using the vector store much faster.

The previous technique used the "cat method," which consists of reading the whole LMDB data file and pipping it into the null file descriptor. By doing that, the whole LMDB data file becomes hot in the cache. However, when the database is large, at least 30% of it is free, and unused pages and many other pages don't need to be hot, e.g., raw JSON documents or uninteresting parts of the inverted index.

This new subcommand reads all the Arroy pages of a given index to make them hot, and only those. More coming...

The current algorithm is single-threaded and takes a lot of time. I am in the process of multithreading it. This is the time it takes to hair dry a 305GiB database with a single thread.

```
real    21m51.054s
user    0m3.155s
sys     0m19.393s
```

## To Do
- [ ] (optional) Do the reads in parallel.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-12 10:45:16 +00:00
bd27fe7d02 force dumpless upgrade to recompute stats 2025-02-12 11:45:02 +01:00
41203f0931 Add embedders stats 2025-02-12 11:37:47 +01:00
803a699b15 Remove unsafes 2025-02-12 10:46:45 +01:00
246ad3b06e Display a progress percentage 2025-02-12 09:56:05 +01:00
a21c440274 Bump Ubuntu from 20.04 to 22.04 2025-02-12 09:49:50 +01:00
70305b9f71 Merge #5332
5332: Fix geo update r=Kerollmops a=dureuill

# Pull Request

## Related issue
Fixes #5331

## What does this PR do?
- use the merged version that contains all fields instead of the updated version that contains only updated fields
- add test that detects the problem
- As it is the second time that `changes.updated` is causing a bug, I'm changing its name to `only_changed_fields`, hopefully better communicating that old fields are not there


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-11 18:51:33 +00:00
5dab435d13 Add more logs about read txns 2025-02-11 18:14:48 +01:00
c83c1a3c51 Introduce the Hair Dryer meilitool sucommand 2025-02-11 18:01:53 +01:00
b83275c9c5 Change the updated* functions to only_new functions, hopefully better communicating what they do 2025-02-11 15:27:10 +01:00
d7f35ee3ba Use merged document instead of updated 2025-02-11 15:27:10 +01:00
1dce341bfb Add test 2025-02-11 15:27:10 +01:00
4876c1c8eb Merge #5310
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 18s
Test suite / Run tests in debug (push) Failing after 17s
Test suite / Run Rustfmt (push) Successful in 2m42s
Test suite / Run Clippy (push) Failing after 7m17s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5310: Fix batch export/import dump r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5304
Fixes https://github.com/meilisearch/meilisearch/issues/5247

## What does this PR do?
- Add the batches to the dump
- Update the tests
- Create a new dump test containing batches and an enqueued task with a document addition


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-11 10:21:34 +00:00
43c8d54501 fix test after rebase 2025-02-11 11:19:13 +01:00
84e2a1f836 rename the atomic to something more meaningful 2025-02-11 11:14:49 +01:00
00eb47d42e use serde_json::to_writer instead of serializing + writing 2025-02-11 11:14:49 +01:00
9293e7f2c1 fix tests after rebase 2025-02-11 11:14:49 +01:00
80198aa855 add a dump test with batches and enqueued tasks 2025-02-11 11:14:49 +01:00
fa00b42c93 fix the missing batch in the dumps in meilisearch and meilitools 2025-02-11 11:14:49 +01:00
6c9409edf8 Merge #5326
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 15s
Test suite / Run Clippy (push) Failing after 11s
Test suite / Run tests in debug (push) Failing after 34s
Test suite / Run Rustfmt (push) Successful in 1m32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5326: Expose a route to get the file content associated with a task r=Kerollmops a=Kerollmops

This PR exposes a new `/tasks/{taskUid}/documents` route, exposing the update file associated with a task.

## To Do
- [x] (optional) Change the route to `/tasks/{taskUid}/documents` `@dureuill.`
- [x] Update Open API example.
- [x] Create [an Experimental Feature Discussion](https://github.com/orgs/meilisearch/discussions/808).
- [x] Make this route experimental and enable it via the experimental route.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-10 16:50:13 +00:00
acb06cb3e6 Improve the error message when missing documents
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-10 16:53:50 +01:00
7d0d8f4445 Make the feature experimental 2025-02-10 16:11:32 +01:00
491d115c3c Change the route to get the task documents 2025-02-10 14:55:07 +01:00
55fa2dda00 Update the Open API example 2025-02-10 14:52:48 +01:00
c71eea8023 Improve error message when update file has been processed 2025-02-10 14:33:01 +01:00
df40533741 Expose a route to get the update file content of a task 2025-02-10 14:05:32 +01:00
0c3e7fe963 Merge #5316
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 2s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 16s
Test suite / Run Clippy (push) Failing after 12s
Test suite / Run Rustfmt (push) Failing after 32s
Test suite / Tests on macos-13 (push) Has been cancelled
Test suite / Tests on windows-2022 (push) Has been cancelled
5316: Fix the dumpless upgrade corruption r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5280

## What does this PR do?
- Add a test that ensure we write the version in the index-scheduler even if we have a bug while writing the VERSION file
- Do what was described in the issue


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-10 09:53:57 +00:00
45f843ccb9 fmt 2025-02-10 10:46:42 +01:00
35b6bca598 remove the failing test 2025-02-10 10:20:14 +01:00
7f82d33597 update the version file atomically 2025-02-06 18:23:28 +01:00
8c5856007c flush+sync the version file just in case 2025-02-06 18:04:43 +01:00
ae1d7f4d9b Improve the test and disable it on windows and linux since they don't work on the CI 2025-02-06 17:54:12 +01:00
792be63567 Merge #5323
5323: exclude network time from processingMs r=Kerollmops a=dureuill



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-06 16:35:44 +00:00
70aac71c63 exclude network time from processingMs 2025-02-06 17:18:36 +01:00
a562d6abc1 Merge #5322
5322: Make sure arroy is using the rayon thread-pool r=dureuill a=Kerollmops

This PR fixes #5249 by ensuring arroy uses the rayon thread pool.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-06 15:28:47 +00:00
b7fdd9516c Merge #4970
4970: Create a new export documents meilitool subcommand r=dureuill a=Kerollmops

This subcommand can be useful for extracting documents from an existing database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-06 14:48:27 +00:00
5f2a1a4fd1 Skip the documents before fetching them 2025-02-06 15:40:22 +01:00
2b0e17ede0 Make sure arroy is using the rayon thread-pool 2025-02-06 15:28:10 +01:00
37092adc71 Show a bit of progress 2025-02-06 10:37:05 +01:00
86fcad788e Introduce a parameter to skip the first documents 2025-02-06 10:32:50 +01:00
2ea5c57871 Create a new export documents meilitool subcommand based on v1.12 2025-02-06 10:32:39 +01:00
b63c64395d add a test ensuring the index-scheduler version is set when we cannot write the version file 2025-02-05 18:08:50 +01:00
628119e31e fix the dumpless upgrade potential corruption when upgrading from the v1.12 2025-02-05 18:08:50 +01:00
78867b6852 Merge #5299
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 1s
Test suite / Tests on ubuntu-20.04 (push) Failing after 17s
Test suite / Tests on windows-2022 (push) Failing after 25s
Test suite / Run Rustfmt (push) Failing after 1m6s
Test suite / Run Clippy (push) Successful in 8m46s
Test suite / Tests on macos-13 (push) Has been cancelled
5299: Remote federated search r=dureuill a=dureuill

Fixes #4980 

- Usage: https://www.notion.so/meilisearch/API-usage-Remote-search-request-f64fae093abf409e9434c9b9c8fab6f3?pvs=25#1894b06b651f809a9f3dcc6b7189646e

- Changes database format:
  - Adds a new database key: the code is resilient to the case where the key is missing
  - Adds a new experimental feature: the code for experimental features is resilient to this case

Changes:

- Add experimental feature `proxySearch`
- Add network routes
- Dump support for network
- Add proxy search
- Add various tests

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-05 16:08:48 +00:00
b21b8e8f30 Remote search tests 2025-02-05 15:03:33 +01:00
4a9e5ae215 mv multi.rs -> multi/mod.rs 2025-02-05 15:03:33 +01:00
6e1865b75b network integration tests 2025-02-05 15:03:32 +01:00
64409a1de7 Test server: clear_api_key 2025-02-05 15:03:32 +01:00
1b81cab782 Add more analytics 2025-02-05 15:03:32 +01:00
88190b5602 Fix tests 2025-02-05 15:03:32 +01:00
0b27aa5138 Multi search reads header to know if it is being proxied 2025-02-05 15:03:32 +01:00
35160788d7 Proxy search requests 2025-02-05 15:03:32 +01:00
c3e5c3ba36 Allow rebuilding a SearchQueryWithIndex from its components 2025-02-05 15:03:16 +01:00
04ac0af54b Add WeightedScoreValues to be able to compare remote scores 2025-02-05 15:03:16 +01:00
9996533364 Make search types serialize and deserialize so that reading from a proxy is possible 2025-02-05 15:03:16 +01:00
3f6b334fc5 Route network 2025-02-05 15:03:16 +01:00
b30e5a7a35 Add new permissions 2025-02-05 15:03:16 +01:00
6d79cb23ba New error codes 2025-02-05 15:03:16 +01:00
e34afca6d7 Support network in dumps 2025-02-05 15:03:16 +01:00
4918b9ffb6 Network stored in DB 2025-02-05 15:03:15 +01:00
73474e7af0 Network types 2025-02-05 15:03:15 +01:00
7ae6dda03f Add new experimental feature 2025-02-05 15:01:04 +01:00
00e764b0d3 Merge #5314
5314: Activate used database size r=irevoire a=ManyTheFish

# Pull Request

make the `/stats` route return the `usedDatabaseSize` corresponding to the size used to store the "real" data in the database and not the disk size used by LMDB


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-05 12:51:57 +00:00
4abf0db0b4 Activate used database size 2025-02-05 13:45:47 +01:00
acc885fd0a Merge #5312
5312: Send the OSS analytics once per day instead of once per hour r=ManyTheFish a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5311

## What does this PR do?
- If the instance is OSS => we send the analytics once every day
- If the instance is on the meilisearch cloud => we send the analytics every hour


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-02-05 11:15:34 +00:00
61e8cfd4bc Send the OSS analytics once per day instead of once per hour 2025-02-04 15:39:00 +01:00
796acd1aee Merge #5288
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 13s
Test suite / Run tests in debug (push) Failing after 13s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Tests on windows-2022 (push) Failing after 48s
Test suite / Run Rustfmt (push) Successful in 1m28s
Test suite / Tests on macos-13 (push) Has been cancelled
5288: Improve AI logging r=dureuill a=Kerollmops

This PR fixes #5285 and brings the changes from #5233 to simplify debugging indexation and search performance issues related to AI. The following texts can be found in the logs to debug and understand performance issues:

 - `embed_one: search` represents the time we spent waiting for the embedding generation, i.e., OpenAI, local HuggingFace, Ollama.
 - `filtered_universe: search::universe` the time spent filtering the documents.
 - ~`next_bucket: search::vector_sort` is the time spent finding the nearest neighbors (ANNs) in the vector store (arroy), locally~ was being triggered too many times.
 - `indexing::vectors` is the time arroy spends indexing the new vectors for a batch.
 - `documents::extract vectors` and `documents::merge vectors` to see the time spent generating and writing the embeddings.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-04 10:20:45 +00:00
cc8df5e11f Move back the search-side logging to tracing 2025-02-04 11:16:17 +01:00
ede74ccc42 Merge #5306
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 2s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 24s
Test suite / Run Rustfmt (push) Successful in 1m33s
Test suite / Run Clippy (push) Successful in 6m20s
Test suite / Tests on macos-13 (push) Has been cancelled
5306: Fix internal error when passing `documentTemplateMaxBytes` to a source that doesn't support it r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes #5305 

## What does this PR do?
- add `DOCUMENT_TEMPLATE_MAX_BYTES` to `allowed_sources_for_field` and `allowed_fields_for_source` to prevent a panic


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-02-04 08:46:13 +00:00
6425451bbc Merge #5303
5303: Bring back changes from v1.12.8 into v1.13.0 r=Kerollmops a=Kerollmops

Fixes #5087 and other problems that you can find in the original PR #5294.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-02-03 10:49:26 +00:00
fe46855462 Merge #5235
5235: Introduce a compaction subcommand in meilitool r=dureuill a=Kerollmops

This PR proposes a change to the meilitool helper, introducing the `compact-index` subcommand to reduce the size of the indexes.

While working on this tool, I discovered that the current heed `Env::copy_to_file` API is not very temp file friendly and [could be improved](https://github.com/meilisearch/heed/issues/306).

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-02-03 10:11:01 +00:00
8e7d2d25f2 Only open indexes, do not create them 2025-02-03 10:50:38 +01:00
a436534515 Fix test 2025-02-03 10:36:34 +01:00
2385842537 Fix the imports 2025-02-03 10:29:09 +01:00
6a70c0ec92 Add a link to the experimental feature GitHub discussion 2025-02-03 10:24:53 +01:00
7a9382b115 Better document the rayon limitation condition 2025-02-03 10:24:53 +01:00
62dabeba5f Do not create too many rayon tasks when processing the settings 2025-02-03 10:24:52 +01:00
48812229a9 Remove a log that would log too much 2025-02-03 10:24:52 +01:00
915cc377fb Refine the env variable and the max readers 2025-02-03 10:24:52 +01:00
96544bfa43 add DOCUMENT_TEMPLATE_MAX_BYTES to allowed_sources_for_field and allowed_fields_for_source 2025-02-03 09:59:17 +01:00
09d474da63 Merge #5140
Some checks failed
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests almost all features (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 21s
Test suite / Tests on windows-2022 (push) Failing after 26s
Test suite / Run Clippy (push) Failing after 19s
Test suite / Run Rustfmt (push) Successful in 4m7s
Test suite / Tests on ubuntu-20.04 (push) Failing after 14m22s
Test suite / Tests on macos-13 (push) Has been cancelled
5140: Fix workload inversion r=dureuill a=ManyTheFish

The used assets were inverted between `workloads/hackernews-modify-facet-numbers.json`
and `workloads/hackernews-modify-facet-strings.json`, now fixed.


Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-02-03 08:22:22 +00:00
aaefbfae1f Do not create too many rayon tasks 2025-01-30 16:36:12 +01:00
97e17f52a1 Add more logs to see calls to the embedders 2025-01-30 16:36:12 +01:00
62ced0e3f1 Make cargo fmt happy 2025-01-30 11:09:54 +01:00
71bb24f17e Throw and error when the index is not found
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-30 11:07:43 +01:00
c72f114b33 Fix english in the comments
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-30 11:07:09 +01:00
8ed39f5de0 Merge #5300
5300: Improve unexpected panic message r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5273

## What does this PR do?
- When an unexpected panic happens in the index-scheduler we catch it and rebuild an error message from the join_error
- Same when the upgrade index-scheduler fails


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-30 09:23:17 +00:00
424c5bde40 Move the embedding computation and extraction log to debug 2025-01-29 16:40:36 +01:00
bdd3005d10 Log the progress when a batch fails 2025-01-29 16:36:23 +01:00
4224edea28 Merge #5177
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 0s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 2s
Test suite / Tests on windows-2022 (push) Failing after 23s
Test suite / Run Rustfmt (push) Successful in 2m17s
Test suite / Run Clippy (push) Successful in 5m55s
Test suite / Tests on macos-13 (push) Has been cancelled
5177: Debug log  the channel congestion r=Kerollmops a=Kerollmops

This PR displays the congestion of the BBQueue channel and the allocated memory for the channel and the extraction. This information can be beneficial for debugging and noticing slow disks. We show three pieces of information in debug:
- The direct attempts: the number of tries to send something in the BBQueue channel,
- The blocked attempts: the number of unsuccessful attempts that must be retried,
- The congestion: The percentage of blocking attempts. The higher, the slower the receiver and, therefore, the disk.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
2025-01-29 15:35:31 +00:00
cb1b7513af Log the memory metrics only once 2025-01-29 15:21:52 +01:00
2f89b8209f Merge #5291
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 12s
Test suite / Run tests in debug (push) Failing after 12s
Test suite / Run Clippy (push) Failing after 21s
Test suite / Run Rustfmt (push) Successful in 1m43s
Test suite / Tests on windows-2022 (push) Failing after 5m39s
Test suite / Tests on macos-13 (push) Has been cancelled
5291: Fix Dotnet tests in sdks-tests.yml r=irevoire a=curquiza



Co-authored-by: Clémentine <clementine@meilisearch.com>
2025-01-29 14:18:48 +00:00
a9d0f4a002 Improve english comments
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-29 15:16:40 +01:00
db032079d8 Show indexation allocated memory 2025-01-29 14:21:02 +01:00
a00796c46a Improve the naming in the log message 2025-01-29 14:21:02 +01:00
6112bd8caa Display the channel congestion 2025-01-29 14:21:02 +01:00
cec88cfc29 Measure the bbqueue congestion 2025-01-29 14:21:02 +01:00
8439aeb7cf improve error message in case of unexpected panic while processing tasks 2025-01-29 11:51:06 +01:00
42257eec53 Merge #5272
Some checks failed
Test suite / Tests on ubuntu-20.04 (push) Failing after 1s
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Run tests in debug (push) Failing after 0s
Test suite / Tests on windows-2022 (push) Failing after 14s
Test suite / Run Rustfmt (push) Successful in 1m59s
Test suite / Run Clippy (push) Successful in 5m48s
Test suite / Tests on macos-13 (push) Has been cancelled
5272: Fix Batches Deletion and flaky tests r=irevoire a=Kerollmops

- This issue fixes #5263 by removing the batches from the date and time databases.
- It also introduces a new `enqueued_at` field in the batch object to quickly retrieve them in the `batches.enqueued_at` database
- Finally, it probably fixes all the flaky tests of the batches: https://github.com/meilisearch/meilisearch/issues/5256

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-01-28 16:14:11 +00:00
1beda3b9af fix another flaky test 2025-01-28 16:53:50 +01:00
8676e94f5c fix the flaky tests 2025-01-28 16:53:50 +01:00
ef47a0d820 apply review comment 2025-01-28 16:53:50 +01:00
e0f0da57e2 make sure the batches we snapshots actually all contains an enqueued_at 2025-01-28 16:53:50 +01:00
485e3127c7 use the remove_n_tasks_datetime_earlier_than function when updating batches 2025-01-28 16:53:50 +01:00
58f90b70c7 store the enqueued at to eases the batch deletion 2025-01-28 16:53:50 +01:00
508db9020d update the snapshots 2025-01-28 16:53:50 +01:00
6ff37c6fc4 Fix the insta snapshots 2025-01-28 16:53:50 +01:00
f21ae1f5d1 Remove the batch id from the date time databases 2025-01-28 16:53:50 +01:00
483c52f07b Merge #5289
5289: Fix workload files after removing the vectorStore experimental feature r=Kerollmops a=dureuill

Running the bench [currently fails](https://github.com/meilisearch/meilisearch/actions/runs/12990029453) on embedding-related workloads, due to the call to `/experimental-features` that is used to enable the vector store:

In v1.13, `vectorStore` is no longer an experimental feature, so trying to enable it causes a 400

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-01-28 10:28:21 +00:00
f0d7ab81ad Fix Dotnet tests in sdks-tests.yml 2025-01-27 15:37:32 +01:00
f88f415a00 Fix workload files after removing the vectorStore experimental feature 2025-01-27 14:39:28 +01:00
19bc885b07 Fix the milli logo 2025-01-27 14:30:59 +01:00
47f70e3d79 Debug the first vector sort fill buffer 2025-01-27 14:22:29 +01:00
0f8eb3b506 Improve the logs of the search with AI 2025-01-27 14:22:22 +01:00
4a5923a55e log the time arroy took to insert embeddings 2025-01-27 14:22:17 +01:00
e6295c9c5f Introduce a meilitool subcommand to compact an index 2025-01-22 16:37:00 +01:00
6768e4ef75 Fix workload inversion 2024-12-09 10:20:49 +01:00
421 changed files with 22172 additions and 5701 deletions

View File

@ -0,0 +1,100 @@
name: PR Milestone Check
on:
pull_request:
types: [opened, reopened, edited, synchronize, milestoned, demilestoned]
branches:
- "main"
- "release-v*.*.*"
jobs:
check-milestone:
name: Check PR Milestone
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Validate PR milestone
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Get PR number directly from the event payload
const prNumber = context.payload.pull_request.number;
// Get PR details
const { data: prData } = await github.rest.pulls.get({
owner: 'meilisearch',
repo: 'meilisearch',
pull_number: prNumber
});
// Get base branch name
const baseBranch = prData.base.ref;
console.log(`Base branch: ${baseBranch}`);
// Get PR milestone
const prMilestone = prData.milestone;
if (!prMilestone) {
core.setFailed('PR must have a milestone assigned');
return;
}
console.log(`PR milestone: ${prMilestone.title}`);
// Validate milestone format: vx.y.z
const milestoneRegex = /^v\d+\.\d+\.\d+$/;
if (!milestoneRegex.test(prMilestone.title)) {
core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`);
return;
}
// For main branch PRs, check if the milestone is the highest one
if (baseBranch === 'main') {
// Get all milestones
const { data: milestones } = await github.rest.issues.listMilestones({
owner: 'meilisearch',
repo: 'meilisearch',
state: 'open',
sort: 'due_on',
direction: 'desc'
});
// Sort milestones by version number (vx.y.z)
const sortedMilestones = milestones
.filter(m => milestoneRegex.test(m.title))
.sort((a, b) => {
const versionA = a.title.substring(1).split('.').map(Number);
const versionB = b.title.substring(1).split('.').map(Number);
// Compare major version
if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0];
// Compare minor version
if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1];
// Compare patch version
return versionB[2] - versionA[2];
});
if (sortedMilestones.length === 0) {
core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z');
return;
}
const highestMilestone = sortedMilestones[0];
console.log(`Highest milestone: ${highestMilestone.title}`);
if (prMilestone.title !== highestMilestone.title) {
core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`);
return;
}
} else {
// For release branches, the milestone should match the branch version
const branchVersion = baseBranch.substring(8); // remove 'release-'
if (prMilestone.title !== branchVersion) {
core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`);
return;
}
}
console.log('PR milestone validation passed!');

View File

@ -9,22 +9,22 @@ jobs:
flaky: flaky:
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:20.04 image: ubuntu:22.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install needed dependencies - name: Install needed dependencies
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.81 - uses: dtolnay/rust-toolchain@1.81
- name: Install cargo-flaky - name: Install cargo-flaky
run: cargo install cargo-flaky run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps - name: Run cargo flaky in the dumps
run: cd crates/dump; cargo flaky -i 100 --release run: cd crates/dump; cargo flaky -i 100 --release
- name: Run cargo flaky in the index-scheduler - name: Run cargo flaky in the index-scheduler
run: cd crates/index-scheduler; cargo flaky -i 100 --release run: cd crates/index-scheduler; cargo flaky -i 100 --release
- name: Run cargo flaky in the auth - name: Run cargo flaky in the auth
run: cd crates/meilisearch-auth; cargo flaky -i 100 --release run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
- name: Run cargo flaky in meilisearch - name: Run cargo flaky in meilisearch
run: cd crates/meilisearch; cargo flaky -i 100 --release run: cd crates/meilisearch; cargo flaky -i 100 --release

View File

@ -18,28 +18,28 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-version needs: check-version
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:20.04 image: ubuntu:22.04
steps: steps:
- name: Install needed dependencies - name: Install needed dependencies
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.81 - uses: dtolnay/rust-toolchain@1.81
- name: Install cargo-deb - name: Install cargo-deb
run: cargo install cargo-deb run: cargo install cargo-deb
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Build deb package - name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release - name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.7.0 uses: svenstaro/upload-release-action@2.7.0
with: with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb file: target/debian/meilisearch.deb
asset_name: meilisearch.deb asset_name: meilisearch.deb
tag: ${{ github.ref }} tag: ${{ github.ref }}
- name: Upload debian pkg to apt repository - name: Upload debian pkg to apt repository
run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/ run: curl -F package=@target/debian/meilisearch.deb https://${{ secrets.GEMFURY_PUSH_TOKEN }}@push.fury.io/meilisearch/
homebrew: homebrew:
name: Bump Homebrew formula name: Bump Homebrew formula

View File

@ -3,7 +3,7 @@ name: Publish binaries to GitHub release
on: on:
workflow_dispatch: workflow_dispatch:
schedule: schedule:
- cron: '0 2 * * *' # Every day at 2:00am - cron: "0 2 * * *" # Every day at 2:00am
release: release:
types: [published] types: [published]
@ -37,26 +37,26 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: check-version needs: check-version
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:20.04 image: ubuntu:22.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install needed dependencies - name: Install needed dependencies
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
apt-get install build-essential -y apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.81 - uses: dtolnay/rust-toolchain@1.81
- name: Build - name: Build
run: cargo build --release --locked run: cargo build --release --locked
# No need to upload binaries for dry run (cron) # No need to upload binaries for dry run (cron)
- name: Upload binaries to release - name: Upload binaries to release
if: github.event_name == 'release' if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0 uses: svenstaro/upload-release-action@2.7.0
with: with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch file: target/release/meilisearch
asset_name: meilisearch-linux-amd64 asset_name: meilisearch-linux-amd64
tag: ${{ github.ref }} tag: ${{ github.ref }}
publish-macos-windows: publish-macos-windows:
name: Publish binary for ${{ matrix.os }} name: Publish binary for ${{ matrix.os }}
@ -74,19 +74,19 @@ jobs:
artifact_name: meilisearch.exe artifact_name: meilisearch.exe
asset_name: meilisearch-windows-amd64.exe asset_name: meilisearch-windows-amd64.exe
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@1.81 - uses: dtolnay/rust-toolchain@1.81
- name: Build - name: Build
run: cargo build --release --locked run: cargo build --release --locked
# No need to upload binaries for dry run (cron) # No need to upload binaries for dry run (cron)
- name: Upload binaries to release - name: Upload binaries to release
if: github.event_name == 'release' if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.7.0 uses: svenstaro/upload-release-action@2.7.0
with: with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }} file: target/release/${{ matrix.artifact_name }}
asset_name: ${{ matrix.asset_name }} asset_name: ${{ matrix.asset_name }}
tag: ${{ github.ref }} tag: ${{ github.ref }}
publish-macos-apple-silicon: publish-macos-apple-silicon:
name: Publish binary for macOS silicon name: Publish binary for macOS silicon
@ -127,8 +127,8 @@ jobs:
env: env:
DEBIAN_FRONTEND: noninteractive DEBIAN_FRONTEND: noninteractive
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:20.04 image: ubuntu:22.04
strategy: strategy:
matrix: matrix:
include: include:

View File

@ -52,7 +52,7 @@ jobs:
- name: Setup .NET Core - name: Setup .NET Core
uses: actions/setup-dotnet@v4 uses: actions/setup-dotnet@v4
with: with:
dotnet-version: "6.0.x" dotnet-version: "8.0.x"
- name: Install dependencies - name: Install dependencies
run: dotnet restore run: dotnet restore
- name: Build - name: Build

View File

@ -6,11 +6,7 @@ on:
# Everyday at 5:00am # Everyday at 5:00am
- cron: "0 5 * * *" - cron: "0 5 * * *"
pull_request: pull_request:
push: merge_group:
# trying and staging branches are for Bors config
branches:
- trying
- staging
env: env:
CARGO_TERM_COLOR: always CARGO_TERM_COLOR: always
@ -19,13 +15,13 @@ env:
jobs: jobs:
test-linux: test-linux:
name: Tests on ubuntu-20.04 name: Tests on ubuntu-22.04
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:20.04 image: ubuntu:22.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- name: Install needed dependencies - name: Install needed dependencies
run: | run: |
apt-get update && apt-get install -y curl apt-get update && apt-get install -y curl
@ -72,8 +68,8 @@ jobs:
name: Tests almost all features name: Tests almost all features
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:20.04 image: ubuntu:22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@ -125,7 +121,7 @@ jobs:
name: Test disabled tokenization name: Test disabled tokenization
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
image: ubuntu:20.04 image: ubuntu:22.04
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
@ -149,8 +145,8 @@ jobs:
name: Run tests in debug name: Run tests in debug
runs-on: ubuntu-latest runs-on: ubuntu-latest
container: container:
# Use ubuntu-20.04 to compile with glibc 2.28 # Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:20.04 image: ubuntu:22.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Install needed dependencies - name: Install needed dependencies

View File

@ -95,6 +95,11 @@ Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) wo
Run `cargo xtask --help` from the root of the repository to find out what is available. Run `cargo xtask --help` from the root of the repository to find out what is available.
#### Update the openAPI file if the APIchanged
To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api).
If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md).
### Logging ### Logging
Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message. Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message.
@ -145,7 +150,7 @@ Some notes on GitHub PRs:
- The PR title should be accurate and descriptive of the changes. - The PR title should be accurate and descriptive of the changes.
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br> - [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
The draft PRs are recommended when you want to show that you are working on something and make your work visible. The draft PRs are recommended when you want to show that you are working on something and make your work visible.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually. - The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
## Release Process (for internal team only) ## Release Process (for internal team only)
@ -153,8 +158,7 @@ Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org
### Automation to rebase and Merge the PRs ### Automation to rebase and Merge the PRs
This project integrates a bot that helps us manage pull requests merging.<br> This project uses GitHub Merge Queues that helps us manage pull requests merging.
_[Read more about this](https://github.com/meilisearch/integration-guides/blob/main/resources/bors.md)._
### How to Publish a new Release ### How to Publish a new Release

366
Cargo.lock generated
View File

@ -47,7 +47,7 @@ dependencies = [
"actix-utils", "actix-utils",
"ahash 0.8.11", "ahash 0.8.11",
"base64 0.22.1", "base64 0.22.1",
"bitflags 2.6.0", "bitflags 2.9.0",
"brotli", "brotli",
"bytes", "bytes",
"bytestring", "bytestring",
@ -258,7 +258,7 @@ version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
"once_cell", "once_cell",
"version_check", "version_check",
] ]
@ -271,7 +271,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"const-random", "const-random",
"getrandom", "getrandom 0.2.15",
"once_cell", "once_cell",
"version_check", "version_check",
"zerocopy", "zerocopy",
@ -393,41 +393,24 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]] [[package]]
name = "arroy" name = "arroy"
version = "0.5.0" version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e" checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
"enum-iterator",
"heed", "heed",
"log",
"memmap2", "memmap2",
"nohash", "nohash",
"ordered-float", "ordered-float",
"page_size",
"rand", "rand",
"rayon", "rayon",
"roaring", "roaring",
"tempfile", "tempfile",
"thiserror 1.0.69", "thiserror 2.0.9",
] "tracing",
[[package]]
name = "arroy"
version = "0.5.0"
source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7"
dependencies = [
"bytemuck",
"byteorder",
"heed",
"log",
"memmap2",
"nohash",
"ordered-float",
"rand",
"rayon",
"roaring",
"tempfile",
"thiserror 1.0.69",
] ]
[[package]] [[package]]
@ -503,7 +486,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bumpalo", "bumpalo",
@ -553,7 +536,7 @@ version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.13.0", "itertools 0.13.0",
@ -599,9 +582,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.6.0" version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -694,7 +677,7 @@ dependencies = [
[[package]] [[package]]
name = "build-info" name = "build-info"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"time", "time",
@ -807,22 +790,20 @@ dependencies = [
[[package]] [[package]]
name = "bzip2" name = "bzip2"
version = "0.4.4" version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
dependencies = [ dependencies = [
"bzip2-sys", "bzip2-sys",
"libc",
] ]
[[package]] [[package]]
name = "bzip2-sys" name = "bzip2-sys"
version = "0.1.11+1.0.8" version = "0.1.13+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
dependencies = [ dependencies = [
"cc", "cc",
"libc",
"pkg-config", "pkg-config",
] ]
@ -944,13 +925,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.104" version = "1.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490" checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
dependencies = [ dependencies = [
"jobserver", "jobserver",
"libc", "libc",
"once_cell", "shlex",
] ]
[[package]] [[package]]
@ -995,9 +976,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.9.2" version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf8921fe4d53ab8f9e8f9b72ce6f91726cfc40fffab1243d27db406b5e2e9cc2" checksum = "650d52f87a36472ea1c803dee49d6bfd23d426efa9363e2f4c4a0e6a236d3407"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"csv", "csv",
@ -1160,7 +1141,7 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
"once_cell", "once_cell",
"tiny-keccak", "tiny-keccak",
] ]
@ -1671,7 +1652,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@ -1873,7 +1854,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"tempfile", "tempfile",
"thiserror 2.0.9", "thiserror 2.0.9",
@ -1895,7 +1876,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@ -1915,7 +1896,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@ -2054,7 +2035,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"bumpalo", "bumpalo",
@ -2082,7 +2063,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22" checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"debugid", "debugid",
"fxhash", "fxhash",
"serde", "serde",
@ -2233,10 +2214,24 @@ dependencies = [
"cfg-if", "cfg-if",
"js-sys", "js-sys",
"libc", "libc",
"wasi", "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "getrandom"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi 0.13.3+wasi-0.2.2",
"wasm-bindgen",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "gimli" name = "gimli"
version = "0.27.3" version = "0.27.3"
@ -2249,7 +2244,7 @@ version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724" checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"libc", "libc",
"libgit2-sys", "libgit2-sys",
"log", "log",
@ -2397,11 +2392,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]] [[package]]
name = "heed" name = "heed"
version = "0.20.5" version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb" checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"byteorder", "byteorder",
"heed-traits", "heed-traits",
"heed-types", "heed-types",
@ -2421,9 +2416,9 @@ checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
[[package]] [[package]]
name = "heed-types" name = "heed-types"
version = "0.20.1" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d3f528b053a6d700b2734eabcd0fd49cb8230647aa72958467527b0b7917114" checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -2743,14 +2738,14 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"big_s", "big_s",
"bincode", "bincode",
"bumpalo", "bumpalo",
"bumparaw-collections", "bumparaw-collections",
"byte-unit",
"convert_case 0.6.0", "convert_case 0.6.0",
"crossbeam-channel", "crossbeam-channel",
"csv", "csv",
@ -2759,6 +2754,7 @@ dependencies = [
"enum-iterator", "enum-iterator",
"file-store", "file-store",
"flate2", "flate2",
"indexmap",
"insta", "insta",
"maplit", "maplit",
"meili-snap", "meili-snap",
@ -2941,16 +2937,17 @@ dependencies = [
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.69" version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [ dependencies = [
"once_cell",
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@ -3013,9 +3010,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.169" version = "0.2.171"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]] [[package]]
name = "libgit2-sys" name = "libgit2-sys"
@ -3080,9 +3077,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera" name = "lindera"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6cbc1aad631a7da0a7e9bc4b8669fa92ac9ca8eeb7b35a807376dd3034443ff" checksum = "832c220475557e3b44a46cad1862b57f010f0c6e93d771d0e628e08689c068b1"
dependencies = [ dependencies = [
"lindera-analyzer", "lindera-analyzer",
"lindera-core", "lindera-core",
@ -3093,9 +3090,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-analyzer" name = "lindera-analyzer"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74508ffbb24e36905d1718b261460e378a748029b07bcd7e06f0d18500b8194c" checksum = "a8e26651714abf5167e6b6a80f5cdaa0cad41c5fcb84d8ba96bebafcb9029339"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -3123,9 +3120,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-assets" name = "lindera-assets"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a677c371ecb3bd02b751be306ea09876cd47cf426303ad5f10a3fd6f9a4ded6" checksum = "ebb01f1ca53c1e642234c6c7fdb9ac664ad0c1ab9502f33e4200201bac7e6ce7"
dependencies = [ dependencies = [
"encoding", "encoding",
"flate2", "flate2",
@ -3136,9 +3133,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict" name = "lindera-cc-cedict"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c35944000d05a177e981f037b5f0805f283b32f05a0c35713003bef136ca8cb4" checksum = "5f7618d9aa947fdd7c38eae2b79f0fd237ecb5067608f1363610ba20d20ab5a8"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3150,9 +3147,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict-builder" name = "lindera-cc-cedict-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85b8f642bc9c9130682569975772a17336c6aab26d11fc0f823f3e663167ace6" checksum = "efdbcb809d81428935d601a78c94bfb39500749213f7320705f427a7a1d31aec"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3162,9 +3159,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-compress" name = "lindera-compress"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7825d8d63592aa5727d67bd209170ac82df56c369533efbf0ddbac277bb68ec" checksum = "eac178afa2456dac469d3b1a2d7fbaf3e1ea796a1f52321e8ac29545a53c239c"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@ -3173,9 +3170,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-core" name = "lindera-core"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c28191456debc98af6aa5f7db77872471983e9fa2a737b1c232b6ef543aed62" checksum = "649777465f48147ce593ab6db347e235e3af8f693a23f4437be94a1cdbdf5fdf"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -3190,9 +3187,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-decompress" name = "lindera-decompress"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4788a1ead2f63f3fc2888109272921dedd86a87b7d0bf05e9daab46600daac51" checksum = "9e3faaceb85e43ac250021866c6db3cdc9997b44b3d3ea498594d04edc91fc45"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@ -3201,9 +3198,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary" name = "lindera-dictionary"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdf5f91725e32b9a21b1656baa7030766c9bafc4de4b4ddeb8ffdde7224dd2f6" checksum = "31e15b2d2d8a4ad45f2e373a084931cf3dfbde15f124044e2436bb920af3366c"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -3226,9 +3223,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary-builder" name = "lindera-dictionary-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e41f00ba7ac541b0ffd8c30e7a73f2dd197546cc5780462ec4f2e4782945a780" checksum = "59802949110545b59b663917ed3fd55dc3b3a8cde6bd20137d7fe24372cfb9aa"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -3248,9 +3245,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-filter" name = "lindera-filter"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "273d27e01e1377e2647314a4a5b9bdca4b52a867b319069ebae8c10191146eca" checksum = "1320f118c3fc9e897f4ebfc16864e5ef8c0b06ba769c0a50e53f193f9d682bf8"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"csv", "csv",
@ -3273,9 +3270,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic" name = "lindera-ipadic"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97a52ff0af5acb700093badaf7078051ab9ffd9071859724445a60193995f1f" checksum = "5b4731bf3730f1f38266d7ee9bca7d460cd336645c9dfd4e6a1082e58ab1e993"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3287,9 +3284,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-builder" name = "lindera-ipadic-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf5031c52686128db13f774b2c5a8abfd52b4cc1f904041d8411aa19d630ce4d" checksum = "309966c12e682f67205c3cd3c8dc55bbdcd1eb3b5c7c5cb41fb8acd18906d340"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3299,9 +3296,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd" name = "lindera-ipadic-neologd"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6b36764b27b169aa11d24888141f206a6c246a5b195c1e67127485bac512fb6" checksum = "e90e919b4cfb9962d24ee1e1d50a7c163bbf356376495ad66d1996e20b9f9e44"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3313,9 +3310,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd-builder" name = "lindera-ipadic-neologd-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abf36e40ace904741efdd883ed5c4dba6425f65156a0fb5d3f73a386335950dc" checksum = "7e517df0d501f9f8bf3126da20fc8cb9a5e37921e0eec1824d7a62f096463e02"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3325,9 +3322,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic" name = "lindera-ko-dic"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c92a1a3564b531953f0238cbcea392f2905f7b27b449978cf9e702a80e1086d" checksum = "e9c6da4e68bc8b452a54b96d65361ebdceb4b6f36ecf262425c0e1f77960ae82"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3340,9 +3337,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic-builder" name = "lindera-ko-dic-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f2c60425abc1548570c2568858f74a1f042105ecd89faa39c651b4315350fd9" checksum = "afc95884cc8f6dfb176caf5991043a4acf94c359215bbd039ea765e00454f271"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3352,9 +3349,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-tokenizer" name = "lindera-tokenizer"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "903e558981bcb6f59870aa7d6b4bcb09e8f7db778886a6a70f67fd74c9fa2ca3" checksum = "d122042e1232a55c3604692445952a134e523822e9b4b9ab32a53ff890037ad4"
dependencies = [ dependencies = [
"bincode", "bincode",
"lindera-core", "lindera-core",
@ -3366,9 +3363,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic" name = "lindera-unidic"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d227c3ce9cbd905f865c46c65a0470fd04e89b71104d7f92baa71a212ffe1d4b" checksum = "cbffae1fb2f2614abdcb50f99b138476dbac19862ffa57bfdc9c7b5d5b22a90c"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3381,9 +3378,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic-builder" name = "lindera-unidic-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99e2c50015c242e02c451acb6748667ac6fd1d3d667cd7db48cd89e2f2d2377e" checksum = "fe50055327712ebd1bcc74b657cf78c728a78b9586e3f99d5dd0b6a0be221c5d"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3468,9 +3465,9 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]] [[package]]
name = "lmdb-master-sys" name = "lmdb-master-sys"
version = "0.2.4" version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9" checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df"
dependencies = [ dependencies = [
"cc", "cc",
"doxygen-rs", "doxygen-rs",
@ -3513,9 +3510,18 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.21" version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
[[package]]
name = "lru"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465"
dependencies = [
"hashbrown 0.15.2",
]
[[package]] [[package]]
name = "lzma-rs" name = "lzma-rs"
@ -3527,6 +3533,17 @@ dependencies = [
"crc", "crc",
] ]
[[package]]
name = "lzma-sys"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]] [[package]]
name = "macro_rules_attribute" name = "macro_rules_attribute"
version = "0.2.0" version = "0.2.0"
@ -3569,7 +3586,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@ -3578,7 +3595,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@ -3665,12 +3682,12 @@ dependencies = [
"uuid", "uuid",
"wiremock", "wiremock",
"yaup", "yaup",
"zip 2.2.2", "zip 2.3.0",
] ]
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"enum-iterator", "enum-iterator",
@ -3689,7 +3706,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@ -3723,10 +3740,9 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
"clap", "clap",
"dump", "dump",
"file-store", "file-store",
@ -3758,10 +3774,10 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"allocator-api2", "allocator-api2",
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "arroy",
"bbqueue", "bbqueue",
"big_s", "big_s",
"bimap", "bimap",
@ -3798,6 +3814,7 @@ dependencies = [
"json-depth-checker", "json-depth-checker",
"levenshtein_automata", "levenshtein_automata",
"liquid", "liquid",
"lru",
"maplit", "maplit",
"md5", "md5",
"meili-snap", "meili-snap",
@ -3891,7 +3908,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [ dependencies = [
"libc", "libc",
"log", "log",
"wasi", "wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
@ -3902,7 +3919,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [ dependencies = [
"libc", "libc",
"wasi", "wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
@ -4129,9 +4146,9 @@ checksum = "ae4512a8f418ac322335255a72361b9ac927e106f4d7fe6ab4d8ac59cb01f7a9"
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.20.2" version = "1.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" checksum = "cde51589ab56b20a6f686b2c68f7a0bd6add753d697abf720d63f8db3ab7b1ad"
[[package]] [[package]]
name = "onig" name = "onig"
@ -4270,7 +4287,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",
@ -4518,7 +4535,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"hex", "hex",
"lazy_static", "lazy_static",
"procfs-core", "procfs-core",
@ -4531,7 +4548,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"hex", "hex",
] ]
@ -4679,7 +4696,7 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
] ]
[[package]] [[package]]
@ -4771,7 +4788,7 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
"redox_syscall 0.2.16", "redox_syscall 0.2.16",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
@ -4872,7 +4889,7 @@ version = "1.20.0"
source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4" source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bitflags 2.6.0", "bitflags 2.9.0",
"instant", "instant",
"num-traits", "num-traits",
"once_cell", "once_cell",
@ -4895,15 +4912,14 @@ dependencies = [
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.8" version = "0.17.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [ dependencies = [
"cc", "cc",
"cfg-if", "cfg-if",
"getrandom", "getrandom 0.2.15",
"libc", "libc",
"spin",
"untrusted", "untrusted",
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
@ -5009,7 +5025,7 @@ version = "0.38.41"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@ -5131,9 +5147,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.217" version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
@ -5149,9 +5165,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.217" version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -5160,9 +5176,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.135" version = "1.0.140"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
dependencies = [ dependencies = [
"indexmap", "indexmap",
"itoa", "itoa",
@ -5530,7 +5546,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"byteorder", "byteorder",
"enum-as-inner", "enum-as-inner",
"libc", "libc",
@ -5586,7 +5602,7 @@ checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"fastrand", "fastrand",
"getrandom", "getrandom 0.2.15",
"once_cell", "once_cell",
"rustix", "rustix",
"windows-sys 0.52.0", "windows-sys 0.52.0",
@ -5761,7 +5777,7 @@ dependencies = [
"aho-corasick", "aho-corasick",
"derive_builder 0.12.0", "derive_builder 0.12.0",
"esaxx-rs", "esaxx-rs",
"getrandom", "getrandom 0.2.15",
"itertools 0.12.1", "itertools 0.12.1",
"lazy_static", "lazy_static",
"log", "log",
@ -6104,9 +6120,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]] [[package]]
name = "unicode-normalization" name = "unicode-normalization"
version = "0.1.23" version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
dependencies = [ dependencies = [
"tinyvec", "tinyvec",
] ]
@ -6248,7 +6264,7 @@ version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
"serde", "serde",
] ]
@ -6345,24 +6361,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]] [[package]]
name = "wasm-bindgen" name = "wasi"
version = "0.2.92" version = "0.13.3+wasi-0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro", "wasm-bindgen-macro",
] ]
[[package]] [[package]]
name = "wasm-bindgen-backend" name = "wasm-bindgen-backend"
version = "0.2.92" version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"log", "log",
"once_cell",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.87", "syn 2.0.87",
@ -6383,9 +6409,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.92" version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [ dependencies = [
"quote", "quote",
"wasm-bindgen-macro-support", "wasm-bindgen-macro-support",
@ -6393,9 +6419,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro-support" name = "wasm-bindgen-macro-support"
version = "0.2.92" version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -6406,9 +6432,12 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-shared" name = "wasm-bindgen-shared"
version = "0.2.92" version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]] [[package]]
name = "wasm-streams" name = "wasm-streams"
@ -6813,6 +6842,15 @@ dependencies = [
"url", "url",
] ]
[[package]]
name = "wit-bindgen-rt"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [
"bitflags 2.9.0",
]
[[package]] [[package]]
name = "write16" name = "write16"
version = "1.0.0" version = "1.0.0"
@ -6847,7 +6885,7 @@ dependencies = [
[[package]] [[package]]
name = "xtask" name = "xtask"
version = "1.13.0" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"build-info", "build-info",
@ -6868,6 +6906,15 @@ dependencies = [
"uuid", "uuid",
] ]
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
]
[[package]] [[package]]
name = "yada" name = "yada"
version = "0.5.1" version = "0.5.1"
@ -7009,9 +7056,9 @@ dependencies = [
[[package]] [[package]]
name = "zip" name = "zip"
version = "2.2.2" version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45" checksum = "84e9a772a54b54236b9b744aaaf8d7be01b4d6e99725523cb82cb32d1c81b1d7"
dependencies = [ dependencies = [
"aes", "aes",
"arbitrary", "arbitrary",
@ -7022,15 +7069,16 @@ dependencies = [
"deflate64", "deflate64",
"displaydoc", "displaydoc",
"flate2", "flate2",
"getrandom 0.3.1",
"hmac", "hmac",
"indexmap", "indexmap",
"lzma-rs", "lzma-rs",
"memchr", "memchr",
"pbkdf2", "pbkdf2",
"rand",
"sha1", "sha1",
"thiserror 2.0.9", "thiserror 2.0.9",
"time", "time",
"xz2",
"zeroize", "zeroize",
"zopfli", "zopfli",
"zstd", "zstd",

View File

@ -22,7 +22,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.13.0" version = "1.14.0"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",
@ -36,6 +36,12 @@ license = "MIT"
[profile.release] [profile.release]
codegen-units = 1 codegen-units = 1
# We now compile heed without the NDEBUG define for better performance.
# However, we still enable debug assertions for a better detection of
# disk corruption on the cloud or in OSS.
[profile.release.package.heed]
debug-assertions = true
[profile.dev.package.flate2] [profile.dev.package.flate2]
opt-level = 3 opt-level = 3

View File

@ -20,7 +20,7 @@
<p align="center"> <p align="center">
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a> <a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a> <a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a> <a href="https://github.com/meilisearch/meilisearch/queue"><img alt="Merge Queues enabled" src="https://img.shields.io/badge/Merge_Queues-enabled-%2357cf60?logo=github"></a>
</p> </p>
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p> <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

View File

@ -1,11 +0,0 @@
status = [
'Tests on ubuntu-20.04',
'Tests on macos-13',
'Tests on windows-2022',
'Run Clippy',
'Run Rustfmt',
'Run tests in debug',
]
pr_status = ['Milestone Check']
# 3 hours timeout
timeout-sec = 10800

View File

@ -12,7 +12,7 @@ use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexerConfig, Settings}; use milli::update::{IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
use milli::Index; use milli::{FilterableAttributesRule, Index};
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
use rand_chacha::rand_core::SeedableRng; use rand_chacha::rand_core::SeedableRng;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -35,7 +35,8 @@ fn setup_dir(path: impl AsRef<Path>) {
fn setup_index() -> Index { fn setup_index() -> Index {
let path = "benches.mmdb"; let path = "benches.mmdb";
setup_dir(path); setup_dir(path);
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100); options.max_readers(100);
Index::new(options, path, true).unwrap() Index::new(options, path, true).unwrap()
@ -57,7 +58,8 @@ fn setup_settings<'t>(
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect(); let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields); builder.set_searchable_fields(searchable_fields);
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect(); let filterable_fields =
filterable_fields.iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect();
builder.set_filterable_fields(filterable_fields); builder.set_filterable_fields(filterable_fields);
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();

View File

@ -2,7 +2,7 @@ mod datasets_paths;
mod utils; mod utils;
use criterion::{criterion_group, criterion_main}; use criterion::{criterion_group, criterion_main};
use milli::update::Settings; use milli::{update::Settings, FilterableAttributesRule};
use utils::Conf; use utils::Conf;
#[cfg(not(windows))] #[cfg(not(windows))]
@ -21,8 +21,10 @@ fn base_conf(builder: &mut Settings) {
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect(); ["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields); builder.set_searchable_fields(searchable_fields);
let filterable_fields = let filterable_fields = ["_geo", "population", "elevation"]
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect(); .iter()
.map(|s| FilterableAttributesRule::Field(s.to_string()))
.collect();
builder.set_filterable_fields(filterable_fields); builder.set_filterable_fields(filterable_fields);
let sortable_fields = let sortable_fields =

View File

@ -2,7 +2,7 @@ mod datasets_paths;
mod utils; mod utils;
use criterion::{criterion_group, criterion_main}; use criterion::{criterion_group, criterion_main};
use milli::update::Settings; use milli::{update::Settings, FilterableAttributesRule};
use utils::Conf; use utils::Conf;
#[cfg(not(windows))] #[cfg(not(windows))]
@ -22,7 +22,7 @@ fn base_conf(builder: &mut Settings) {
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"] let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
.iter() .iter()
.map(|s| s.to_string()) .map(|s| FilterableAttributesRule::Field(s.to_string()))
.collect(); .collect();
builder.set_filterable_fields(faceted_fields); builder.set_filterable_fields(faceted_fields);
} }

View File

@ -65,7 +65,8 @@ pub fn base_setup(conf: &Conf) -> Index {
} }
create_dir_all(conf.database_name).unwrap(); create_dir_all(conf.database_name).unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100); options.max_readers(100);
let index = Index::new(options, conf.database_name, true).unwrap(); let index = Index::new(options, conf.database_name, true).unwrap();

View File

@ -10,8 +10,10 @@ dump
├── instance-uid.uuid ├── instance-uid.uuid
├── keys.jsonl ├── keys.jsonl
├── metadata.json ├── metadata.json
── tasks ── tasks
├── update_files ├── update_files
│ └── [task_id].jsonl │ └── [task_id].jsonl
│ └── queue.jsonl
└── batches
└── queue.jsonl └── queue.jsonl
``` ```

View File

@ -228,14 +228,16 @@ pub(crate) mod test {
use big_s::S; use big_s::S;
use maplit::{btreemap, btreeset}; use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::RuntimeTogglableFeatures; use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key}; use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli;
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, FilterableAttributesRule};
use meilisearch_types::settings::{Checked, FacetingSettings, Settings}; use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::tasks::{Details, Status}; use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{Details, Kind, Status};
use serde_json::{json, Map, Value}; use serde_json::{json, Map, Value};
use time::macros::datetime; use time::macros::datetime;
use uuid::Uuid; use uuid::Uuid;
@ -277,7 +279,10 @@ pub(crate) mod test {
let settings = Settings { let settings = Settings {
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(), displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(), searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }), filterable_attributes: Setting::Set(vec![
FilterableAttributesRule::Field(S("race")),
FilterableAttributesRule::Field(S("age")),
]),
sortable_attributes: Setting::Set(btreeset! { S("age") }), sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet, ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet, stop_words: Setting::NotSet,
@ -305,6 +310,33 @@ pub(crate) mod test {
settings.check() settings.check()
} }
pub fn create_test_batches() -> Vec<Batch> {
vec![Batch {
uid: 0,
details: DetailsView {
received_documents: Some(12),
indexed_documents: Some(Some(10)),
..DetailsView::default()
},
progress: None,
stats: BatchStats {
total_nb_tasks: 1,
status: maplit::btreemap! { Status::Succeeded => 1 },
types: maplit::btreemap! { Kind::DocumentAdditionOrUpdate => 1 },
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
progress_trace: Default::default(),
write_channel_congestion: None,
internal_database_sizes: Default::default(),
},
enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC),
oldest: datetime!(2022-11-11 0:00 UTC),
}),
started_at: datetime!(2022-11-20 0:00 UTC),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
}]
}
pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> { pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
vec![ vec![
( (
@ -427,6 +459,15 @@ pub(crate) mod test {
index.flush().unwrap(); index.flush().unwrap();
index.settings(&settings).unwrap(); index.settings(&settings).unwrap();
// ========== pushing the batch queue
let batches = create_test_batches();
let mut batch_queue = dump.create_batches_queue().unwrap();
for batch in &batches {
batch_queue.push_batch(batch).unwrap();
}
batch_queue.flush().unwrap();
// ========== pushing the task queue // ========== pushing the task queue
let tasks = create_test_tasks(); let tasks = create_test_tasks();
@ -455,6 +496,10 @@ pub(crate) mod test {
dump.create_experimental_features(features).unwrap(); dump.create_experimental_features(features).unwrap();
// ========== network
let network = create_test_network();
dump.create_network(network).unwrap();
// create the dump // create the dump
let mut file = tempfile::tempfile().unwrap(); let mut file = tempfile::tempfile().unwrap();
dump.persist_to(&mut file).unwrap(); dump.persist_to(&mut file).unwrap();
@ -467,6 +512,13 @@ pub(crate) mod test {
RuntimeTogglableFeatures::default() RuntimeTogglableFeatures::default()
} }
fn create_test_network() -> Network {
Network {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
}
}
#[test] #[test]
fn test_creating_and_read_dump() { fn test_creating_and_read_dump() {
let mut file = create_test_dump(); let mut file = create_test_dump();
@ -515,5 +567,9 @@ pub(crate) mod test {
// ==== checking the features // ==== checking the features
let expected = create_test_features(); let expected = create_test_features();
assert_eq!(dump.features().unwrap().unwrap(), expected); assert_eq!(dump.features().unwrap().unwrap(), expected);
// ==== checking the network
let expected = create_test_network();
assert_eq!(&expected, dump.network().unwrap().unwrap());
} }
} }

View File

@ -196,6 +196,10 @@ impl CompatV5ToV6 {
pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> { pub fn features(&self) -> Result<Option<v6::RuntimeTogglableFeatures>> {
Ok(None) Ok(None)
} }
pub fn network(&self) -> Result<Option<&v6::Network>> {
Ok(None)
}
} }
pub enum CompatIndexV5ToV6 { pub enum CompatIndexV5ToV6 {
@ -318,7 +322,16 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
v6::Settings { v6::Settings {
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(), displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(), searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
filterable_attributes: settings.filterable_attributes.into(), filterable_attributes: match settings.filterable_attributes {
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
filterable_attributes
.into_iter()
.map(v6::FilterableAttributesRule::Field)
.collect(),
),
v5::settings::Setting::Reset => v6::Setting::Reset,
v5::settings::Setting::NotSet => v6::Setting::NotSet,
},
sortable_attributes: settings.sortable_attributes.into(), sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: { ranking_rules: {
match settings.ranking_rules { match settings.ranking_rules {

View File

@ -23,6 +23,7 @@ mod v6;
pub type Document = serde_json::Map<String, serde_json::Value>; pub type Document = serde_json::Map<String, serde_json::Value>;
pub type UpdateFile = dyn Iterator<Item = Result<Document>>; pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
#[allow(clippy::large_enum_variant)]
pub enum DumpReader { pub enum DumpReader {
Current(V6Reader), Current(V6Reader),
Compat(CompatV5ToV6), Compat(CompatV5ToV6),
@ -101,6 +102,13 @@ impl DumpReader {
} }
} }
pub fn batches(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Batch>> + '_>> {
match self {
DumpReader::Current(current) => Ok(current.batches()),
DumpReader::Compat(_compat) => Ok(Box::new(std::iter::empty())),
}
}
pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> { pub fn keys(&mut self) -> Result<Box<dyn Iterator<Item = Result<v6::Key>> + '_>> {
match self { match self {
DumpReader::Current(current) => Ok(current.keys()), DumpReader::Current(current) => Ok(current.keys()),
@ -114,6 +122,13 @@ impl DumpReader {
DumpReader::Compat(compat) => compat.features(), DumpReader::Compat(compat) => compat.features(),
} }
} }
pub fn network(&self) -> Result<Option<&v6::Network>> {
match self {
DumpReader::Current(current) => Ok(current.network()),
DumpReader::Compat(compat) => compat.network(),
}
}
} }
impl From<V6Reader> for DumpReader { impl From<V6Reader> for DumpReader {
@ -219,6 +234,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00"); insta::assert_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -328,6 +347,7 @@ pub(crate) mod test {
} }
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
assert_eq!(dump.network().unwrap(), None);
} }
#[test] #[test]
@ -339,6 +359,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00"); insta::assert_snapshot!(dump.date().unwrap(), @"2023-07-06 7:10:27.21958 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None"); insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -373,6 +397,27 @@ pub(crate) mod test {
assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default()); assert_eq!(dump.features().unwrap().unwrap(), RuntimeTogglableFeatures::default());
} }
#[test]
fn import_dump_v6_network() {
let dump = File::open("tests/assets/v6-with-network.dump").unwrap();
let dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_snapshot!(dump.date().unwrap(), @"2025-01-29 15:45:32.738676 +00:00:00");
insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
// network
let network = dump.network().unwrap().unwrap();
insta::assert_snapshot!(network.local.as_ref().unwrap(), @"ms-0");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().url, @"http://localhost:7700");
insta::assert_snapshot!(network.remotes.get("ms-0").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().url, @"http://localhost:7701");
insta::assert_snapshot!(network.remotes.get("ms-1").as_ref().unwrap().search_api_key.is_none(), @"true");
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().url, @"http://ms-5679.example.meilisearch.io");
insta::assert_snapshot!(network.remotes.get("ms-2").as_ref().unwrap().search_api_key.as_ref().unwrap(), @"foo");
}
#[test] #[test]
fn import_dump_v5() { fn import_dump_v5() {
let dump = File::open("tests/assets/v5.dump").unwrap(); let dump = File::open("tests/assets/v5.dump").unwrap();
@ -382,6 +427,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -462,6 +511,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00");
insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); insta::assert_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d");
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -539,6 +592,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None); assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -632,6 +689,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); insta::assert_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None); assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -725,6 +786,10 @@ pub(crate) mod test {
insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00"); insta::assert_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None); assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
@ -801,6 +866,10 @@ pub(crate) mod test {
assert_eq!(dump.date(), None); assert_eq!(dump.date(), None);
assert_eq!(dump.instance_uid().unwrap(), None); assert_eq!(dump.instance_uid().unwrap(), None);
// batches didn't exists at the time
let batches = dump.batches().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot!(meili_snap::json_string!(batches), @"[]");
// tasks // tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap(); let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();

View File

@ -1,5 +1,5 @@
--- ---
source: dump/src/reader/mod.rs source: crates/dump/src/reader/mod.rs
expression: vector_index.settings().unwrap() expression: vector_index.settings().unwrap()
--- ---
{ {
@ -49,6 +49,7 @@ expression: vector_index.settings().unwrap()
"source": "huggingFace", "source": "huggingFace",
"model": "BAAI/bge-base-en-v1.5", "model": "BAAI/bge-base-en-v1.5",
"revision": "617ca489d9e86b49b8167676d8220688b99db36e", "revision": "617ca489d9e86b49b8167676d8220688b99db36e",
"pooling": "forceMean",
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
} }
}, },

View File

@ -3,6 +3,7 @@ use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path; use std::path::Path;
pub use meilisearch_types::milli; pub use meilisearch_types::milli;
use meilisearch_types::milli::vector::hf::OverridePooling;
use tempfile::TempDir; use tempfile::TempDir;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug; use tracing::debug;
@ -18,8 +19,10 @@ pub type Checked = meilisearch_types::settings::Checked;
pub type Unchecked = meilisearch_types::settings::Unchecked; pub type Unchecked = meilisearch_types::settings::Unchecked;
pub type Task = crate::TaskDump; pub type Task = crate::TaskDump;
pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key; pub type Key = meilisearch_types::keys::Key;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures; pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::features::Network;
// ===== Other types to clarify the code of the compat module // ===== Other types to clarify the code of the compat module
// everything related to the tasks // everything related to the tasks
@ -43,13 +46,17 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
pub type Code = meilisearch_types::error::Code; pub type Code = meilisearch_types::error::Code;
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView; pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
pub struct V6Reader { pub struct V6Reader {
dump: TempDir, dump: TempDir,
instance_uid: Option<Uuid>, instance_uid: Option<Uuid>,
metadata: Metadata, metadata: Metadata,
tasks: BufReader<File>, tasks: BufReader<File>,
batches: Option<BufReader<File>>,
keys: BufReader<File>, keys: BufReader<File>,
features: Option<RuntimeTogglableFeatures>, features: Option<RuntimeTogglableFeatures>,
network: Option<Network>,
} }
impl V6Reader { impl V6Reader {
@ -77,13 +84,38 @@ impl V6Reader {
} else { } else {
None None
}; };
let batches = match File::open(dump.path().join("batches").join("queue.jsonl")) {
Ok(file) => Some(BufReader::new(file)),
// The batch file was only introduced during the v1.13, anything prior to that won't have batches
Err(err) if err.kind() == ErrorKind::NotFound => None,
Err(e) => return Err(e.into()),
};
let network_file = match fs::read(dump.path().join("network.json")) {
Ok(network_file) => Some(network_file),
Err(error) => match error.kind() {
// Allows the file to be missing, this will only result in all experimental features disabled.
ErrorKind::NotFound => {
debug!("`network.json` not found in dump");
None
}
_ => return Err(error.into()),
},
};
let network = if let Some(network_file) = network_file {
Some(serde_json::from_reader(&*network_file)?)
} else {
None
};
Ok(V6Reader { Ok(V6Reader {
metadata: serde_json::from_reader(&*meta_file)?, metadata: serde_json::from_reader(&*meta_file)?,
instance_uid, instance_uid,
tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?),
batches,
keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?),
features, features,
network,
dump, dump,
}) })
} }
@ -124,7 +156,7 @@ impl V6Reader {
&mut self, &mut self,
) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> { ) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> {
Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { Box::new((&mut self.tasks).lines().map(|line| -> Result<_> {
let task: Task = serde_json::from_str(&line?).unwrap(); let task: Task = serde_json::from_str(&line?)?;
let update_file_path = self let update_file_path = self
.dump .dump
@ -136,8 +168,7 @@ impl V6Reader {
if update_file_path.exists() { if update_file_path.exists() {
Ok(( Ok((
task, task,
Some(Box::new(UpdateFile::new(&update_file_path).unwrap()) Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>),
as Box<super::UpdateFile>),
)) ))
} else { } else {
Ok((task, None)) Ok((task, None))
@ -145,6 +176,16 @@ impl V6Reader {
})) }))
} }
pub fn batches(&mut self) -> Box<dyn Iterator<Item = Result<Batch>> + '_> {
match self.batches.as_mut() {
Some(batches) => Box::new((batches).lines().map(|line| -> Result<_> {
let batch = serde_json::from_str(&line?)?;
Ok(batch)
})),
None => Box::new(std::iter::empty()) as Box<dyn Iterator<Item = Result<Batch>> + '_>,
}
}
pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> { pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> {
Box::new( Box::new(
(&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }),
@ -154,6 +195,10 @@ impl V6Reader {
pub fn features(&self) -> Option<RuntimeTogglableFeatures> { pub fn features(&self) -> Option<RuntimeTogglableFeatures> {
self.features self.features
} }
pub fn network(&self) -> Option<&Network> {
self.network.as_ref()
}
} }
pub struct UpdateFile { pub struct UpdateFile {
@ -210,7 +255,29 @@ impl V6IndexReader {
} }
pub fn settings(&mut self) -> Result<Settings<Checked>> { pub fn settings(&mut self) -> Result<Settings<Checked>> {
let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
patch_embedders(&mut settings);
Ok(settings.check()) Ok(settings.check())
} }
} }
fn patch_embedders(settings: &mut Settings<Unchecked>) {
if let Setting::Set(embedders) = &mut settings.embedders {
for settings in embedders.values_mut() {
let Setting::Set(settings) = &mut settings.inner else {
continue;
};
if settings.source != Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace)
{
continue;
}
settings.pooling = match settings.pooling {
Setting::Set(pooling) => Setting::Set(pooling),
// if the pooling for a hugging face embedder is not set, force it to `forceMean`
// for backward compatibility with v1.13
// dumps created in v1.14 and up will have the setting set for hugging face embedders
Setting::Reset | Setting::NotSet => Setting::Set(OverridePooling::ForceMean),
};
}
}
}

View File

@ -4,7 +4,8 @@ use std::path::PathBuf;
use flate2::write::GzEncoder; use flate2::write::GzEncoder;
use flate2::Compression; use flate2::Compression;
use meilisearch_types::features::RuntimeTogglableFeatures; use meilisearch_types::batches::Batch;
use meilisearch_types::features::{Network, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key; use meilisearch_types::keys::Key;
use meilisearch_types::settings::{Checked, Settings}; use meilisearch_types::settings::{Checked, Settings};
use serde_json::{Map, Value}; use serde_json::{Map, Value};
@ -54,6 +55,10 @@ impl DumpWriter {
TaskWriter::new(self.dir.path().join("tasks")) TaskWriter::new(self.dir.path().join("tasks"))
} }
pub fn create_batches_queue(&self) -> Result<BatchWriter> {
BatchWriter::new(self.dir.path().join("batches"))
}
pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> { pub fn create_experimental_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
Ok(std::fs::write( Ok(std::fs::write(
self.dir.path().join("experimental-features.json"), self.dir.path().join("experimental-features.json"),
@ -61,6 +66,10 @@ impl DumpWriter {
)?) )?)
} }
pub fn create_network(&self, network: Network) -> Result<()> {
Ok(std::fs::write(self.dir.path().join("network.json"), serde_json::to_string(&network)?)?)
}
pub fn persist_to(self, mut writer: impl Write) -> Result<()> { pub fn persist_to(self, mut writer: impl Write) -> Result<()> {
let gz_encoder = GzEncoder::new(&mut writer, Compression::default()); let gz_encoder = GzEncoder::new(&mut writer, Compression::default());
let mut tar_encoder = tar::Builder::new(gz_encoder); let mut tar_encoder = tar::Builder::new(gz_encoder);
@ -84,7 +93,7 @@ impl KeyWriter {
} }
pub fn push_key(&mut self, key: &Key) -> Result<()> { pub fn push_key(&mut self, key: &Key) -> Result<()> {
self.keys.write_all(&serde_json::to_vec(key)?)?; serde_json::to_writer(&mut self.keys, &key)?;
self.keys.write_all(b"\n")?; self.keys.write_all(b"\n")?;
Ok(()) Ok(())
} }
@ -114,7 +123,7 @@ impl TaskWriter {
/// Pushes tasks in the dump. /// Pushes tasks in the dump.
/// If the tasks has an associated `update_file` it'll use the `task_id` as its name. /// If the tasks has an associated `update_file` it'll use the `task_id` as its name.
pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> { pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> {
self.queue.write_all(&serde_json::to_vec(task)?)?; serde_json::to_writer(&mut self.queue, &task)?;
self.queue.write_all(b"\n")?; self.queue.write_all(b"\n")?;
Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid)))) Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid))))
@ -126,6 +135,30 @@ impl TaskWriter {
} }
} }
pub struct BatchWriter {
queue: BufWriter<File>,
}
impl BatchWriter {
pub(crate) fn new(path: PathBuf) -> Result<Self> {
std::fs::create_dir(&path)?;
let queue = File::create(path.join("queue.jsonl"))?;
Ok(BatchWriter { queue: BufWriter::new(queue) })
}
/// Pushes batches in the dump.
pub fn push_batch(&mut self, batch: &Batch) -> Result<()> {
serde_json::to_writer(&mut self.queue, &batch)?;
self.queue.write_all(b"\n")?;
Ok(())
}
pub fn flush(mut self) -> Result<()> {
self.queue.flush()?;
Ok(())
}
}
pub struct UpdateFile { pub struct UpdateFile {
path: PathBuf, path: PathBuf,
writer: Option<BufWriter<File>>, writer: Option<BufWriter<File>>,
@ -137,8 +170,8 @@ impl UpdateFile {
} }
pub fn push_document(&mut self, document: &Document) -> Result<()> { pub fn push_document(&mut self, document: &Document) -> Result<()> {
if let Some(writer) = self.writer.as_mut() { if let Some(mut writer) = self.writer.as_mut() {
writer.write_all(&serde_json::to_vec(document)?)?; serde_json::to_writer(&mut writer, &document)?;
writer.write_all(b"\n")?; writer.write_all(b"\n")?;
} else { } else {
let file = File::create(&self.path).unwrap(); let file = File::create(&self.path).unwrap();
@ -205,8 +238,8 @@ pub(crate) mod test {
use super::*; use super::*;
use crate::reader::Document; use crate::reader::Document;
use crate::test::{ use crate::test::{
create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid, create_test_api_keys, create_test_batches, create_test_documents, create_test_dump,
create_test_settings, create_test_tasks, create_test_instance_uid, create_test_settings, create_test_tasks,
}; };
fn create_directory_hierarchy(dir: &Path) -> String { fn create_directory_hierarchy(dir: &Path) -> String {
@ -281,8 +314,10 @@ pub(crate) mod test {
let dump_path = dump.path(); let dump_path = dump.path();
// ==== checking global file hierarchy (we want to be sure there isn't too many files or too few) // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few)
insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r###" insta::assert_snapshot!(create_directory_hierarchy(dump_path), @r"
. .
├---- batches/
│ └---- queue.jsonl
├---- indexes/ ├---- indexes/
│ └---- doggos/ │ └---- doggos/
│ │ ├---- documents.jsonl │ │ ├---- documents.jsonl
@ -295,8 +330,9 @@ pub(crate) mod test {
├---- experimental-features.json ├---- experimental-features.json
├---- instance_uid.uuid ├---- instance_uid.uuid
├---- keys.jsonl ├---- keys.jsonl
---- metadata.json ---- metadata.json
"###); └---- network.json
");
// ==== checking the top level infos // ==== checking the top level infos
let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap(); let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap();
@ -349,6 +385,16 @@ pub(crate) mod test {
} }
} }
// ==== checking the batch queue
let batches_queue = fs::read_to_string(dump_path.join("batches/queue.jsonl")).unwrap();
for (batch, expected) in batches_queue.lines().zip(create_test_batches()) {
let mut batch = serde_json::from_str::<Batch>(batch).unwrap();
if batch.details.settings == Some(Box::new(Settings::<Unchecked>::default())) {
batch.details.settings = None;
}
assert_eq!(batch, expected, "{batch:#?}{expected:#?}");
}
// ==== checking the keys // ==== checking the keys
let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap(); let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap();
for (key, expected) in keys.lines().zip(create_test_api_keys()) { for (key, expected) in keys.lines().zip(create_test_api_keys()) {

Binary file not shown.

View File

@ -30,6 +30,25 @@ pub enum Condition<'a> {
StartsWith { keyword: Token<'a>, word: Token<'a> }, StartsWith { keyword: Token<'a>, word: Token<'a> },
} }
impl Condition<'_> {
pub fn operator(&self) -> &str {
match self {
Condition::GreaterThan(_) => ">",
Condition::GreaterThanOrEqual(_) => ">=",
Condition::Equal(_) => "=",
Condition::NotEqual(_) => "!=",
Condition::Null => "IS NULL",
Condition::Empty => "IS EMPTY",
Condition::Exists => "EXISTS",
Condition::LowerThan(_) => "<",
Condition::LowerThanOrEqual(_) => "<=",
Condition::Between { .. } => "TO",
Condition::Contains { .. } => "CONTAINS",
Condition::StartsWith { .. } => "STARTS WITH",
}
}
}
/// condition = value ("==" | ">" ...) value /// condition = value ("==" | ">" ...) value
pub fn parse_condition(input: Span) -> IResult<FilterCondition> { pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));

View File

@ -57,7 +57,8 @@ fn main() {
let opt = opt.clone(); let opt = opt.clone();
let handle = std::thread::spawn(move || { let handle = std::thread::spawn(move || {
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(1024 * 1024 * 1024 * 1024); options.map_size(1024 * 1024 * 1024 * 1024);
let tempdir = match opt.path { let tempdir = match opt.path {
Some(path) => TempDir::new_in(path).unwrap(), Some(path) => TempDir::new_in(path).unwrap(),

View File

@ -13,6 +13,7 @@ license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.95" anyhow = "1.0.95"
bincode = "1.3.3" bincode = "1.3.3"
byte-unit = "5.1.6"
bumpalo = "3.16.0" bumpalo = "3.16.0"
bumparaw-collections = "0.1.4" bumparaw-collections = "0.1.4"
convert_case = "0.6.0" convert_case = "0.6.0"
@ -22,6 +23,7 @@ dump = { path = "../dump" }
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.35" flate2 = "1.0.35"
indexmap = "2.7.0"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5" memmap2 = "0.9.5"
@ -29,7 +31,7 @@ page_size = "0.6.0"
rayon = "1.10.0" rayon = "1.10.0"
roaring = { version = "0.10.10", features = ["serde"] } roaring = { version = "0.10.10", features = ["serde"] }
serde = { version = "1.0.217", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde_json = { version = "1.0.135", features = ["preserve_order"] } serde_json = { version = "1.0.138", features = ["preserve_order"] }
synchronoise = "1.0.1" synchronoise = "1.0.1"
tempfile = "3.15.0" tempfile = "3.15.0"
thiserror = "2.0.9" thiserror = "2.0.9"
@ -44,7 +46,6 @@ ureq = "2.12.1"
uuid = { version = "1.11.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
arroy = "0.5.0"
big_s = "1.0.2" big_s = "1.0.2"
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
# fixed version due to format breakages in v1.40 # fixed version due to format breakages in v1.40

View File

@ -2,6 +2,7 @@ use std::collections::HashMap;
use std::io; use std::io;
use dump::{KindDump, TaskDump, UpdateFile}; use dump::{KindDump, TaskDump, UpdateFile};
use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::RwTxn; use meilisearch_types::heed::RwTxn;
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
@ -14,9 +15,15 @@ pub struct Dump<'a> {
index_scheduler: &'a IndexScheduler, index_scheduler: &'a IndexScheduler,
wtxn: RwTxn<'a>, wtxn: RwTxn<'a>,
batch_to_task_mapping: HashMap<BatchId, RoaringBitmap>,
indexes: HashMap<String, RoaringBitmap>, indexes: HashMap<String, RoaringBitmap>,
statuses: HashMap<Status, RoaringBitmap>, statuses: HashMap<Status, RoaringBitmap>,
kinds: HashMap<Kind, RoaringBitmap>, kinds: HashMap<Kind, RoaringBitmap>,
batch_indexes: HashMap<String, RoaringBitmap>,
batch_statuses: HashMap<Status, RoaringBitmap>,
batch_kinds: HashMap<Kind, RoaringBitmap>,
} }
impl<'a> Dump<'a> { impl<'a> Dump<'a> {
@ -27,12 +34,72 @@ impl<'a> Dump<'a> {
Ok(Dump { Ok(Dump {
index_scheduler, index_scheduler,
wtxn, wtxn,
batch_to_task_mapping: HashMap::new(),
indexes: HashMap::new(), indexes: HashMap::new(),
statuses: HashMap::new(), statuses: HashMap::new(),
kinds: HashMap::new(), kinds: HashMap::new(),
batch_indexes: HashMap::new(),
batch_statuses: HashMap::new(),
batch_kinds: HashMap::new(),
}) })
} }
/// Register a new batch coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_batch(&mut self, batch: Batch) -> Result<()> {
self.index_scheduler.queue.batches.all_batches.put(&mut self.wtxn, &batch.uid, &batch)?;
if let Some(enqueued_at) = batch.enqueued_at {
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.enqueued_at,
enqueued_at.earliest,
batch.uid,
)?;
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.enqueued_at,
enqueued_at.oldest,
batch.uid,
)?;
}
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.started_at,
batch.started_at,
batch.uid,
)?;
if let Some(finished_at) = batch.finished_at {
utils::insert_task_datetime(
&mut self.wtxn,
self.index_scheduler.queue.batches.finished_at,
finished_at,
batch.uid,
)?;
}
for index in batch.stats.index_uids.keys() {
match self.batch_indexes.get_mut(index) {
Some(bitmap) => {
bitmap.insert(batch.uid);
}
None => {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(batch.uid);
self.batch_indexes.insert(index.to_string(), bitmap);
}
};
}
for status in batch.stats.status.keys() {
self.batch_statuses.entry(*status).or_default().insert(batch.uid);
}
for kind in batch.stats.types.keys() {
self.batch_kinds.entry(*kind).or_default().insert(batch.uid);
}
Ok(())
}
/// Register a new task coming from a dump in the scheduler. /// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task( pub fn register_dumped_task(
@ -149,6 +216,9 @@ impl<'a> Dump<'a> {
}; };
self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?; self.index_scheduler.queue.tasks.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;
if let Some(batch_id) = task.batch_uid {
self.batch_to_task_mapping.entry(batch_id).or_default().insert(task.uid);
}
for index in task.indexes() { for index in task.indexes() {
match self.indexes.get_mut(index) { match self.indexes.get_mut(index) {
@ -198,6 +268,14 @@ impl<'a> Dump<'a> {
/// Commit all the changes and exit the importing dump state /// Commit all the changes and exit the importing dump state
pub fn finish(mut self) -> Result<()> { pub fn finish(mut self) -> Result<()> {
for (batch_id, task_ids) in self.batch_to_task_mapping {
self.index_scheduler.queue.batch_to_tasks_mapping.put(
&mut self.wtxn,
&batch_id,
&task_ids,
)?;
}
for (index, bitmap) in self.indexes { for (index, bitmap) in self.indexes {
self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; self.index_scheduler.queue.tasks.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
} }
@ -208,6 +286,16 @@ impl<'a> Dump<'a> {
self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?; self.index_scheduler.queue.tasks.put_kind(&mut self.wtxn, kind, &bitmap)?;
} }
for (index, bitmap) in self.batch_indexes {
self.index_scheduler.queue.batches.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
}
for (status, bitmap) in self.batch_statuses {
self.index_scheduler.queue.batches.put_status(&mut self.wtxn, status, &bitmap)?;
}
for (kind, bitmap) in self.batch_kinds {
self.index_scheduler.queue.batches.put_kind(&mut self.wtxn, kind, &bitmap)?;
}
self.wtxn.commit()?; self.wtxn.commit()?;
self.index_scheduler.scheduler.wake_up.signal(); self.index_scheduler.scheduler.wake_up.signal();

View File

@ -109,6 +109,8 @@ pub enum Error {
InvalidIndexUid { index_uid: String }, InvalidIndexUid { index_uid: String },
#[error("Task `{0}` not found.")] #[error("Task `{0}` not found.")]
TaskNotFound(TaskId), TaskNotFound(TaskId),
#[error("Task `{0}` does not contain any documents. Only `documentAdditionOrUpdate` tasks with the statuses `enqueued` or `processing` contain documents")]
TaskFileNotFound(TaskId),
#[error("Batch `{0}` not found.")] #[error("Batch `{0}` not found.")]
BatchNotFound(BatchId), BatchNotFound(BatchId),
#[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")] #[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
@ -127,8 +129,8 @@ pub enum Error {
_ => format!("{error}") _ => format!("{error}")
})] })]
Milli { error: milli::Error, index_uid: Option<String> }, Milli { error: milli::Error, index_uid: Option<String> },
#[error("An unexpected crash occurred when processing the task.")] #[error("An unexpected crash occurred when processing the task: {0}")]
ProcessBatchPanicked, ProcessBatchPanicked(String),
#[error(transparent)] #[error(transparent)]
FileStore(#[from] file_store::Error), FileStore(#[from] file_store::Error),
#[error(transparent)] #[error(transparent)]
@ -189,6 +191,7 @@ impl Error {
| Error::InvalidTaskCanceledBy { .. } | Error::InvalidTaskCanceledBy { .. }
| Error::InvalidIndexUid { .. } | Error::InvalidIndexUid { .. }
| Error::TaskNotFound(_) | Error::TaskNotFound(_)
| Error::TaskFileNotFound(_)
| Error::BatchNotFound(_) | Error::BatchNotFound(_)
| Error::TaskDeletionWithEmptyQuery | Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery | Error::TaskCancelationWithEmptyQuery
@ -196,7 +199,7 @@ impl Error {
| Error::Dump(_) | Error::Dump(_)
| Error::Heed(_) | Error::Heed(_)
| Error::Milli { .. } | Error::Milli { .. }
| Error::ProcessBatchPanicked | Error::ProcessBatchPanicked(_)
| Error::FileStore(_) | Error::FileStore(_)
| Error::IoError(_) | Error::IoError(_)
| Error::Persist(_) | Error::Persist(_)
@ -250,6 +253,7 @@ impl ErrorCode for Error {
Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy, Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledBy,
Error::InvalidIndexUid { .. } => Code::InvalidIndexUid, Error::InvalidIndexUid { .. } => Code::InvalidIndexUid,
Error::TaskNotFound(_) => Code::TaskNotFound, Error::TaskNotFound(_) => Code::TaskNotFound,
Error::TaskFileNotFound(_) => Code::TaskFileNotFound,
Error::BatchNotFound(_) => Code::BatchNotFound, Error::BatchNotFound(_) => Code::BatchNotFound,
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters, Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters, Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
@ -257,7 +261,7 @@ impl ErrorCode for Error {
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice, Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::Dump(e) => e.error_code(), Error::Dump(e) => e.error_code(),
Error::Milli { error, .. } => error.error_code(), Error::Milli { error, .. } => error.error_code(),
Error::ProcessBatchPanicked => Code::Internal, Error::ProcessBatchPanicked(_) => Code::Internal,
Error::Heed(e) => e.error_code(), Error::Heed(e) => e.error_code(),
Error::HeedTransaction(e) => e.error_code(), Error::HeedTransaction(e) => e.error_code(),
Error::FileStore(e) => e.error_code(), Error::FileStore(e) => e.error_code(),

View File

@ -1,8 +1,8 @@
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn}; use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
use crate::error::FeatureNotEnabledError; use crate::error::FeatureNotEnabledError;
use crate::Result; use crate::Result;
@ -14,10 +14,16 @@ mod db_name {
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features"; pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
} }
mod db_keys {
pub const EXPERIMENTAL_FEATURES: &str = "experimental-features";
pub const NETWORK: &str = "network";
}
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct FeatureData { pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>, persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>, runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
network: Arc<RwLock<Network>>,
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
@ -86,6 +92,45 @@ impl RoFeatures {
.into()) .into())
} }
} }
pub fn check_network(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.network {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "network",
issue_link: "https://github.com/orgs/meilisearch/discussions/805",
}
.into())
}
}
pub fn check_get_task_documents_route(&self) -> Result<()> {
if self.runtime.get_task_documents_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Getting the documents of an enqueued task",
feature: "get task documents route",
issue_link: "https://github.com/orgs/meilisearch/discussions/808",
}
.into())
}
}
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.composite_embedders {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "composite embedders",
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
}
.into())
}
}
} }
impl FeatureData { impl FeatureData {
@ -94,7 +139,7 @@ impl FeatureData {
} }
pub fn new( pub fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
instance_features: InstanceTogglableFeatures, instance_features: InstanceTogglableFeatures,
) -> Result<Self> { ) -> Result<Self> {
@ -102,7 +147,7 @@ impl FeatureData {
env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?; env.create_database(wtxn, Some(db_name::EXPERIMENTAL_FEATURES))?;
let persisted_features: RuntimeTogglableFeatures = let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(wtxn, db_name::EXPERIMENTAL_FEATURES)?.unwrap_or_default(); runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features; let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: metrics || persisted_features.metrics, metrics: metrics || persisted_features.metrics,
@ -111,7 +156,14 @@ impl FeatureData {
..persisted_features ..persisted_features
})); }));
Ok(Self { persisted: runtime_features_db, runtime }) let network_db = runtime_features_db.remap_data_type::<SerdeJson<Network>>();
let network: Network = network_db.get(wtxn, db_keys::NETWORK)?.unwrap_or_default();
Ok(Self {
persisted: runtime_features_db,
runtime,
network: Arc::new(RwLock::new(network)),
})
} }
pub fn put_runtime_features( pub fn put_runtime_features(
@ -119,7 +171,7 @@ impl FeatureData {
mut wtxn: RwTxn, mut wtxn: RwTxn,
features: RuntimeTogglableFeatures, features: RuntimeTogglableFeatures,
) -> Result<()> { ) -> Result<()> {
self.persisted.put(&mut wtxn, db_name::EXPERIMENTAL_FEATURES, &features)?; self.persisted.put(&mut wtxn, db_keys::EXPERIMENTAL_FEATURES, &features)?;
wtxn.commit()?; wtxn.commit()?;
// safe to unwrap, the lock will only fail if: // safe to unwrap, the lock will only fail if:
@ -140,4 +192,21 @@ impl FeatureData {
pub fn features(&self) -> RoFeatures { pub fn features(&self) -> RoFeatures {
RoFeatures::new(self) RoFeatures::new(self)
} }
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
&mut wtxn,
db_keys::NETWORK,
&new_network,
)?;
wtxn.commit()?;
let mut network = self.network.write().unwrap();
*network = new_network;
Ok(())
}
pub fn network(&self) -> Network {
Network::clone(&*self.network.read().unwrap())
}
} }

View File

@ -1,5 +1,7 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::env::VarError;
use std::path::Path; use std::path::Path;
use std::str::FromStr;
use std::time::Duration; use std::time::Duration;
use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions}; use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
@ -302,9 +304,21 @@ fn create_or_open_index(
map_size: usize, map_size: usize,
creation: bool, creation: bool,
) -> Result<Index> { ) -> Result<Index> {
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(clamp_to_page_size(map_size)); options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
// You can find more details about this experimental
// environment variable on the following GitHub discussion:
// <https://github.com/orgs/meilisearch/discussions/806>
let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
Ok(value) => u32::from_str(&value).unwrap(),
Err(VarError::NotPresent) => 1024,
Err(VarError::NotUnicode(value)) => panic!(
"Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
),
};
options.max_readers(max_readers);
if enable_mdb_writemap { if enable_mdb_writemap {
unsafe { options.flags(EnvFlags::WRITE_MAP) }; unsafe { options.flags(EnvFlags::WRITE_MAP) };
} }
@ -320,7 +334,7 @@ fn create_or_open_index(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use meilisearch_types::heed::Env; use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::Index; use meilisearch_types::Index;
use uuid::Uuid; use uuid::Uuid;
@ -330,7 +344,7 @@ mod tests {
use crate::IndexScheduler; use crate::IndexScheduler;
impl IndexMapper { impl IndexMapper {
fn test() -> (Self, Env, IndexSchedulerHandle) { fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
(index_scheduler.index_mapper, index_scheduler.env, handle) (index_scheduler.index_mapper, index_scheduler.env, handle)
} }

View File

@ -4,8 +4,9 @@ use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::milli::database_stats::DatabaseStats;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index}; use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -98,14 +99,25 @@ pub enum IndexStatus {
/// The statistics that can be computed from an `Index` object. /// The statistics that can be computed from an `Index` object.
#[derive(Serialize, Deserialize, Debug)] #[derive(Serialize, Deserialize, Debug)]
pub struct IndexStats { pub struct IndexStats {
/// Number of documents in the index. /// Stats of the documents database.
pub number_of_documents: u64, #[serde(default)]
pub documents_database_stats: DatabaseStats,
#[serde(default, skip_serializing)]
pub number_of_documents: Option<u64>,
/// Size taken up by the index' DB, in bytes. /// Size taken up by the index' DB, in bytes.
/// ///
/// This includes the size taken by both the used and free pages of the DB, and as the free pages /// This includes the size taken by both the used and free pages of the DB, and as the free pages
/// are not returned to the disk after a deletion, this number is typically larger than /// are not returned to the disk after a deletion, this number is typically larger than
/// `used_database_size` that only includes the size of the used pages. /// `used_database_size` that only includes the size of the used pages.
pub database_size: u64, pub database_size: u64,
/// Number of embeddings in the index.
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
pub number_of_embeddings: Option<u64>,
/// Number of embedded documents in the index.
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
pub number_of_embedded_documents: Option<u64>,
/// Size taken by the used pages of the index' DB, in bytes. /// Size taken by the used pages of the index' DB, in bytes.
/// ///
/// As the DB backend does not return to the disk the pages that are not currently used by the DB, /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
@ -130,8 +142,12 @@ impl IndexStats {
/// ///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> { pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
let arroy_stats = index.arroy_stats(rtxn)?;
Ok(IndexStats { Ok(IndexStats {
number_of_documents: index.number_of_documents(rtxn)?, number_of_embeddings: Some(arroy_stats.number_of_embeddings),
number_of_embedded_documents: Some(arroy_stats.documents.len()),
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
number_of_documents: None,
database_size: index.on_disk_size()?, database_size: index.on_disk_size()?,
used_database_size: index.used_size()?, used_database_size: index.used_size()?,
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()), primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
@ -148,7 +164,7 @@ impl IndexMapper {
} }
pub fn new( pub fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
options: &IndexSchedulerOptions, options: &IndexSchedulerOptions,
budget: IndexBudget, budget: IndexBudget,

View File

@ -1,7 +1,7 @@
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::fmt::Write; use std::fmt::Write;
use meilisearch_types::batches::Batch; use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn}; use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
@ -341,14 +341,24 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
pub fn snapshot_batch(batch: &Batch) -> String { pub fn snapshot_batch(batch: &Batch) -> String {
let mut snap = String::new(); let mut snap = String::new();
let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch; let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
let stats = BatchStats {
progress_trace: Default::default(),
internal_database_sizes: Default::default(),
write_channel_congestion: None,
..stats.clone()
};
if let Some(finished_at) = finished_at { if let Some(finished_at) = finished_at {
assert!(finished_at > started_at); assert!(finished_at > started_at);
} }
let BatchEnqueuedAt { earliest, oldest } = enqueued_at.unwrap();
assert!(*started_at > earliest);
assert!(earliest >= oldest);
snap.push('{'); snap.push('{');
snap.push_str(&format!("uid: {uid}, ")); snap.push_str(&format!("uid: {uid}, "));
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap())); snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(stats).unwrap())); snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
snap.push('}'); snap.push('}');
snap snap
} }
@ -361,7 +371,8 @@ pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
let stats = mapper.stats_of(rtxn, &name).unwrap(); let stats = mapper.stats_of(rtxn, &name).unwrap();
s.push_str(&format!( s.push_str(&format!(
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n", "{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
stats.number_of_documents, stats.field_distribution stats.documents_database_stats.number_of_entries(),
stats.field_distribution
)); ));
} }

View File

@ -33,7 +33,7 @@ mod test_utils;
pub mod upgrade; pub mod upgrade;
mod utils; mod utils;
pub mod uuid_codec; pub mod uuid_codec;
mod versioning; pub mod versioning;
pub type Result<T, E = Error> = std::result::Result<T, E>; pub type Result<T, E = Error> = std::result::Result<T, E>;
pub type TaskId = u32; pub type TaskId = u32;
@ -51,10 +51,10 @@ pub use features::RoFeatures;
use flate2::bufread::GzEncoder; use flate2::bufread::GzEncoder;
use flate2::Compression; use flate2::Compression;
use meilisearch_types::batches::Batch; use meilisearch_types::batches::Batch;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE; use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::I128; use meilisearch_types::heed::types::I128;
use meilisearch_types::heed::{self, Env, RoTxn}; use meilisearch_types::heed::{self, Env, RoTxn, WithoutTls};
use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
@ -125,13 +125,17 @@ pub struct IndexSchedulerOptions {
pub instance_features: InstanceTogglableFeatures, pub instance_features: InstanceTogglableFeatures,
/// The experimental features enabled for this instance. /// The experimental features enabled for this instance.
pub auto_upgrade: bool, pub auto_upgrade: bool,
/// The maximal number of entries in the search query cache of an embedder.
///
/// 0 disables the cache.
pub embedding_cache_cap: usize,
} }
/// Structure which holds meilisearch's indexes and schedules the tasks /// Structure which holds meilisearch's indexes and schedules the tasks
/// to be performed on them. /// to be performed on them.
pub struct IndexScheduler { pub struct IndexScheduler {
/// The LMDB environment which the DBs are associated with. /// The LMDB environment which the DBs are associated with.
pub(crate) env: Env, pub(crate) env: Env<WithoutTls>,
/// The list of tasks currently processing /// The list of tasks currently processing
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>, pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
@ -156,6 +160,11 @@ pub struct IndexScheduler {
/// The Authorization header to send to the webhook URL. /// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>, pub(crate) webhook_authorization_header: Option<String>,
/// A map to retrieve the runtime representation of an embedder depending on its configuration.
///
/// This map may return the same embedder object for two different indexes or embedder settings,
/// but it will only do this if the embedder configuration options are the same, leading
/// to the same embeddings for the same input text.
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>, embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
// ================= test // ================= test
@ -209,6 +218,7 @@ impl IndexScheduler {
#[allow(private_interfaces)] // because test_utils is private #[allow(private_interfaces)] // because test_utils is private
pub fn new( pub fn new(
options: IndexSchedulerOptions, options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32), from_db_version: (u32, u32, u32),
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>, #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>, #[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
@ -240,7 +250,9 @@ impl IndexScheduler {
}; };
let env = unsafe { let env = unsafe {
heed::EnvOpenOptions::new() let env_options = heed::EnvOpenOptions::new();
let mut env_options = env_options.read_txn_without_tls();
env_options
.max_dbs(Self::nb_db()) .max_dbs(Self::nb_db())
.map_size(budget.task_db_size) .map_size(budget.task_db_size)
.open(&options.tasks_path) .open(&options.tasks_path)
@ -260,7 +272,7 @@ impl IndexScheduler {
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
version, version,
queue, queue,
scheduler: Scheduler::new(&options), scheduler: Scheduler::new(&options, auth_env),
index_mapper, index_mapper,
env, env,
@ -358,7 +370,7 @@ impl IndexScheduler {
} }
} }
pub fn read_txn(&self) -> Result<RoTxn> { pub fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
self.env.read_txn().map_err(|e| e.into()) self.env.read_txn().map_err(|e| e.into())
} }
@ -427,12 +439,14 @@ impl IndexScheduler {
/// If you need to fetch information from or perform an action on all indexes, /// If you need to fetch information from or perform an action on all indexes,
/// see the `try_for_each_index` function. /// see the `try_for_each_index` function.
pub fn index(&self, name: &str) -> Result<Index> { pub fn index(&self, name: &str) -> Result<Index> {
self.index_mapper.index(&self.env.read_txn()?, name) let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, name)
} }
/// Return the boolean referring if index exists. /// Return the boolean referring if index exists.
pub fn index_exists(&self, name: &str) -> Result<bool> { pub fn index_exists(&self, name: &str) -> Result<bool> {
self.index_mapper.index_exists(&self.env.read_txn()?, name) let rtxn = self.env.read_txn()?;
self.index_mapper.index_exists(&rtxn, name)
} }
/// Return the name of all indexes without opening them. /// Return the name of all indexes without opening them.
@ -507,7 +521,8 @@ impl IndexScheduler {
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
/// 3. The number of times the properties appeared. /// 3. The number of times the properties appeared.
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> { pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
self.queue.get_stats(&self.read_txn()?, &self.processing_tasks.read().unwrap()) let rtxn = self.read_txn()?;
self.queue.get_stats(&rtxn, &self.processing_tasks.read().unwrap())
} }
// Return true if there is at least one task that is processing. // Return true if there is at least one task that is processing.
@ -770,7 +785,16 @@ impl IndexScheduler {
Ok(()) Ok(())
} }
// TODO: consider using a type alias or a struct embedder/template pub fn put_network(&self, network: Network) -> Result<()> {
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.features.put_network(wtxn, network)?;
Ok(())
}
pub fn network(&self) -> Network {
self.features.network()
}
pub fn embedders( pub fn embedders(
&self, &self,
index_uid: String, index_uid: String,
@ -803,7 +827,7 @@ impl IndexScheduler {
// add missing embedder // add missing embedder
let embedder = Arc::new( let embedder = Arc::new(
Embedder::new(embedder_options.clone()) Embedder::new(embedder_options.clone(), self.scheduler.embedding_cache_cap)
.map_err(meilisearch_types::milli::vector::Error::from) .map_err(meilisearch_types::milli::vector::Error::from)
.map_err(|err| { .map_err(|err| {
Error::from_milli(err.into(), Some(index_uid.clone())) Error::from_milli(err.into(), Some(index_uid.clone()))

View File

@ -64,6 +64,13 @@ make_enum_progress! {
} }
} }
make_enum_progress! {
pub enum FinalizingIndexStep {
Committing,
ComputingStats,
}
}
make_enum_progress! { make_enum_progress! {
pub enum TaskCancelationProgress { pub enum TaskCancelationProgress {
RetrievingTasks, RetrievingTasks,
@ -96,6 +103,7 @@ make_enum_progress! {
StartTheDumpCreation, StartTheDumpCreation,
DumpTheApiKeys, DumpTheApiKeys,
DumpTheTasks, DumpTheTasks,
DumpTheBatches,
DumpTheIndexes, DumpTheIndexes,
DumpTheExperimentalFeatures, DumpTheExperimentalFeatures,
CompressTheDump, CompressTheDump,

View File

@ -3,7 +3,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::batches::{Batch, BatchId}; use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::tasks::{Kind, Status};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
@ -12,8 +12,8 @@ use time::OffsetDateTime;
use super::{Query, Queue}; use super::{Query, Queue};
use crate::processing::ProcessingTasks; use crate::processing::ProcessingTasks;
use crate::utils::{ use crate::utils::{
insert_task_datetime, keep_ids_within_datetimes, map_bound, remove_task_datetime, insert_task_datetime, keep_ids_within_datetimes, map_bound,
ProcessingBatch, remove_n_tasks_datetime_earlier_than, remove_task_datetime, ProcessingBatch,
}; };
use crate::{Error, Result, BEI128}; use crate::{Error, Result, BEI128};
@ -66,7 +66,7 @@ impl BatchQueue {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> { pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self { Ok(Self {
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?, all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?, status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,
@ -181,6 +181,7 @@ impl BatchQueue {
stats: batch.stats, stats: batch.stats,
started_at: batch.started_at, started_at: batch.started_at,
finished_at: batch.finished_at, finished_at: batch.finished_at,
enqueued_at: batch.enqueued_at,
}, },
)?; )?;
@ -234,34 +235,25 @@ impl BatchQueue {
// What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written // What we know, though, is that the task date is from before the enqueued_at, and max two timestamps have been written
// to the DB per batches. // to the DB per batches.
if let Some(ref old_batch) = old_batch { if let Some(ref old_batch) = old_batch {
let started_at = old_batch.started_at.unix_timestamp_nanos(); if let Some(enqueued_at) = old_batch.enqueued_at {
remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, old_batch.uid)?;
// We have either one or two enqueued at to remove remove_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, old_batch.uid)?;
let mut exit = old_batch.stats.total_nb_tasks.clamp(0, 2); } else {
let mut iterator = self.enqueued_at.rev_iter_mut(wtxn)?; // If we don't have the enqueued at in the batch it means the database comes from the v1.12
while let Some(entry) = iterator.next() { // and we still need to find the date by scrolling the database
let (key, mut value) = entry?; remove_n_tasks_datetime_earlier_than(
if key > started_at { wtxn,
continue; self.enqueued_at,
} old_batch.started_at,
if value.remove(old_batch.uid) { old_batch.stats.total_nb_tasks.clamp(1, 2) as usize,
exit = exit.saturating_sub(1); old_batch.uid,
// Safe because the key and value are owned )?;
unsafe {
iterator.put_current(&key, &value)?;
}
if exit == 0 {
break;
}
}
} }
} }
if let Some(enqueued_at) = batch.oldest_enqueued_at { // A finished batch MUST contains at least one task and have an enqueued_at
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?; let enqueued_at = batch.enqueued_at.as_ref().unwrap();
} insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.earliest, batch.uid)?;
if let Some(enqueued_at) = batch.earliest_enqueued_at { insert_task_datetime(wtxn, self.enqueued_at, enqueued_at.oldest, batch.uid)?;
insert_task_datetime(wtxn, self.enqueued_at, enqueued_at, batch.uid)?;
}
// Update the started at and finished at // Update the started at and finished at
if let Some(ref old_batch) = old_batch { if let Some(ref old_batch) = old_batch {

View File

@ -102,30 +102,33 @@ fn query_batches_simple() {
.unwrap(); .unwrap();
assert_eq!(batches.len(), 1); assert_eq!(batches.len(), 1);
batches[0].started_at = OffsetDateTime::UNIX_EPOCH; batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
assert!(batches[0].enqueued_at.is_some());
batches[0].enqueued_at = None;
// Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689 // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
let batch = serde_json::to_string_pretty(&batches[0]).unwrap(); let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
snapshot!(batch, @r#" snapshot!(batch, @r#"
{ {
"uid": 0, "uid": 0,
"details": { "details": {
"primaryKey": "mouse" "primaryKey": "mouse"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,
"status": { "status": {
"processing": 1 "processing": 1
}, },
"types": { "types": {
"indexCreation": 1 "indexCreation": 1
}, },
"indexUids": { "indexUids": {
"catto": 1 "catto": 1
}
},
"startedAt": "1970-01-01T00:00:00Z",
"finishedAt": null
} }
"#); },
"startedAt": "1970-01-01T00:00:00Z",
"finishedAt": null,
"enqueuedAt": null
}
"#);
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
let (batches, _) = index_scheduler let (batches, _) = index_scheduler

View File

@ -8,11 +8,12 @@ mod tasks_test;
mod test; mod test;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::fs::File as StdFile;
use std::time::Duration; use std::time::Duration;
use file_store::FileStore; use file_store::FileStore;
use meilisearch_types::batches::BatchId; use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -156,7 +157,7 @@ impl Queue {
/// Create an index scheduler and start its run loop. /// Create an index scheduler and start its run loop.
pub(crate) fn new( pub(crate) fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
options: &IndexSchedulerOptions, options: &IndexSchedulerOptions,
) -> Result<Self> { ) -> Result<Self> {
@ -216,6 +217,11 @@ impl Queue {
} }
} }
/// Open and returns the task's content File.
pub fn update_file(&self, uuid: Uuid) -> file_store::Result<StdFile> {
self.file_store.get_update(uuid)
}
/// Delete a file from the index scheduler. /// Delete a file from the index scheduler.
/// ///
/// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method.

View File

@ -1,7 +1,7 @@
use std::ops::{Bound, RangeBounds}; use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status, Task}; use meilisearch_types::tasks::{Kind, Status, Task};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
@ -68,7 +68,7 @@ impl TaskQueue {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub(crate) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> { pub(crate) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self { Ok(Self {
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?, all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
status: env.create_database(wtxn, Some(db_name::STATUS))?, status: env.create_database(wtxn, Some(db_name::STATUS))?,

View File

@ -326,7 +326,7 @@ fn test_auto_deletion_of_tasks() {
fn test_task_queue_is_full() { fn test_task_queue_is_full() {
let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| { let (index_scheduler, mut handle) = IndexScheduler::test_with_custom_config(vec![], |config| {
// that's the minimum map size possible // that's the minimum map size possible
config.task_db_size = 1048576; config.task_db_size = 1048576 * 3;
None None
}); });

View File

@ -20,9 +20,12 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::Arc; use std::sync::Arc;
use convert_case::{Case, Casing as _};
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::tasks::Status; use meilisearch_types::tasks::Status;
use process_batch::ProcessBatchInfo;
use rayon::current_num_threads; use rayon::current_num_threads;
use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::iter::{IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -71,10 +74,15 @@ pub struct Scheduler {
pub(crate) snapshots_path: PathBuf, pub(crate) snapshots_path: PathBuf,
/// The path to the folder containing the auth LMDB env. /// The path to the folder containing the auth LMDB env.
pub(crate) auth_path: PathBuf, pub(crate) auth_env: Env<WithoutTls>,
/// The path to the version file of Meilisearch. /// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf, pub(crate) version_file_path: PathBuf,
/// The maximal number of entries in the search query cache of an embedder.
///
/// 0 disables the cache.
pub(crate) embedding_cache_cap: usize,
} }
impl Scheduler { impl Scheduler {
@ -87,12 +95,13 @@ impl Scheduler {
batched_tasks_size_limit: self.batched_tasks_size_limit, batched_tasks_size_limit: self.batched_tasks_size_limit,
dumps_path: self.dumps_path.clone(), dumps_path: self.dumps_path.clone(),
snapshots_path: self.snapshots_path.clone(), snapshots_path: self.snapshots_path.clone(),
auth_path: self.auth_path.clone(), auth_env: self.auth_env.clone(),
version_file_path: self.version_file_path.clone(), version_file_path: self.version_file_path.clone(),
embedding_cache_cap: self.embedding_cache_cap,
} }
} }
pub fn new(options: &IndexSchedulerOptions) -> Scheduler { pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
Scheduler { Scheduler {
must_stop_processing: MustStopProcessing::default(), must_stop_processing: MustStopProcessing::default(),
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
@ -102,8 +111,9 @@ impl Scheduler {
batched_tasks_size_limit: options.batched_tasks_size_limit, batched_tasks_size_limit: options.batched_tasks_size_limit,
dumps_path: options.dumps_path.clone(), dumps_path: options.dumps_path.clone(),
snapshots_path: options.snapshots_path.clone(), snapshots_path: options.snapshots_path.clone(),
auth_path: options.auth_path.clone(), auth_env,
version_file_path: options.version_file_path.clone(), version_file_path: options.version_file_path.clone(),
embedding_cache_cap: options.embedding_cache_cap,
} }
} }
} }
@ -166,13 +176,41 @@ impl IndexScheduler {
let processing_batch = &mut processing_batch; let processing_batch = &mut processing_batch;
let progress = progress.clone(); let progress = progress.clone();
std::thread::scope(|s| { std::thread::scope(|s| {
let p = progress.clone();
let handle = std::thread::Builder::new() let handle = std::thread::Builder::new()
.name(String::from("batch-operation")) .name(String::from("batch-operation"))
.spawn_scoped(s, move || { .spawn_scoped(s, move || {
cloned_index_scheduler.process_batch(batch, processing_batch, progress) cloned_index_scheduler.process_batch(batch, processing_batch, p)
}) })
.unwrap(); .unwrap();
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
match handle.join() {
Ok(ret) => {
if ret.is_err() {
if let Ok(progress_view) =
serde_json::to_string(&progress.as_progress_view())
{
tracing::warn!("Batch failed while doing: {progress_view}")
}
}
ret
}
Err(panic) => {
if let Ok(progress_view) =
serde_json::to_string(&progress.as_progress_view())
{
tracing::warn!("Batch failed while doing: {progress_view}")
}
let msg = match panic.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match panic.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
Err(Error::ProcessBatchPanicked(msg.to_string()))
}
}
}) })
}; };
@ -187,14 +225,16 @@ impl IndexScheduler {
let mut stop_scheduler_forever = false; let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new(); let mut canceled = RoaringBitmap::new();
let mut process_batch_info = ProcessBatchInfo::default();
match res { match res {
Ok(tasks) => { Ok((tasks, info)) => {
#[cfg(test)] #[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded); self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj); progress.update_progress(task_progress_obj);
process_batch_info = info;
let mut success = 0; let mut success = 0;
let mut failure = 0; let mut failure = 0;
let mut canceled_by = None; let mut canceled_by = None;
@ -311,6 +351,58 @@ impl IndexScheduler {
// We must re-add the canceled task so they're part of the same batch. // We must re-add the canceled task so they're part of the same batch.
ids |= canceled; ids |= canceled;
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
process_batch_info;
processing_batch.stats.progress_trace =
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
let mut congestion_info = serde_json::Map::new();
congestion_info.insert("attempts".into(), congestion.attempts.into());
congestion_info.insert("blocking_attempts".into(), congestion.blocking_attempts.into());
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info
});
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
.iter()
.flat_map(|(dbname, pre_size)| {
post_commit_dabases_sizes
.get(dbname)
.map(|post_size| {
use byte_unit::{Byte, UnitType::Binary};
use std::cmp::Ordering::{Equal, Greater, Less};
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
let diff_size = post_size.abs_diff(*pre_size) as u64;
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
let sign = match post_size.cmp(pre_size) {
Equal => return None,
Greater => "+",
Less => "-",
};
Some((
dbname.to_case(Case::Camel),
format!("{post:#.2} ({sign}{diff:#.2})").into(),
))
})
.into_iter()
.flatten()
})
.collect();
if let Some(congestion) = congestion {
tracing::debug!(
"Channel congestion metrics - Attempts: {}, Blocked attempts: {} ({:.1}% congestion)",
congestion.attempts,
congestion.blocking_attempts,
congestion.congestion_ratio(),
);
}
tracing::debug!("call trace: {:?}", progress.accumulated_durations());
self.queue.write_batch(&mut wtxn, processing_batch, &ids)?; self.queue.write_batch(&mut wtxn, processing_batch, &ids)?;
#[cfg(test)] #[cfg(test)]

View File

@ -2,23 +2,36 @@ use std::collections::{BTreeSet, HashMap, HashSet};
use std::panic::{catch_unwind, AssertUnwindSafe}; use std::panic::{catch_unwind, AssertUnwindSafe};
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use meilisearch_types::batches::BatchId; use meilisearch_types::batches::{BatchEnqueuedAt, BatchId};
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::progress::{Progress, VariableNameStep}; use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self}; use meilisearch_types::milli::{self, ChannelCongestion};
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task};
use milli::update::Settings as MilliSettings; use milli::update::Settings as MilliSettings;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::create_batch::Batch; use super::create_batch::Batch;
use crate::processing::{ use crate::processing::{
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress, UpdateIndexProgress,
}; };
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; use crate::utils::{
self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task,
ProcessingBatch,
};
use crate::{Error, IndexScheduler, Result, TaskId}; use crate::{Error, IndexScheduler, Result, TaskId};
#[derive(Debug, Default)]
pub struct ProcessBatchInfo {
/// The write channel congestion. None when unavailable: settings update.
pub congestion: Option<ChannelCongestion>,
/// The sizes of the different databases before starting the indexation.
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
/// The sizes of the different databases after commiting the indexation.
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
}
impl IndexScheduler { impl IndexScheduler {
/// Apply the operation associated with the given batch. /// Apply the operation associated with the given batch.
/// ///
@ -32,7 +45,7 @@ impl IndexScheduler {
batch: Batch, batch: Batch,
current_batch: &mut ProcessingBatch, current_batch: &mut ProcessingBatch,
progress: Progress, progress: Progress,
) -> Result<Vec<Task>> { ) -> Result<(Vec<Task>, ProcessBatchInfo)> {
#[cfg(test)] #[cfg(test)]
{ {
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?; self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
@ -73,7 +86,7 @@ impl IndexScheduler {
canceled_tasks.push(task); canceled_tasks.push(task);
Ok(canceled_tasks) Ok((canceled_tasks, ProcessBatchInfo::default()))
} }
Batch::TaskDeletions(mut tasks) => { Batch::TaskDeletions(mut tasks) => {
// 1. Retrieve the tasks that matched the query at enqueue-time. // 1. Retrieve the tasks that matched the query at enqueue-time.
@ -112,10 +125,14 @@ impl IndexScheduler {
_ => unreachable!(), _ => unreachable!(),
} }
} }
Ok(tasks) Ok((tasks, ProcessBatchInfo::default()))
} }
Batch::SnapshotCreation(tasks) => self.process_snapshot(progress, tasks), Batch::SnapshotCreation(tasks) => self
Batch::Dump(task) => self.process_dump_creation(progress, task), .process_snapshot(progress, tasks)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::Dump(task) => self
.process_dump_creation(progress, task)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::IndexOperation { op, must_create_index } => { Batch::IndexOperation { op, must_create_index } => {
let index_uid = op.index_uid().to_string(); let index_uid = op.index_uid().to_string();
let index = if must_create_index { let index = if must_create_index {
@ -132,9 +149,12 @@ impl IndexScheduler {
.set_currently_updating_index(Some((index_uid.clone(), index.clone()))); .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?; let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
{ {
progress.update_progress(FinalizingIndexStep::Committing);
let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
let _entered = span.enter(); let _entered = span.enter();
@ -145,12 +165,15 @@ impl IndexScheduler {
// stats of the index. Since the tasks have already been processed and // stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail // this is a non-critical operation. If it fails, we should not fail
// the entire batch. // the entire batch.
let mut post_commit_dabases_sizes = None;
let res = || -> Result<()> { let res = || -> Result<()> {
progress.update_progress(FinalizingIndexStep::ComputingStats);
let index_rtxn = index.read_txn()?; let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
wtxn.commit()?; wtxn.commit()?;
Ok(()) Ok(())
}(); }();
@ -163,7 +186,16 @@ impl IndexScheduler {
), ),
} }
Ok(tasks) let info = ProcessBatchInfo {
congestion,
// In case we fail to the get post-commit sizes we decide
// that nothing changed and use the pre-commit sizes.
post_commit_dabases_sizes: post_commit_dabases_sizes
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
pre_commit_dabases_sizes,
};
Ok((tasks, info))
} }
Batch::IndexCreation { index_uid, primary_key, task } => { Batch::IndexCreation { index_uid, primary_key, task } => {
progress.update_progress(CreateIndexProgress::CreatingTheIndex); progress.update_progress(CreateIndexProgress::CreatingTheIndex);
@ -231,7 +263,7 @@ impl IndexScheduler {
), ),
} }
Ok(vec![task]) Ok((vec![task], ProcessBatchInfo::default()))
} }
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
progress.update_progress(DeleteIndexProgress::DeletingTheIndex); progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
@ -265,7 +297,9 @@ impl IndexScheduler {
}; };
} }
Ok(tasks) // Here we could also show that all the internal database sizes goes to 0
// but it would mean opening the index and that's costly.
Ok((tasks, ProcessBatchInfo::default()))
} }
Batch::IndexSwap { mut task } => { Batch::IndexSwap { mut task } => {
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
@ -313,7 +347,7 @@ impl IndexScheduler {
} }
wtxn.commit()?; wtxn.commit()?;
task.status = Status::Succeeded; task.status = Status::Succeeded;
Ok(vec![task]) Ok((vec![task], ProcessBatchInfo::default()))
} }
Batch::UpgradeDatabase { mut tasks } => { Batch::UpgradeDatabase { mut tasks } => {
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else { let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
@ -323,8 +357,17 @@ impl IndexScheduler {
match ret { match ret {
Ok(Ok(())) => (), Ok(Ok(())) => (),
Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))), Ok(Err(e)) => return Err(Error::DatabaseUpgrade(Box::new(e))),
Err(_e) => { Err(e) => {
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked))); let msg = match e.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match e.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
return Err(Error::DatabaseUpgrade(Box::new(Error::ProcessBatchPanicked(
msg.to_string(),
))));
} }
} }
@ -334,7 +377,7 @@ impl IndexScheduler {
task.error = None; task.error = None;
} }
Ok(tasks) Ok((tasks, ProcessBatchInfo::default()))
} }
} }
} }
@ -418,7 +461,6 @@ impl IndexScheduler {
to_delete_tasks -= &enqueued_tasks; to_delete_tasks -= &enqueued_tasks;
// 2. We now have a list of tasks to delete, delete them // 2. We now have a list of tasks to delete, delete them
let mut affected_indexes = HashSet::new(); let mut affected_indexes = HashSet::new();
let mut affected_statuses = HashSet::new(); let mut affected_statuses = HashSet::new();
let mut affected_kinds = HashSet::new(); let mut affected_kinds = HashSet::new();
@ -515,9 +557,51 @@ impl IndexScheduler {
tasks -= &to_delete_tasks; tasks -= &to_delete_tasks;
// We must remove the batch entirely // We must remove the batch entirely
if tasks.is_empty() { if tasks.is_empty() {
self.queue.batches.all_batches.delete(wtxn, &batch_id)?; if let Some(batch) = self.queue.batches.get_batch(wtxn, batch_id)? {
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?; if let Some(BatchEnqueuedAt { earliest, oldest }) = batch.enqueued_at {
remove_task_datetime(
wtxn,
self.queue.batches.enqueued_at,
earliest,
batch_id,
)?;
remove_task_datetime(
wtxn,
self.queue.batches.enqueued_at,
oldest,
batch_id,
)?;
} else {
// If we don't have the enqueued at in the batch it means the database comes from the v1.12
// and we still need to find the date by scrolling the database
remove_n_tasks_datetime_earlier_than(
wtxn,
self.queue.batches.enqueued_at,
batch.started_at,
batch.stats.total_nb_tasks.clamp(1, 2) as usize,
batch_id,
)?;
}
remove_task_datetime(
wtxn,
self.queue.batches.started_at,
batch.started_at,
batch_id,
)?;
if let Some(finished_at) = batch.finished_at {
remove_task_datetime(
wtxn,
self.queue.batches.finished_at,
finished_at,
batch_id,
)?;
}
self.queue.batches.all_batches.delete(wtxn, &batch_id)?;
self.queue.batch_to_tasks_mapping.delete(wtxn, &batch_id)?;
}
} }
// Anyway, we must remove the batch from all its reverse indexes. // Anyway, we must remove the batch from all its reverse indexes.
// The only way to do that is to check // The only way to do that is to check

View File

@ -1,3 +1,4 @@
use std::collections::BTreeMap;
use std::fs::File; use std::fs::File;
use std::io::BufWriter; use std::io::BufWriter;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
@ -11,7 +12,9 @@ use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use time::macros::format_description; use time::macros::format_description;
use time::OffsetDateTime; use time::OffsetDateTime;
use crate::processing::{AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress}; use crate::processing::{
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, DumpCreationProgress,
};
use crate::{Error, IndexScheduler, Result}; use crate::{Error, IndexScheduler, Result};
impl IndexScheduler { impl IndexScheduler {
@ -102,7 +105,40 @@ impl IndexScheduler {
} }
dump_tasks.flush()?; dump_tasks.flush()?;
// 3. Dump the indexes // 3. dump the batches
progress.update_progress(DumpCreationProgress::DumpTheBatches);
let mut dump_batches = dump.create_batches_queue()?;
let (atomic_batch_progress, update_batch_progress) =
AtomicBatchStep::new(self.queue.batches.all_batches.len(&rtxn)? as u32);
progress.update_progress(update_batch_progress);
for ret in self.queue.batches.all_batches.iter(&rtxn)? {
if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask);
}
let (_, mut b) = ret?;
// In the case we're dumping ourselves we want to be marked as finished
// to not loop over ourselves indefinitely.
if b.uid == task.uid {
let finished_at = OffsetDateTime::now_utc();
// We're going to fake the date because we don't know if everything is going to go well.
// But we need to dump the task as finished and successful.
// If something fail everything will be set appropriately in the end.
let mut statuses = BTreeMap::new();
statuses.insert(Status::Succeeded, b.stats.total_nb_tasks);
b.stats.status = statuses;
b.finished_at = Some(finished_at);
}
dump_batches.push_batch(&b)?;
atomic_batch_progress.fetch_add(1, Ordering::Relaxed);
}
dump_batches.flush()?;
// 4. Dump the indexes
progress.update_progress(DumpCreationProgress::DumpTheIndexes); progress.update_progress(DumpCreationProgress::DumpTheIndexes);
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32; let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
let mut count = 0; let mut count = 0;
@ -142,7 +178,7 @@ impl IndexScheduler {
let documents = index let documents = index
.all_documents(&rtxn) .all_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// 3.1. Dump the documents // 4.1. Dump the documents
for ret in documents { for ret in documents {
if self.scheduler.must_stop_processing.get() { if self.scheduler.must_stop_processing.get() {
return Err(Error::AbortedTask); return Err(Error::AbortedTask);
@ -204,7 +240,7 @@ impl IndexScheduler {
atomic.fetch_add(1, Ordering::Relaxed); atomic.fetch_add(1, Ordering::Relaxed);
} }
// 3.2. Dump the settings // 4.2. Dump the settings
let settings = meilisearch_types::settings::settings( let settings = meilisearch_types::settings::settings(
index, index,
&rtxn, &rtxn,
@ -215,10 +251,12 @@ impl IndexScheduler {
Ok(()) Ok(())
})?; })?;
// 4. Dump experimental feature settings // 5. Dump experimental feature settings
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures); progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
let features = self.features().runtime_features(); let features = self.features().runtime_features();
dump.create_experimental_features(features)?; dump.create_experimental_features(features)?;
let network = self.network();
dump.create_network(network)?;
let dump_uid = started_at.format(format_description!( let dump_uid = started_at.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"

View File

@ -5,7 +5,7 @@ use meilisearch_types::milli::documents::PrimaryKey;
use meilisearch_types::milli::progress::Progress; use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::DocumentAdditionResult; use meilisearch_types::milli::update::DocumentAdditionResult;
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; use meilisearch_types::milli::{self, ChannelCongestion, Filter, ThreadPoolNoAbortBuilder};
use meilisearch_types::settings::apply_settings_to_builder; use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::Index; use meilisearch_types::Index;
@ -32,10 +32,9 @@ impl IndexScheduler {
index_wtxn: &mut RwTxn<'i>, index_wtxn: &mut RwTxn<'i>,
index: &'i Index, index: &'i Index,
operation: IndexOperation, operation: IndexOperation,
progress: Progress, progress: &Progress,
) -> Result<Vec<Task>> { ) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now(); let started_processing_at = std::time::Instant::now();
let must_stop_processing = self.scheduler.must_stop_processing.clone(); let must_stop_processing = self.scheduler.must_stop_processing.clone();
@ -60,7 +59,7 @@ impl IndexScheduler {
}; };
} }
Ok(tasks) Ok((tasks, None))
} }
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => { IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
progress.update_progress(DocumentOperationProgress::RetrievingConfig); progress.update_progress(DocumentOperationProgress::RetrievingConfig);
@ -173,21 +172,24 @@ impl IndexScheduler {
} }
progress.update_progress(DocumentOperationProgress::Indexing); progress.update_progress(DocumentOperationProgress::Indexing);
let mut congestion = None;
if tasks.iter().any(|res| res.error.is_none()) { if tasks.iter().any(|res| res.error.is_none()) {
indexer::index( congestion = Some(
index_wtxn, indexer::index(
index, index_wtxn,
pool, index,
indexer_config.grenad_parameters(), pool,
&db_fields_ids_map, indexer_config.grenad_parameters(),
new_fields_ids_map, &db_fields_ids_map,
primary_key, new_fields_ids_map,
&document_changes, primary_key,
embedders, &document_changes,
&|| must_stop_processing.get(), embedders,
&progress, &|| must_stop_processing.get(),
) progress,
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; )
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult { let addition = DocumentAdditionResult {
indexed_documents: candidates_count, indexed_documents: candidates_count,
@ -199,7 +201,7 @@ impl IndexScheduler {
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
} }
Ok(tasks) Ok((tasks, congestion))
} }
IndexOperation::DocumentEdition { index_uid, mut task } => { IndexOperation::DocumentEdition { index_uid, mut task } => {
progress.update_progress(DocumentEditionProgress::RetrievingConfig); progress.update_progress(DocumentEditionProgress::RetrievingConfig);
@ -247,7 +249,7 @@ impl IndexScheduler {
edited_documents: Some(0), edited_documents: Some(0),
}); });
return Ok(vec![task]); return Ok((vec![task], None));
} }
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
@ -262,6 +264,7 @@ impl IndexScheduler {
let result_count = Ok((candidates.len(), candidates.len())) as Result<_>; let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
let mut congestion = None;
if task.error.is_none() { if task.error.is_none() {
let local_pool; let local_pool;
let indexer_config = self.index_mapper.indexer_config(); let indexer_config = self.index_mapper.indexer_config();
@ -292,20 +295,22 @@ impl IndexScheduler {
let embedders = self.embedders(index_uid.clone(), embedders)?; let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentEditionProgress::Indexing); progress.update_progress(DocumentEditionProgress::Indexing);
indexer::index( congestion = Some(
index_wtxn, indexer::index(
index, index_wtxn,
pool, index,
indexer_config.grenad_parameters(), pool,
&db_fields_ids_map, indexer_config.grenad_parameters(),
new_fields_ids_map, &db_fields_ids_map,
None, // cannot change primary key in DocumentEdition new_fields_ids_map,
&document_changes, None, // cannot change primary key in DocumentEdition
embedders, &document_changes,
&|| must_stop_processing.get(), embedders,
&progress, &|| must_stop_processing.get(),
) progress,
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult { let addition = DocumentAdditionResult {
indexed_documents: candidates_count, indexed_documents: candidates_count,
@ -341,7 +346,7 @@ impl IndexScheduler {
} }
} }
Ok(vec![task]) Ok((vec![task], congestion))
} }
IndexOperation::DocumentDeletion { mut tasks, index_uid } => { IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
progress.update_progress(DocumentDeletionProgress::RetrievingConfig); progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
@ -408,7 +413,7 @@ impl IndexScheduler {
} }
if to_delete.is_empty() { if to_delete.is_empty() {
return Ok(tasks); return Ok((tasks, None));
} }
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
@ -422,6 +427,7 @@ impl IndexScheduler {
PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map) PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
.map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
let mut congestion = None;
if !tasks.iter().all(|res| res.error.is_some()) { if !tasks.iter().all(|res| res.error.is_some()) {
let local_pool; let local_pool;
let indexer_config = self.index_mapper.indexer_config(); let indexer_config = self.index_mapper.indexer_config();
@ -447,20 +453,22 @@ impl IndexScheduler {
let embedders = self.embedders(index_uid.clone(), embedders)?; let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentDeletionProgress::Indexing); progress.update_progress(DocumentDeletionProgress::Indexing);
indexer::index( congestion = Some(
index_wtxn, indexer::index(
index, index_wtxn,
pool, index,
indexer_config.grenad_parameters(), pool,
&db_fields_ids_map, indexer_config.grenad_parameters(),
new_fields_ids_map, &db_fields_ids_map,
None, // document deletion never changes primary key new_fields_ids_map,
&document_changes, None, // document deletion never changes primary key
embedders, &document_changes,
&|| must_stop_processing.get(), embedders,
&progress, &|| must_stop_processing.get(),
) progress,
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
let addition = DocumentAdditionResult { let addition = DocumentAdditionResult {
indexed_documents: candidates_count, indexed_documents: candidates_count,
@ -472,7 +480,7 @@ impl IndexScheduler {
tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done"); tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
} }
Ok(tasks) Ok((tasks, congestion))
} }
IndexOperation::Settings { index_uid, settings, mut tasks } => { IndexOperation::Settings { index_uid, settings, mut tasks } => {
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings); progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
@ -497,7 +505,7 @@ impl IndexScheduler {
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
Ok(tasks) Ok((tasks, None))
} }
IndexOperation::DocumentClearAndSetting { IndexOperation::DocumentClearAndSetting {
index_uid, index_uid,
@ -505,17 +513,17 @@ impl IndexScheduler {
settings, settings,
settings_tasks, settings_tasks,
} => { } => {
let mut import_tasks = self.apply_index_operation( let (mut import_tasks, _congestion) = self.apply_index_operation(
index_wtxn, index_wtxn,
index, index,
IndexOperation::DocumentClear { IndexOperation::DocumentClear {
index_uid: index_uid.clone(), index_uid: index_uid.clone(),
tasks: cleared_tasks, tasks: cleared_tasks,
}, },
progress.clone(), progress,
)?; )?;
let settings_tasks = self.apply_index_operation( let (settings_tasks, _congestion) = self.apply_index_operation(
index_wtxn, index_wtxn,
index, index,
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
@ -524,7 +532,7 @@ impl IndexScheduler {
let mut tasks = settings_tasks; let mut tasks = settings_tasks;
tasks.append(&mut import_tasks); tasks.append(&mut import_tasks);
Ok(tasks) Ok((tasks, None))
} }
} }
} }

View File

@ -4,7 +4,6 @@ use std::sync::atomic::Ordering;
use meilisearch_types::heed::CompactionOption; use meilisearch_types::heed::CompactionOption;
use meilisearch_types::milli::progress::{Progress, VariableNameStep}; use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{Status, Task}; use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::{compression, VERSION_FILE_NAME}; use meilisearch_types::{compression, VERSION_FILE_NAME};
@ -28,7 +27,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env // 2. Snapshot the index-scheduler LMDB env
// //
// When we call copy_to_file, LMDB opens a read transaction by itself, // When we call copy_to_path, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know // we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy // the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks. // of the env and the new transaction we open to retrieve the enqueued tasks.
@ -42,7 +41,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let dst = temp_snapshot_dir.path().join("tasks"); let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler // 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@ -81,7 +80,7 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
index index
.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled) .copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?; .map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
} }
@ -91,14 +90,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let dst = temp_snapshot_dir.path().join("auth"); let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
// TODO We can't use the open_auth_store_env function here but we should self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
let auth = unsafe {
milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.scheduler.auth_path)
}?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot // 5. Copy and tarball the flat snapshot
progress.update_progress(SnapshotCreationProgress::CreateTheTarball); progress.update_progress(SnapshotCreationProgress::CreateTheTarball);

View File

@ -1,12 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_embedders.rs source: crates/index-scheduler/src/scheduler/test_embedders.rs
expression: simple_hf_config.embedder_options expression: simple_hf_config.embedder_options
snapshot_kind: text
--- ---
{ {
"HuggingFace": { "HuggingFace": {
"model": "sentence-transformers/all-MiniLM-L6-v2", "model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"distribution": null "distribution": null,
"pooling": "useModel"
} }
} }

View File

@ -56,16 +56,13 @@ succeeded [1,]
### Batches Index Tasks: ### Batches Index Tasks:
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Enqueued At: ### Batches Enqueued At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
[timestamp] [1,] [timestamp] [1,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Started At: ### Batches Started At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Finished At: ### Batches Finished At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### File Store: ### File Store:

View File

@ -54,15 +54,12 @@ succeeded [1,]
### Batches Index Tasks: ### Batches Index Tasks:
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Enqueued At: ### Batches Enqueued At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Started At: ### Batches Started At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Finished At: ### Batches Finished At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### File Store: ### File Store:

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test.rs source: crates/index-scheduler/src/scheduler/test.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test.rs source: crates/index-scheduler/src/scheduler/test.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [] enqueued []

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_embedders.rs source: crates/index-scheduler/src/scheduler/test_embedders.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_embedders.rs source: crates/index-scheduler/src/scheduler/test_embedders.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_embedders.rs source: crates/index-scheduler/src/scheduler/test_embedders.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_embedders.rs source: crates/index-scheduler/src/scheduler/test_embedders.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_embedders.rs source: crates/index-scheduler/src/scheduler/test_embedders.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_embedders.rs source: crates/index-scheduler/src/scheduler/test_embedders.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [] enqueued []

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -1,17 +1,16 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} 3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} 4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attribute patterns are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }} 5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} 2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} 3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -7,7 +7,7 @@ snapshot_kind: text
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task: simulated panic", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [] enqueued []

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@ -58,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,] [timestamp] [4,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, } 1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, } 2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, } 3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }

View File

@ -87,7 +87,6 @@ doggo [2,3,]
girafo [4,] girafo [4,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Enqueued At: ### Batches Enqueued At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
[timestamp] [2,] [timestamp] [2,]
[timestamp] [3,] [timestamp] [3,]
@ -95,7 +94,6 @@ girafo [4,]
[timestamp] [5,] [timestamp] [5,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Started At: ### Batches Started At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
[timestamp] [2,] [timestamp] [2,]
[timestamp] [3,] [timestamp] [3,]
@ -103,7 +101,6 @@ girafo [4,]
[timestamp] [5,] [timestamp] [5,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batches Finished At: ### Batches Finished At:
[timestamp] [0,]
[timestamp] [1,] [timestamp] [1,]
[timestamp] [2,] [timestamp] [2,]
[timestamp] [3,] [timestamp] [3,]

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
@ -38,7 +37,7 @@ catto [1,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
@ -41,7 +40,7 @@ doggo [2,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@ -1,13 +1,12 @@
--- ---
source: crates/index-scheduler/src/scheduler/test_failure.rs source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
--- ---
### Autobatching Enabled = true ### Autobatching Enabled = true
### Processing batch None: ### Processing batch None:
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@ -44,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@ -903,15 +903,21 @@ fn create_and_list_index() {
index_scheduler.index("kefir").unwrap(); index_scheduler.index("kefir").unwrap();
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap(); let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#" snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
[ [
1, 1,
[ [
[ [
"kefir", "kefir",
{ {
"number_of_documents": 0, "documents_database_stats": {
"numberOfEntries": 0,
"totalKeySize": 0,
"totalValueSize": 0
},
"database_size": "[bytes]", "database_size": "[bytes]",
"number_of_embeddings": 0,
"number_of_embedded_documents": 0,
"used_database_size": "[bytes]", "used_database_size": "[bytes]",
"primary_key": null, "primary_key": null,
"field_distribution": {}, "field_distribution": {},
@ -921,5 +927,5 @@ fn create_and_list_index() {
] ]
] ]
] ]
"#); "###);
} }

View File

@ -104,9 +104,9 @@ fn import_vectors() {
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap(); let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap(); let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap(); let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
}; };
@ -404,31 +404,32 @@ fn import_vectors_first_and_embedder_later() {
// even though we specified the vector for the ID 3, it shouldn't be marked // even though we specified the vector for the ID 3, it shouldn't be marked
// as user provided since we explicitely marked it as NOT user provided. // as user provided since we explicitely marked it as NOT user provided.
snapshot!(format!("{conf:#?}"), @r###" snapshot!(format!("{conf:#?}"), @r###"
[ [
IndexEmbeddingConfig { IndexEmbeddingConfig {
name: "my_doggo_embedder", name: "my_doggo_embedder",
config: EmbeddingConfig { config: EmbeddingConfig {
embedder_options: HuggingFace( embedder_options: HuggingFace(
EmbedderOptions { EmbedderOptions {
model: "sentence-transformers/all-MiniLM-L6-v2", model: "sentence-transformers/all-MiniLM-L6-v2",
revision: Some( revision: Some(
"e4ce9877abf3edfe10b0d82785e83bdcb973e22e", "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
),
distribution: None,
},
),
prompt: PromptData {
template: "{{doc.doggo}}",
max_bytes: Some(
400,
), ),
distribution: None,
pooling: UseModel,
}, },
quantized: None, ),
prompt: PromptData {
template: "{{doc.doggo}}",
max_bytes: Some(
400,
),
}, },
user_provided: RoaringBitmap<[1, 2]>, quantized: None,
}, },
] user_provided: RoaringBitmap<[1, 2]>,
"###); },
]
"###);
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap(); let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap(); let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"]; let embedding = &embeddings["my_doggo_embedder"];

View File

@ -1,13 +1,12 @@
use std::time::Instant; use std::time::Instant;
use big_s::S; use big_s::S;
use maplit::btreeset;
use meili_snap::snapshot; use meili_snap::snapshot;
use meilisearch_types::milli::obkv_to_json; use meilisearch_types::milli::obkv_to_json;
use meilisearch_types::milli::update::IndexDocumentsMethod::*; use meilisearch_types::milli::update::IndexDocumentsMethod::*;
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
use meilisearch_types::tasks::Kind; use meilisearch_types::milli::FilterableAttributesRule;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::{Kind, KindWithContent};
use crate::insta_snapshot::snapshot_index_scheduler; use crate::insta_snapshot::snapshot_index_scheduler;
use crate::test_utils::Breakpoint::*; use crate::test_utils::Breakpoint::*;
@ -128,7 +127,8 @@ fn fail_in_process_batch_for_document_deletion() {
use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::settings::{Settings, Unchecked};
let mut new_settings: Box<Settings<Unchecked>> = Box::default(); let mut new_settings: Box<Settings<Unchecked>> = Box::default();
new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); new_settings.filterable_attributes =
Setting::Set(vec![FilterableAttributesRule::Field(S("catto"))]);
index_scheduler index_scheduler
.register( .register(

View File

@ -5,6 +5,7 @@ use std::time::Duration;
use big_s::S; use big_s::S;
use crossbeam_channel::RecvTimeoutError; use crossbeam_channel::RecvTimeoutError;
use file_store::File; use file_store::File;
use meilisearch_auth::open_auth_store_env;
use meilisearch_types::document_formats::DocumentFormatError; use meilisearch_types::document_formats::DocumentFormatError;
use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments; use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
@ -111,6 +112,7 @@ impl IndexScheduler {
batched_tasks_size_limit: u64::MAX, batched_tasks_size_limit: u64::MAX,
instance_features: Default::default(), instance_features: Default::default(),
auto_upgrade: true, // Don't cost much and will ensure the happy path works auto_upgrade: true, // Don't cost much and will ensure the happy path works
embedding_cache_cap: 10,
}; };
let version = configuration(&mut options).unwrap_or_else(|| { let version = configuration(&mut options).unwrap_or_else(|| {
( (
@ -120,7 +122,10 @@ impl IndexScheduler {
) )
}); });
let index_scheduler = Self::new(options, version, sender, planned_failures).unwrap(); std::fs::create_dir_all(&options.auth_path).unwrap();
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
let index_scheduler =
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
// To be 100% consistent between all test we're going to start the scheduler right now // To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state. // and ensure it's in the expected starting state.

View File

@ -1,5 +1,5 @@
use anyhow::bail; use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn}; use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use time::OffsetDateTime; use time::OffsetDateTime;
@ -9,13 +9,17 @@ use crate::queue::TaskQueue;
use crate::versioning::Versioning; use crate::versioning::Versioning;
trait UpgradeIndexScheduler { trait UpgradeIndexScheduler {
fn upgrade(&self, env: &Env, wtxn: &mut RwTxn, original: (u32, u32, u32)) fn upgrade(
-> anyhow::Result<()>; &self,
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
original: (u32, u32, u32),
) -> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32); fn target_version(&self) -> (u32, u32, u32);
} }
pub fn upgrade_index_scheduler( pub fn upgrade_index_scheduler(
env: &Env, env: &Env<WithoutTls>,
versioning: &Versioning, versioning: &Versioning,
from: (u32, u32, u32), from: (u32, u32, u32),
to: (u32, u32, u32), to: (u32, u32, u32),
@ -24,10 +28,12 @@ pub fn upgrade_index_scheduler(
let current_minor = to.1; let current_minor = to.1;
let current_patch = to.2; let current_patch = to.2;
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[&V1_12_ToCurrent {}]; let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[&ToCurrentNoOp {}];
let start = match from { let start = match from {
(1, 12, _) => 0, (1, 12, _) => 0,
(1, 13, _) => 0,
(1, 14, _) => 0,
(major, minor, patch) => { (major, minor, patch) => {
if major > current_major if major > current_major
|| (major == current_major && minor > current_minor) || (major == current_major && minor > current_minor)
@ -46,20 +52,19 @@ pub fn upgrade_index_scheduler(
} }
}; };
let mut current_version = from;
info!("Upgrading the task queue"); info!("Upgrading the task queue");
let mut local_from = from;
for upgrade in upgrade_functions[start..].iter() { for upgrade in upgrade_functions[start..].iter() {
let target = upgrade.target_version(); let target = upgrade.target_version();
info!( info!(
"Upgrading from v{}.{}.{} to v{}.{}.{}", "Upgrading from v{}.{}.{} to v{}.{}.{}",
from.0, from.1, from.2, current_version.0, current_version.1, current_version.2 local_from.0, local_from.1, local_from.2, target.0, target.1, target.2
); );
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
upgrade.upgrade(env, &mut wtxn, from)?; upgrade.upgrade(env, &mut wtxn, local_from)?;
versioning.set_version(&mut wtxn, target)?; versioning.set_version(&mut wtxn, target)?;
wtxn.commit()?; wtxn.commit()?;
current_version = target; local_from = target;
} }
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
@ -86,12 +91,12 @@ pub fn upgrade_index_scheduler(
} }
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
struct V1_12_ToCurrent {} struct ToCurrentNoOp {}
impl UpgradeIndexScheduler for V1_12_ToCurrent { impl UpgradeIndexScheduler for ToCurrentNoOp {
fn upgrade( fn upgrade(
&self, &self,
_env: &Env, _env: &Env<WithoutTls>,
_wtxn: &mut RwTxn, _wtxn: &mut RwTxn,
_original: (u32, u32, u32), _original: (u32, u32, u32),
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {

View File

@ -3,7 +3,7 @@
use std::collections::{BTreeSet, HashSet}; use std::collections::{BTreeSet, HashSet};
use std::ops::Bound; use std::ops::Bound;
use meilisearch_types::batches::{Batch, BatchId, BatchStats}; use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchId, BatchStats};
use meilisearch_types::heed::{Database, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::CboRoaringBitmapCodec; use meilisearch_types::milli::CboRoaringBitmapCodec;
use meilisearch_types::task_view::DetailsView; use meilisearch_types::task_view::DetailsView;
@ -30,8 +30,7 @@ pub struct ProcessingBatch {
pub kinds: HashSet<Kind>, pub kinds: HashSet<Kind>,
pub indexes: HashSet<String>, pub indexes: HashSet<String>,
pub canceled_by: HashSet<TaskId>, pub canceled_by: HashSet<TaskId>,
pub oldest_enqueued_at: Option<OffsetDateTime>, pub enqueued_at: Option<BatchEnqueuedAt>,
pub earliest_enqueued_at: Option<OffsetDateTime>,
pub started_at: OffsetDateTime, pub started_at: OffsetDateTime,
pub finished_at: Option<OffsetDateTime>, pub finished_at: Option<OffsetDateTime>,
} }
@ -51,8 +50,7 @@ impl ProcessingBatch {
kinds: HashSet::default(), kinds: HashSet::default(),
indexes: HashSet::default(), indexes: HashSet::default(),
canceled_by: HashSet::default(), canceled_by: HashSet::default(),
oldest_enqueued_at: None, enqueued_at: None,
earliest_enqueued_at: None,
started_at: OffsetDateTime::now_utc(), started_at: OffsetDateTime::now_utc(),
finished_at: None, finished_at: None,
} }
@ -80,14 +78,18 @@ impl ProcessingBatch {
if let Some(canceled_by) = task.canceled_by { if let Some(canceled_by) = task.canceled_by {
self.canceled_by.insert(canceled_by); self.canceled_by.insert(canceled_by);
} }
self.oldest_enqueued_at = match self.enqueued_at.as_mut() {
Some(self.oldest_enqueued_at.map_or(task.enqueued_at, |oldest_enqueued_at| { Some(BatchEnqueuedAt { earliest, oldest }) => {
task.enqueued_at.min(oldest_enqueued_at) *oldest = task.enqueued_at.min(*oldest);
})); *earliest = task.enqueued_at.max(*earliest);
self.earliest_enqueued_at = }
Some(self.earliest_enqueued_at.map_or(task.enqueued_at, |earliest_enqueued_at| { None => {
task.enqueued_at.max(earliest_enqueued_at) self.enqueued_at = Some(BatchEnqueuedAt {
})); earliest: task.enqueued_at,
oldest: task.enqueued_at,
});
}
}
} }
} }
@ -138,6 +140,7 @@ impl ProcessingBatch {
stats: self.stats.clone(), stats: self.stats.clone(),
started_at: self.started_at, started_at: self.started_at,
finished_at: self.finished_at, finished_at: self.finished_at,
enqueued_at: self.enqueued_at,
} }
} }
} }
@ -174,6 +177,33 @@ pub(crate) fn remove_task_datetime(
Ok(()) Ok(())
} }
pub(crate) fn remove_n_tasks_datetime_earlier_than(
wtxn: &mut RwTxn,
database: Database<BEI128, CboRoaringBitmapCodec>,
earlier_than: OffsetDateTime,
mut count: usize,
task_id: TaskId,
) -> Result<()> {
let earlier_than = earlier_than.unix_timestamp_nanos();
let mut iter = database.rev_range_mut(wtxn, &(..earlier_than))?;
while let Some((current, mut existing)) = iter.next().transpose()? {
count -= existing.remove(task_id) as usize;
if existing.is_empty() {
// safety: We don't keep references to the database
unsafe { iter.del_current()? };
} else {
// safety: We don't keep references to the database
unsafe { iter.put_current(&current, &existing)? };
}
if count == 0 {
break;
}
}
Ok(())
}
pub(crate) fn keep_ids_within_datetimes( pub(crate) fn keep_ids_within_datetimes(
rtxn: &RoTxn, rtxn: &RoTxn,
ids: &mut RoaringBitmap, ids: &mut RoaringBitmap,
@ -329,14 +359,27 @@ impl crate::IndexScheduler {
kind, kind,
} = task; } = task;
assert_eq!(uid, task.uid); assert_eq!(uid, task.uid);
if let Some(ref batch) = batch_uid { if task.status != Status::Enqueued {
let batch_uid = batch_uid.expect("All non enqueued tasks must be part of a batch");
assert!(self assert!(self
.queue .queue
.batch_to_tasks_mapping .batch_to_tasks_mapping
.get(&rtxn, batch) .get(&rtxn, &batch_uid)
.unwrap() .unwrap()
.unwrap() .unwrap()
.contains(uid)); .contains(uid));
let batch = self.queue.batches.get_batch(&rtxn, batch_uid).unwrap().unwrap();
assert_eq!(batch.uid, batch_uid);
if task.status == Status::Processing {
assert!(batch.progress.is_some());
} else {
assert!(batch.progress.is_none());
}
assert_eq!(batch.started_at, task.started_at.unwrap());
assert_eq!(batch.finished_at, task.finished_at);
let enqueued_at = batch.enqueued_at.unwrap();
assert!(task.enqueued_at >= enqueued_at.oldest);
assert!(task.enqueued_at <= enqueued_at.earliest);
} }
if let Some(task_index_uid) = &task_index_uid { if let Some(task_index_uid) = &task_index_uid {
assert!(self assert!(self

View File

@ -1,9 +1,10 @@
use crate::{upgrade::upgrade_index_scheduler, Result}; use meilisearch_types::heed::types::Str;
use meilisearch_types::{ use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn, WithoutTls};
heed::{types::Str, Database, Env, RoTxn, RwTxn}, use meilisearch_types::milli::heed_codec::version::VersionCodec;
milli::heed_codec::version::VersionCodec, use meilisearch_types::versioning;
versioning,
}; use crate::upgrade::upgrade_index_scheduler;
use crate::Result;
/// The number of database used by queue itself /// The number of database used by queue itself
const NUMBER_OF_DATABASES: u32 = 1; const NUMBER_OF_DATABASES: u32 = 1;
@ -21,30 +22,38 @@ pub struct Versioning {
} }
impl Versioning { impl Versioning {
pub(crate) const fn nb_db() -> u32 { pub const fn nb_db() -> u32 {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>> { pub fn get_version(&self, rtxn: &RoTxn) -> Result<Option<(u32, u32, u32)>, heed::Error> {
Ok(self.version.get(rtxn, entry_name::MAIN)?) self.version.get(rtxn, entry_name::MAIN)
} }
pub fn set_version(&self, wtxn: &mut RwTxn, version: (u32, u32, u32)) -> Result<()> { pub fn set_version(
Ok(self.version.put(wtxn, entry_name::MAIN, &version)?) &self,
wtxn: &mut RwTxn,
version: (u32, u32, u32),
) -> Result<(), heed::Error> {
self.version.put(wtxn, entry_name::MAIN, &version)
} }
pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<()> { pub fn set_current_version(&self, wtxn: &mut RwTxn) -> Result<(), heed::Error> {
let major = versioning::VERSION_MAJOR.parse().unwrap(); let major = versioning::VERSION_MAJOR.parse().unwrap();
let minor = versioning::VERSION_MINOR.parse().unwrap(); let minor = versioning::VERSION_MINOR.parse().unwrap();
let patch = versioning::VERSION_PATCH.parse().unwrap(); let patch = versioning::VERSION_PATCH.parse().unwrap();
self.set_version(wtxn, (major, minor, patch)) self.set_version(wtxn, (major, minor, patch))
} }
/// Create an index scheduler and start its run loop. /// Return `Self` without checking anything about the version
pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> { pub fn raw_new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
let version = env.create_database(wtxn, Some(db_name::VERSION))?;
Ok(Self { version })
}
pub(crate) fn new(env: &Env<WithoutTls>, db_version: (u32, u32, u32)) -> Result<Self> {
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let version = env.create_database(&mut wtxn, Some(db_name::VERSION))?; let this = Self::raw_new(env, &mut wtxn)?;
let this = Self { version };
let from = match this.get_version(&wtxn)? { let from = match this.get_version(&wtxn)? {
Some(version) => version, Some(version) => version,
// fresh DB: use the db version // fresh DB: use the db version

View File

@ -2,6 +2,7 @@ use std::fs::File;
use std::io::{BufReader, Write}; use std::io::{BufReader, Write};
use std::path::Path; use std::path::Path;
use meilisearch_types::heed::{Env, WithoutTls};
use serde_json::Deserializer; use serde_json::Deserializer;
use crate::{AuthController, HeedAuthStore, Result}; use crate::{AuthController, HeedAuthStore, Result};
@ -9,11 +10,8 @@ use crate::{AuthController, HeedAuthStore, Result};
const KEYS_PATH: &str = "keys"; const KEYS_PATH: &str = "keys";
impl AuthController { impl AuthController {
pub fn dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> { pub fn dump(auth_env: Env<WithoutTls>, dst: impl AsRef<Path>) -> Result<()> {
let mut store = HeedAuthStore::new(&src)?; let store = HeedAuthStore::new(auth_env)?;
// do not attempt to close the database on drop!
store.set_drop_on_close(false);
let keys_file_path = dst.as_ref().join(KEYS_PATH); let keys_file_path = dst.as_ref().join(KEYS_PATH);
@ -27,8 +25,8 @@ impl AuthController {
Ok(()) Ok(())
} }
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> { pub fn load_dump(src: impl AsRef<Path>, auth_env: Env<WithoutTls>) -> Result<()> {
let store = HeedAuthStore::new(&dst)?; let store = HeedAuthStore::new(auth_env)?;
let keys_file_path = src.as_ref().join(KEYS_PATH); let keys_file_path = src.as_ref().join(KEYS_PATH);

View File

@ -3,11 +3,10 @@ pub mod error;
mod store; mod store;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::sync::Arc;
use error::{AuthControllerError, Result}; use error::{AuthControllerError, Result};
use maplit::hashset; use maplit::hashset;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey}; use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
@ -19,19 +18,19 @@ use uuid::Uuid;
#[derive(Clone)] #[derive(Clone)]
pub struct AuthController { pub struct AuthController {
store: Arc<HeedAuthStore>, store: HeedAuthStore,
master_key: Option<String>, master_key: Option<String>,
} }
impl AuthController { impl AuthController {
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> { pub fn new(auth_env: Env<WithoutTls>, master_key: &Option<String>) -> Result<Self> {
let store = HeedAuthStore::new(db_path)?; let store = HeedAuthStore::new(auth_env)?;
if store.is_empty()? { if store.is_empty()? {
generate_default_keys(&store)?; generate_default_keys(&store)?;
} }
Ok(Self { store: Arc::new(store), master_key: master_key.clone() }) Ok(Self { store, master_key: master_key.clone() })
} }
/// Return `Ok(())` if the auth controller is able to access one of its database. /// Return `Ok(())` if the auth controller is able to access one of its database.

View File

@ -1,18 +1,16 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::HashSet; use std::collections::HashSet;
use std::fs::create_dir_all;
use std::path::Path; use std::path::Path;
use std::result::Result as StdResult; use std::result::Result as StdResult;
use std::str; use std::str;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac}; use hmac::{Hmac, Mac};
use meilisearch_types::heed::BoxedError; use meilisearch_types::heed::{BoxedError, WithoutTls};
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId; use meilisearch_types::keys::KeyId;
use meilisearch_types::milli; use meilisearch_types::milli::heed;
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson}; use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256; use sha2::Sha256;
@ -25,44 +23,32 @@ use super::error::{AuthControllerError, Result};
use super::{Action, Key}; use super::{Action, Key};
const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB
const AUTH_DB_PATH: &str = "auth";
const KEY_DB_NAME: &str = "api-keys"; const KEY_DB_NAME: &str = "api-keys";
const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration"; const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration";
#[derive(Clone)] #[derive(Clone)]
pub struct HeedAuthStore { pub struct HeedAuthStore {
env: Arc<Env>, env: Env<WithoutTls>,
keys: Database<Bytes, SerdeJson<Key>>, keys: Database<Bytes, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>, action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool,
} }
impl Drop for HeedAuthStore { pub fn open_auth_store_env(path: &Path) -> heed::Result<Env<WithoutTls>> {
fn drop(&mut self) { let options = EnvOpenOptions::new();
if self.should_close_on_drop && Arc::strong_count(&self.env) == 1 { let mut options = options.read_txn_without_tls();
self.env.as_ref().clone().prepare_for_closing();
}
}
}
pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env> {
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2); options.max_dbs(2);
unsafe { options.open(path) } unsafe { options.open(path) }
} }
impl HeedAuthStore { impl HeedAuthStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> { pub fn new(env: Env<WithoutTls>) -> Result<Self> {
let path = path.as_ref().join(AUTH_DB_PATH);
create_dir_all(&path)?;
let env = Arc::new(open_auth_store_env(path.as_ref())?);
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?; let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?;
let action_keyid_index_expiration = let action_keyid_index_expiration =
env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
wtxn.commit()?; wtxn.commit()?;
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) Ok(Self { env, keys, action_keyid_index_expiration })
} }
/// Return `Ok(())` if the auth store is able to access one of its database. /// Return `Ok(())` if the auth store is able to access one of its database.
@ -82,10 +68,6 @@ impl HeedAuthStore {
Ok(self.env.non_free_pages_size()?) Ok(self.env.non_free_pages_size()?)
} }
pub fn set_drop_on_close(&mut self, v: bool) {
self.should_close_on_drop = v;
}
pub fn is_empty(&self) -> Result<bool> { pub fn is_empty(&self) -> Result<bool> {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@ -293,7 +275,7 @@ impl HeedAuthStore {
/// optionally on a specific index, for a given key. /// optionally on a specific index, for a given key.
pub struct KeyIdActionCodec; pub struct KeyIdActionCodec;
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec { impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>); type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> { fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
@ -310,7 +292,7 @@ impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
} }
} }
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> { fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {

View File

@ -14,6 +14,7 @@ license.workspace = true
actix-web = { version = "4.9.0", default-features = false } actix-web = { version = "4.9.0", default-features = false }
anyhow = "1.0.95" anyhow = "1.0.95"
bumpalo = "3.16.0" bumpalo = "3.16.0"
bumparaw-collections = "0.1.4"
convert_case = "0.6.0" convert_case = "0.6.0"
csv = "1.3.1" csv = "1.3.1"
deserr = { version = "0.6.3", features = ["actix-web"] } deserr = { version = "0.6.3", features = ["actix-web"] }
@ -24,12 +25,11 @@ flate2 = "1.0.35"
fst = "0.4.7" fst = "0.4.7"
memmap2 = "0.9.5" memmap2 = "0.9.5"
milli = { path = "../milli" } milli = { path = "../milli" }
bumparaw-collections = "0.1.4"
roaring = { version = "0.10.10", features = ["serde"] } roaring = { version = "0.10.10", features = ["serde"] }
rustc-hash = "2.1.0" rustc-hash = "2.1.0"
serde = { version = "1.0.217", features = ["derive"] } serde = { version = "1.0.217", features = ["derive"] }
serde-cs = "0.2.4" serde-cs = "0.2.4"
serde_json = "1.0.135" serde_json = { version = "1.0.135", features = ["preserve_order"] }
tar = "0.4.43" tar = "0.4.43"
tempfile = "3.15.0" tempfile = "3.15.0"
thiserror = "2.0.9" thiserror = "2.0.9"

View File

@ -24,9 +24,35 @@ pub struct Batch {
pub started_at: OffsetDateTime, pub started_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339::option")] #[serde(with = "time::serde::rfc3339::option")]
pub finished_at: Option<OffsetDateTime>, pub finished_at: Option<OffsetDateTime>,
// Enqueued at is never displayed and is only required when removing a batch.
// It's always some except when upgrading from a database pre v1.12
pub enqueued_at: Option<BatchEnqueuedAt>,
} }
#[derive(Default, Debug, Clone, Serialize, Deserialize, ToSchema)] impl PartialEq for Batch {
fn eq(&self, other: &Self) -> bool {
let Self { uid, progress, details, stats, started_at, finished_at, enqueued_at } = self;
*uid == other.uid
&& progress.is_none() == other.progress.is_none()
&& details == &other.details
&& stats == &other.stats
&& started_at == &other.started_at
&& finished_at == &other.finished_at
&& enqueued_at == &other.enqueued_at
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct BatchEnqueuedAt {
#[serde(with = "time::serde::rfc3339")]
pub earliest: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub oldest: OffsetDateTime,
}
#[derive(Default, Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")] #[schema(rename_all = "camelCase")]
pub struct BatchStats { pub struct BatchStats {
@ -34,4 +60,10 @@ pub struct BatchStats {
pub status: BTreeMap<Status, u32>, pub status: BTreeMap<Status, u32>,
pub types: BTreeMap<Kind, u32>, pub types: BTreeMap<Kind, u32>,
pub index_uids: BTreeMap<String, u32>, pub index_uids: BTreeMap<String, u32>,
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub progress_trace: serde_json::Map<String, serde_json::Value>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>,
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
} }

View File

@ -193,6 +193,8 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
merge_with_error_impl_take_error_message!(ParseTaskStatusError); merge_with_error_impl_take_error_message!(ParseTaskStatusError);
merge_with_error_impl_take_error_message!(IndexUidFormatError); merge_with_error_impl_take_error_message!(IndexUidFormatError);
merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight); merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight);
merge_with_error_impl_take_error_message!(InvalidNetworkUrl);
merge_with_error_impl_take_error_message!(InvalidNetworkSearchApiKey);
merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold);

View File

@ -241,6 +241,7 @@ InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ; InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentIds , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ; InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ;
@ -260,7 +261,13 @@ InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryPosition , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchRemote , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
@ -275,6 +282,7 @@ InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ; InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
InvalidSearchLocales , InvalidRequest , BAD_REQUEST ; InvalidSearchLocales , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchExhaustiveFacetCount, InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSimilarId , InvalidRequest , BAD_REQUEST ; InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
@ -351,14 +359,22 @@ MissingDocumentId , InvalidRequest , BAD_REQUEST ;
MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; MissingFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
MissingIndexUid , InvalidRequest , BAD_REQUEST ; MissingIndexUid , InvalidRequest , BAD_REQUEST ;
MissingMasterKey , Auth , UNAUTHORIZED ; MissingMasterKey , Auth , UNAUTHORIZED ;
MissingNetworkUrl , InvalidRequest , BAD_REQUEST ;
MissingPayload , InvalidRequest , BAD_REQUEST ; MissingPayload , InvalidRequest , BAD_REQUEST ;
MissingSearchHybrid , InvalidRequest , BAD_REQUEST ; MissingSearchHybrid , InvalidRequest , BAD_REQUEST ;
MissingSwapIndexes , InvalidRequest , BAD_REQUEST ; MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
MissingTaskFilters , InvalidRequest , BAD_REQUEST ; MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY; NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ; PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
RemoteBadResponse , System , BAD_GATEWAY ;
RemoteBadRequest , InvalidRequest , BAD_REQUEST ;
RemoteCouldNotSendRequest , System , BAD_GATEWAY ;
RemoteInvalidApiKey , Auth , FORBIDDEN ;
RemoteRemoteError , System , BAD_GATEWAY ;
RemoteTimeout , System , BAD_GATEWAY ;
TooManySearchRequests , System , SERVICE_UNAVAILABLE ; TooManySearchRequests , System , SERVICE_UNAVAILABLE ;
TaskNotFound , InvalidRequest , NOT_FOUND ; TaskNotFound , InvalidRequest , NOT_FOUND ;
TaskFileNotFound , InvalidRequest , NOT_FOUND ;
BatchNotFound , InvalidRequest , NOT_FOUND ; BatchNotFound , InvalidRequest , NOT_FOUND ;
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ; TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
TooManyVectors , InvalidRequest , BAD_REQUEST ; TooManyVectors , InvalidRequest , BAD_REQUEST ;
@ -391,7 +407,7 @@ impl ErrorCode for milli::Error {
match error { match error {
// TODO: wait for spec for new error codes. // TODO: wait for spec for new error codes.
UserError::SerdeJson(_) UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions | UserError::EnvAlreadyOpened
| UserError::DocumentLimitReached | UserError::DocumentLimitReached
| UserError::UnknownInternalDocumentId { .. } => Code::Internal, | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStoreFile, UserError::InvalidStoreFile => Code::InvalidStoreFile,
@ -400,6 +416,7 @@ impl ErrorCode for milli::Error {
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::InvalidSearchFilter, UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter, UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
UserError::FilterOperatorNotAllowed { .. } => Code::InvalidSearchFilter,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
Code::InvalidDocumentId Code::InvalidDocumentId
@ -414,9 +431,10 @@ impl ErrorCode for milli::Error {
| UserError::InvalidUrl { .. } | UserError::InvalidUrl { .. }
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
| UserError::InvalidPrompt(_) | UserError::InvalidPrompt(_)
| UserError::InvalidDisableBinaryQuantization { .. } => { | UserError::InvalidDisableBinaryQuantization { .. }
Code::InvalidSettingsEmbedders | UserError::InvalidSourceForNested { .. }
} | UserError::MissingSourceForNested { .. }
| UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders,
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
@ -486,8 +504,7 @@ impl ErrorCode for HeedError {
HeedError::Mdb(_) HeedError::Mdb(_)
| HeedError::Encoding(_) | HeedError::Encoding(_)
| HeedError::Decoding(_) | HeedError::Decoding(_)
| HeedError::DatabaseClosing | HeedError::EnvAlreadyOpened => Code::Internal,
| HeedError::BadOpenOptions { .. } => Code::Internal,
} }
} }
} }
@ -583,6 +600,18 @@ impl fmt::Display for deserr_codes::InvalidSimilarRankingScoreThreshold {
} }
} }
impl fmt::Display for deserr_codes::InvalidNetworkUrl {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `url` is invalid, expected a string.")
}
}
impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `searchApiKey` is invalid, expected a string.")
}
}
#[macro_export] #[macro_export]
macro_rules! internal_error { macro_rules! internal_error {
($target:ty : $($other:path), *) => { ($target:ty : $($other:path), *) => {

View File

@ -1,3 +1,5 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)]
@ -7,6 +9,9 @@ pub struct RuntimeTogglableFeatures {
pub logs_route: bool, pub logs_route: bool,
pub edit_documents_by_function: bool, pub edit_documents_by_function: bool,
pub contains_filter: bool, pub contains_filter: bool,
pub network: bool,
pub get_task_documents_route: bool,
pub composite_embedders: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]
@ -15,3 +20,20 @@ pub struct InstanceTogglableFeatures {
pub logs_route: bool, pub logs_route: bool,
pub contains_filter: bool, pub contains_filter: bool,
} }
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
}

View File

@ -4,13 +4,14 @@ use std::fmt;
use std::str::FromStr; use std::str::FromStr;
use deserr::Deserr; use deserr::Deserr;
use serde::Serialize;
use utoipa::ToSchema; use utoipa::ToSchema;
use crate::error::{Code, ErrorCode}; use crate::error::{Code, ErrorCode};
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long /// bytes long
#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, ToSchema)] #[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord, Serialize, ToSchema)]
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
#[schema(value_type = String, example = "movies")] #[schema(value_type = String, example = "movies")]
pub struct IndexUid(String); pub struct IndexUid(String);

View File

@ -302,6 +302,12 @@ pub enum Action {
#[serde(rename = "experimental.update")] #[serde(rename = "experimental.update")]
#[deserr(rename = "experimental.update")] #[deserr(rename = "experimental.update")]
ExperimentalFeaturesUpdate, ExperimentalFeaturesUpdate,
#[serde(rename = "network.get")]
#[deserr(rename = "network.get")]
NetworkGet,
#[serde(rename = "network.update")]
#[deserr(rename = "network.update")]
NetworkUpdate,
} }
impl Action { impl Action {
@ -341,6 +347,8 @@ impl Action {
KEYS_DELETE => Some(Self::KeysDelete), KEYS_DELETE => Some(Self::KeysDelete),
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet), EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate), EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
NETWORK_GET => Some(Self::NetworkGet),
NETWORK_UPDATE => Some(Self::NetworkUpdate),
_otherwise => None, _otherwise => None,
} }
} }
@ -386,4 +394,7 @@ pub mod actions {
pub const KEYS_DELETE: u8 = KeysDelete.repr(); pub const KEYS_DELETE: u8 = KeysDelete.repr();
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr(); pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr(); pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
pub const NETWORK_GET: u8 = NetworkGet.repr();
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();
} }

View File

@ -1,5 +1,5 @@
use deserr::Deserr; use deserr::Deserr;
use milli::LocalizedAttributesRule; use milli::{AttributePatterns, LocalizedAttributesRule};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema; use utoipa::ToSchema;
@ -7,7 +7,7 @@ use utoipa::ToSchema;
#[deserr(rename_all = camelCase)] #[deserr(rename_all = camelCase)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct LocalizedAttributesRuleView { pub struct LocalizedAttributesRuleView {
pub attribute_patterns: Vec<String>, pub attribute_patterns: AttributePatterns,
pub locales: Vec<Locale>, pub locales: Vec<Locale>,
} }

View File

@ -11,7 +11,7 @@ use fst::IntoStreamer;
use milli::index::{IndexEmbeddingConfig, PrefixSearch}; use milli::index::{IndexEmbeddingConfig, PrefixSearch};
use milli::proximity::ProximityPrecision; use milli::proximity::ProximityPrecision;
use milli::update::Setting; use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer}; use serde::{Deserialize, Serialize, Serializer};
use utoipa::ToSchema; use utoipa::ToSchema;
@ -202,8 +202,8 @@ pub struct Settings<T> {
/// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters). /// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters).
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)] #[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
#[schema(value_type = Option<Vec<String>>, example = json!(["release_date", "genre"]))] #[schema(value_type = Option<Vec<FilterableAttributesRule>>, example = json!(["release_date", "genre"]))]
pub filterable_attributes: Setting<BTreeSet<String>>, pub filterable_attributes: Setting<Vec<FilterableAttributesRule>>,
/// Attributes to use when sorting search results. /// Attributes to use when sorting search results.
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>)] #[deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>)]
@ -791,7 +791,7 @@ pub fn settings(
.user_defined_searchable_fields(rtxn)? .user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect()); .map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); let filterable_attributes = index.filterable_attributes_rules(rtxn)?.into_iter().collect();
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();

View File

@ -1,7 +1,10 @@
use std::fs; use std::fs;
use std::io::{self, ErrorKind}; use std::io::{ErrorKind, Write};
use std::path::Path; use std::path::Path;
use milli::heed;
use tempfile::NamedTempFile;
/// The name of the file that contains the version of the database. /// The name of the file that contains the version of the database.
pub const VERSION_FILE_NAME: &str = "VERSION"; pub const VERSION_FILE_NAME: &str = "VERSION";
@ -10,37 +13,7 @@ pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
/// Persists the version of the current Meilisearch binary to a VERSION file /// Persists the version of the current Meilisearch binary to a VERSION file
pub fn update_version_file_for_dumpless_upgrade( pub fn create_current_version_file(db_path: &Path) -> anyhow::Result<()> {
db_path: &Path,
from: (u32, u32, u32),
to: (u32, u32, u32),
) -> Result<(), VersionFileError> {
let (from_major, from_minor, from_patch) = from;
let (to_major, to_minor, to_patch) = to;
if from_major > to_major
|| (from_major == to_major && from_minor > to_minor)
|| (from_major == to_major && from_minor == to_minor && from_patch > to_patch)
{
Err(VersionFileError::DowngradeNotSupported {
major: from_major,
minor: from_minor,
patch: from_patch,
})
} else if from_major < 1 || (from_major == to_major && from_minor < 12) {
Err(VersionFileError::TooOldForAutomaticUpgrade {
major: from_major,
minor: from_minor,
patch: from_patch,
})
} else {
create_current_version_file(db_path)?;
Ok(())
}
}
/// Persists the version of the current Meilisearch binary to a VERSION file
pub fn create_current_version_file(db_path: &Path) -> io::Result<()> {
create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
} }
@ -49,9 +22,14 @@ pub fn create_version_file(
major: &str, major: &str,
minor: &str, minor: &str,
patch: &str, patch: &str,
) -> io::Result<()> { ) -> anyhow::Result<()> {
let version_path = db_path.join(VERSION_FILE_NAME); let version_path = db_path.join(VERSION_FILE_NAME);
fs::write(version_path, format!("{}.{}.{}", major, minor, patch)) // In order to persist the file later we must create it in the `data.ms` and not in `/tmp`
let mut file = NamedTempFile::new_in(db_path)?;
file.write_all(format!("{}.{}.{}", major, minor, patch).as_bytes())?;
file.flush()?;
file.persist(version_path)?;
Ok(())
} }
pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> { pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError> {
@ -61,7 +39,7 @@ pub fn get_version(db_path: &Path) -> Result<(u32, u32, u32), VersionFileError>
Ok(version) => parse_version(&version), Ok(version) => parse_version(&version),
Err(error) => match error.kind() { Err(error) => match error.kind() {
ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile), ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile),
_ => Err(error.into()), _ => Err(anyhow::Error::from(error).into()),
}, },
} }
} }
@ -112,7 +90,9 @@ pub enum VersionFileError {
DowngradeNotSupported { major: u32, minor: u32, patch: u32 }, DowngradeNotSupported { major: u32, minor: u32, patch: u32 },
#[error("Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}")] #[error("Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}")]
TooOldForAutomaticUpgrade { major: u32, minor: u32, patch: u32 }, TooOldForAutomaticUpgrade { major: u32, minor: u32, patch: u32 },
#[error("Error while modifying the database: {0}")]
ErrorWhileModifyingTheDatabase(#[from] heed::Error),
#[error(transparent)] #[error(transparent)]
IoError(#[from] std::io::Error), AnyhowError(#[from] anyhow::Error),
} }

View File

@ -30,11 +30,7 @@ actix-web = { version = "4.9.0", default-features = false, features = [
anyhow = { version = "1.0.95", features = ["backtrace"] } anyhow = { version = "1.0.95", features = ["backtrace"] }
async-trait = "0.1.85" async-trait = "0.1.85"
bstr = "1.11.3" bstr = "1.11.3"
byte-unit = { version = "5.1.6", default-features = false, features = [ byte-unit = { version = "5.1.6", features = ["serde"] }
"std",
"byte",
"serde",
] }
bytes = "1.9.0" bytes = "1.9.0"
clap = { version = "4.5.24", features = ["derive", "env"] } clap = { version = "4.5.24", features = ["derive", "env"] }
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
@ -140,7 +136,7 @@ reqwest = { version = "0.12.12", features = [
sha-1 = { version = "0.10.1", optional = true } sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true } static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.15.0", optional = true } tempfile = { version = "3.15.0", optional = true }
zip = { version = "2.2.2", optional = true } zip = { version = "2.3.0", optional = true }
[features] [features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
@ -170,5 +166,5 @@ german = ["meilisearch-types/german"]
turkish = ["meilisearch-types/turkish"] turkish = ["meilisearch-types/turkish"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.16/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip"
sha1 = "68f83438a114aabbe76bc9fe480071e741996662" sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834"

View File

@ -31,6 +31,7 @@ use crate::routes::{create_all_stats, Stats};
use crate::Opt; use crate::Opt;
const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; const ANALYTICS_HEADER: &str = "X-Meilisearch-Client";
const MEILI_SERVER_PROVIDER: &str = "MEILI_SERVER_PROVIDER";
/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. /// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors.
fn write_user_id(db_path: &Path, user_id: &InstanceUid) { fn write_user_id(db_path: &Path, user_id: &InstanceUid) {
@ -195,6 +196,10 @@ struct Infos {
experimental_reduce_indexing_memory_usage: bool, experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize, experimental_max_number_of_batched_tasks: usize,
experimental_limit_batched_tasks_total_size: u64, experimental_limit_batched_tasks_total_size: u64,
experimental_network: bool,
experimental_get_task_documents_route: bool,
experimental_composite_embedders: bool,
experimental_embedding_cache_entries: usize,
gpu_enabled: bool, gpu_enabled: bool,
db_path: bool, db_path: bool,
import_dump: bool, import_dump: bool,
@ -242,6 +247,7 @@ impl Infos {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size, experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
http_addr, http_addr,
master_key: _, master_key: _,
env, env,
@ -285,6 +291,9 @@ impl Infos {
logs_route, logs_route,
edit_documents_by_function, edit_documents_by_function,
contains_filter, contains_filter,
network,
get_task_documents_route,
composite_embedders,
} = features; } = features;
// We're going to override every sensible information. // We're going to override every sensible information.
@ -302,6 +311,10 @@ impl Infos {
experimental_replication_parameters, experimental_replication_parameters,
experimental_enable_logs_route: experimental_enable_logs_route | logs_route, experimental_enable_logs_route: experimental_enable_logs_route | logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_network: network,
experimental_get_task_documents_route: get_task_documents_route,
experimental_composite_embedders: composite_embedders,
experimental_embedding_cache_entries,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"), db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(), import_dump: import_dump.is_some(),
@ -316,7 +329,8 @@ impl Infos {
http_addr: http_addr != default_http_addr(), http_addr: http_addr != default_http_addr(),
http_payload_size_limit, http_payload_size_limit,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size, experimental_limit_batched_tasks_total_size:
experimental_limit_batched_tasks_total_size.into(),
task_queue_webhook: task_webhook_url.is_some(), task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(), task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(), log_level: log_level.to_string(),
@ -357,7 +371,7 @@ impl Segment {
"cores": sys.cpus().len(), "cores": sys.cpus().len(),
"ram_size": sys.total_memory(), "ram_size": sys.total_memory(),
"disk_size": disks.iter().map(|disk| disk.total_space()).max(), "disk_size": disks.iter().map(|disk| disk.total_space()).max(),
"server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), "server_provider": std::env::var(MEILI_SERVER_PROVIDER).ok(),
}) })
}); });
let number_of_documents = let number_of_documents =
@ -380,10 +394,18 @@ impl Segment {
index_scheduler: Arc<IndexScheduler>, index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>, auth_controller: Arc<AuthController>,
) { ) {
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour let interval: Duration = match std::env::var(MEILI_SERVER_PROVIDER) {
// The first batch must be sent after one hour. Ok(provider) if provider.starts_with("meili_cloud:") => {
Duration::from_secs(60 * 60) // one hour
}
_ => {
// We're an open source instance
Duration::from_secs(60 * 60 * 24) // one day
}
};
let mut interval = let mut interval =
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL); tokio::time::interval_at(tokio::time::Instant::now() + interval, interval);
loop { loop {
select! { select! {

View File

@ -32,18 +32,18 @@ use analytics::Analytics;
use anyhow::bail; use anyhow::bail;
use error::PayloadError; use error::PayloadError;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use index_scheduler::versioning::Versioning;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::AuthController; use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::milli::constants::VERSION_MAJOR; use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
use meilisearch_types::settings::apply_settings_to_builder; use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::versioning::{ use meilisearch_types::versioning::{
create_current_version_file, get_version, update_version_file_for_dumpless_upgrade, create_current_version_file, get_version, VersionFileError, VERSION_MINOR, VERSION_PATCH,
VersionFileError, VERSION_MINOR, VERSION_PATCH,
}; };
use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; use meilisearch_types::{compression, heed, milli, VERSION_FILE_NAME};
pub use option::Opt; pub use option::Opt;
use option::ScheduleSnapshot; use option::ScheduleSnapshot;
use search_queue::SearchQueue; use search_queue::SearchQueue;
@ -228,11 +228,12 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
cleanup_enabled: !opt.experimental_replication_parameters, cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000, max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size, batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT, index_count: DEFAULT_INDEX_COUNT,
instance_features: opt.to_instance_features(), instance_features: opt.to_instance_features(),
auto_upgrade: opt.experimental_dumpless_upgrade, auto_upgrade: opt.experimental_dumpless_upgrade,
embedding_cache_cap: opt.experimental_embedding_cache_entries,
}; };
let bin_major: u32 = VERSION_MAJOR.parse().unwrap(); let bin_major: u32 = VERSION_MAJOR.parse().unwrap();
let bin_minor: u32 = VERSION_MINOR.parse().unwrap(); let bin_minor: u32 = VERSION_MINOR.parse().unwrap();
@ -335,9 +336,12 @@ fn open_or_create_database_unchecked(
) -> anyhow::Result<(IndexScheduler, AuthController)> { ) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we // we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later. // wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key); std::fs::create_dir_all(&index_scheduler_opt.auth_path)?;
let index_scheduler_builder = let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|| -> anyhow::Result<_> { Ok(IndexScheduler::new(index_scheduler_opt, version)?) }; let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
};
match ( match (
index_scheduler_builder(), index_scheduler_builder(),
@ -356,14 +360,19 @@ fn open_or_create_database_unchecked(
/// Ensures Meilisearch version is compatible with the database, returns an error in case of version mismatch. /// Ensures Meilisearch version is compatible with the database, returns an error in case of version mismatch.
/// Returns the version that was contained in the version file /// Returns the version that was contained in the version file
fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<(u32, u32, u32)> { fn check_version(
opt: &Opt,
index_scheduler_opt: &IndexSchedulerOptions,
binary_version: (u32, u32, u32),
) -> anyhow::Result<(u32, u32, u32)> {
let (bin_major, bin_minor, bin_patch) = binary_version; let (bin_major, bin_minor, bin_patch) = binary_version;
let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?; let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?;
if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch { if db_major != bin_major || db_minor != bin_minor || db_patch != bin_patch {
if opt.experimental_dumpless_upgrade { if opt.experimental_dumpless_upgrade {
update_version_file_for_dumpless_upgrade( update_version_file_for_dumpless_upgrade(
&opt.db_path, opt,
index_scheduler_opt,
(db_major, db_minor, db_patch), (db_major, db_minor, db_patch),
(bin_major, bin_minor, bin_patch), (bin_major, bin_minor, bin_patch),
)?; )?;
@ -380,6 +389,58 @@ fn check_version(opt: &Opt, binary_version: (u32, u32, u32)) -> anyhow::Result<(
Ok((db_major, db_minor, db_patch)) Ok((db_major, db_minor, db_patch))
} }
/// Persists the version of the current Meilisearch binary to a VERSION file
pub fn update_version_file_for_dumpless_upgrade(
opt: &Opt,
index_scheduler_opt: &IndexSchedulerOptions,
from: (u32, u32, u32),
to: (u32, u32, u32),
) -> Result<(), VersionFileError> {
let (from_major, from_minor, from_patch) = from;
let (to_major, to_minor, to_patch) = to;
// Early exit in case of error
if from_major > to_major
|| (from_major == to_major && from_minor > to_minor)
|| (from_major == to_major && from_minor == to_minor && from_patch > to_patch)
{
return Err(VersionFileError::DowngradeNotSupported {
major: from_major,
minor: from_minor,
patch: from_patch,
});
} else if from_major < 1 || (from_major == to_major && from_minor < 12) {
return Err(VersionFileError::TooOldForAutomaticUpgrade {
major: from_major,
minor: from_minor,
patch: from_patch,
});
}
// In the case of v1.12, the index-scheduler didn't store its internal version at the time.
// => We must write it immediately **in the index-scheduler** otherwise we'll update the version file
// there is a risk of DB corruption if a restart happens after writing the version file but before
// writing the version in the index-scheduler. See <https://github.com/meilisearch/meilisearch/issues/5280>
if from_major == 1 && from_minor == 12 {
let env = unsafe {
heed::EnvOpenOptions::new()
.read_txn_without_tls()
.max_dbs(Versioning::nb_db())
.map_size(index_scheduler_opt.task_db_size)
.open(&index_scheduler_opt.tasks_path)
}?;
let mut wtxn = env.write_txn()?;
let versioning = Versioning::raw_new(&env, &mut wtxn)?;
versioning.set_version(&mut wtxn, (from_major, from_minor, from_patch))?;
wtxn.commit()?;
// Should be instant since we're the only one using the env
env.prepare_for_closing().wait();
}
create_current_version_file(&opt.db_path)?;
Ok(())
}
/// Ensure you're in a valid state and open the IndexScheduler + AuthController for you. /// Ensure you're in a valid state and open the IndexScheduler + AuthController for you.
fn open_or_create_database( fn open_or_create_database(
opt: &Opt, opt: &Opt,
@ -387,7 +448,11 @@ fn open_or_create_database(
empty_db: bool, empty_db: bool,
binary_version: (u32, u32, u32), binary_version: (u32, u32, u32),
) -> anyhow::Result<(IndexScheduler, AuthController)> { ) -> anyhow::Result<(IndexScheduler, AuthController)> {
let version = if !empty_db { check_version(opt, binary_version)? } else { binary_version }; let version = if !empty_db {
check_version(opt, &index_scheduler_opt, binary_version)?
} else {
binary_version
};
open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version) open_or_create_database_unchecked(opt, index_scheduler_opt, OnFailure::KeepDb, version)
} }
@ -431,10 +496,13 @@ fn import_dump(
keys.push(key); keys.push(key);
} }
// 3. Import the runtime features. // 3. Import the runtime features and network
let features = dump_reader.features()?.unwrap_or_default(); let features = dump_reader.features()?.unwrap_or_default();
index_scheduler.put_runtime_features(features)?; index_scheduler.put_runtime_features(features)?;
let network = dump_reader.network()?.cloned().unwrap_or_default();
index_scheduler.put_network(network)?;
let indexer_config = index_scheduler.indexer_config(); let indexer_config = index_scheduler.indexer_config();
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might // /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
@ -508,9 +576,15 @@ fn import_dump(
index_scheduler.refresh_index_stats(&uid)?; index_scheduler.refresh_index_stats(&uid)?;
} }
// 5. Import the queue
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
// 5.1. Import the batches
for ret in dump_reader.batches()? {
let batch = ret?;
index_scheduler_dump.register_dumped_batch(batch)?;
}
// 5. Import the tasks. // 5.2. Import the tasks
for ret in dump_reader.tasks()? { for ret in dump_reader.tasks()? {
let (task, file) = ret?; let (task, file) = ret?;
index_scheduler_dump.register_dumped_task(task, file)?; index_scheduler_dump.register_dumped_task(task, file)?;

View File

@ -63,7 +63,8 @@ const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS"; "MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str = const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE"; "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms"; const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700"; const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@ -444,7 +445,15 @@ pub struct Opt {
/// see: <https://github.com/orgs/meilisearch/discussions/801> /// see: <https://github.com/orgs/meilisearch/discussions/801>
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())] #[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
#[serde(default = "default_limit_batched_tasks_total_size")] #[serde(default = "default_limit_batched_tasks_total_size")]
pub experimental_limit_batched_tasks_total_size: u64, pub experimental_limit_batched_tasks_total_size: Byte,
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
/// distinct embedder.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/818>.
#[clap(long, env = MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES, default_value_t = default_embedding_cache_entries())]
#[serde(default = "default_embedding_cache_entries")]
pub experimental_embedding_cache_entries: usize,
#[serde(flatten)] #[serde(flatten)]
#[clap(flatten)] #[clap(flatten)]
@ -549,6 +558,7 @@ impl Opt {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size, experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
} = self; } = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -641,6 +651,10 @@ impl Opt {
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
experimental_limit_batched_tasks_total_size.to_string(), experimental_limit_batched_tasks_total_size.to_string(),
); );
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
experimental_embedding_cache_entries.to_string(),
);
indexer_options.export_to_env(); indexer_options.export_to_env();
} }
@ -944,8 +958,12 @@ fn default_limit_batched_tasks() -> usize {
usize::MAX usize::MAX
} }
fn default_limit_batched_tasks_total_size() -> u64 { fn default_limit_batched_tasks_total_size() -> Byte {
u64::MAX Byte::from_u64(u64::MAX)
}
fn default_embedding_cache_entries() -> usize {
0
} }
fn default_snapshot_dir() -> PathBuf { fn default_snapshot_dir() -> PathBuf {

View File

@ -50,6 +50,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
logs_route: Some(false), logs_route: Some(false),
edit_documents_by_function: Some(false), edit_documents_by_function: Some(false),
contains_filter: Some(false), contains_filter: Some(false),
network: Some(false),
get_task_documents_route: Some(false),
composite_embedders: Some(false),
})), })),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{ {
@ -88,6 +91,12 @@ pub struct RuntimeTogglableFeatures {
pub edit_documents_by_function: Option<bool>, pub edit_documents_by_function: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub contains_filter: Option<bool>, pub contains_filter: Option<bool>,
#[deserr(default)]
pub network: Option<bool>,
#[deserr(default)]
pub get_task_documents_route: Option<bool>,
#[deserr(default)]
pub composite_embedders: Option<bool>,
} }
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures { impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
@ -97,6 +106,9 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
logs_route, logs_route,
edit_documents_by_function, edit_documents_by_function,
contains_filter, contains_filter,
network,
get_task_documents_route,
composite_embedders,
} = value; } = value;
Self { Self {
@ -104,6 +116,9 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
logs_route: Some(logs_route), logs_route: Some(logs_route),
edit_documents_by_function: Some(edit_documents_by_function), edit_documents_by_function: Some(edit_documents_by_function),
contains_filter: Some(contains_filter), contains_filter: Some(contains_filter),
network: Some(network),
get_task_documents_route: Some(get_task_documents_route),
composite_embedders: Some(composite_embedders),
} }
} }
} }
@ -114,6 +129,9 @@ pub struct PatchExperimentalFeatureAnalytics {
logs_route: bool, logs_route: bool,
edit_documents_by_function: bool, edit_documents_by_function: bool,
contains_filter: bool, contains_filter: bool,
network: bool,
get_task_documents_route: bool,
composite_embedders: bool,
} }
impl Aggregate for PatchExperimentalFeatureAnalytics { impl Aggregate for PatchExperimentalFeatureAnalytics {
@ -127,6 +145,9 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
logs_route: new.logs_route, logs_route: new.logs_route,
edit_documents_by_function: new.edit_documents_by_function, edit_documents_by_function: new.edit_documents_by_function,
contains_filter: new.contains_filter, contains_filter: new.contains_filter,
network: new.network,
get_task_documents_route: new.get_task_documents_route,
composite_embedders: new.composite_embedders,
}) })
} }
@ -149,6 +170,9 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
logs_route: Some(false), logs_route: Some(false),
edit_documents_by_function: Some(false), edit_documents_by_function: Some(false),
contains_filter: Some(false), contains_filter: Some(false),
network: Some(false),
get_task_documents_route: Some(false),
composite_embedders: Some(false),
})), })),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{ {
@ -181,16 +205,28 @@ async fn patch_features(
.edit_documents_by_function .edit_documents_by_function
.unwrap_or(old_features.edit_documents_by_function), .unwrap_or(old_features.edit_documents_by_function),
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter), contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
network: new_features.0.network.unwrap_or(old_features.network),
get_task_documents_route: new_features
.0
.get_task_documents_route
.unwrap_or(old_features.get_task_documents_route),
composite_embedders: new_features
.0
.composite_embedders
.unwrap_or(old_features.composite_embedders),
}; };
// explicitly destructure for analytics rather than using the `Serialize` implementation, because // explicitly destructure for analytics rather than using the `Serialize` implementation, because
// the it renames to camelCase, which we don't want for analytics. // it renames to camelCase, which we don't want for analytics.
// **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
let meilisearch_types::features::RuntimeTogglableFeatures { let meilisearch_types::features::RuntimeTogglableFeatures {
metrics, metrics,
logs_route, logs_route,
edit_documents_by_function, edit_documents_by_function,
contains_filter, contains_filter,
network,
get_task_documents_route,
composite_embedders,
} = new_features; } = new_features;
analytics.publish( analytics.publish(
@ -199,6 +235,9 @@ async fn patch_features(
logs_route, logs_route,
edit_documents_by_function, edit_documents_by_function,
contains_filter, contains_filter,
network,
get_task_documents_route,
composite_embedders,
}, },
&req, &req,
); );

View File

@ -20,11 +20,13 @@ use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId; use meilisearch_types::milli::DocumentId;
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::star_or::OptionStarOrList; use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index}; use meilisearch_types::{milli, Document, Index};
use mime::Mime; use mime::Mime;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
@ -43,7 +45,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{ use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
}; };
use crate::search::{parse_filter, RetrieveVectors}; use crate::search::{parse_filter, ExternalDocumentId, RetrieveVectors};
use crate::{aggregate_methods, Opt}; use crate::{aggregate_methods, Opt};
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| { static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
@ -137,6 +139,9 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
#[serde(rename = "vector.retrieve_vectors")] #[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool, retrieve_vectors: bool,
// maximum size of `ids` array. 0 if always empty or `null`
max_document_ids: usize,
// pagination // pagination
#[serde(rename = "pagination.max_limit")] #[serde(rename = "pagination.max_limit")]
max_limit: usize, max_limit: usize,
@ -149,7 +154,7 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind { pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool }, PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
} }
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> { impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
@ -161,12 +166,18 @@ impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
} }
}; };
let ids = match query {
DocumentFetchKind::Normal { ids, .. } => *ids,
DocumentFetchKind::PerDocumentId { .. } => 0,
};
Self { Self {
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit, max_limit: limit,
max_offset: offset, max_offset: offset,
retrieve_vectors, retrieve_vectors,
max_document_ids: ids,
marker: PhantomData, marker: PhantomData,
} }
@ -185,6 +196,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors, retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
max_limit: self.max_limit.max(new.max_limit), max_limit: self.max_limit.max(new.max_limit),
max_offset: self.max_offset.max(new.max_offset), max_offset: self.max_offset.max(new.max_offset),
max_document_ids: self.max_document_ids.max(new.max_document_ids),
marker: PhantomData, marker: PhantomData,
}) })
} }
@ -266,6 +278,7 @@ pub async fn get_document(
per_filter: false, per_filter: false,
max_limit: 0, max_limit: 0,
max_offset: 0, max_offset: 0,
max_document_ids: 0,
marker: PhantomData, marker: PhantomData,
}, },
&req, &req,
@ -387,6 +400,9 @@ pub struct BrowseQueryGet {
#[param(default, value_type = Option<bool>)] #[param(default, value_type = Option<bool>)]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: Param<bool>, retrieve_vectors: Param<bool>,
#[param(default, value_type = Option<Vec<String>>)]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentIds>)]
ids: Option<CS<String>>,
#[param(default, value_type = Option<String>, example = "popularity > 1000")] #[param(default, value_type = Option<String>, example = "popularity > 1000")]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>, filter: Option<String>,
@ -408,6 +424,9 @@ pub struct BrowseQuery {
#[schema(default, example = true)] #[schema(default, example = true)]
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)] #[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: bool, retrieve_vectors: bool,
#[schema(value_type = Option<Vec<String>>, example = json!(["cody", "finn", "brandy", "gambit"]))]
#[deserr(default, error = DeserrJsonError<InvalidDocumentIds>)]
ids: Option<Vec<serde_json::Value>>,
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")] #[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)] #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>, filter: Option<Value>,
@ -479,6 +498,7 @@ pub async fn documents_by_query_post(
retrieve_vectors: body.retrieve_vectors, retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit, max_limit: body.limit,
max_offset: body.offset, max_offset: body.offset,
max_document_ids: body.ids.as_ref().map(Vec::len).unwrap_or_default(),
per_document_id: false, per_document_id: false,
marker: PhantomData, marker: PhantomData,
}, },
@ -551,7 +571,8 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET"); debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner(); let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
params.into_inner();
let filter = match filter { let filter = match filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
@ -561,12 +582,15 @@ pub async fn get_documents(
None => None, None => None,
}; };
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
let query = BrowseQuery { let query = BrowseQuery {
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
fields: fields.merge_star_and_none(), fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0, retrieve_vectors: retrieve_vectors.0,
filter, filter,
ids,
}; };
analytics.publish( analytics.publish(
@ -575,6 +599,7 @@ pub async fn get_documents(
retrieve_vectors: query.retrieve_vectors, retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit, max_limit: query.limit,
max_offset: query.offset, max_offset: query.offset,
max_document_ids: query.ids.as_ref().map(Vec::len).unwrap_or_default(),
per_document_id: false, per_document_id: false,
marker: PhantomData, marker: PhantomData,
}, },
@ -590,15 +615,30 @@ fn documents_by_query(
query: BrowseQuery, query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query; let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors); let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
let ids = if let Some(ids) = ids {
let mut parsed_ids = Vec::with_capacity(ids.len());
for (index, id) in ids.into_iter().enumerate() {
let id = id.try_into().map_err(|error| {
let msg = format!("In `.ids[{index}]`: {error}");
ResponseError::from_msg(msg, Code::InvalidDocumentIds)
})?;
parsed_ids.push(id)
}
Some(parsed_ids)
} else {
None
};
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents( let (total, documents) = retrieve_documents(
&index, &index,
offset, offset,
limit, limit,
ids,
filter, filter,
fields, fields,
retrieve_vectors, retrieve_vectors,
@ -1451,10 +1491,12 @@ fn some_documents<'a, 't: 'a>(
})) }))
} }
#[allow(clippy::too_many_arguments)]
fn retrieve_documents<S: AsRef<str>>( fn retrieve_documents<S: AsRef<str>>(
index: &Index, index: &Index,
offset: usize, offset: usize,
limit: usize, limit: usize,
ids: Option<Vec<ExternalDocumentId>>,
filter: Option<Value>, filter: Option<Value>,
attributes_to_retrieve: Option<Vec<S>>, attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
@ -1468,16 +1510,28 @@ fn retrieve_documents<S: AsRef<str>>(
None None
}; };
let candidates = if let Some(filter) = filter { let mut candidates = if let Some(ids) = ids {
filter.evaluate(&rtxn, index).map_err(|err| match err { let external_document_ids = index.external_documents_ids();
let mut candidates = RoaringBitmap::new();
for id in ids.iter() {
let Some(docid) = external_document_ids.get(&rtxn, id)? else {
continue;
};
candidates.insert(docid);
}
candidates
} else {
index.documents_ids(&rtxn)?
};
if let Some(filter) = filter {
candidates &= filter.evaluate(&rtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter) ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
} }
e => e.into(), e => e.into(),
})? })?
} else { }
index.documents_ids(&rtxn)?
};
let (it, number_of_documents) = { let (it, number_of_documents) = {
let number_of_documents = candidates.len(); let number_of_documents = candidates.len();

View File

@ -68,6 +68,8 @@ pub struct FacetSearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>, pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>, pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchExhaustiveFacetCount>, default)]
pub exhaustive_facet_count: Option<bool>,
} }
#[derive(Default)] #[derive(Default)]
@ -98,6 +100,7 @@ impl FacetSearchAggregator {
hybrid, hybrid,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
exhaustive_facet_count,
} = query; } = query;
Self { Self {
@ -110,7 +113,8 @@ impl FacetSearchAggregator {
|| attributes_to_search_on.is_some() || attributes_to_search_on.is_some()
|| hybrid.is_some() || hybrid.is_some()
|| ranking_score_threshold.is_some() || ranking_score_threshold.is_some()
|| locales.is_some(), || locales.is_some()
|| exhaustive_facet_count.is_some(),
..Default::default() ..Default::default()
} }
} }
@ -293,13 +297,24 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid, hybrid,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
exhaustive_facet_count,
} = value; } = value;
// If exhaustive_facet_count is true, we need to set the page to 0
// because the facet search is not exhaustive by default.
let page = if exhaustive_facet_count.map_or(false, |exhaustive| exhaustive) {
// setting the page to 0 will force the search to be exhaustive when computing the number of hits,
// but it will skip the bucket sort saving time.
Some(0)
} else {
None
};
SearchQuery { SearchQuery {
q, q,
offset: DEFAULT_SEARCH_OFFSET(), offset: DEFAULT_SEARCH_OFFSET(),
limit: DEFAULT_SEARCH_LIMIT(), limit: DEFAULT_SEARCH_LIMIT(),
page: None, page,
hits_per_page: None, hits_per_page: None,
attributes_to_retrieve: None, attributes_to_retrieve: None,
retrieve_vectors: false, retrieve_vectors: false,

View File

@ -494,8 +494,18 @@ pub async fn delete_index(
pub struct IndexStats { pub struct IndexStats {
/// Number of documents in the index /// Number of documents in the index
pub number_of_documents: u64, pub number_of_documents: u64,
/// Size of the documents database, in bytes.
pub raw_document_db_size: u64,
/// Average size of a document in the documents database.
pub avg_document_size: u64,
/// Whether or not the index is currently ingesting document /// Whether or not the index is currently ingesting document
pub is_indexing: bool, pub is_indexing: bool,
/// Number of embeddings in the index
#[serde(skip_serializing_if = "Option::is_none")]
pub number_of_embeddings: Option<u64>,
/// Number of embedded documents in the index
#[serde(skip_serializing_if = "Option::is_none")]
pub number_of_embedded_documents: Option<u64>,
/// Association of every field name with the number of times it occurs in the documents. /// Association of every field name with the number of times it occurs in the documents.
#[schema(value_type = HashMap<String, u64>)] #[schema(value_type = HashMap<String, u64>)]
pub field_distribution: FieldDistribution, pub field_distribution: FieldDistribution,
@ -504,8 +514,15 @@ pub struct IndexStats {
impl From<index_scheduler::IndexStats> for IndexStats { impl From<index_scheduler::IndexStats> for IndexStats {
fn from(stats: index_scheduler::IndexStats) -> Self { fn from(stats: index_scheduler::IndexStats) -> Self {
IndexStats { IndexStats {
number_of_documents: stats.inner_stats.number_of_documents, number_of_documents: stats
.inner_stats
.number_of_documents
.unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()),
raw_document_db_size: stats.inner_stats.documents_database_stats.total_size(),
avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
is_indexing: stats.is_indexing, is_indexing: stats.is_indexing,
number_of_embeddings: stats.inner_stats.number_of_embeddings,
number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents,
field_distribution: stats.inner_stats.field_distribution, field_distribution: stats.inner_stats.field_distribution,
} }
} }
@ -524,6 +541,10 @@ impl From<index_scheduler::IndexStats> for IndexStats {
(status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!( (status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!(
{ {
"numberOfDocuments": 10, "numberOfDocuments": 10,
"rawDocumentDbSize": 10,
"avgDocumentSize": 10,
"numberOfEmbeddings": 10,
"numberOfEmbeddedDocuments": 10,
"isIndexing": true, "isIndexing": true,
"fieldDistribution": { "fieldDistribution": {
"genre": 10, "genre": 10,

View File

@ -291,7 +291,7 @@ make_setting_routes!(
{ {
route: "/filterable-attributes", route: "/filterable-attributes",
update_verb: put, update_verb: put,
value_type: std::collections::BTreeSet<String>, value_type: Vec<meilisearch_types::milli::FilterableAttributesRule>,
err_type: meilisearch_types::deserr::DeserrJsonError< err_type: meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes, meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
>, >,
@ -716,7 +716,30 @@ pub async fn delete_all(
fn validate_settings( fn validate_settings(
settings: Settings<Unchecked>, settings: Settings<Unchecked>,
_index_scheduler: &IndexScheduler, index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> { ) -> Result<Settings<Unchecked>, ResponseError> {
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbedderSource;
let features = index_scheduler.features();
if let Setting::Set(embedders) = &settings.embedders {
for SettingEmbeddingSettings { inner: embedder } in embedders.values() {
let Setting::Set(embedder) = embedder else {
continue;
};
if matches!(embedder.source, Setting::Set(EmbedderSource::Composite)) {
features.check_composite_embedders("using `\"composite\"` as source")?;
}
if matches!(embedder.search_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `searchEmbedder`")?;
}
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `indexingEmbedder`")?;
}
}
}
Ok(settings.validate()?) Ok(settings.validate()?)
} }

View File

@ -8,6 +8,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::FilterableAttributesRule;
use meilisearch_types::settings::{ use meilisearch_types::settings::{
FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView, FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView,
RankingRuleView, SettingEmbeddingSettings, TypoSettings, RankingRuleView, SettingEmbeddingSettings, TypoSettings,
@ -89,6 +90,10 @@ impl Aggregate for SettingsAnalytics {
filterable_attributes: FilterableAttributesAnalytics { filterable_attributes: FilterableAttributesAnalytics {
total: new.filterable_attributes.total.or(self.filterable_attributes.total), total: new.filterable_attributes.total.or(self.filterable_attributes.total),
has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo), has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo),
has_patterns: new
.filterable_attributes
.has_patterns
.or(self.filterable_attributes.has_patterns),
}, },
distinct_attribute: DistinctAttributeAnalytics { distinct_attribute: DistinctAttributeAnalytics {
set: self.distinct_attribute.set | new.distinct_attribute.set, set: self.distinct_attribute.set | new.distinct_attribute.set,
@ -328,13 +333,19 @@ impl SortableAttributesAnalytics {
pub struct FilterableAttributesAnalytics { pub struct FilterableAttributesAnalytics {
pub total: Option<usize>, pub total: Option<usize>,
pub has_geo: Option<bool>, pub has_geo: Option<bool>,
pub has_patterns: Option<bool>,
} }
impl FilterableAttributesAnalytics { impl FilterableAttributesAnalytics {
pub fn new(setting: Option<&BTreeSet<String>>) -> Self { pub fn new(setting: Option<&Vec<FilterableAttributesRule>>) -> Self {
Self { Self {
total: setting.as_ref().map(|filter| filter.len()), total: setting.as_ref().map(|filter| filter.len()),
has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), has_geo: setting
.as_ref()
.map(|filter| filter.iter().any(FilterableAttributesRule::has_geo)),
has_patterns: setting.as_ref().map(|filter| {
filter.iter().any(|rule| matches!(rule, FilterableAttributesRule::Pattern(_)))
}),
} }
} }
@ -512,6 +523,7 @@ impl EmbeddersAnalytics {
EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), EmbedderSource::UserProvided => sources.insert("userProvided".to_string()),
EmbedderSource::Ollama => sources.insert("ollama".to_string()), EmbedderSource::Ollama => sources.insert("ollama".to_string()),
EmbedderSource::Rest => sources.insert("rest".to_string()), EmbedderSource::Rest => sources.insert("rest".to_string()),
EmbedderSource::Composite => sources.insert("composite".to_string()),
}; };
} }
}; };

View File

@ -5,7 +5,7 @@ use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{ErrorCode as _, ResponseError}; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::serde_cs::vec::CS;
@ -111,7 +111,7 @@ pub async fn similar_get(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.0.try_into()?; let query = params.0.into();
let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query); let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query);
@ -295,10 +295,8 @@ impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
} }
} }
impl TryFrom<SimilarQueryGet> for SimilarQuery { impl From<SimilarQueryGet> for SimilarQuery {
type Error = ResponseError; fn from(
fn try_from(
SimilarQueryGet { SimilarQueryGet {
id, id,
offset, offset,
@ -311,7 +309,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
embedder, embedder,
ranking_score_threshold, ranking_score_threshold,
}: SimilarQueryGet, }: SimilarQueryGet,
) -> Result<Self, Self::Error> { ) -> Self {
let filter = match filter { let filter = match filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v), Ok(v) => Some(v),
@ -320,10 +318,8 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
None => None, None => None,
}; };
Ok(SimilarQuery { SimilarQuery {
id: id.0.try_into().map_err(|code: InvalidSimilarId| { id: serde_json::Value::String(id.0),
ResponseError::from_msg(code.to_string(), code.error_code())
})?,
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
filter, filter,
@ -333,6 +329,6 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
show_ranking_score: show_ranking_score.0, show_ranking_score: show_ranking_score.0,
show_ranking_score_details: show_ranking_score_details.0, show_ranking_score_details: show_ranking_score_details.0,
ranking_score_threshold: ranking_score_threshold.map(|x| x.0), ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
}) }
} }
} }

View File

@ -9,6 +9,10 @@ use meilisearch_types::batches::BatchStats;
use meilisearch_types::error::{Code, ErrorType, ResponseError}; use meilisearch_types::error::{Code, ErrorType, ResponseError};
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::CreateApiKey; use meilisearch_types::keys::CreateApiKey;
use meilisearch_types::milli::{
AttributePatterns, FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns,
FilterableAttributesRule,
};
use meilisearch_types::settings::{ use meilisearch_types::settings::{
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings, Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
Unchecked, Unchecked,
@ -34,6 +38,7 @@ use crate::routes::features::RuntimeTogglableFeatures;
use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction}; use crate::routes::indexes::documents::{DocumentDeletionByFilter, DocumentEditionByFunction};
use crate::routes::indexes::IndexView; use crate::routes::indexes::IndexView;
use crate::routes::multi_search::SearchResults; use crate::routes::multi_search::SearchResults;
use crate::routes::network::{Network, Remote};
use crate::routes::swap_indexes::SwapIndexesPayload; use crate::routes::swap_indexes::SwapIndexesPayload;
use crate::search::{ use crate::search::{
FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets, FederatedSearch, FederatedSearchResult, Federation, FederationOptions, MergeFacets,
@ -54,6 +59,7 @@ mod logs;
mod metrics; mod metrics;
mod multi_search; mod multi_search;
mod multi_search_analytics; mod multi_search_analytics;
pub mod network;
mod open_api_utils; mod open_api_utils;
mod snapshot; mod snapshot;
mod swap_indexes; mod swap_indexes;
@ -75,6 +81,7 @@ pub mod tasks;
(path = "/multi-search", api = multi_search::MultiSearchApi), (path = "/multi-search", api = multi_search::MultiSearchApi),
(path = "/swap-indexes", api = swap_indexes::SwapIndexesApi), (path = "/swap-indexes", api = swap_indexes::SwapIndexesApi),
(path = "/experimental-features", api = features::ExperimentalFeaturesApi), (path = "/experimental-features", api = features::ExperimentalFeaturesApi),
(path = "/network", api = network::NetworkApi),
), ),
paths(get_health, get_version, get_stats), paths(get_health, get_version, get_stats),
tags( tags(
@ -85,7 +92,7 @@ pub mod tasks;
url = "/", url = "/",
description = "Local server", description = "Local server",
)), )),
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind)) components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures))
)] )]
pub struct MeilisearchApi; pub struct MeilisearchApi;
@ -103,7 +110,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/multi-search").configure(multi_search::configure)) .service(web::scope("/multi-search").configure(multi_search::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure)) .service(web::scope("/swap-indexes").configure(swap_indexes::configure))
.service(web::scope("/metrics").configure(metrics::configure)) .service(web::scope("/metrics").configure(metrics::configure))
.service(web::scope("/experimental-features").configure(features::configure)); .service(web::scope("/experimental-features").configure(features::configure))
.service(web::scope("/network").configure(network::configure));
#[cfg(feature = "swagger")] #[cfg(feature = "swagger")]
{ {
@ -359,9 +367,9 @@ pub async fn running() -> HttpResponse {
#[derive(Serialize, Debug, ToSchema)] #[derive(Serialize, Debug, ToSchema)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct Stats { pub struct Stats {
/// The size of the database, in bytes. /// The disk space used by the database, in bytes.
pub database_size: u64, pub database_size: u64,
#[serde(skip)] /// The size of the database, in bytes.
pub used_database_size: u64, pub used_database_size: u64,
/// The date of the last update in the RFC 3339 formats. Can be `null` if no update has ever been processed. /// The date of the last update in the RFC 3339 formats. Can be `null` if no update has ever been processed.
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")] #[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
@ -383,10 +391,14 @@ pub struct Stats {
(status = 200, description = "The stats of the instance", body = Stats, content_type = "application/json", example = json!( (status = 200, description = "The stats of the instance", body = Stats, content_type = "application/json", example = json!(
{ {
"databaseSize": 567, "databaseSize": 567,
"usedDatabaseSize": 456,
"lastUpdate": "2019-11-20T09:40:33.711324Z", "lastUpdate": "2019-11-20T09:40:33.711324Z",
"indexes": { "indexes": {
"movies": { "movies": {
"numberOfDocuments": 10, "numberOfDocuments": 10,
"rawDocumentDbSize": 100,
"maxDocumentSize": 16,
"avgDocumentSize": 10,
"isIndexing": true, "isIndexing": true,
"fieldDistribution": { "fieldDistribution": {
"genre": 10, "genre": 10,

Some files were not shown because too many files have changed in this diff Show More