Compare commits

..

111 Commits

Author SHA1 Message Date
6a5a834f27 Lazily compute the FSTs during indexing 2025-04-03 16:04:35 +02:00
418fa47963 Merge pull request #5313 from barloes/fixRankingScoreThresholdRankingIssue
fix for rankingScoreThreshold changes the results' ranking
2025-04-01 13:10:55 +00:00
0656a0d515 Optimize roaring operation
Co-authored-by: Many the fish <many@meilisearch.com>
2025-04-01 14:25:27 +02:00
e36a8c50b9 Merge pull request #5478 from meilisearch/enforce-embedding-dimensions
Enforce embedding dimensions
2025-03-31 15:31:29 +00:00
08ff135ad6 Fix test 2025-03-31 15:27:49 +02:00
f729864466 Check dimension mismatch at insertion time 2025-03-31 15:27:49 +02:00
94ea263bef Add new error for dimensions mismatch during indexing 2025-03-31 15:27:49 +02:00
0e475cb5e6 fix warn and show what meilisearch understood of the vectors in the cursed test 2025-03-31 13:49:22 +02:00
62de70b73c Document problematic case in test and acknowledge PR comment 2025-03-31 13:49:22 +02:00
7707fb18dd add embedding with dimension mismatch test case 2025-03-31 13:49:22 +02:00
bb2e9419d3 Merge pull request #5468 from meilisearch/more-precise-post-processing
More Precise Post Processing
2025-03-27 10:07:09 +00:00
cf68713145 Merge pull request #5465 from meilisearch/improve-stats-perf
Improve documents stats performances
2025-03-27 09:20:14 +00:00
811143cbe9 Add more progress precision when doing post processing 2025-03-27 10:17:28 +01:00
c670e9a39b Make sure the snaps are happy 2025-03-26 20:03:35 +01:00
65f1b13475 Merge pull request #5464 from meilisearch/camel-case-database-sizes
Prefer camelCase for internal database sizes db name
2025-03-26 16:40:39 +00:00
db7ce03763 Improve the performances of computing the size of the documents database 2025-03-26 17:40:12 +01:00
7ed9adde29 Prefer camelCase for internal database sizes db name 2025-03-26 16:45:52 +01:00
9ce7ccfbe7 Merge pull request #5457 from meilisearch/show-database-sizes-changes
Show database sizes batches
2025-03-26 10:19:40 +00:00
3deb1ef78f Fix the snapshots again 2025-03-26 10:38:49 +01:00
5820d822c8 Add more details about the finalizing progress step 2025-03-26 09:49:43 +01:00
637bea0370 Compute and store the database sizes 2025-03-26 09:49:42 +01:00
fd079c6757 Add an index method to get the database sizes 2025-03-25 16:30:51 +01:00
182e5d5632 Add database sizes stats to the batches 2025-03-25 16:30:15 +01:00
82aee6a9af Merge pull request #5415 from meilisearch/isolate-word-fst-usage
Isolate word fst usage
2025-03-25 11:43:37 +00:00
fca947219f Merge pull request #5402 from meilisearch/do-not-reindex-searchable-order-change
Avoid reindexing searchable order changes
2025-03-25 07:03:14 +00:00
fb7ae9f97f Merge pull request #5454 from meilisearch/update-charabia-v0.9.3
Update Charabia v0.9.3
2025-03-24 22:34:51 +00:00
cd421fea1e Merge pull request #5456 from meilisearch/fix-CI
Fix CI to work with merge queues
2025-03-25 09:55:59 +00:00
1ad4235beb Remove the bors file 2025-03-25 10:05:41 +01:00
de6c7e551e Remove bors references from the repository 2025-03-25 10:04:38 +01:00
c0fe70c5f0 Make the CI work with merge queue grouping 2025-03-25 10:04:24 +01:00
a09d08c7b6 Avoid reindexing searchable order changes
Update settings.rs

Update settings.rs
2025-03-24 16:26:52 +01:00
2e6aa63efc Update Charabia v0.9.3 2025-03-24 14:32:21 +01:00
f9807ba32e Fix logic when results are below the threshold 2025-03-19 11:34:53 +01:00
8c8cc59a6c remove new line added by accident 2025-03-19 11:34:53 +01:00
f540a69ac3 add 1 to index so it points to correct position 2025-03-19 11:34:52 +01:00
7df2bdfb15 Merge #5436
5436: Update mini-dashboard to v0.2.19 version r=Kerollmops a=curquiza

Fixes mini dashboard to prevent the panel from popping up every time

Fixed by `@mdubus` 👍 

Co-authored-by: curquiza <clementine@meilisearch.com>
2025-03-18 16:24:31 +00:00
71f7456748 Update mini-dashboard to v0.2.19 version 2025-03-18 12:48:38 +01:00
c98b313d03 Merge #5426
5426: Bump zip from 2.2.2 to 2.3.0 r=Kerollmops a=dependabot[bot]

Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/releases">zip's releases</a>.</em></p>
<blockquote>
<h2>v2.3.0</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2>v2.2.3</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md">zip's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.3...v2.3.0">2.3.0</a> - 2025-03-16</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.2.3">2.2.3</a> - 2025-02-26</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6eab5f5cc6"><code>6eab5f5</code></a> chore: release v2.3.0 (<a href="https://redirect.github.com/zip-rs/zip2/issues/300">#300</a>)</li>
<li><a href="e4aee2050f"><code>e4aee20</code></a> implement <code>ZipFile::options</code> + refactor options normalization (<a href="https://redirect.github.com/zip-rs/zip2/issues/305">#305</a>)</li>
<li><a href="ea8a7bba24"><code>ea8a7bb</code></a> fix(test): Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/issues/308">#308</a>)</li>
<li><a href="365c81a39f"><code>365c81a</code></a> Use <code>xz2</code> crate instead of a custom implementation (<a href="https://redirect.github.com/zip-rs/zip2/issues/306">#306</a>)</li>
<li><a href="ae94b3452b"><code>ae94b34</code></a> chore: Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/issues/310">#310</a>)</li>
<li><a href="a2e062f370"><code>a2e062f</code></a> Merge commit from fork</li>
<li><a href="0199ac2cb8"><code>0199ac2</code></a> Simplify handling for symlink targets</li>
<li><a href="977bb9479d"><code>977bb94</code></a> fix failing tests, remove symlink loop check</li>
<li><a href="3cb29e70d1"><code>3cb29e7</code></a> Partial fix for tests</li>
<li><a href="2182b07686"><code>2182b07</code></a> Refactor</li>
<li>Additional commits viewable in <a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=zip&package-manager=cargo&previous-version=2.2.2&new-version=2.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-18 08:57:11 +00:00
69678ed8e1 Bump zip from 2.2.2 to 2.3.0
Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
- [Release notes](https://github.com/zip-rs/zip2/releases)
- [Changelog](https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md)
- [Commits](https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0)

---
updated-dependencies:
- dependency-name: zip
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-18 00:19:49 +00:00
bf144a94d8 No more use FST to find a word without any typo 2025-03-17 14:20:10 +01:00
b0b1888ef9 Add test 2025-03-17 14:20:10 +01:00
6ec1d2b712 Merge #5423
5423: Bump ring to v0.17.14 to compile on old aarch64 r=irevoire a=Kerollmops

This PR will fix [this CI issue](https://github.com/meilisearch/meilisearch/actions/runs/13896085925/job/38876941154) where ring v0.17.13 breaks the compilation on old aarch64 machines by bumping its version to v0.17.14.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:53:02 +00:00
cbdf80893d Merge #5422
5422: Add more progress levels to measure merging r=Kerollmops a=Kerollmops

I found out that Meilisearch was not correctly reporting the long indexing times in the progress and that a lot of time was spent on extracting words with all documents already extracted. The reason was that there was no step to report merging the cache and sending the entries to write to the writer thread. This PR adds these entries to the progress.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:02:46 +00:00
e2156ddfc7 Simplify the IndexingStep progress enum 2025-03-17 11:40:50 +01:00
49dd50dab2 Bump ring to v0.17.14 to compile on old aarch64 2025-03-17 11:29:17 +01:00
13a88d6131 Merge #5407
5407: Geo update bug r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #5380
Fixes #5399



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-17 10:24:33 +00:00
d9875b782d Merge #5421
5421: Accept total batch size in human size r=irevoire a=Kerollmops

This PR fixes the new `experimental-limit-batched-tasks-total-size` to accept human-defined sizes in bytes.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 09:41:22 +00:00
cb16baab18 Add more progress levels to measure merging 2025-03-17 10:13:29 +01:00
2500e3c067 Merge #5414
5414: Update version for the next release (v1.14.0) in Cargo.toml r=Kerollmops a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Fixes https://github.com/meilisearch/meilisearch/issues/5268.

Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>
Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-14 13:35:54 +00:00
d3e4b2dfe7 Accept total batch size in human size 2025-03-14 13:07:51 +01:00
2a46624e19 Merge #5420
5420: Add support for the progress API of arroy r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5419

## What does this PR do?
- Convert the arroy progress to the meilisearch progress
- Use the new arroy closure to support the progress of arroy


Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-13 18:03:08 +00:00
009c36a4d0 Add support for the progress API of arroy 2025-03-13 19:00:43 +01:00
2a47e25e6d Update the upgrade path snap 2025-03-13 18:35:06 +01:00
82912e191b Merge #5418
5418: Cache embeddings in search r=Kerollmops a=dureuill

# Pull Request

## Related issue
TBD

## What does this PR do?
- Adds a cache for embeddings produced in search
- The cache is disabled by default, and can be enabled following the instructions [here](https://github.com/orgs/meilisearch/discussions/818).
- Had to accommodate the `timeout` test for openai that uses a mock that simulates a timeout on subsequent responses: since the test was reusing the same query, the cache would kick-in and no request would be made to the mock, meaning no timeout any longer and so a failing test 😅 
- `Embedder::embed_search` now accepts a reference instead of an owned `String`.

## Manual testing

- I created 4 indexes on a fresh DB with the same settings (one embedder from openai)
- I sent 1/4 of movies.json to each index
- I sent a federated search request against all 4 indexes, with the same query for each index, using the embedder of each index.

Results:

- The first call took 400ms to 1s. Before this change, it took in the 3s range.
- Any repeated call with the same query took in the range of 25ms.
- Looking at the details at trace log level, I can see that the first index that needs the embedding is taking most of the 400ms in `embed_one`. The other indexes report that the query text is found in the cache and they each take a few µs.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-13 16:37:15 +00:00
e2d372823a Disable the cache by default and make it experimental 2025-03-13 17:22:51 +01:00
1876132172 Mutex-based implementation 2025-03-13 17:22:50 +01:00
d0b0b90d17 fixup tests, in particular foil the cache for the timeout test 2025-03-13 17:22:50 +01:00
b08544e86d Add embedding cache 2025-03-13 17:22:50 +01:00
d9111fe8ce Add lru crate to milli again 2025-03-13 17:22:50 +01:00
41d8161017 Update the versions 2025-03-13 17:22:32 +01:00
7df5715d39 Merge pull request #5406 from meilisearch/bump-heed
Bump heed to v0.22 and arroy to v0.6
2025-03-13 16:52:45 +01:00
5fe02ab5e0 Move to heed 0.22 and arroy 0.6 2025-03-13 15:48:18 +01:00
5ef7767429 Let arroy uses all the memory available instead of 50% of the 70% 2025-03-13 15:06:03 +01:00
3fad48167b remove arroy dependency in the index-scheduler 2025-03-13 14:57:56 +01:00
a92a48b9b9 Do not recompute stats on dumpless upgrade
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-13 13:58:58 +01:00
d53225bf64 uses a random seed instead of 42 2025-03-13 12:43:31 +01:00
20896500c2 Bump arroy to the latest version 2025-03-13 12:37:10 +01:00
1af520077c Call the underlying Env::copy_to_path method 2025-03-13 11:49:25 +01:00
7e07cb9de1 Make meilitool prefer WithoutTls Env 2025-03-13 11:47:19 +01:00
a12b06d99d Merge #5369
5369: exhaustive facet search r=ManyTheFish a=ManyTheFish

Fixes #5403

This PR adds an `exhaustiveFacetCount` field to the `/facet-search` API allowing the end-user to have a better facet count when having a distinct attribute set in the index settings.

 # Usage

`POST /index/:index_uid/facet-search`
**Body:**
```json
{
  "facetQuery": "blob",
  "facetName": "genres",
  "q": "",
  "exhaustiveFacetCount": true
}
```

# Prototype Docker images

```sh
$ docker pull getmeili/meilisearch:prototype-exhaustive-facet-search-00
```

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-13 10:36:04 +00:00
331dc3d241 Add a comment to explain why we keep debug assertions 2025-03-13 11:29:00 +01:00
ef9d9f8481 set the memory in arroy 2025-03-13 11:29:00 +01:00
d3d22d8ed4 Prefer waiting for the task before getting the indexes 2025-03-13 11:29:00 +01:00
5e6abcf50c Prefer using WithoutTls for the auth env 2025-03-13 11:29:00 +01:00
a4aaf932ba Fix some test (invalid anyway) 2025-03-13 11:29:00 +01:00
16c962eb30 Enable debug assertions of heed 2025-03-13 11:07:49 +01:00
55ca2c4481 Avoid opening the Auth environment multiple times 2025-03-13 11:07:49 +01:00
fedb444e66 Fix the upgrade arroy calls 2025-03-13 11:07:49 +01:00
bef5954741 Use a WithoutTls env 2025-03-13 11:07:49 +01:00
ff8cf38d6b Move to the latest version of arroy 2025-03-13 11:07:48 +01:00
f8ac575ec5 Move to the latest version of arroy 2025-03-13 11:07:48 +01:00
566b4efb06 Dumpless upgrade from v1.13 to v1.14 2025-03-13 11:07:44 +01:00
1d499ed9b2 Use the new arroy upgrade method to move from 0.4 to 0.5 2025-03-13 11:07:44 +01:00
3bc62f0549 WIP: Still need to introduce a Env::copy_to_path method 2025-03-13 11:07:39 +01:00
21bbbdec76 Specify WithoutTls everywhere 2025-03-13 11:07:38 +01:00
78ebd8dba2 Fix the error variants 2025-03-13 11:07:38 +01:00
34df44a002 Open Env without TLS 2025-03-13 11:07:38 +01:00
48a27f669e Bump heed and other dependencies 2025-03-13 11:07:37 +01:00
e2d0ce52ba Merge #5384
5384: Get multiple documents by ids r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5345 

## What does this PR do?
- Implements [public usage](https://www.notion.so/meilisearch/Get-documents-by-ID-1994b06b651f805ba273e1c6b75ce4d8)
- Slightly refactor error messages for the `/similar` route

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-12 17:26:49 +00:00
995f8962bd Merge #5398
5398: Bump ring from 0.17.8 to 0.17.13 r=Kerollmops a=dependabot[bot]

Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/briansmith/ring/blob/main/RELEASES.md">ring's changelog</a>.</em></p>
<blockquote>
<h1>Version 0.17.13 (2025-03-06)</h1>
<p>Increased MSRV to 1.66.0 to avoid bugs in earlier versions so that we can
safely use <code>core::arch::x86_64::__cpuid</code> and <code>core::arch::x86::__cpuid</code> from
Rust in future releases.</p>
<p>AVX2-based VAES-CLMUL implementation. This will be a notable performance
improvement for most newish x86-64 systems. This will likely raise the minimum
binutils version supported for very old Linux distros.</p>
<h1>Version 0.17.12 (2025-03-05)</h1>
<p>Bug fix: <a href="https://redirect.github.com/briansmith/ring/pull/2447">briansmith/ring#2447</a> for denial of service (DoS).</p>
<ul>
<li>
<p>Fixes a panic in <code>ring::aead::quic::HeaderProtectionKey::new_mask()</code> when
integer overflow checking is enabled. In the QUIC protocol, an attacker can
induce this panic by sending a specially-crafted packet. Even unintentionally
it is likely to occur in 1 out of every 2**32 packets sent and/or received.</p>
</li>
<li>
<p>Fixes a panic on 64-bit targets in <code>ring::aead::{AES_128_GCM, AES_256_GCM}</code>
when overflow checking is enabled, when encrypting/decrypting approximately
68,719,476,700 bytes (about 64 gigabytes) of data in a single chunk. Protocols
like TLS and SSH are not affected by this because those protocols break large
amounts of data into small chunks. Similarly, most applications will not
attempt to encrypt/decrypt 64GB of data in one chunk.</p>
</li>
</ul>
<p>Overflow checking is not enabled in release mode by default, but
<code>RUSTFLAGS=&quot;-C overflow-checks&quot;</code> or <code>overflow-checks = true</code> in the Cargo.toml
profile can override this. Overflow checking is usually enabled by default in
debug mode.</p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li>See full diff in <a href="https://github.com/briansmith/ring/commits">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=ring&package-manager=cargo&previous-version=0.17.8&new-version=0.17.13)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-12 13:20:30 +00:00
1cd00f37c0 Merge #5413
5413: Make sure to delete useless prefixes r=ManyTheFish a=Kerollmops

We discovered a bug where the new indexer was still writing empty roaring bitmaps instead of deleting the prefix entry from the prefix database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-12 10:54:04 +00:00
1aa3375e12 Update version for the next release (v1.14.0) in Cargo.toml 2025-03-12 10:51:04 +00:00
60ff1b19a8 Searching for a document that does not exist no longer raises an error 2025-03-12 11:50:39 +01:00
7df5e3f059 Fix error message
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-12 11:48:40 +01:00
0197dc87e0 Make sure to delete useless prefixes 2025-03-12 11:24:13 +01:00
7a172b82ca Add test 2025-03-12 11:22:59 +01:00
eb3ff325d1 Add an exhaustiveFacetCount field to the facet-search API 2025-03-12 11:22:59 +01:00
d3cd5ea689 Check if the geo fields changed additionally to the other faceted fields when reindexing facets 2025-03-12 11:20:10 +01:00
3ed43f9097 add a failing test reproducing the bug 2025-03-12 11:20:10 +01:00
a2a86ef4e2 Merge #5254
5254: Granular Filterable attribute settings r=ManyTheFish a=ManyTheFish

# Related
**Issue:** https://github.com/meilisearch/meilisearch/issues/5163
**PRD:** https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-filterableAttributes-1764b06b651f80aba8bdf359b2df3ca8

# Summary
Change the `filterableAttributes` settings to let the user choose which facet feature he wants to activate or not.
Deactivating a feature will avoid some database computation in the indexing process and save time and disk size.

# Example

`PATCH /indexes/:index_uid/settings`

```json
{
  "filterableAttributes": [
    {
      "patterns": [
        "cattos",
        "doggos.age"
      ],
      "features": {
        "facetSearch": false,
        "filter": {
          "equality": true,
          "comparison": false
        }
      }
    }
  ]
}
```

# Impact on the codebase
- Settings API:
  - `/settings`
  - `/settings/filterable-attributes`
  - OpenAPI 
  - may impact the LocalizedAttributesRules due to the AttributePatterns factorization
- Database:
  - Filterable attributes format changed
  - Faceted field_ids are no more stored in the database
  - FieldIdsMap has no more unexisting fields
- Search:
  - Search using filters
  - Facet search
  - `Attributes` ranking rule
  - Distinct attribute
  - Facet distribution
- Settings reindexing:
  - searchable
  - facet
  - vector
  - geo
- Document indexing:
  - searchable
  - facet
  - vector
  - geo
- Dump import

# Note for the reviewers
The changes are huge and have been split in different commits with a dedicated explanation, I suggest reviewing the commit 1by1

Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-12 09:00:43 +00:00
d0dda78f3d Merge #5401
5401: Make composite embedders an experimental feature r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #5343 

## What does this PR do?
- Introduce new `compositeEmbedders` experimental feature
- Guard `source = "composite"` and `searchEmbedder`, `indexingEmbedder` behind enabling the feature.
- Update tests accordingly

## Dumpless upgrade

- Adding an experimental feature is never a breaking change

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-03-11 14:20:36 +00:00
fa8afc5cfd Style change after review
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-03-11 13:25:35 +01:00
aa32b719c7 Add tests about experimentalness of the feature and fix existing 2025-03-10 14:23:22 +01:00
41d2b1e52b Analytics 2025-03-10 14:23:07 +01:00
54ee81bb09 Make composite embedders experimental 2025-03-10 14:22:47 +01:00
9d9e0d4c54 Add analytics 2025-03-10 11:33:15 +01:00
19c9caed39 Fix tests 2025-03-10 11:11:48 +01:00
21c3b3957e tests: Change get_document_by_filter to fetch_documents 2025-03-10 11:11:48 +01:00
f292fc9ac0 Add ids parameter to GET documents and POST documents/fetch 2025-03-10 11:11:48 +01:00
1d3c4642a6 Don't use Deserr for ExternalDocumentId, instead convert to error afterward 2025-03-10 11:11:48 +01:00
bea28968a0 Bump ring from 0.17.8 to 0.17.13
Bumps [ring](https://github.com/briansmith/ring) from 0.17.8 to 0.17.13.
- [Changelog](https://github.com/briansmith/ring/blob/main/RELEASES.md)
- [Commits](https://github.com/briansmith/ring/commits)

---
updated-dependencies:
- dependency-name: ring
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-07 17:04:57 +00:00
122 changed files with 2909 additions and 1049 deletions

View File

@ -6,11 +6,7 @@ on:
# Everyday at 5:00am # Everyday at 5:00am
- cron: "0 5 * * *" - cron: "0 5 * * *"
pull_request: pull_request:
push: merge_group:
# trying and staging branches are for Bors config
branches:
- trying
- staging
env: env:
CARGO_TERM_COLOR: always CARGO_TERM_COLOR: always

View File

@ -150,7 +150,7 @@ Some notes on GitHub PRs:
- The PR title should be accurate and descriptive of the changes. - The PR title should be accurate and descriptive of the changes.
- [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br> - [Convert your PR as a draft](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/changing-the-stage-of-a-pull-request) if your changes are a work in progress: no one will review it until you pass your PR as ready for review.<br>
The draft PRs are recommended when you want to show that you are working on something and make your work visible. The draft PRs are recommended when you want to show that you are working on something and make your work visible.
- The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [Bors](https://github.com/bors-ng/bors-ng) to automatically enforce this requirement without the PR author having to rebase manually. - The branch related to the PR must be **up-to-date with `main`** before merging. Fortunately, this project uses [GitHub Merge Queues](https://github.blog/news-insights/product-news/github-merge-queue-is-generally-available/) to automatically enforce this requirement without the PR author having to rebase manually.
## Release Process (for internal team only) ## Release Process (for internal team only)
@ -158,8 +158,7 @@ Meilisearch tools follow the [Semantic Versioning Convention](https://semver.org
### Automation to rebase and Merge the PRs ### Automation to rebase and Merge the PRs
This project integrates a bot that helps us manage pull requests merging.<br> This project uses GitHub Merge Queues that helps us manage pull requests merging.
_[Read more about this](https://github.com/meilisearch/integration-guides/blob/main/resources/bors.md)._
### How to Publish a new Release ### How to Publish a new Release

366
Cargo.lock generated
View File

@ -47,7 +47,7 @@ dependencies = [
"actix-utils", "actix-utils",
"ahash 0.8.11", "ahash 0.8.11",
"base64 0.22.1", "base64 0.22.1",
"bitflags 2.6.0", "bitflags 2.9.0",
"brotli", "brotli",
"bytes", "bytes",
"bytestring", "bytestring",
@ -258,7 +258,7 @@ version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
"once_cell", "once_cell",
"version_check", "version_check",
] ]
@ -271,7 +271,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"const-random", "const-random",
"getrandom", "getrandom 0.2.15",
"once_cell", "once_cell",
"version_check", "version_check",
"zerocopy", "zerocopy",
@ -393,41 +393,24 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]] [[package]]
name = "arroy" name = "arroy"
version = "0.5.0" version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e" checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
"enum-iterator",
"heed", "heed",
"log",
"memmap2", "memmap2",
"nohash", "nohash",
"ordered-float", "ordered-float",
"page_size",
"rand", "rand",
"rayon", "rayon",
"roaring", "roaring",
"tempfile", "tempfile",
"thiserror 1.0.69", "thiserror 2.0.9",
] "tracing",
[[package]]
name = "arroy"
version = "0.5.0"
source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7"
dependencies = [
"bytemuck",
"byteorder",
"heed",
"log",
"memmap2",
"nohash",
"ordered-float",
"rand",
"rayon",
"roaring",
"tempfile",
"thiserror 1.0.69",
] ]
[[package]] [[package]]
@ -503,7 +486,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bumpalo", "bumpalo",
@ -553,7 +536,7 @@ version = "0.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"cexpr", "cexpr",
"clang-sys", "clang-sys",
"itertools 0.13.0", "itertools 0.13.0",
@ -599,9 +582,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.6.0" version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -694,7 +677,7 @@ dependencies = [
[[package]] [[package]]
name = "build-info" name = "build-info"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"time", "time",
@ -807,22 +790,20 @@ dependencies = [
[[package]] [[package]]
name = "bzip2" name = "bzip2"
version = "0.4.4" version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
dependencies = [ dependencies = [
"bzip2-sys", "bzip2-sys",
"libc",
] ]
[[package]] [[package]]
name = "bzip2-sys" name = "bzip2-sys"
version = "0.1.11+1.0.8" version = "0.1.13+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
dependencies = [ dependencies = [
"cc", "cc",
"libc",
"pkg-config", "pkg-config",
] ]
@ -944,13 +925,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.104" version = "1.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74b6a57f98764a267ff415d50a25e6e166f3831a5071af4995296ea97d210490" checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
dependencies = [ dependencies = [
"jobserver", "jobserver",
"libc", "libc",
"once_cell", "shlex",
] ]
[[package]] [[package]]
@ -995,9 +976,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.9.2" version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf8921fe4d53ab8f9e8f9b72ce6f91726cfc40fffab1243d27db406b5e2e9cc2" checksum = "650d52f87a36472ea1c803dee49d6bfd23d426efa9363e2f4c4a0e6a236d3407"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"csv", "csv",
@ -1160,7 +1141,7 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
"once_cell", "once_cell",
"tiny-keccak", "tiny-keccak",
] ]
@ -1671,7 +1652,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@ -1873,7 +1854,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"tempfile", "tempfile",
"thiserror 2.0.9", "thiserror 2.0.9",
@ -1895,7 +1876,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"insta", "insta",
"nom", "nom",
@ -1915,7 +1896,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@ -2054,7 +2035,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"bumpalo", "bumpalo",
@ -2082,7 +2063,7 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22" checksum = "ce20bbb48248608ba4908b45fe36e17e40f56f8c6bb385ecf5d3c4a1e8b05a22"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"debugid", "debugid",
"fxhash", "fxhash",
"serde", "serde",
@ -2233,10 +2214,24 @@ dependencies = [
"cfg-if", "cfg-if",
"js-sys", "js-sys",
"libc", "libc",
"wasi", "wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]]
name = "getrandom"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi 0.13.3+wasi-0.2.2",
"wasm-bindgen",
"windows-targets 0.52.6",
]
[[package]] [[package]]
name = "gimli" name = "gimli"
version = "0.27.3" version = "0.27.3"
@ -2249,7 +2244,7 @@ version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724" checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"libc", "libc",
"libgit2-sys", "libgit2-sys",
"log", "log",
@ -2397,11 +2392,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]] [[package]]
name = "heed" name = "heed"
version = "0.20.5" version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d4f449bab7320c56003d37732a917e18798e2f1709d80263face2b4f9436ddb" checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"byteorder", "byteorder",
"heed-traits", "heed-traits",
"heed-types", "heed-types",
@ -2421,9 +2416,9 @@ checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
[[package]] [[package]]
name = "heed-types" name = "heed-types"
version = "0.20.1" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d3f528b053a6d700b2734eabcd0fd49cb8230647aa72958467527b0b7917114" checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -2743,14 +2738,14 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"big_s", "big_s",
"bincode", "bincode",
"bumpalo", "bumpalo",
"bumparaw-collections", "bumparaw-collections",
"byte-unit",
"convert_case 0.6.0", "convert_case 0.6.0",
"crossbeam-channel", "crossbeam-channel",
"csv", "csv",
@ -2759,6 +2754,7 @@ dependencies = [
"enum-iterator", "enum-iterator",
"file-store", "file-store",
"flate2", "flate2",
"indexmap",
"insta", "insta",
"maplit", "maplit",
"meili-snap", "meili-snap",
@ -2941,16 +2937,17 @@ dependencies = [
[[package]] [[package]]
name = "js-sys" name = "js-sys"
version = "0.3.69" version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [ dependencies = [
"once_cell",
"wasm-bindgen", "wasm-bindgen",
] ]
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@ -3013,9 +3010,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.169" version = "0.2.171"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]] [[package]]
name = "libgit2-sys" name = "libgit2-sys"
@ -3080,9 +3077,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera" name = "lindera"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6cbc1aad631a7da0a7e9bc4b8669fa92ac9ca8eeb7b35a807376dd3034443ff" checksum = "832c220475557e3b44a46cad1862b57f010f0c6e93d771d0e628e08689c068b1"
dependencies = [ dependencies = [
"lindera-analyzer", "lindera-analyzer",
"lindera-core", "lindera-core",
@ -3093,9 +3090,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-analyzer" name = "lindera-analyzer"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "74508ffbb24e36905d1718b261460e378a748029b07bcd7e06f0d18500b8194c" checksum = "a8e26651714abf5167e6b6a80f5cdaa0cad41c5fcb84d8ba96bebafcb9029339"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -3123,9 +3120,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-assets" name = "lindera-assets"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a677c371ecb3bd02b751be306ea09876cd47cf426303ad5f10a3fd6f9a4ded6" checksum = "ebb01f1ca53c1e642234c6c7fdb9ac664ad0c1ab9502f33e4200201bac7e6ce7"
dependencies = [ dependencies = [
"encoding", "encoding",
"flate2", "flate2",
@ -3136,9 +3133,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict" name = "lindera-cc-cedict"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c35944000d05a177e981f037b5f0805f283b32f05a0c35713003bef136ca8cb4" checksum = "5f7618d9aa947fdd7c38eae2b79f0fd237ecb5067608f1363610ba20d20ab5a8"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3150,9 +3147,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict-builder" name = "lindera-cc-cedict-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85b8f642bc9c9130682569975772a17336c6aab26d11fc0f823f3e663167ace6" checksum = "efdbcb809d81428935d601a78c94bfb39500749213f7320705f427a7a1d31aec"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3162,9 +3159,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-compress" name = "lindera-compress"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7825d8d63592aa5727d67bd209170ac82df56c369533efbf0ddbac277bb68ec" checksum = "eac178afa2456dac469d3b1a2d7fbaf3e1ea796a1f52321e8ac29545a53c239c"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@ -3173,9 +3170,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-core" name = "lindera-core"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c28191456debc98af6aa5f7db77872471983e9fa2a737b1c232b6ef543aed62" checksum = "649777465f48147ce593ab6db347e235e3af8f693a23f4437be94a1cdbdf5fdf"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -3190,9 +3187,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-decompress" name = "lindera-decompress"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4788a1ead2f63f3fc2888109272921dedd86a87b7d0bf05e9daab46600daac51" checksum = "9e3faaceb85e43ac250021866c6db3cdc9997b44b3d3ea498594d04edc91fc45"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@ -3201,9 +3198,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary" name = "lindera-dictionary"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdf5f91725e32b9a21b1656baa7030766c9bafc4de4b4ddeb8ffdde7224dd2f6" checksum = "31e15b2d2d8a4ad45f2e373a084931cf3dfbde15f124044e2436bb920af3366c"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -3226,9 +3223,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary-builder" name = "lindera-dictionary-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e41f00ba7ac541b0ffd8c30e7a73f2dd197546cc5780462ec4f2e4782945a780" checksum = "59802949110545b59b663917ed3fd55dc3b3a8cde6bd20137d7fe24372cfb9aa"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -3248,9 +3245,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-filter" name = "lindera-filter"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "273d27e01e1377e2647314a4a5b9bdca4b52a867b319069ebae8c10191146eca" checksum = "1320f118c3fc9e897f4ebfc16864e5ef8c0b06ba769c0a50e53f193f9d682bf8"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"csv", "csv",
@ -3273,9 +3270,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic" name = "lindera-ipadic"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97a52ff0af5acb700093badaf7078051ab9ffd9071859724445a60193995f1f" checksum = "5b4731bf3730f1f38266d7ee9bca7d460cd336645c9dfd4e6a1082e58ab1e993"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3287,9 +3284,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-builder" name = "lindera-ipadic-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf5031c52686128db13f774b2c5a8abfd52b4cc1f904041d8411aa19d630ce4d" checksum = "309966c12e682f67205c3cd3c8dc55bbdcd1eb3b5c7c5cb41fb8acd18906d340"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3299,9 +3296,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd" name = "lindera-ipadic-neologd"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6b36764b27b169aa11d24888141f206a6c246a5b195c1e67127485bac512fb6" checksum = "e90e919b4cfb9962d24ee1e1d50a7c163bbf356376495ad66d1996e20b9f9e44"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3313,9 +3310,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd-builder" name = "lindera-ipadic-neologd-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abf36e40ace904741efdd883ed5c4dba6425f65156a0fb5d3f73a386335950dc" checksum = "7e517df0d501f9f8bf3126da20fc8cb9a5e37921e0eec1824d7a62f096463e02"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3325,9 +3322,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic" name = "lindera-ko-dic"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c92a1a3564b531953f0238cbcea392f2905f7b27b449978cf9e702a80e1086d" checksum = "e9c6da4e68bc8b452a54b96d65361ebdceb4b6f36ecf262425c0e1f77960ae82"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3340,9 +3337,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic-builder" name = "lindera-ko-dic-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f2c60425abc1548570c2568858f74a1f042105ecd89faa39c651b4315350fd9" checksum = "afc95884cc8f6dfb176caf5991043a4acf94c359215bbd039ea765e00454f271"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3352,9 +3349,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-tokenizer" name = "lindera-tokenizer"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "903e558981bcb6f59870aa7d6b4bcb09e8f7db778886a6a70f67fd74c9fa2ca3" checksum = "d122042e1232a55c3604692445952a134e523822e9b4b9ab32a53ff890037ad4"
dependencies = [ dependencies = [
"bincode", "bincode",
"lindera-core", "lindera-core",
@ -3366,9 +3363,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic" name = "lindera-unidic"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d227c3ce9cbd905f865c46c65a0470fd04e89b71104d7f92baa71a212ffe1d4b" checksum = "cbffae1fb2f2614abdcb50f99b138476dbac19862ffa57bfdc9c7b5d5b22a90c"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3381,9 +3378,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic-builder" name = "lindera-unidic-builder"
version = "0.32.2" version = "0.32.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99e2c50015c242e02c451acb6748667ac6fd1d3d667cd7db48cd89e2f2d2377e" checksum = "fe50055327712ebd1bcc74b657cf78c728a78b9586e3f99d5dd0b6a0be221c5d"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"lindera-core", "lindera-core",
@ -3468,9 +3465,9 @@ checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]] [[package]]
name = "lmdb-master-sys" name = "lmdb-master-sys"
version = "0.2.4" version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9" checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df"
dependencies = [ dependencies = [
"cc", "cc",
"doxygen-rs", "doxygen-rs",
@ -3513,9 +3510,18 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.21" version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
[[package]]
name = "lru"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "227748d55f2f0ab4735d87fd623798cb6b664512fe979705f829c9f81c934465"
dependencies = [
"hashbrown 0.15.2",
]
[[package]] [[package]]
name = "lzma-rs" name = "lzma-rs"
@ -3527,6 +3533,17 @@ dependencies = [
"crc", "crc",
] ]
[[package]]
name = "lzma-sys"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]] [[package]]
name = "macro_rules_attribute" name = "macro_rules_attribute"
version = "0.2.0" version = "0.2.0"
@ -3569,7 +3586,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"insta", "insta",
"md5", "md5",
@ -3578,7 +3595,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@ -3665,12 +3682,12 @@ dependencies = [
"uuid", "uuid",
"wiremock", "wiremock",
"yaup", "yaup",
"zip 2.2.2", "zip 2.3.0",
] ]
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"enum-iterator", "enum-iterator",
@ -3689,7 +3706,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@ -3723,10 +3740,9 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
"clap", "clap",
"dump", "dump",
"file-store", "file-store",
@ -3758,10 +3774,10 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"allocator-api2", "allocator-api2",
"arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "arroy",
"bbqueue", "bbqueue",
"big_s", "big_s",
"bimap", "bimap",
@ -3798,6 +3814,7 @@ dependencies = [
"json-depth-checker", "json-depth-checker",
"levenshtein_automata", "levenshtein_automata",
"liquid", "liquid",
"lru",
"maplit", "maplit",
"md5", "md5",
"meili-snap", "meili-snap",
@ -3891,7 +3908,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [ dependencies = [
"libc", "libc",
"log", "log",
"wasi", "wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.48.0", "windows-sys 0.48.0",
] ]
@ -3902,7 +3919,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [ dependencies = [
"libc", "libc",
"wasi", "wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
@ -4129,9 +4146,9 @@ checksum = "ae4512a8f418ac322335255a72361b9ac927e106f4d7fe6ab4d8ac59cb01f7a9"
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.20.2" version = "1.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" checksum = "cde51589ab56b20a6f686b2c68f7a0bd6add753d697abf720d63f8db3ab7b1ad"
[[package]] [[package]]
name = "onig" name = "onig"
@ -4270,7 +4287,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",
@ -4518,7 +4535,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"hex", "hex",
"lazy_static", "lazy_static",
"procfs-core", "procfs-core",
@ -4531,7 +4548,7 @@ version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"hex", "hex",
] ]
@ -4679,7 +4696,7 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
] ]
[[package]] [[package]]
@ -4771,7 +4788,7 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
"redox_syscall 0.2.16", "redox_syscall 0.2.16",
"thiserror 1.0.69", "thiserror 1.0.69",
] ]
@ -4872,7 +4889,7 @@ version = "1.20.0"
source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4" source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b83fd65f20a1e4#ef3df63121d27aacd838f366f2b83fd65f20a1e4"
dependencies = [ dependencies = [
"ahash 0.8.11", "ahash 0.8.11",
"bitflags 2.6.0", "bitflags 2.9.0",
"instant", "instant",
"num-traits", "num-traits",
"once_cell", "once_cell",
@ -4895,15 +4912,14 @@ dependencies = [
[[package]] [[package]]
name = "ring" name = "ring"
version = "0.17.8" version = "0.17.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [ dependencies = [
"cc", "cc",
"cfg-if", "cfg-if",
"getrandom", "getrandom 0.2.15",
"libc", "libc",
"spin",
"untrusted", "untrusted",
"windows-sys 0.52.0", "windows-sys 0.52.0",
] ]
@ -5009,7 +5025,7 @@ version = "0.38.41"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@ -5131,9 +5147,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.217" version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
@ -5149,9 +5165,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.217" version = "1.0.219"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -5160,9 +5176,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.138" version = "1.0.140"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
dependencies = [ dependencies = [
"indexmap", "indexmap",
"itoa", "itoa",
@ -5530,7 +5546,7 @@ version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea"
dependencies = [ dependencies = [
"bitflags 2.6.0", "bitflags 2.9.0",
"byteorder", "byteorder",
"enum-as-inner", "enum-as-inner",
"libc", "libc",
@ -5586,7 +5602,7 @@ checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"fastrand", "fastrand",
"getrandom", "getrandom 0.2.15",
"once_cell", "once_cell",
"rustix", "rustix",
"windows-sys 0.52.0", "windows-sys 0.52.0",
@ -5761,7 +5777,7 @@ dependencies = [
"aho-corasick", "aho-corasick",
"derive_builder 0.12.0", "derive_builder 0.12.0",
"esaxx-rs", "esaxx-rs",
"getrandom", "getrandom 0.2.15",
"itertools 0.12.1", "itertools 0.12.1",
"lazy_static", "lazy_static",
"log", "log",
@ -6104,9 +6120,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]] [[package]]
name = "unicode-normalization" name = "unicode-normalization"
version = "0.1.23" version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
dependencies = [ dependencies = [
"tinyvec", "tinyvec",
] ]
@ -6248,7 +6264,7 @@ version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a"
dependencies = [ dependencies = [
"getrandom", "getrandom 0.2.15",
"serde", "serde",
] ]
@ -6345,24 +6361,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]] [[package]]
name = "wasm-bindgen" name = "wasi"
version = "0.2.92" version = "0.13.3+wasi-0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro", "wasm-bindgen-macro",
] ]
[[package]] [[package]]
name = "wasm-bindgen-backend" name = "wasm-bindgen-backend"
version = "0.2.92" version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [ dependencies = [
"bumpalo", "bumpalo",
"log", "log",
"once_cell",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.87", "syn 2.0.87",
@ -6383,9 +6409,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro" name = "wasm-bindgen-macro"
version = "0.2.92" version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [ dependencies = [
"quote", "quote",
"wasm-bindgen-macro-support", "wasm-bindgen-macro-support",
@ -6393,9 +6419,9 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-macro-support" name = "wasm-bindgen-macro-support"
version = "0.2.92" version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -6406,9 +6432,12 @@ dependencies = [
[[package]] [[package]]
name = "wasm-bindgen-shared" name = "wasm-bindgen-shared"
version = "0.2.92" version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]] [[package]]
name = "wasm-streams" name = "wasm-streams"
@ -6813,6 +6842,15 @@ dependencies = [
"url", "url",
] ]
[[package]]
name = "wit-bindgen-rt"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [
"bitflags 2.9.0",
]
[[package]] [[package]]
name = "write16" name = "write16"
version = "1.0.0" version = "1.0.0"
@ -6847,7 +6885,7 @@ dependencies = [
[[package]] [[package]]
name = "xtask" name = "xtask"
version = "1.13.3" version = "1.14.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"build-info", "build-info",
@ -6868,6 +6906,15 @@ dependencies = [
"uuid", "uuid",
] ]
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
]
[[package]] [[package]]
name = "yada" name = "yada"
version = "0.5.1" version = "0.5.1"
@ -7009,9 +7056,9 @@ dependencies = [
[[package]] [[package]]
name = "zip" name = "zip"
version = "2.2.2" version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45" checksum = "84e9a772a54b54236b9b744aaaf8d7be01b4d6e99725523cb82cb32d1c81b1d7"
dependencies = [ dependencies = [
"aes", "aes",
"arbitrary", "arbitrary",
@ -7022,15 +7069,16 @@ dependencies = [
"deflate64", "deflate64",
"displaydoc", "displaydoc",
"flate2", "flate2",
"getrandom 0.3.1",
"hmac", "hmac",
"indexmap", "indexmap",
"lzma-rs", "lzma-rs",
"memchr", "memchr",
"pbkdf2", "pbkdf2",
"rand",
"sha1", "sha1",
"thiserror 2.0.9", "thiserror 2.0.9",
"time", "time",
"xz2",
"zeroize", "zeroize",
"zopfli", "zopfli",
"zstd", "zstd",

View File

@ -22,7 +22,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.13.3" version = "1.14.0"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",
@ -36,6 +36,12 @@ license = "MIT"
[profile.release] [profile.release]
codegen-units = 1 codegen-units = 1
# We now compile heed without the NDEBUG define for better performance.
# However, we still enable debug assertions for a better detection of
# disk corruption on the cloud or in OSS.
[profile.release.package.heed]
debug-assertions = true
[profile.dev.package.flate2] [profile.dev.package.flate2]
opt-level = 3 opt-level = 3

View File

@ -20,7 +20,7 @@
<p align="center"> <p align="center">
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a> <a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a> <a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a> <a href="https://github.com/meilisearch/meilisearch/queue"><img alt="Merge Queues enabled" src="https://img.shields.io/badge/Merge_Queues-enabled-%2357cf60?logo=github"></a>
</p> </p>
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p> <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

View File

@ -1,10 +0,0 @@
status = [
'Tests on ubuntu-22.04',
'Tests on macos-13',
'Tests on windows-2022',
'Run Clippy',
'Run Rustfmt',
'Run tests in debug',
]
# 3 hours timeout
timeout-sec = 10800

View File

@ -35,7 +35,8 @@ fn setup_dir(path: impl AsRef<Path>) {
fn setup_index() -> Index { fn setup_index() -> Index {
let path = "benches.mmdb"; let path = "benches.mmdb";
setup_dir(path); setup_dir(path);
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100); options.max_readers(100);
Index::new(options, path, true).unwrap() Index::new(options, path, true).unwrap()

View File

@ -65,7 +65,8 @@ pub fn base_setup(conf: &Conf) -> Index {
} }
create_dir_all(conf.database_name).unwrap(); create_dir_all(conf.database_name).unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
options.max_readers(100); options.max_readers(100);
let index = Index::new(options, conf.database_name, true).unwrap(); let index = Index::new(options, conf.database_name, true).unwrap();

View File

@ -326,6 +326,7 @@ pub(crate) mod test {
index_uids: maplit::btreemap! { "doggo".to_string() => 1 }, index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
progress_trace: Default::default(), progress_trace: Default::default(),
write_channel_congestion: None, write_channel_congestion: None,
internal_database_sizes: Default::default(),
}, },
enqueued_at: Some(BatchEnqueuedAt { enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC), earliest: datetime!(2022-11-11 0:00 UTC),

View File

@ -57,7 +57,8 @@ fn main() {
let opt = opt.clone(); let opt = opt.clone();
let handle = std::thread::spawn(move || { let handle = std::thread::spawn(move || {
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(1024 * 1024 * 1024 * 1024); options.map_size(1024 * 1024 * 1024 * 1024);
let tempdir = match opt.path { let tempdir = match opt.path {
Some(path) => TempDir::new_in(path).unwrap(), Some(path) => TempDir::new_in(path).unwrap(),

View File

@ -13,6 +13,7 @@ license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.95" anyhow = "1.0.95"
bincode = "1.3.3" bincode = "1.3.3"
byte-unit = "5.1.6"
bumpalo = "3.16.0" bumpalo = "3.16.0"
bumparaw-collections = "0.1.4" bumparaw-collections = "0.1.4"
convert_case = "0.6.0" convert_case = "0.6.0"
@ -22,6 +23,7 @@ dump = { path = "../dump" }
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.35" flate2 = "1.0.35"
indexmap = "2.7.0"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5" memmap2 = "0.9.5"
@ -44,7 +46,6 @@ ureq = "2.12.1"
uuid = { version = "1.11.0", features = ["serde", "v4"] } uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
arroy = "0.5.0"
big_s = "1.0.2" big_s = "1.0.2"
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
# fixed version due to format breakages in v1.40 # fixed version due to format breakages in v1.40

View File

@ -2,7 +2,7 @@ use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn}; use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
use crate::error::FeatureNotEnabledError; use crate::error::FeatureNotEnabledError;
use crate::Result; use crate::Result;
@ -118,6 +118,19 @@ impl RoFeatures {
.into()) .into())
} }
} }
pub fn check_composite_embedders(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.composite_embedders {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action,
feature: "composite embedders",
issue_link: "https://github.com/orgs/meilisearch/discussions/816",
}
.into())
}
}
} }
impl FeatureData { impl FeatureData {
@ -126,7 +139,7 @@ impl FeatureData {
} }
pub fn new( pub fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
instance_features: InstanceTogglableFeatures, instance_features: InstanceTogglableFeatures,
) -> Result<Self> { ) -> Result<Self> {

View File

@ -304,7 +304,8 @@ fn create_or_open_index(
map_size: usize, map_size: usize,
creation: bool, creation: bool,
) -> Result<Index> { ) -> Result<Index> {
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(clamp_to_page_size(map_size)); options.map_size(clamp_to_page_size(map_size));
// You can find more details about this experimental // You can find more details about this experimental
@ -333,7 +334,7 @@ fn create_or_open_index(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use meilisearch_types::heed::Env; use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::Index; use meilisearch_types::Index;
use uuid::Uuid; use uuid::Uuid;
@ -343,7 +344,7 @@ mod tests {
use crate::IndexScheduler; use crate::IndexScheduler;
impl IndexMapper { impl IndexMapper {
fn test() -> (Self, Env, IndexSchedulerHandle) { fn test() -> (Self, Env<WithoutTls>, IndexSchedulerHandle) {
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]); let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
(index_scheduler.index_mapper, index_scheduler.env, handle) (index_scheduler.index_mapper, index_scheduler.env, handle)
} }

View File

@ -4,7 +4,7 @@ use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::milli::database_stats::DatabaseStats; use meilisearch_types::milli::database_stats::DatabaseStats;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
@ -164,7 +164,7 @@ impl IndexMapper {
} }
pub fn new( pub fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
options: &IndexSchedulerOptions, options: &IndexSchedulerOptions,
budget: IndexBudget, budget: IndexBudget,

View File

@ -344,6 +344,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch; let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
let stats = BatchStats { let stats = BatchStats {
progress_trace: Default::default(), progress_trace: Default::default(),
internal_database_sizes: Default::default(),
write_channel_congestion: None, write_channel_congestion: None,
..stats.clone() ..stats.clone()
}; };

View File

@ -54,7 +54,7 @@ use meilisearch_types::batches::Batch;
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE; use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::I128; use meilisearch_types::heed::types::I128;
use meilisearch_types::heed::{self, Env, RoTxn}; use meilisearch_types::heed::{self, Env, RoTxn, WithoutTls};
use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
@ -125,13 +125,17 @@ pub struct IndexSchedulerOptions {
pub instance_features: InstanceTogglableFeatures, pub instance_features: InstanceTogglableFeatures,
/// The experimental features enabled for this instance. /// The experimental features enabled for this instance.
pub auto_upgrade: bool, pub auto_upgrade: bool,
/// The maximal number of entries in the search query cache of an embedder.
///
/// 0 disables the cache.
pub embedding_cache_cap: usize,
} }
/// Structure which holds meilisearch's indexes and schedules the tasks /// Structure which holds meilisearch's indexes and schedules the tasks
/// to be performed on them. /// to be performed on them.
pub struct IndexScheduler { pub struct IndexScheduler {
/// The LMDB environment which the DBs are associated with. /// The LMDB environment which the DBs are associated with.
pub(crate) env: Env, pub(crate) env: Env<WithoutTls>,
/// The list of tasks currently processing /// The list of tasks currently processing
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>, pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
@ -156,6 +160,11 @@ pub struct IndexScheduler {
/// The Authorization header to send to the webhook URL. /// The Authorization header to send to the webhook URL.
pub(crate) webhook_authorization_header: Option<String>, pub(crate) webhook_authorization_header: Option<String>,
/// A map to retrieve the runtime representation of an embedder depending on its configuration.
///
/// This map may return the same embedder object for two different indexes or embedder settings,
/// but it will only do this if the embedder configuration options are the same, leading
/// to the same embeddings for the same input text.
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>, embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
// ================= test // ================= test
@ -209,6 +218,7 @@ impl IndexScheduler {
#[allow(private_interfaces)] // because test_utils is private #[allow(private_interfaces)] // because test_utils is private
pub fn new( pub fn new(
options: IndexSchedulerOptions, options: IndexSchedulerOptions,
auth_env: Env<WithoutTls>,
from_db_version: (u32, u32, u32), from_db_version: (u32, u32, u32),
#[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>, #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(test_utils::Breakpoint, bool)>,
#[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>, #[cfg(test)] planned_failures: Vec<(usize, test_utils::FailureLocation)>,
@ -240,7 +250,9 @@ impl IndexScheduler {
}; };
let env = unsafe { let env = unsafe {
heed::EnvOpenOptions::new() let env_options = heed::EnvOpenOptions::new();
let mut env_options = env_options.read_txn_without_tls();
env_options
.max_dbs(Self::nb_db()) .max_dbs(Self::nb_db())
.map_size(budget.task_db_size) .map_size(budget.task_db_size)
.open(&options.tasks_path) .open(&options.tasks_path)
@ -260,7 +272,7 @@ impl IndexScheduler {
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
version, version,
queue, queue,
scheduler: Scheduler::new(&options), scheduler: Scheduler::new(&options, auth_env),
index_mapper, index_mapper,
env, env,
@ -358,7 +370,7 @@ impl IndexScheduler {
} }
} }
pub fn read_txn(&self) -> Result<RoTxn> { pub fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
self.env.read_txn().map_err(|e| e.into()) self.env.read_txn().map_err(|e| e.into())
} }
@ -427,12 +439,14 @@ impl IndexScheduler {
/// If you need to fetch information from or perform an action on all indexes, /// If you need to fetch information from or perform an action on all indexes,
/// see the `try_for_each_index` function. /// see the `try_for_each_index` function.
pub fn index(&self, name: &str) -> Result<Index> { pub fn index(&self, name: &str) -> Result<Index> {
self.index_mapper.index(&self.env.read_txn()?, name) let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, name)
} }
/// Return the boolean referring if index exists. /// Return the boolean referring if index exists.
pub fn index_exists(&self, name: &str) -> Result<bool> { pub fn index_exists(&self, name: &str) -> Result<bool> {
self.index_mapper.index_exists(&self.env.read_txn()?, name) let rtxn = self.env.read_txn()?;
self.index_mapper.index_exists(&rtxn, name)
} }
/// Return the name of all indexes without opening them. /// Return the name of all indexes without opening them.
@ -507,7 +521,8 @@ impl IndexScheduler {
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
/// 3. The number of times the properties appeared. /// 3. The number of times the properties appeared.
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> { pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
self.queue.get_stats(&self.read_txn()?, &self.processing_tasks.read().unwrap()) let rtxn = self.read_txn()?;
self.queue.get_stats(&rtxn, &self.processing_tasks.read().unwrap())
} }
// Return true if there is at least one task that is processing. // Return true if there is at least one task that is processing.
@ -812,7 +827,7 @@ impl IndexScheduler {
// add missing embedder // add missing embedder
let embedder = Arc::new( let embedder = Arc::new(
Embedder::new(embedder_options.clone()) Embedder::new(embedder_options.clone(), self.scheduler.embedding_cache_cap)
.map_err(meilisearch_types::milli::vector::Error::from) .map_err(meilisearch_types::milli::vector::Error::from)
.map_err(|err| { .map_err(|err| {
Error::from_milli(err.into(), Some(index_uid.clone())) Error::from_milli(err.into(), Some(index_uid.clone()))

View File

@ -64,6 +64,13 @@ make_enum_progress! {
} }
} }
make_enum_progress! {
pub enum FinalizingIndexStep {
Committing,
ComputingStats,
}
}
make_enum_progress! { make_enum_progress! {
pub enum TaskCancelationProgress { pub enum TaskCancelationProgress {
RetrievingTasks, RetrievingTasks,

View File

@ -3,7 +3,7 @@ use std::ops::{Bound, RangeBounds};
use meilisearch_types::batches::{Batch, BatchId}; use meilisearch_types::batches::{Batch, BatchId};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status}; use meilisearch_types::tasks::{Kind, Status};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
@ -66,7 +66,7 @@ impl BatchQueue {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub(super) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> { pub(super) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self { Ok(Self {
all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?, all_batches: env.create_database(wtxn, Some(db_name::ALL_BATCHES))?,
status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?, status: env.create_database(wtxn, Some(db_name::BATCH_STATUS))?,

View File

@ -13,7 +13,7 @@ use std::time::Duration;
use file_store::FileStore; use file_store::FileStore;
use meilisearch_types::batches::BatchId; use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -157,7 +157,7 @@ impl Queue {
/// Create an index scheduler and start its run loop. /// Create an index scheduler and start its run loop.
pub(crate) fn new( pub(crate) fn new(
env: &Env, env: &Env<WithoutTls>,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
options: &IndexSchedulerOptions, options: &IndexSchedulerOptions,
) -> Result<Self> { ) -> Result<Self> {

View File

@ -1,7 +1,7 @@
use std::ops::{Bound, RangeBounds}; use std::ops::{Bound, RangeBounds};
use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{DecodeIgnore, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Kind, Status, Task}; use meilisearch_types::tasks::{Kind, Status, Task};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
@ -68,7 +68,7 @@ impl TaskQueue {
NUMBER_OF_DATABASES NUMBER_OF_DATABASES
} }
pub(crate) fn new(env: &Env, wtxn: &mut RwTxn) -> Result<Self> { pub(crate) fn new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self> {
Ok(Self { Ok(Self {
all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?, all_tasks: env.create_database(wtxn, Some(db_name::ALL_TASKS))?,
status: env.create_database(wtxn, Some(db_name::STATUS))?, status: env.create_database(wtxn, Some(db_name::STATUS))?,

View File

@ -20,9 +20,12 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::Arc; use std::sync::Arc;
use convert_case::{Case, Casing as _};
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli; use meilisearch_types::milli;
use meilisearch_types::tasks::Status; use meilisearch_types::tasks::Status;
use process_batch::ProcessBatchInfo;
use rayon::current_num_threads; use rayon::current_num_threads;
use rayon::iter::{IntoParallelIterator, ParallelIterator}; use rayon::iter::{IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -71,10 +74,15 @@ pub struct Scheduler {
pub(crate) snapshots_path: PathBuf, pub(crate) snapshots_path: PathBuf,
/// The path to the folder containing the auth LMDB env. /// The path to the folder containing the auth LMDB env.
pub(crate) auth_path: PathBuf, pub(crate) auth_env: Env<WithoutTls>,
/// The path to the version file of Meilisearch. /// The path to the version file of Meilisearch.
pub(crate) version_file_path: PathBuf, pub(crate) version_file_path: PathBuf,
/// The maximal number of entries in the search query cache of an embedder.
///
/// 0 disables the cache.
pub(crate) embedding_cache_cap: usize,
} }
impl Scheduler { impl Scheduler {
@ -87,12 +95,13 @@ impl Scheduler {
batched_tasks_size_limit: self.batched_tasks_size_limit, batched_tasks_size_limit: self.batched_tasks_size_limit,
dumps_path: self.dumps_path.clone(), dumps_path: self.dumps_path.clone(),
snapshots_path: self.snapshots_path.clone(), snapshots_path: self.snapshots_path.clone(),
auth_path: self.auth_path.clone(), auth_env: self.auth_env.clone(),
version_file_path: self.version_file_path.clone(), version_file_path: self.version_file_path.clone(),
embedding_cache_cap: self.embedding_cache_cap,
} }
} }
pub fn new(options: &IndexSchedulerOptions) -> Scheduler { pub fn new(options: &IndexSchedulerOptions, auth_env: Env<WithoutTls>) -> Scheduler {
Scheduler { Scheduler {
must_stop_processing: MustStopProcessing::default(), must_stop_processing: MustStopProcessing::default(),
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
@ -102,8 +111,9 @@ impl Scheduler {
batched_tasks_size_limit: options.batched_tasks_size_limit, batched_tasks_size_limit: options.batched_tasks_size_limit,
dumps_path: options.dumps_path.clone(), dumps_path: options.dumps_path.clone(),
snapshots_path: options.snapshots_path.clone(), snapshots_path: options.snapshots_path.clone(),
auth_path: options.auth_path.clone(), auth_env,
version_file_path: options.version_file_path.clone(), version_file_path: options.version_file_path.clone(),
embedding_cache_cap: options.embedding_cache_cap,
} }
} }
} }
@ -215,16 +225,16 @@ impl IndexScheduler {
let mut stop_scheduler_forever = false; let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new(); let mut canceled = RoaringBitmap::new();
let mut congestion = None; let mut process_batch_info = ProcessBatchInfo::default();
match res { match res {
Ok((tasks, cong)) => { Ok((tasks, info)) => {
#[cfg(test)] #[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded); self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32); let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj); progress.update_progress(task_progress_obj);
congestion = cong; process_batch_info = info;
let mut success = 0; let mut success = 0;
let mut failure = 0; let mut failure = 0;
let mut canceled_by = None; let mut canceled_by = None;
@ -342,6 +352,9 @@ impl IndexScheduler {
// We must re-add the canceled task so they're part of the same batch. // We must re-add the canceled task so they're part of the same batch.
ids |= canceled; ids |= canceled;
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
process_batch_info;
processing_batch.stats.progress_trace = processing_batch.stats.progress_trace =
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect(); progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| { processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
@ -351,6 +364,33 @@ impl IndexScheduler {
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into()); congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info congestion_info
}); });
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
.iter()
.flat_map(|(dbname, pre_size)| {
post_commit_dabases_sizes
.get(dbname)
.map(|post_size| {
use byte_unit::{Byte, UnitType::Binary};
use std::cmp::Ordering::{Equal, Greater, Less};
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
let diff_size = post_size.abs_diff(*pre_size) as u64;
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
let sign = match post_size.cmp(pre_size) {
Equal => return None,
Greater => "+",
Less => "-",
};
Some((
dbname.to_case(Case::Camel),
format!("{post:#.2} ({sign}{diff:#.2})").into(),
))
})
.into_iter()
.flatten()
})
.collect();
if let Some(congestion) = congestion { if let Some(congestion) = congestion {
tracing::debug!( tracing::debug!(

View File

@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
use super::create_batch::Batch; use super::create_batch::Batch;
use crate::processing::{ use crate::processing::{
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress, UpdateIndexProgress,
}; };
@ -22,6 +22,16 @@ use crate::utils::{
}; };
use crate::{Error, IndexScheduler, Result, TaskId}; use crate::{Error, IndexScheduler, Result, TaskId};
#[derive(Debug, Default)]
pub struct ProcessBatchInfo {
/// The write channel congestion. None when unavailable: settings update.
pub congestion: Option<ChannelCongestion>,
/// The sizes of the different databases before starting the indexation.
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
/// The sizes of the different databases after commiting the indexation.
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
}
impl IndexScheduler { impl IndexScheduler {
/// Apply the operation associated with the given batch. /// Apply the operation associated with the given batch.
/// ///
@ -35,7 +45,7 @@ impl IndexScheduler {
batch: Batch, batch: Batch,
current_batch: &mut ProcessingBatch, current_batch: &mut ProcessingBatch,
progress: Progress, progress: Progress,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> { ) -> Result<(Vec<Task>, ProcessBatchInfo)> {
#[cfg(test)] #[cfg(test)]
{ {
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?; self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
@ -76,7 +86,7 @@ impl IndexScheduler {
canceled_tasks.push(task); canceled_tasks.push(task);
Ok((canceled_tasks, None)) Ok((canceled_tasks, ProcessBatchInfo::default()))
} }
Batch::TaskDeletions(mut tasks) => { Batch::TaskDeletions(mut tasks) => {
// 1. Retrieve the tasks that matched the query at enqueue-time. // 1. Retrieve the tasks that matched the query at enqueue-time.
@ -115,14 +125,14 @@ impl IndexScheduler {
_ => unreachable!(), _ => unreachable!(),
} }
} }
Ok((tasks, None)) Ok((tasks, ProcessBatchInfo::default()))
}
Batch::SnapshotCreation(tasks) => {
self.process_snapshot(progress, tasks).map(|tasks| (tasks, None))
}
Batch::Dump(task) => {
self.process_dump_creation(progress, task).map(|tasks| (tasks, None))
} }
Batch::SnapshotCreation(tasks) => self
.process_snapshot(progress, tasks)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::Dump(task) => self
.process_dump_creation(progress, task)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::IndexOperation { op, must_create_index } => { Batch::IndexOperation { op, must_create_index } => {
let index_uid = op.index_uid().to_string(); let index_uid = op.index_uid().to_string();
let index = if must_create_index { let index = if must_create_index {
@ -139,10 +149,12 @@ impl IndexScheduler {
.set_currently_updating_index(Some((index_uid.clone(), index.clone()))); .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?; let mut index_wtxn = index.write_txn()?;
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) = let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, progress)?; self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
{ {
progress.update_progress(FinalizingIndexStep::Committing);
let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
let _entered = span.enter(); let _entered = span.enter();
@ -153,12 +165,15 @@ impl IndexScheduler {
// stats of the index. Since the tasks have already been processed and // stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail // this is a non-critical operation. If it fails, we should not fail
// the entire batch. // the entire batch.
let mut post_commit_dabases_sizes = None;
let res = || -> Result<()> { let res = || -> Result<()> {
progress.update_progress(FinalizingIndexStep::ComputingStats);
let index_rtxn = index.read_txn()?; let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?; self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
wtxn.commit()?; wtxn.commit()?;
Ok(()) Ok(())
}(); }();
@ -171,7 +186,16 @@ impl IndexScheduler {
), ),
} }
Ok((tasks, congestion)) let info = ProcessBatchInfo {
congestion,
// In case we fail to the get post-commit sizes we decide
// that nothing changed and use the pre-commit sizes.
post_commit_dabases_sizes: post_commit_dabases_sizes
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
pre_commit_dabases_sizes,
};
Ok((tasks, info))
} }
Batch::IndexCreation { index_uid, primary_key, task } => { Batch::IndexCreation { index_uid, primary_key, task } => {
progress.update_progress(CreateIndexProgress::CreatingTheIndex); progress.update_progress(CreateIndexProgress::CreatingTheIndex);
@ -239,7 +263,7 @@ impl IndexScheduler {
), ),
} }
Ok((vec![task], None)) Ok((vec![task], ProcessBatchInfo::default()))
} }
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
progress.update_progress(DeleteIndexProgress::DeletingTheIndex); progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
@ -273,7 +297,9 @@ impl IndexScheduler {
}; };
} }
Ok((tasks, None)) // Here we could also show that all the internal database sizes goes to 0
// but it would mean opening the index and that's costly.
Ok((tasks, ProcessBatchInfo::default()))
} }
Batch::IndexSwap { mut task } => { Batch::IndexSwap { mut task } => {
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap); progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
@ -321,7 +347,7 @@ impl IndexScheduler {
} }
wtxn.commit()?; wtxn.commit()?;
task.status = Status::Succeeded; task.status = Status::Succeeded;
Ok((vec![task], None)) Ok((vec![task], ProcessBatchInfo::default()))
} }
Batch::UpgradeDatabase { mut tasks } => { Batch::UpgradeDatabase { mut tasks } => {
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else { let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
@ -351,7 +377,7 @@ impl IndexScheduler {
task.error = None; task.error = None;
} }
Ok((tasks, None)) Ok((tasks, ProcessBatchInfo::default()))
} }
} }
} }

View File

@ -32,7 +32,7 @@ impl IndexScheduler {
index_wtxn: &mut RwTxn<'i>, index_wtxn: &mut RwTxn<'i>,
index: &'i Index, index: &'i Index,
operation: IndexOperation, operation: IndexOperation,
progress: Progress, progress: &Progress,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> { ) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now(); let started_processing_at = std::time::Instant::now();
@ -186,7 +186,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&progress, progress,
) )
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?, .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
); );
@ -307,7 +307,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&progress, progress,
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
); );
@ -465,7 +465,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&progress, progress,
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?, .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
); );
@ -520,7 +520,7 @@ impl IndexScheduler {
index_uid: index_uid.clone(), index_uid: index_uid.clone(),
tasks: cleared_tasks, tasks: cleared_tasks,
}, },
progress.clone(), progress,
)?; )?;
let (settings_tasks, _congestion) = self.apply_index_operation( let (settings_tasks, _congestion) = self.apply_index_operation(

View File

@ -4,7 +4,6 @@ use std::sync::atomic::Ordering;
use meilisearch_types::heed::CompactionOption; use meilisearch_types::heed::CompactionOption;
use meilisearch_types::milli::progress::{Progress, VariableNameStep}; use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::{self};
use meilisearch_types::tasks::{Status, Task}; use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::{compression, VERSION_FILE_NAME}; use meilisearch_types::{compression, VERSION_FILE_NAME};
@ -28,7 +27,7 @@ impl IndexScheduler {
// 2. Snapshot the index-scheduler LMDB env // 2. Snapshot the index-scheduler LMDB env
// //
// When we call copy_to_file, LMDB opens a read transaction by itself, // When we call copy_to_path, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know // we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy // the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks. // of the env and the new transaction we open to retrieve the enqueued tasks.
@ -42,7 +41,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler); progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let dst = temp_snapshot_dir.path().join("tasks"); let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Create a read transaction on the index-scheduler // 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@ -81,7 +80,7 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
index index
.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled) .copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?; .map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
} }
@ -91,14 +90,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys); progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let dst = temp_snapshot_dir.path().join("auth"); let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
// TODO We can't use the open_auth_store_env function here but we should self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
let auth = unsafe {
milli::heed::EnvOpenOptions::new()
.map_size(1024 * 1024 * 1024) // 1 GiB
.max_dbs(2)
.open(&self.scheduler.auth_path)
}?;
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot // 5. Copy and tarball the flat snapshot
progress.update_progress(SnapshotCreationProgress::CreateTheTarball); progress.update_progress(SnapshotCreationProgress::CreateTheTarball);

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,] [timestamp] [4,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, } 1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, } 2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, } 3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 14, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,] [timestamp] [0,]
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Batches: ### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.14.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, }
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Batch to tasks mapping: ### Batch to tasks mapping:
0 [0,] 0 [0,]

View File

@ -104,10 +104,9 @@ fn import_vectors() {
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
hf_embedder.embed_search(S("Intel the beagle best doggo"), None).unwrap(); let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
let lab_embed = hf_embedder.embed_search(S("Max the lab best doggo"), None).unwrap(); let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
let patou_embed = hf_embedder.embed_search(S("kefir the patou best doggo"), None).unwrap();
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
}; };

View File

@ -5,6 +5,7 @@ use std::time::Duration;
use big_s::S; use big_s::S;
use crossbeam_channel::RecvTimeoutError; use crossbeam_channel::RecvTimeoutError;
use file_store::File; use file_store::File;
use meilisearch_auth::open_auth_store_env;
use meilisearch_types::document_formats::DocumentFormatError; use meilisearch_types::document_formats::DocumentFormatError;
use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments; use meilisearch_types::milli::update::IndexDocumentsMethod::ReplaceDocuments;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
@ -111,6 +112,7 @@ impl IndexScheduler {
batched_tasks_size_limit: u64::MAX, batched_tasks_size_limit: u64::MAX,
instance_features: Default::default(), instance_features: Default::default(),
auto_upgrade: true, // Don't cost much and will ensure the happy path works auto_upgrade: true, // Don't cost much and will ensure the happy path works
embedding_cache_cap: 10,
}; };
let version = configuration(&mut options).unwrap_or_else(|| { let version = configuration(&mut options).unwrap_or_else(|| {
( (
@ -120,7 +122,10 @@ impl IndexScheduler {
) )
}); });
let index_scheduler = Self::new(options, version, sender, planned_failures).unwrap(); std::fs::create_dir_all(&options.auth_path).unwrap();
let auth_env = open_auth_store_env(&options.auth_path).unwrap();
let index_scheduler =
Self::new(options, auth_env, version, sender, planned_failures).unwrap();
// To be 100% consistent between all test we're going to start the scheduler right now // To be 100% consistent between all test we're going to start the scheduler right now
// and ensure it's in the expected starting state. // and ensure it's in the expected starting state.

View File

@ -1,5 +1,5 @@
use anyhow::bail; use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn}; use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use time::OffsetDateTime; use time::OffsetDateTime;
@ -9,13 +9,17 @@ use crate::queue::TaskQueue;
use crate::versioning::Versioning; use crate::versioning::Versioning;
trait UpgradeIndexScheduler { trait UpgradeIndexScheduler {
fn upgrade(&self, env: &Env, wtxn: &mut RwTxn, original: (u32, u32, u32)) fn upgrade(
-> anyhow::Result<()>; &self,
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
original: (u32, u32, u32),
) -> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32); fn target_version(&self) -> (u32, u32, u32);
} }
pub fn upgrade_index_scheduler( pub fn upgrade_index_scheduler(
env: &Env, env: &Env<WithoutTls>,
versioning: &Versioning, versioning: &Versioning,
from: (u32, u32, u32), from: (u32, u32, u32),
to: (u32, u32, u32), to: (u32, u32, u32),
@ -29,6 +33,7 @@ pub fn upgrade_index_scheduler(
let start = match from { let start = match from {
(1, 12, _) => 0, (1, 12, _) => 0,
(1, 13, _) => 0, (1, 13, _) => 0,
(1, 14, _) => 0,
(major, minor, patch) => { (major, minor, patch) => {
if major > current_major if major > current_major
|| (major == current_major && minor > current_minor) || (major == current_major && minor > current_minor)
@ -91,7 +96,7 @@ struct ToCurrentNoOp {}
impl UpgradeIndexScheduler for ToCurrentNoOp { impl UpgradeIndexScheduler for ToCurrentNoOp {
fn upgrade( fn upgrade(
&self, &self,
_env: &Env, _env: &Env<WithoutTls>,
_wtxn: &mut RwTxn, _wtxn: &mut RwTxn,
_original: (u32, u32, u32), _original: (u32, u32, u32),
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {

View File

@ -1,5 +1,5 @@
use meilisearch_types::heed::types::Str; use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn, WithoutTls};
use meilisearch_types::milli::heed_codec::version::VersionCodec; use meilisearch_types::milli::heed_codec::version::VersionCodec;
use meilisearch_types::versioning; use meilisearch_types::versioning;
@ -46,12 +46,12 @@ impl Versioning {
} }
/// Return `Self` without checking anything about the version /// Return `Self` without checking anything about the version
pub fn raw_new(env: &Env, wtxn: &mut RwTxn) -> Result<Self, heed::Error> { pub fn raw_new(env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> Result<Self, heed::Error> {
let version = env.create_database(wtxn, Some(db_name::VERSION))?; let version = env.create_database(wtxn, Some(db_name::VERSION))?;
Ok(Self { version }) Ok(Self { version })
} }
pub(crate) fn new(env: &Env, db_version: (u32, u32, u32)) -> Result<Self> { pub(crate) fn new(env: &Env<WithoutTls>, db_version: (u32, u32, u32)) -> Result<Self> {
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let this = Self::raw_new(env, &mut wtxn)?; let this = Self::raw_new(env, &mut wtxn)?;
let from = match this.get_version(&wtxn)? { let from = match this.get_version(&wtxn)? {

View File

@ -2,6 +2,7 @@ use std::fs::File;
use std::io::{BufReader, Write}; use std::io::{BufReader, Write};
use std::path::Path; use std::path::Path;
use meilisearch_types::heed::{Env, WithoutTls};
use serde_json::Deserializer; use serde_json::Deserializer;
use crate::{AuthController, HeedAuthStore, Result}; use crate::{AuthController, HeedAuthStore, Result};
@ -9,11 +10,8 @@ use crate::{AuthController, HeedAuthStore, Result};
const KEYS_PATH: &str = "keys"; const KEYS_PATH: &str = "keys";
impl AuthController { impl AuthController {
pub fn dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> { pub fn dump(auth_env: Env<WithoutTls>, dst: impl AsRef<Path>) -> Result<()> {
let mut store = HeedAuthStore::new(&src)?; let store = HeedAuthStore::new(auth_env)?;
// do not attempt to close the database on drop!
store.set_drop_on_close(false);
let keys_file_path = dst.as_ref().join(KEYS_PATH); let keys_file_path = dst.as_ref().join(KEYS_PATH);
@ -27,8 +25,8 @@ impl AuthController {
Ok(()) Ok(())
} }
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> Result<()> { pub fn load_dump(src: impl AsRef<Path>, auth_env: Env<WithoutTls>) -> Result<()> {
let store = HeedAuthStore::new(&dst)?; let store = HeedAuthStore::new(auth_env)?;
let keys_file_path = src.as_ref().join(KEYS_PATH); let keys_file_path = src.as_ref().join(KEYS_PATH);

View File

@ -3,11 +3,10 @@ pub mod error;
mod store; mod store;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::path::Path;
use std::sync::Arc;
use error::{AuthControllerError, Result}; use error::{AuthControllerError, Result};
use maplit::hashset; use maplit::hashset;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey}; use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
@ -19,19 +18,19 @@ use uuid::Uuid;
#[derive(Clone)] #[derive(Clone)]
pub struct AuthController { pub struct AuthController {
store: Arc<HeedAuthStore>, store: HeedAuthStore,
master_key: Option<String>, master_key: Option<String>,
} }
impl AuthController { impl AuthController {
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> { pub fn new(auth_env: Env<WithoutTls>, master_key: &Option<String>) -> Result<Self> {
let store = HeedAuthStore::new(db_path)?; let store = HeedAuthStore::new(auth_env)?;
if store.is_empty()? { if store.is_empty()? {
generate_default_keys(&store)?; generate_default_keys(&store)?;
} }
Ok(Self { store: Arc::new(store), master_key: master_key.clone() }) Ok(Self { store, master_key: master_key.clone() })
} }
/// Return `Ok(())` if the auth controller is able to access one of its database. /// Return `Ok(())` if the auth controller is able to access one of its database.

View File

@ -1,18 +1,16 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::cmp::Reverse; use std::cmp::Reverse;
use std::collections::HashSet; use std::collections::HashSet;
use std::fs::create_dir_all;
use std::path::Path; use std::path::Path;
use std::result::Result as StdResult; use std::result::Result as StdResult;
use std::str; use std::str;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac}; use hmac::{Hmac, Mac};
use meilisearch_types::heed::BoxedError; use meilisearch_types::heed::{BoxedError, WithoutTls};
use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId; use meilisearch_types::keys::KeyId;
use meilisearch_types::milli; use meilisearch_types::milli::heed;
use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson}; use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use sha2::Sha256; use sha2::Sha256;
@ -25,44 +23,32 @@ use super::error::{AuthControllerError, Result};
use super::{Action, Key}; use super::{Action, Key};
const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB
const AUTH_DB_PATH: &str = "auth";
const KEY_DB_NAME: &str = "api-keys"; const KEY_DB_NAME: &str = "api-keys";
const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration"; const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration";
#[derive(Clone)] #[derive(Clone)]
pub struct HeedAuthStore { pub struct HeedAuthStore {
env: Arc<Env>, env: Env<WithoutTls>,
keys: Database<Bytes, SerdeJson<Key>>, keys: Database<Bytes, SerdeJson<Key>>,
action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>, action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
should_close_on_drop: bool,
} }
impl Drop for HeedAuthStore { pub fn open_auth_store_env(path: &Path) -> heed::Result<Env<WithoutTls>> {
fn drop(&mut self) { let options = EnvOpenOptions::new();
if self.should_close_on_drop && Arc::strong_count(&self.env) == 1 { let mut options = options.read_txn_without_tls();
self.env.as_ref().clone().prepare_for_closing();
}
}
}
pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env> {
let mut options = EnvOpenOptions::new();
options.map_size(AUTH_STORE_SIZE); // 1GB options.map_size(AUTH_STORE_SIZE); // 1GB
options.max_dbs(2); options.max_dbs(2);
unsafe { options.open(path) } unsafe { options.open(path) }
} }
impl HeedAuthStore { impl HeedAuthStore {
pub fn new(path: impl AsRef<Path>) -> Result<Self> { pub fn new(env: Env<WithoutTls>) -> Result<Self> {
let path = path.as_ref().join(AUTH_DB_PATH);
create_dir_all(&path)?;
let env = Arc::new(open_auth_store_env(path.as_ref())?);
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?; let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?;
let action_keyid_index_expiration = let action_keyid_index_expiration =
env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
wtxn.commit()?; wtxn.commit()?;
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) Ok(Self { env, keys, action_keyid_index_expiration })
} }
/// Return `Ok(())` if the auth store is able to access one of its database. /// Return `Ok(())` if the auth store is able to access one of its database.
@ -82,10 +68,6 @@ impl HeedAuthStore {
Ok(self.env.non_free_pages_size()?) Ok(self.env.non_free_pages_size()?)
} }
pub fn set_drop_on_close(&mut self, v: bool) {
self.should_close_on_drop = v;
}
pub fn is_empty(&self) -> Result<bool> { pub fn is_empty(&self) -> Result<bool> {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
@ -293,7 +275,7 @@ impl HeedAuthStore {
/// optionally on a specific index, for a given key. /// optionally on a specific index, for a given key.
pub struct KeyIdActionCodec; pub struct KeyIdActionCodec;
impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec { impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
type DItem = (KeyId, Action, Option<&'a [u8]>); type DItem = (KeyId, Action, Option<&'a [u8]>);
fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> { fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
@ -310,7 +292,7 @@ impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
} }
} }
impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec { impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>); type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> { fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {

View File

@ -64,4 +64,6 @@ pub struct BatchStats {
pub progress_trace: serde_json::Map<String, serde_json::Value>, pub progress_trace: serde_json::Map<String, serde_json::Value>,
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>, pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>,
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
} }

View File

@ -241,6 +241,7 @@ InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ; InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ; InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentIds , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ; InvalidSearchEmbedder , InvalidRequest , BAD_REQUEST ;
@ -281,6 +282,7 @@ InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ; InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
InvalidSearchLocales , InvalidRequest , BAD_REQUEST ; InvalidSearchLocales , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchExhaustiveFacetCount, InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSimilarId , InvalidRequest , BAD_REQUEST ; InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;
@ -405,7 +407,7 @@ impl ErrorCode for milli::Error {
match error { match error {
// TODO: wait for spec for new error codes. // TODO: wait for spec for new error codes.
UserError::SerdeJson(_) UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions | UserError::EnvAlreadyOpened
| UserError::DocumentLimitReached | UserError::DocumentLimitReached
| UserError::UnknownInternalDocumentId { .. } => Code::Internal, | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStoreFile, UserError::InvalidStoreFile => Code::InvalidStoreFile,
@ -452,7 +454,10 @@ impl ErrorCode for milli::Error {
} }
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules, UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField, UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions, UserError::InvalidVectorDimensions { .. }
| UserError::InvalidIndexingVectorDimensions { .. } => {
Code::InvalidVectorDimensions
}
UserError::InvalidVectorsMapType { .. } UserError::InvalidVectorsMapType { .. }
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType, | UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors, UserError::TooManyVectors(_, _) => Code::TooManyVectors,
@ -502,8 +507,7 @@ impl ErrorCode for HeedError {
HeedError::Mdb(_) HeedError::Mdb(_)
| HeedError::Encoding(_) | HeedError::Encoding(_)
| HeedError::Decoding(_) | HeedError::Decoding(_)
| HeedError::DatabaseClosing | HeedError::EnvAlreadyOpened => Code::Internal,
| HeedError::BadOpenOptions { .. } => Code::Internal,
} }
} }
} }

View File

@ -11,6 +11,7 @@ pub struct RuntimeTogglableFeatures {
pub contains_filter: bool, pub contains_filter: bool,
pub network: bool, pub network: bool,
pub get_task_documents_route: bool, pub get_task_documents_route: bool,
pub composite_embedders: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]

View File

@ -30,11 +30,7 @@ actix-web = { version = "4.9.0", default-features = false, features = [
anyhow = { version = "1.0.95", features = ["backtrace"] } anyhow = { version = "1.0.95", features = ["backtrace"] }
async-trait = "0.1.85" async-trait = "0.1.85"
bstr = "1.11.3" bstr = "1.11.3"
byte-unit = { version = "5.1.6", default-features = false, features = [ byte-unit = { version = "5.1.6", features = ["serde"] }
"std",
"byte",
"serde",
] }
bytes = "1.9.0" bytes = "1.9.0"
clap = { version = "4.5.24", features = ["derive", "env"] } clap = { version = "4.5.24", features = ["derive", "env"] }
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
@ -140,7 +136,7 @@ reqwest = { version = "0.12.12", features = [
sha-1 = { version = "0.10.1", optional = true } sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true } static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.15.0", optional = true } tempfile = { version = "3.15.0", optional = true }
zip = { version = "2.2.2", optional = true } zip = { version = "2.3.0", optional = true }
[features] [features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
@ -170,5 +166,5 @@ german = ["meilisearch-types/german"]
turkish = ["meilisearch-types/turkish"] turkish = ["meilisearch-types/turkish"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.18/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip"
sha1 = "b408a30dcb6e20cddb0c153c23385bcac4c8e912" sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834"

View File

@ -198,6 +198,8 @@ struct Infos {
experimental_limit_batched_tasks_total_size: u64, experimental_limit_batched_tasks_total_size: u64,
experimental_network: bool, experimental_network: bool,
experimental_get_task_documents_route: bool, experimental_get_task_documents_route: bool,
experimental_composite_embedders: bool,
experimental_embedding_cache_entries: usize,
gpu_enabled: bool, gpu_enabled: bool,
db_path: bool, db_path: bool,
import_dump: bool, import_dump: bool,
@ -245,6 +247,7 @@ impl Infos {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size, experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
http_addr, http_addr,
master_key: _, master_key: _,
env, env,
@ -290,6 +293,7 @@ impl Infos {
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = features; } = features;
// We're going to override every sensible information. // We're going to override every sensible information.
@ -309,6 +313,8 @@ impl Infos {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_network: network, experimental_network: network,
experimental_get_task_documents_route: get_task_documents_route, experimental_get_task_documents_route: get_task_documents_route,
experimental_composite_embedders: composite_embedders,
experimental_embedding_cache_entries,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"), db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(), import_dump: import_dump.is_some(),
@ -323,7 +329,8 @@ impl Infos {
http_addr: http_addr != default_http_addr(), http_addr: http_addr != default_http_addr(),
http_payload_size_limit, http_payload_size_limit,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size, experimental_limit_batched_tasks_total_size:
experimental_limit_batched_tasks_total_size.into(),
task_queue_webhook: task_webhook_url.is_some(), task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(), task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(), log_level: log_level.to_string(),

View File

@ -34,7 +34,7 @@ use error::PayloadError;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use index_scheduler::versioning::Versioning; use index_scheduler::versioning::Versioning;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::AuthController; use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::milli::constants::VERSION_MAJOR; use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -228,11 +228,12 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
cleanup_enabled: !opt.experimental_replication_parameters, cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000, max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size, batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT, index_count: DEFAULT_INDEX_COUNT,
instance_features: opt.to_instance_features(), instance_features: opt.to_instance_features(),
auto_upgrade: opt.experimental_dumpless_upgrade, auto_upgrade: opt.experimental_dumpless_upgrade,
embedding_cache_cap: opt.experimental_embedding_cache_entries,
}; };
let bin_major: u32 = VERSION_MAJOR.parse().unwrap(); let bin_major: u32 = VERSION_MAJOR.parse().unwrap();
let bin_minor: u32 = VERSION_MINOR.parse().unwrap(); let bin_minor: u32 = VERSION_MINOR.parse().unwrap();
@ -335,9 +336,12 @@ fn open_or_create_database_unchecked(
) -> anyhow::Result<(IndexScheduler, AuthController)> { ) -> anyhow::Result<(IndexScheduler, AuthController)> {
// we don't want to create anything in the data.ms yet, thus we // we don't want to create anything in the data.ms yet, thus we
// wrap our two builders in a closure that'll be executed later. // wrap our two builders in a closure that'll be executed later.
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key); std::fs::create_dir_all(&index_scheduler_opt.auth_path)?;
let index_scheduler_builder = let auth_env = open_auth_store_env(&index_scheduler_opt.auth_path).unwrap();
|| -> anyhow::Result<_> { Ok(IndexScheduler::new(index_scheduler_opt, version)?) }; let auth_controller = AuthController::new(auth_env.clone(), &opt.master_key);
let index_scheduler_builder = || -> anyhow::Result<_> {
Ok(IndexScheduler::new(index_scheduler_opt, auth_env, version)?)
};
match ( match (
index_scheduler_builder(), index_scheduler_builder(),
@ -420,6 +424,7 @@ pub fn update_version_file_for_dumpless_upgrade(
if from_major == 1 && from_minor == 12 { if from_major == 1 && from_minor == 12 {
let env = unsafe { let env = unsafe {
heed::EnvOpenOptions::new() heed::EnvOpenOptions::new()
.read_txn_without_tls()
.max_dbs(Versioning::nb_db()) .max_dbs(Versioning::nb_db())
.map_size(index_scheduler_opt.task_db_size) .map_size(index_scheduler_opt.task_db_size)
.open(&index_scheduler_opt.tasks_path) .open(&index_scheduler_opt.tasks_path)

View File

@ -63,7 +63,8 @@ const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS"; "MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str = const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
"MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE"; "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_SIZE";
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms"; const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700"; const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@ -444,7 +445,15 @@ pub struct Opt {
/// see: <https://github.com/orgs/meilisearch/discussions/801> /// see: <https://github.com/orgs/meilisearch/discussions/801>
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())] #[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
#[serde(default = "default_limit_batched_tasks_total_size")] #[serde(default = "default_limit_batched_tasks_total_size")]
pub experimental_limit_batched_tasks_total_size: u64, pub experimental_limit_batched_tasks_total_size: Byte,
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
/// distinct embedder.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/818>.
#[clap(long, env = MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES, default_value_t = default_embedding_cache_entries())]
#[serde(default = "default_embedding_cache_entries")]
pub experimental_embedding_cache_entries: usize,
#[serde(flatten)] #[serde(flatten)]
#[clap(flatten)] #[clap(flatten)]
@ -549,6 +558,7 @@ impl Opt {
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size, experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
} = self; } = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -641,6 +651,10 @@ impl Opt {
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE,
experimental_limit_batched_tasks_total_size.to_string(), experimental_limit_batched_tasks_total_size.to_string(),
); );
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES,
experimental_embedding_cache_entries.to_string(),
);
indexer_options.export_to_env(); indexer_options.export_to_env();
} }
@ -944,8 +958,12 @@ fn default_limit_batched_tasks() -> usize {
usize::MAX usize::MAX
} }
fn default_limit_batched_tasks_total_size() -> u64 { fn default_limit_batched_tasks_total_size() -> Byte {
u64::MAX Byte::from_u64(u64::MAX)
}
fn default_embedding_cache_entries() -> usize {
0
} }
fn default_snapshot_dir() -> PathBuf { fn default_snapshot_dir() -> PathBuf {

View File

@ -52,6 +52,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
contains_filter: Some(false), contains_filter: Some(false),
network: Some(false), network: Some(false),
get_task_documents_route: Some(false), get_task_documents_route: Some(false),
composite_embedders: Some(false),
})), })),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{ {
@ -94,6 +95,8 @@ pub struct RuntimeTogglableFeatures {
pub network: Option<bool>, pub network: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub get_task_documents_route: Option<bool>, pub get_task_documents_route: Option<bool>,
#[deserr(default)]
pub composite_embedders: Option<bool>,
} }
impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures { impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogglableFeatures {
@ -105,6 +108,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = value; } = value;
Self { Self {
@ -114,6 +118,7 @@ impl From<meilisearch_types::features::RuntimeTogglableFeatures> for RuntimeTogg
contains_filter: Some(contains_filter), contains_filter: Some(contains_filter),
network: Some(network), network: Some(network),
get_task_documents_route: Some(get_task_documents_route), get_task_documents_route: Some(get_task_documents_route),
composite_embedders: Some(composite_embedders),
} }
} }
} }
@ -126,6 +131,7 @@ pub struct PatchExperimentalFeatureAnalytics {
contains_filter: bool, contains_filter: bool,
network: bool, network: bool,
get_task_documents_route: bool, get_task_documents_route: bool,
composite_embedders: bool,
} }
impl Aggregate for PatchExperimentalFeatureAnalytics { impl Aggregate for PatchExperimentalFeatureAnalytics {
@ -141,6 +147,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
contains_filter: new.contains_filter, contains_filter: new.contains_filter,
network: new.network, network: new.network,
get_task_documents_route: new.get_task_documents_route, get_task_documents_route: new.get_task_documents_route,
composite_embedders: new.composite_embedders,
}) })
} }
@ -165,6 +172,7 @@ impl Aggregate for PatchExperimentalFeatureAnalytics {
contains_filter: Some(false), contains_filter: Some(false),
network: Some(false), network: Some(false),
get_task_documents_route: Some(false), get_task_documents_route: Some(false),
composite_embedders: Some(false),
})), })),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!( (status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
{ {
@ -202,6 +210,10 @@ async fn patch_features(
.0 .0
.get_task_documents_route .get_task_documents_route
.unwrap_or(old_features.get_task_documents_route), .unwrap_or(old_features.get_task_documents_route),
composite_embedders: new_features
.0
.composite_embedders
.unwrap_or(old_features.composite_embedders),
}; };
// explicitly destructure for analytics rather than using the `Serialize` implementation, because // explicitly destructure for analytics rather than using the `Serialize` implementation, because
@ -214,6 +226,7 @@ async fn patch_features(
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
} = new_features; } = new_features;
analytics.publish( analytics.publish(
@ -224,6 +237,7 @@ async fn patch_features(
contains_filter, contains_filter,
network, network,
get_task_documents_route, get_task_documents_route,
composite_embedders,
}, },
&req, &req,
); );

View File

@ -20,11 +20,13 @@ use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId; use meilisearch_types::milli::DocumentId;
use meilisearch_types::serde_cs::vec::CS;
use meilisearch_types::star_or::OptionStarOrList; use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index}; use meilisearch_types::{milli, Document, Index};
use mime::Mime; use mime::Mime;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
@ -43,7 +45,7 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{ use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
}; };
use crate::search::{parse_filter, RetrieveVectors}; use crate::search::{parse_filter, ExternalDocumentId, RetrieveVectors};
use crate::{aggregate_methods, Opt}; use crate::{aggregate_methods, Opt};
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| { static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
@ -137,6 +139,9 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
#[serde(rename = "vector.retrieve_vectors")] #[serde(rename = "vector.retrieve_vectors")]
retrieve_vectors: bool, retrieve_vectors: bool,
// maximum size of `ids` array. 0 if always empty or `null`
max_document_ids: usize,
// pagination // pagination
#[serde(rename = "pagination.max_limit")] #[serde(rename = "pagination.max_limit")]
max_limit: usize, max_limit: usize,
@ -149,7 +154,7 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
#[derive(Copy, Clone, Debug, PartialEq, Eq)] #[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind { pub enum DocumentFetchKind {
PerDocumentId { retrieve_vectors: bool }, PerDocumentId { retrieve_vectors: bool },
Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool, ids: usize },
} }
impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> { impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
@ -161,12 +166,18 @@ impl<Method: AggregateMethod> DocumentsFetchAggregator<Method> {
} }
}; };
let ids = match query {
DocumentFetchKind::Normal { ids, .. } => *ids,
DocumentFetchKind::PerDocumentId { .. } => 0,
};
Self { Self {
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit, max_limit: limit,
max_offset: offset, max_offset: offset,
retrieve_vectors, retrieve_vectors,
max_document_ids: ids,
marker: PhantomData, marker: PhantomData,
} }
@ -185,6 +196,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors, retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
max_limit: self.max_limit.max(new.max_limit), max_limit: self.max_limit.max(new.max_limit),
max_offset: self.max_offset.max(new.max_offset), max_offset: self.max_offset.max(new.max_offset),
max_document_ids: self.max_document_ids.max(new.max_document_ids),
marker: PhantomData, marker: PhantomData,
}) })
} }
@ -266,6 +278,7 @@ pub async fn get_document(
per_filter: false, per_filter: false,
max_limit: 0, max_limit: 0,
max_offset: 0, max_offset: 0,
max_document_ids: 0,
marker: PhantomData, marker: PhantomData,
}, },
&req, &req,
@ -387,6 +400,9 @@ pub struct BrowseQueryGet {
#[param(default, value_type = Option<bool>)] #[param(default, value_type = Option<bool>)]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: Param<bool>, retrieve_vectors: Param<bool>,
#[param(default, value_type = Option<Vec<String>>)]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentIds>)]
ids: Option<CS<String>>,
#[param(default, value_type = Option<String>, example = "popularity > 1000")] #[param(default, value_type = Option<String>, example = "popularity > 1000")]
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)] #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>, filter: Option<String>,
@ -408,6 +424,9 @@ pub struct BrowseQuery {
#[schema(default, example = true)] #[schema(default, example = true)]
#[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)] #[deserr(default, error = DeserrJsonError<InvalidDocumentRetrieveVectors>)]
retrieve_vectors: bool, retrieve_vectors: bool,
#[schema(value_type = Option<Vec<String>>, example = json!(["cody", "finn", "brandy", "gambit"]))]
#[deserr(default, error = DeserrJsonError<InvalidDocumentIds>)]
ids: Option<Vec<serde_json::Value>>,
#[schema(default, value_type = Option<Value>, example = "popularity > 1000")] #[schema(default, value_type = Option<Value>, example = "popularity > 1000")]
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)] #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>, filter: Option<Value>,
@ -479,6 +498,7 @@ pub async fn documents_by_query_post(
retrieve_vectors: body.retrieve_vectors, retrieve_vectors: body.retrieve_vectors,
max_limit: body.limit, max_limit: body.limit,
max_offset: body.offset, max_offset: body.offset,
max_document_ids: body.ids.as_ref().map(Vec::len).unwrap_or_default(),
per_document_id: false, per_document_id: false,
marker: PhantomData, marker: PhantomData,
}, },
@ -551,7 +571,8 @@ pub async fn get_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?params, "Get documents GET"); debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter } = params.into_inner(); let BrowseQueryGet { limit, offset, fields, retrieve_vectors, filter, ids } =
params.into_inner();
let filter = match filter { let filter = match filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
@ -561,12 +582,15 @@ pub async fn get_documents(
None => None, None => None,
}; };
let ids = ids.map(|ids| ids.into_iter().map(Into::into).collect());
let query = BrowseQuery { let query = BrowseQuery {
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
fields: fields.merge_star_and_none(), fields: fields.merge_star_and_none(),
retrieve_vectors: retrieve_vectors.0, retrieve_vectors: retrieve_vectors.0,
filter, filter,
ids,
}; };
analytics.publish( analytics.publish(
@ -575,6 +599,7 @@ pub async fn get_documents(
retrieve_vectors: query.retrieve_vectors, retrieve_vectors: query.retrieve_vectors,
max_limit: query.limit, max_limit: query.limit,
max_offset: query.offset, max_offset: query.offset,
max_document_ids: query.ids.as_ref().map(Vec::len).unwrap_or_default(),
per_document_id: false, per_document_id: false,
marker: PhantomData, marker: PhantomData,
}, },
@ -590,15 +615,30 @@ fn documents_by_query(
query: BrowseQuery, query: BrowseQuery,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, retrieve_vectors, filter } = query; let BrowseQuery { offset, limit, fields, retrieve_vectors, filter, ids } = query;
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors); let retrieve_vectors = RetrieveVectors::new(retrieve_vectors);
let ids = if let Some(ids) = ids {
let mut parsed_ids = Vec::with_capacity(ids.len());
for (index, id) in ids.into_iter().enumerate() {
let id = id.try_into().map_err(|error| {
let msg = format!("In `.ids[{index}]`: {error}");
ResponseError::from_msg(msg, Code::InvalidDocumentIds)
})?;
parsed_ids.push(id)
}
Some(parsed_ids)
} else {
None
};
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents( let (total, documents) = retrieve_documents(
&index, &index,
offset, offset,
limit, limit,
ids,
filter, filter,
fields, fields,
retrieve_vectors, retrieve_vectors,
@ -1451,10 +1491,12 @@ fn some_documents<'a, 't: 'a>(
})) }))
} }
#[allow(clippy::too_many_arguments)]
fn retrieve_documents<S: AsRef<str>>( fn retrieve_documents<S: AsRef<str>>(
index: &Index, index: &Index,
offset: usize, offset: usize,
limit: usize, limit: usize,
ids: Option<Vec<ExternalDocumentId>>,
filter: Option<Value>, filter: Option<Value>,
attributes_to_retrieve: Option<Vec<S>>, attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
@ -1468,16 +1510,28 @@ fn retrieve_documents<S: AsRef<str>>(
None None
}; };
let candidates = if let Some(filter) = filter { let mut candidates = if let Some(ids) = ids {
filter.evaluate(&rtxn, index).map_err(|err| match err { let external_document_ids = index.external_documents_ids();
let mut candidates = RoaringBitmap::new();
for id in ids.iter() {
let Some(docid) = external_document_ids.get(&rtxn, id)? else {
continue;
};
candidates.insert(docid);
}
candidates
} else {
index.documents_ids(&rtxn)?
};
if let Some(filter) = filter {
candidates &= filter.evaluate(&rtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter) ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
} }
e => e.into(), e => e.into(),
})? })?
} else { }
index.documents_ids(&rtxn)?
};
let (it, number_of_documents) = { let (it, number_of_documents) = {
let number_of_documents = candidates.len(); let number_of_documents = candidates.len();

View File

@ -68,6 +68,8 @@ pub struct FacetSearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>, pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>, pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchExhaustiveFacetCount>, default)]
pub exhaustive_facet_count: Option<bool>,
} }
#[derive(Default)] #[derive(Default)]
@ -98,6 +100,7 @@ impl FacetSearchAggregator {
hybrid, hybrid,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
exhaustive_facet_count,
} = query; } = query;
Self { Self {
@ -110,7 +113,8 @@ impl FacetSearchAggregator {
|| attributes_to_search_on.is_some() || attributes_to_search_on.is_some()
|| hybrid.is_some() || hybrid.is_some()
|| ranking_score_threshold.is_some() || ranking_score_threshold.is_some()
|| locales.is_some(), || locales.is_some()
|| exhaustive_facet_count.is_some(),
..Default::default() ..Default::default()
} }
} }
@ -293,13 +297,24 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid, hybrid,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
exhaustive_facet_count,
} = value; } = value;
// If exhaustive_facet_count is true, we need to set the page to 0
// because the facet search is not exhaustive by default.
let page = if exhaustive_facet_count.map_or(false, |exhaustive| exhaustive) {
// setting the page to 0 will force the search to be exhaustive when computing the number of hits,
// but it will skip the bucket sort saving time.
Some(0)
} else {
None
};
SearchQuery { SearchQuery {
q, q,
offset: DEFAULT_SEARCH_OFFSET(), offset: DEFAULT_SEARCH_OFFSET(),
limit: DEFAULT_SEARCH_LIMIT(), limit: DEFAULT_SEARCH_LIMIT(),
page: None, page,
hits_per_page: None, hits_per_page: None,
attributes_to_retrieve: None, attributes_to_retrieve: None,
retrieve_vectors: false, retrieve_vectors: false,

View File

@ -518,7 +518,7 @@ impl From<index_scheduler::IndexStats> for IndexStats {
.inner_stats .inner_stats
.number_of_documents .number_of_documents
.unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()), .unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()),
raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(), raw_document_db_size: stats.inner_stats.documents_database_stats.total_size(),
avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(), avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
is_indexing: stats.is_indexing, is_indexing: stats.is_indexing,
number_of_embeddings: stats.inner_stats.number_of_embeddings, number_of_embeddings: stats.inner_stats.number_of_embeddings,

View File

@ -716,7 +716,30 @@ pub async fn delete_all(
fn validate_settings( fn validate_settings(
settings: Settings<Unchecked>, settings: Settings<Unchecked>,
_index_scheduler: &IndexScheduler, index_scheduler: &IndexScheduler,
) -> Result<Settings<Unchecked>, ResponseError> { ) -> Result<Settings<Unchecked>, ResponseError> {
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbedderSource;
let features = index_scheduler.features();
if let Setting::Set(embedders) = &settings.embedders {
for SettingEmbeddingSettings { inner: embedder } in embedders.values() {
let Setting::Set(embedder) = embedder else {
continue;
};
if matches!(embedder.source, Setting::Set(EmbedderSource::Composite)) {
features.check_composite_embedders("using `\"composite\"` as source")?;
}
if matches!(embedder.search_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `searchEmbedder`")?;
}
if matches!(embedder.indexing_embedder, Setting::Set(_)) {
features.check_composite_embedders("setting `indexingEmbedder`")?;
}
}
}
Ok(settings.validate()?) Ok(settings.validate()?)
} }

View File

@ -5,7 +5,7 @@ use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{ErrorCode as _, ResponseError}; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::serde_cs::vec::CS;
@ -111,7 +111,7 @@ pub async fn similar_get(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.0.try_into()?; let query = params.0.into();
let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query); let mut aggregate = SimilarAggregator::<SimilarGET>::from_query(&query);
@ -295,10 +295,8 @@ impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
} }
} }
impl TryFrom<SimilarQueryGet> for SimilarQuery { impl From<SimilarQueryGet> for SimilarQuery {
type Error = ResponseError; fn from(
fn try_from(
SimilarQueryGet { SimilarQueryGet {
id, id,
offset, offset,
@ -311,7 +309,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
embedder, embedder,
ranking_score_threshold, ranking_score_threshold,
}: SimilarQueryGet, }: SimilarQueryGet,
) -> Result<Self, Self::Error> { ) -> Self {
let filter = match filter { let filter = match filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v), Ok(v) => Some(v),
@ -320,10 +318,8 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
None => None, None => None,
}; };
Ok(SimilarQuery { SimilarQuery {
id: id.0.try_into().map_err(|code: InvalidSimilarId| { id: serde_json::Value::String(id.0),
ResponseError::from_msg(code.to_string(), code.error_code())
})?,
offset: offset.0, offset: offset.0,
limit: limit.0, limit: limit.0,
filter, filter,
@ -333,6 +329,6 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
show_ranking_score: show_ranking_score.0, show_ranking_score: show_ranking_score.0,
show_ranking_score_details: show_ranking_score_details.0, show_ranking_score_details: show_ranking_score_details.0,
ranking_score_threshold: ranking_score_threshold.map(|x| x.0), ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
}) }
} }
} }

View File

@ -340,7 +340,8 @@ impl SearchKind {
vector_len: Option<usize>, vector_len: Option<usize>,
route: Route, route: Route,
) -> Result<(String, Arc<Embedder>, bool), ResponseError> { ) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let rtxn = index.read_txn()?;
let embedder_configs = index.embedding_configs(&rtxn)?;
let embedders = index_scheduler.embedders(index_uid, embedder_configs)?; let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
let (embedder, _, quantized) = embedders let (embedder, _, quantized) = embedders
@ -635,7 +636,7 @@ impl SearchQueryWithIndex {
pub struct SimilarQuery { pub struct SimilarQuery {
#[deserr(error = DeserrJsonError<InvalidSimilarId>)] #[deserr(error = DeserrJsonError<InvalidSimilarId>)]
#[schema(value_type = String)] #[schema(value_type = String)]
pub id: ExternalDocumentId, pub id: serde_json::Value,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSimilarOffset>)] #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSimilarOffset>)]
pub offset: usize, pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSimilarLimit>)] #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSimilarLimit>)]
@ -657,8 +658,7 @@ pub struct SimilarQuery {
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>, pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
} }
#[derive(Debug, Clone, PartialEq, Deserr)] #[derive(Debug, Clone, PartialEq)]
#[deserr(try_from(Value) = TryFrom::try_from -> InvalidSimilarId)]
pub struct ExternalDocumentId(String); pub struct ExternalDocumentId(String);
impl AsRef<str> for ExternalDocumentId { impl AsRef<str> for ExternalDocumentId {
@ -674,7 +674,7 @@ impl ExternalDocumentId {
} }
impl TryFrom<String> for ExternalDocumentId { impl TryFrom<String> for ExternalDocumentId {
type Error = InvalidSimilarId; type Error = milli::UserError;
fn try_from(value: String) -> Result<Self, Self::Error> { fn try_from(value: String) -> Result<Self, Self::Error> {
serde_json::Value::String(value).try_into() serde_json::Value::String(value).try_into()
@ -682,10 +682,10 @@ impl TryFrom<String> for ExternalDocumentId {
} }
impl TryFrom<Value> for ExternalDocumentId { impl TryFrom<Value> for ExternalDocumentId {
type Error = InvalidSimilarId; type Error = milli::UserError;
fn try_from(value: Value) -> Result<Self, Self::Error> { fn try_from(value: Value) -> Result<Self, Self::Error> {
Ok(Self(milli::documents::validate_document_id_value(value).map_err(|_| InvalidSimilarId)?)) Ok(Self(milli::documents::validate_document_id_value(value)?))
} }
} }
@ -916,7 +916,7 @@ fn prepare_search<'t>(
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10); let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
embedder embedder
.embed_search(query.q.clone().unwrap(), Some(deadline)) .embed_search(query.q.as_ref().unwrap(), Some(deadline))
.map_err(milli::vector::Error::from) .map_err(milli::vector::Error::from)
.map_err(milli::Error::from)? .map_err(milli::Error::from)?
} }
@ -1598,6 +1598,11 @@ pub fn perform_similar(
ranking_score_threshold, ranking_score_threshold,
} = query; } = query;
let id: ExternalDocumentId = id.try_into().map_err(|error| {
let msg = format!("Invalid value at `.id`: {error}");
ResponseError::from_msg(msg, Code::InvalidSimilarId)
})?;
// using let-else rather than `?` so that the borrow checker identifies we're always returning here, // using let-else rather than `?` so that the borrow checker identifies we're always returning here,
// preventing a use-after-move // preventing a use-after-move
let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else { let Some(internal_id) = index.external_documents_ids().get(&rtxn, &id)? else {

View File

@ -281,7 +281,8 @@ async fn test_summarized_document_addition_or_update() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]" ".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {
@ -303,7 +304,8 @@ async fn test_summarized_document_addition_or_update() {
"test": 1 "test": 1
}, },
"progressTrace": "[progressTrace]", "progressTrace": "[progressTrace]",
"writeChannelCongestion": "[writeChannelCongestion]" "writeChannelCongestion": "[writeChannelCongestion]",
"internalDatabaseSizes": "[internalDatabaseSizes]"
}, },
"duration": "[duration]", "duration": "[duration]",
"startedAt": "[date]", "startedAt": "[date]",
@ -322,7 +324,8 @@ async fn test_summarized_document_addition_or_update() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]" ".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {
@ -407,7 +410,8 @@ async fn test_summarized_delete_documents_by_batch() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]" ".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {
@ -495,7 +499,8 @@ async fn test_summarized_delete_documents_by_filter() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]" ".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {
@ -537,7 +542,8 @@ async fn test_summarized_delete_documents_by_filter() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]" ".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r#" @r#"
{ {
@ -623,7 +629,8 @@ async fn test_summarized_delete_document_by_id() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]" ".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r#" @r#"
{ {
@ -679,7 +686,8 @@ async fn test_summarized_settings_update() {
".startedAt" => "[date]", ".startedAt" => "[date]",
".finishedAt" => "[date]", ".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]", ".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]" ".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
}, },
@r###" @r###"
{ {

View File

@ -411,7 +411,7 @@ impl<State> Index<'_, State> {
self.service.get(url).await self.service.get(url).await
} }
pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) { pub async fn fetch_documents(&self, payload: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref())); let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
self.service.post(url, payload).await self.service.post(url, payload).await
} }

View File

@ -1897,11 +1897,11 @@ async fn update_documents_with_geo_field() {
}, },
{ {
"id": "3", "id": "3",
"_geo": { "lat": 1, "lng": 1 }, "_geo": { "lat": 3, "lng": 0 },
}, },
{ {
"id": "4", "id": "4",
"_geo": { "lat": "1", "lng": "1" }, "_geo": { "lat": "4", "lng": "0" },
}, },
]); ]);
@ -1928,9 +1928,7 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let (response, code) = index let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
// we are expecting docs 4 and 3 first as they have geo // we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@ -1940,18 +1938,18 @@ async fn update_documents_with_geo_field() {
{ {
"id": "4", "id": "4",
"_geo": { "_geo": {
"lat": "1", "lat": "4",
"lng": "1" "lng": "0"
}, },
"_geoDistance": 5522018 "_geoDistance": 667170
}, },
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
"lat": 1, "lat": 3,
"lng": 1 "lng": 0
}, },
"_geoDistance": 5522018 "_geoDistance": 778364
}, },
{ {
"id": "1" "id": "1"
@ -1969,10 +1967,13 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let updated_documents = json!([{ let updated_documents = json!([
"id": "3", {
"doggo": "kefir", "id": "3",
}]); "doggo": "kefir",
"_geo": { "lat": 5, "lng": 0 },
}
]);
let (task, _status_code) = index.update_documents(updated_documents, None).await; let (task, _status_code) = index.update_documents(updated_documents, None).await;
let response = index.wait_task(task.uid()).await; let response = index.wait_task(task.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@ -2012,16 +2013,16 @@ async fn update_documents_with_geo_field() {
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
"lat": 1, "lat": 5,
"lng": 1 "lng": 0
}, },
"doggo": "kefir" "doggo": "kefir"
}, },
{ {
"id": "4", "id": "4",
"_geo": { "_geo": {
"lat": "1", "lat": "4",
"lng": "1" "lng": "0"
} }
} }
], ],
@ -2031,31 +2032,29 @@ async fn update_documents_with_geo_field() {
} }
"###); "###);
let (response, code) = index let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
// the search response should not have changed: we are expecting docs 4 and 3 first as they have geo // the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@r###" @r###"
{ {
"hits": [ "hits": [
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
},
"_geoDistance": 5522018
},
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
"lat": 1, "lat": 5,
"lng": 1 "lng": 0
}, },
"doggo": "kefir", "doggo": "kefir",
"_geoDistance": 5522018 "_geoDistance": 555975
},
{
"id": "4",
"_geo": {
"lat": "4",
"lng": "0"
},
"_geoDistance": 667170
}, },
{ {
"id": "1" "id": "1"

View File

@ -157,11 +157,14 @@ async fn delete_document_by_filter() {
index.wait_task(task.uid()).await.succeeded(); index.wait_task(task.uid()).await.succeeded();
let (stats, _) = index.stats().await; let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 4, "numberOfDocuments": 4,
"rawDocumentDbSize": 42, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 10, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -208,11 +211,14 @@ async fn delete_document_by_filter() {
"###); "###);
let (stats, _) = index.stats().await; let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": 16, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 8, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -278,11 +284,14 @@ async fn delete_document_by_filter() {
"###); "###);
let (stats, _) = index.stats().await; let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": 12, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 12, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,

View File

@ -667,7 +667,7 @@ async fn fetch_document_by_filter() {
.await; .await;
index.wait_task(task.uid()).await.succeeded(); index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.get_document_by_filter(json!(null)).await; let (response, code) = index.fetch_documents(json!(null)).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@ -678,7 +678,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await; let (response, code) = index.fetch_documents(json!({ "offset": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@ -689,7 +689,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await; let (response, code) = index.fetch_documents(json!({ "limit": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@ -700,7 +700,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await; let (response, code) = index.fetch_documents(json!({ "fields": "doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@ -711,7 +711,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await; let (response, code) = index.fetch_documents(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@ -722,7 +722,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await; let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@ -733,8 +733,7 @@ async fn fetch_document_by_filter() {
} }
"###); "###);
let (response, code) = let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await;
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
@ -762,8 +761,7 @@ async fn retrieve_vectors() {
"###); "###);
// FETCHALL DOCUMENTS BY POST // FETCHALL DOCUMENTS BY POST
let (response, _code) = let (response, _code) = index.fetch_documents(json!({ "retrieveVectors": "tamo" })).await;
index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`", "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",

View File

@ -371,7 +371,7 @@ async fn get_document_by_filter() {
.await; .await;
index.wait_task(task.uid()).await.succeeded(); index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.get_document_by_filter(json!({})).await; let (response, code) = index.fetch_documents(json!({})).await;
let (response2, code2) = index.get_all_documents_raw("").await; let (response2, code2) = index.get_all_documents_raw("").await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
@ -401,7 +401,7 @@ async fn get_document_by_filter() {
assert_eq!(code, code2); assert_eq!(code, code2);
assert_eq!(response, response2); assert_eq!(response, response2);
let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await; let (response, code) = index.fetch_documents(json!({ "filter": "color = blue" })).await;
let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await; let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
@ -424,9 +424,8 @@ async fn get_document_by_filter() {
assert_eq!(code, code2); assert_eq!(code, code2);
assert_eq!(response, response2); assert_eq!(response, response2);
let (response, code) = index let (response, code) =
.get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" })) index.fetch_documents(json!({ "offset": 1, "limit": 1, "filter": "color != blue" })).await;
.await;
let (response2, code2) = let (response2, code2) =
index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await; index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -446,9 +445,7 @@ async fn get_document_by_filter() {
assert_eq!(response, response2); assert_eq!(response, response2);
let (response, code) = index let (response, code) = index
.get_document_by_filter( .fetch_documents(json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }))
json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
)
.await; .await;
let (response2, code2) = let (response2, code2) =
index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await; index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
@ -471,7 +468,7 @@ async fn get_document_by_filter() {
// Now testing more complex filter that the get route can't represent // Now testing more complex filter that the get route can't represent
let (response, code) = let (response, code) =
index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await; index.fetch_documents(json!({ "filter": [["color = blue", "color = red"]] })).await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{ {
@ -495,9 +492,8 @@ async fn get_document_by_filter() {
} }
"###); "###);
let (response, code) = index let (response, code) =
.get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] })) index.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"] })).await;
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{ {
@ -514,6 +510,326 @@ async fn get_document_by_filter() {
"###); "###);
} }
#[actix_rt::test]
async fn get_document_by_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index
.fetch_documents(json!({
"ids": ["0", 1, 2, 3]
}))
.await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,1,2,3").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
},
{
"id": 3
}
],
"offset": 0,
"limit": 20,
"total": 4
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index.fetch_documents(json!({ "ids": [2, "1"] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=2,1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) =
index.fetch_documents(json!({ "offset": 1, "limit": 1, "ids": ["0", 0, 3] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=3,0&offset=1&limit=1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 3
}
],
"offset": 1,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) =
index.fetch_documents(json!({ "limit": 1, "ids": [0, 3], "fields": ["color"] })).await;
let (response2, code2) = index.get_all_documents_raw("?limit=1&ids=0,3&fields=color").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"color": "red"
}
],
"offset": 0,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
// Now testing more complex requests that the get route can't represent
let (response, code) = index.fetch_documents(json!({ "ids": [] })).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}
#[actix_rt::test]
async fn get_document_invalid_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.fetch_documents(json!({"ids": ["0", "illegal/docid"] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,illegal/docid").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"message": "In `.ids[1]`: Document identifier `\"illegal/docid\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_document_ids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_ids"
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
}
#[actix_rt::test]
async fn get_document_not_found_ids() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) = index.fetch_documents(json!({"ids": ["0", 3, 42] })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=0,3,42").await;
// the document with id 42 is not in the results since it doesn't exist
// however, no error is raised
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 3
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
}
#[actix_rt::test]
async fn get_document_by_ids_and_filter() {
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings_filterable_attributes(json!(["color"])).await;
let (task, _code) = index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(task.uid()).await.succeeded();
let (response, code) =
index.fetch_documents(json!({"ids": [2], "filter": "color = blue" })).await;
let (response2, code2) = index.get_all_documents_raw("?ids=2&filter=color=blue").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.fetch_documents(
json!({ "offset": 1, "limit": 1, "ids": [0, 1, 2, 3], "filter": "color != blue" }),
)
.await;
let (response2, code2) =
index.get_all_documents_raw("?ids=0,1,2,3&filter=color!=blue&offset=1&limit=1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 3
}
],
"offset": 1,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.fetch_documents(json!({ "limit": 1, "ids": [0, 1, 2,3], "filter": "color != blue", "fields": ["color"] }))
.await;
let (response2, code2) =
index.get_all_documents_raw("?ids=0,1,2,3&limit=1&filter=color!=blue&fields=color").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"color": "red"
}
],
"offset": 0,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
// Now testing more complex filter that the get route can't represent
let (response, code) = index
.fetch_documents(json!({ "ids": [0, "2"], "filter": [["color = blue", "color = red"]] }))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (response, code) = index
.fetch_documents(json!({ "filter": [["color != blue"], "color EXISTS"], "ids": [1, 2, 3] }))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn get_document_with_vectors() { async fn get_document_with_vectors() {
let server = Server::new().await; let server = Server::new().await;

View File

@ -28,12 +28,15 @@ async fn import_dump_v1_movie_raw() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 21965, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 414, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -185,12 +188,15 @@ async fn import_dump_v1_movie_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 21965, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 414, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -355,12 +361,15 @@ async fn import_dump_v1_rubygems_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 8606, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 162, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -522,12 +531,15 @@ async fn import_dump_v2_movie_raw() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 21965, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 414, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -679,12 +691,15 @@ async fn import_dump_v2_movie_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 21965, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 414, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -846,12 +861,15 @@ async fn import_dump_v2_rubygems_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 8606, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 162, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -1010,12 +1028,15 @@ async fn import_dump_v3_movie_raw() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 21965, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 414, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -1167,12 +1188,15 @@ async fn import_dump_v3_movie_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 21965, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 414, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -1334,12 +1358,15 @@ async fn import_dump_v3_rubygems_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 8606, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 162, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -1498,12 +1525,15 @@ async fn import_dump_v4_movie_raw() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 21965, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 414, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -1655,12 +1685,15 @@ async fn import_dump_v4_movie_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 21965, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 414, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -1822,12 +1855,15 @@ async fn import_dump_v4_rubygems_with_settings() {
let (stats, code) = index.stats().await; let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 53, "numberOfDocuments": 53,
"rawDocumentDbSize": 8606, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 162, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -1994,11 +2030,14 @@ async fn import_dump_v5() {
let (stats, code) = index1.stats().await; let (stats, code) = index1.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 10, "numberOfDocuments": 10,
"rawDocumentDbSize": 6782, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 678, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -2031,12 +2070,15 @@ async fn import_dump_v5() {
let (stats, code) = index2.stats().await; let (stats, code) = index2.stats().await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!( snapshot!(
json_string!(stats), json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###" @r###"
{ {
"numberOfDocuments": 10, "numberOfDocuments": 10,
"rawDocumentDbSize": 6782, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 678, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -2132,7 +2174,8 @@ async fn import_dump_v6_containing_experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -2236,6 +2279,7 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
".results[0].duration" => "[date]", ".results[0].duration" => "[date]",
".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.progressTrace" => "[progressTrace]",
".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]",
".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]",
}), name: "batches"); }), name: "batches");
let (indexes, code) = server.list_indexes(None, None).await; let (indexes, code) = server.list_indexes(None, None).await;
@ -2254,7 +2298,8 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -2358,7 +2403,8 @@ async fn generate_and_import_dump_containing_vectors() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);

View File

@ -23,7 +23,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -37,7 +38,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -51,7 +53,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -66,7 +69,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -81,7 +85,8 @@ async fn experimental_features() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
} }
@ -103,7 +108,8 @@ async fn experimental_feature_metrics() {
"editDocumentsByFunction": false, "editDocumentsByFunction": false,
"containsFilter": false, "containsFilter": false,
"network": false, "network": false,
"getTaskDocumentsRoute": false "getTaskDocumentsRoute": false,
"compositeEmbedders": false
} }
"###); "###);
@ -138,14 +144,6 @@ async fn experimental_feature_metrics() {
let (response, code) = server.get_metrics().await; let (response, code) = server.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null"); meili_snap::snapshot!(response, @"null");
// startup without flag respects persisted metrics value
let disable_metrics =
Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
let (response, code) = server_no_flag.get_metrics().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(response, @"null");
} }
#[actix_rt::test] #[actix_rt::test]
@ -158,7 +156,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`", "message": "Unknown field `NotAFeature`: expected one of `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`, `network`, `getTaskDocumentsRoute`, `compositeEmbedders`",
"code": "bad_request", "code": "bad_request",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request" "link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -615,3 +615,336 @@ async fn facet_search_with_filterable_attributes_rules_errors() {
}, },
).await; ).await;
} }
#[actix_rt::test]
async fn distinct_facet_search_on_movies() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{
"id": 1,
"title": "Carol",
"genres": ["Romance", "Drama", "Blob"],
"color": "crimson"
},
{
"id": 2,
"title": "Wonder Woman",
"genres": ["Action", "Adventure", "Blob"],
"color": "emerald"
},
{
"id": 3,
"title": "Life of Pi",
"genres": ["Adventure", "Drama", "Blob"],
"color": "azure"
},
{
"id": 4,
"title": "Mad Max: Fury Road",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "scarlet"
},
{
"id": 5,
"title": "Moana",
"genres": ["Fantasy", "Action", "Blob"],
"color": "coral"
},
{
"id": 6,
"title": "Philadelphia",
"genres": ["Drama", "Blob"],
"color": "navy"
},
{
"id": 7,
"title": "The Matrix",
"genres": ["Science Fiction", "Action", "Blob"],
"color": "onyx"
},
{
"id": 8,
"title": "Inception",
"genres": ["Science Fiction", "Thriller", "Blob"],
"color": "cerulean"
},
{
"id": 9,
"title": "The Shawshank Redemption",
"genres": ["Drama", "Blob"],
"color": "slate"
},
{
"id": 10,
"title": "Pulp Fiction",
"genres": ["Crime", "Drama", "Blob"],
"color": "gold"
},
{
"id": 11,
"title": "The Dark Knight",
"genres": ["Action", "Crime", "Blob"],
"color": "obsidian"
},
{
"id": 12,
"title": "Forrest Gump",
"genres": ["Drama", "Romance", "Blob"],
"color": "jade"
},
{
"id": 13,
"title": "The Godfather",
"genres": ["Crime", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 14,
"title": "Fight Club",
"genres": ["Drama", "Thriller", "Blob"],
"color": "ruby"
},
{
"id": 15,
"title": "Goodfellas",
"genres": ["Crime", "Biography", "Blob"],
"color": "charcoal"
},
{
"id": 16,
"title": "The Silence of the Lambs",
"genres": ["Crime", "Thriller", "Blob"],
"color": "amethyst"
},
{
"id": 17,
"title": "Schindler's List",
"genres": ["Biography", "Drama", "Blob"],
"color": "ebony"
},
{
"id": 18,
"title": "The Lord of the Rings",
"genres": ["Adventure", "Fantasy", "Blob"],
"color": "forest"
},
{
"id": 19,
"title": "Star Wars",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "amber"
},
{
"id": 20,
"title": "Jurassic Park",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "lime"
},
{
"id": 21,
"title": "Titanic",
"genres": ["Drama", "Romance", "Blob"],
"color": "sapphire"
},
{
"id": 22,
"title": "The Avengers",
"genres": ["Action", "Science Fiction", "Blob"],
"color": "burgundy"
},
{
"id": 23,
"title": "Avatar",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "turquoise"
},
{
"id": 24,
"title": "The Green Mile",
"genres": ["Crime", "Fantasy", "Blob"],
"color": "emerald"
},
{
"id": 25,
"title": "Gladiator",
"genres": ["Action", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 26,
"title": "The Departed",
"genres": ["Crime", "Thriller", "Blob"],
"color": "crimson"
},
{
"id": 27,
"title": "Saving Private Ryan",
"genres": ["Drama", "War", "Blob"],
"color": "slate"
},
{
"id": 28,
"title": "Interstellar",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "azure"
},
{
"id": 29,
"title": "The Pianist",
"genres": ["Biography", "Drama", "Blob"],
"color": "onyx"
},
{
"id": 30,
"title": "The Usual Suspects",
"genres": ["Crime", "Mystery", "Blob"],
"color": "charcoal"
},
{
"id": 31,
"title": "The Sixth Sense",
"genres": ["Mystery", "Thriller", "Blob"],
"color": "amethyst"
},
{
"id": 32,
"title": "The Princess Bride",
"genres": ["Adventure", "Romance", "Blob"],
"color": "ruby"
},
{
"id": 33,
"title": "Blade Runner",
"genres": ["Science Fiction", "Noir", "Blob"],
"color": "sapphire"
},
{
"id": 34,
"title": "The Big Lebowski",
"genres": ["Comedy", "Crime", "Blob"],
"color": "gold"
},
{
"id": 35,
"title": "Good Will Hunting",
"genres": ["Drama", "Romance", "Blob"],
"color": "turquoise"
},
{
"id": 36,
"title": "The Terminator",
"genres": ["Action", "Science Fiction", "Blob"],
"color": "obsidian"
},
{
"id": 37,
"title": "Casablanca",
"genres": ["Drama", "Romance", "Blob"],
"color": "jade"
},
{
"id": 38,
"title": "The Exorcist",
"genres": ["Horror", "Thriller", "Blob"],
"color": "burgundy"
},
{
"id": 39,
"title": "Apocalypse Now",
"genres": ["Drama", "War", "Blob"],
"color": "forest"
},
{
"id": 40,
"title": "Back to the Future",
"genres": ["Adventure", "Comedy", "Blob"],
"color": "amber"
},
{
"id": 41,
"title": "The Graduate",
"genres": ["Comedy", "Drama", "Blob"],
"color": "azure"
},
{
"id": 42,
"title": "Alien",
"genres": ["Horror", "Science Fiction", "Blob"],
"color": "obsidian"
},
{
"id": 43,
"title": "The Breakfast Club",
"genres": ["Drama", "Comedy", "Blob"],
"color": "coral"
},
{
"id": 44,
"title": "Die Hard",
"genres": ["Action", "Thriller", "Blob"],
"color": "scarlet"
},
{
"id": 45,
"title": "The Sound of Music",
"genres": ["Drama", "Musical", "Blob"],
"color": "emerald"
},
{
"id": 46,
"title": "Jaws",
"genres": ["Horror", "Thriller", "Blob"],
"color": "navy"
},
{
"id": 47,
"title": "Rocky",
"genres": ["Drama", "Sport", "Blob"],
"color": "burgundy"
},
{
"id": 48,
"title": "E.T. the Extra-Terrestrial",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "amber"
},
{
"id": 49,
"title": "The Godfather Part II",
"genres": ["Crime", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 50,
"title": "One Flew Over the Cuckoo's Nest",
"genres": ["Drama", "Blob"],
"color": "slate"
}
]);
let (response, code) =
index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index.update_settings_distinct_attribute(json!("color")).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, _code) = index.add_documents(documents, None).await;
index.wait_task(response.uid()).await;
let (response, code) =
index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "" })).await;
// non-exhaustive facet count is counting 27 documents with the facet query "blob" but there are only 23 documents with a distinct color.
assert_eq!(code, 200, "{}", response);
snapshot!(response["facetHits"], @r###"[{"value":"Blob","count":27}]"###);
let (response, code) =
index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "", "exhaustiveFacetCount": true })).await;
// exhaustive facet count is counting 23 documents with the facet query "blob" which is the number of distinct colors.
assert_eq!(code, 200, "{}", response);
snapshot!(response["facetHits"], @r###"[{"value":"Blob","count":23}]"###);
}

View File

@ -1783,6 +1783,146 @@ async fn test_nested_fields() {
.await; .await;
} }
#[actix_rt::test]
async fn test_typo_settings() {
let documents = json!([
{
"id": 0,
"title": "The zeroth document",
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule",
},
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field",
},
{
"prout": "truc",
"machin": "lol",
},
],
},
{
"id": 3,
"title": "The third document",
"nested": "I lied",
},
]);
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"typoTolerance": {
"enabled": true,
"disableOnAttributes": ["title"]
}
}),
&json!({"q": "document"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "The zeroth document"
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
},
{
"id": 3,
"title": "The third document",
"nested": "I lied"
}
]
"###);
},
)
.await;
// Test prefix search
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"typoTolerance": {
"enabled": true,
"disableOnAttributes": ["title"]
}
}),
&json!({"q": "docume"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "The zeroth document"
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
},
{
"id": 3,
"title": "The third document",
"nested": "I lied"
}
]
"###);
},
)
.await;
}
/// Modifying facets with different casing should work correctly /// Modifying facets with different casing should work correctly
#[actix_rt::test] #[actix_rt::test]
async fn change_facet_casing() { async fn change_facet_casing() {

View File

@ -55,11 +55,11 @@ async fn similar_bad_id() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await; server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": ["doggo"]})).await; let (response, code) = index.similar_post(json!({"id": ["doggo"], "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "message": "Invalid value at `.id`: Document identifier `[\"doggo\"]` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_similar_id", "code": "invalid_similar_id",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id" "link": "https://docs.meilisearch.com/errors#invalid_similar_id"
@ -145,11 +145,12 @@ async fn similar_invalid_id() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await; server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": "http://invalid-docid/"})).await; let (response, code) =
index.similar_post(json!({"id": "http://invalid-docid/", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.", "message": "Invalid value at `.id`: Document identifier `\"http://invalid-docid/\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
"code": "invalid_similar_id", "code": "invalid_similar_id",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_id" "link": "https://docs.meilisearch.com/errors#invalid_similar_id"

View File

@ -110,11 +110,14 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": 27, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 13, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 5, "numberOfEmbeddings": 5,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,
@ -135,11 +138,14 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": 27, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 13, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 3, "numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,
@ -160,11 +166,14 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": 27, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 13, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 2, "numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,
@ -186,11 +195,14 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": 27, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 13, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 2, "numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 1, "numberOfEmbeddedDocuments": 1,
@ -236,11 +248,14 @@ async fn add_remove_embedded_documents() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": 27, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 13, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 5, "numberOfEmbeddings": 5,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,
@ -257,11 +272,14 @@ async fn add_remove_embedded_documents() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": 13, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 13, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 3, "numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 1, "numberOfEmbeddedDocuments": 1,
@ -290,11 +308,14 @@ async fn update_embedder_settings() {
index.wait_task(response.uid()).await.succeeded(); index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": 108, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 54, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -326,11 +347,14 @@ async fn update_embedder_settings() {
server.wait_task(response.uid()).await.succeeded(); server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await; let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{ {
"numberOfDocuments": 2, "numberOfDocuments": 2,
"rawDocumentDbSize": 108, "rawDocumentDbSize": "[size]",
"avgDocumentSize": 54, "avgDocumentSize": "[size]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 3, "numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 2, "numberOfEmbeddedDocuments": 2,

View File

@ -43,7 +43,7 @@ async fn version_too_old() {
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap(); std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings }; let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err(); let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.13.3"); snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.14.0");
} }
#[actix_rt::test] #[actix_rt::test]
@ -58,15 +58,12 @@ async fn version_requires_downgrade() {
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap(); std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings }; let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err(); let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.13.4 is higher than the Meilisearch version 1.13.3. Downgrade is not supported"); snapshot!(err, @"Database version 1.14.1 is higher than the Meilisearch version 1.14.0. Downgrade is not supported");
} }
#[actix_rt::test] #[actix_rt::test]
async fn upgrade_to_the_current_version() { async fn upgrade_to_the_current_version() {
let temp = tempfile::tempdir().unwrap(); let temp = tempfile::tempdir().unwrap();
let server = Server::new_with_options(default_settings(temp.path())).await.unwrap();
drop(server);
let server = Server::new_with_options(Opt { let server = Server::new_with_options(Opt {
experimental_dumpless_upgrade: true, experimental_dumpless_upgrade: true,
..default_settings(temp.path()) ..default_settings(temp.path())

View File

@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null, "progress": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.13.3" "upgradeTo": "v1.14.0"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,

View File

@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null, "progress": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.13.3" "upgradeTo": "v1.14.0"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,

View File

@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null, "progress": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.13.3" "upgradeTo": "v1.14.0"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,

View File

@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.13.3" "upgradeTo": "v1.14.0"
}, },
"error": null, "error": null,
"duration": "[duration]", "duration": "[duration]",

View File

@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.13.3" "upgradeTo": "v1.14.0"
}, },
"error": null, "error": null,
"duration": "[duration]", "duration": "[duration]",

View File

@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.13.3" "upgradeTo": "v1.14.0"
}, },
"error": null, "error": null,
"duration": "[duration]", "duration": "[duration]",

View File

@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null, "progress": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.13.3" "upgradeTo": "v1.14.0"
}, },
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,

View File

@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null, "canceledBy": null,
"details": { "details": {
"upgradeFrom": "v1.12.0", "upgradeFrom": "v1.12.0",
"upgradeTo": "v1.13.3" "upgradeTo": "v1.14.0"
}, },
"error": null, "error": null,
"duration": "[duration]", "duration": "[duration]",

View File

@ -108,6 +108,10 @@ async fn check_the_keys(server: &Server) {
/// 5.2. Enqueue a new task /// 5.2. Enqueue a new task
/// 5.3. Create an index /// 5.3. Create an index
async fn check_the_index_scheduler(server: &Server) { async fn check_the_index_scheduler(server: &Server) {
// Wait until the upgrade has been applied to all indexes to avoid flakyness
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
server.wait_task(Value(tasks["results"][0].clone()).uid()).await.succeeded();
// All the indexes are still present // All the indexes are still present
let (indexes, _) = server.list_indexes(None, None).await; let (indexes, _) = server.list_indexes(None, None).await;
snapshot!(indexes, @r#" snapshot!(indexes, @r#"
@ -129,7 +133,9 @@ async fn check_the_index_scheduler(server: &Server) {
let (stats, _) = server.stats().await; let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, { assert_json_snapshot!(stats, {
".databaseSize" => "[bytes]", ".databaseSize" => "[bytes]",
".usedDatabaseSize" => "[bytes]" ".usedDatabaseSize" => "[bytes]",
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
".indexes.kefir.avgDocumentSize" => "[bytes]",
}, },
@r###" @r###"
{ {
@ -139,8 +145,8 @@ async fn check_the_index_scheduler(server: &Server) {
"indexes": { "indexes": {
"kefir": { "kefir": {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": 109, "rawDocumentDbSize": "[bytes]",
"avgDocumentSize": 109, "avgDocumentSize": "[bytes]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -156,10 +162,6 @@ async fn check_the_index_scheduler(server: &Server) {
} }
"###); "###);
// Wait until the upgrade has been applied to all indexes to avoid flakyness
let (tasks, _) = server.tasks_filter("types=upgradeDatabase&limit=1").await;
server.wait_task(Value(tasks["results"][0].clone()).uid()).await.succeeded();
// Tasks and batches should still work // Tasks and batches should still work
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration. // We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
// The other tasks should NOT change // The other tasks should NOT change
@ -193,31 +195,33 @@ async fn check_the_index_scheduler(server: &Server) {
// Tests all the batches query parameters // Tests all the batches query parameters
let (batches, _) = server.batches_filter("uids=10").await; let (batches, _) = server.batches_filter("uids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
let (batches, _) = server.batches_filter("batchUids=10").await; let (batches, _) = server.batches_filter("batchUids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
let (batches, _) = server.batches_filter("statuses=canceled").await; let (batches, _) = server.batches_filter("statuses=canceled").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database // types has already been tested above to retrieve the upgrade database
let (batches, _) = server.batches_filter("canceledBy=19").await; let (batches, _) = server.batches_filter("canceledBy=19").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await; let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41"); snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
let (stats, _) = server.stats().await; let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, { assert_json_snapshot!(stats, {
".databaseSize" => "[bytes]", ".databaseSize" => "[bytes]",
".usedDatabaseSize" => "[bytes]" ".usedDatabaseSize" => "[bytes]",
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
".indexes.kefir.avgDocumentSize" => "[bytes]",
}, },
@r###" @r###"
{ {
@ -227,8 +231,8 @@ async fn check_the_index_scheduler(server: &Server) {
"indexes": { "indexes": {
"kefir": { "kefir": {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": 109, "rawDocumentDbSize": "[bytes]",
"avgDocumentSize": 109, "avgDocumentSize": "[bytes]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,
@ -245,11 +249,14 @@ async fn check_the_index_scheduler(server: &Server) {
"###); "###);
let index = server.index("kefir"); let index = server.index("kefir");
let (stats, _) = index.stats().await; let (stats, _) = index.stats().await;
snapshot!(stats, @r###" snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[bytes]",
".avgDocumentSize" => "[bytes]",
}), @r###"
{ {
"numberOfDocuments": 1, "numberOfDocuments": 1,
"rawDocumentDbSize": 109, "rawDocumentDbSize": "[bytes]",
"avgDocumentSize": 109, "avgDocumentSize": "[bytes]",
"isIndexing": false, "isIndexing": false,
"numberOfEmbeddings": 0, "numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0, "numberOfEmbeddedDocuments": 0,

View File

@ -164,6 +164,87 @@ async fn add_remove_user_provided() {
"###); "###);
} }
#[actix_rt::test]
async fn user_provide_mismatched_embedding_dimension() {
let server = Server::new().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Invalid vector dimensions in document with id `0` in `._vectors.manual`.\n - note: embedding #0 has dimensions 2\n - note: embedder `manual` requires 3",
"code": "invalid_vector_dimensions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let new_document = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [[0, 0], [1, 1], [2, 2]] }},
]);
let (response, code) = index.add_documents(new_document, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Invalid vector dimensions in document with id `0` in `._vectors.manual`.\n - note: embedding #0 has dimensions 2\n - note: embedder `manual` requires 3",
"code": "invalid_vector_dimensions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
async fn generate_default_user_provided_documents(server: &Server) -> Index { async fn generate_default_user_provided_documents(server: &Server) -> Index {
let index = server.index("doggo"); let index = server.index("doggo");

View File

@ -1995,7 +1995,7 @@ async fn timeout() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "grand chien de berger des montagnes", "q": "grand chien de berger des montagnes foil the cache",
"hybrid": {"semanticRatio": 0.99, "embedder": "default"} "hybrid": {"semanticRatio": 0.99, "embedder": "default"}
})) }))
.await; .await;

View File

@ -412,6 +412,117 @@ async fn ollama_url_checks() {
async fn composite_checks() { async fn composite_checks() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("test");
// feature not enabled, using source
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "composite",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
},
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "using `\"composite\"` as source requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// feature not enabled, using search embedder
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "userProvided",
"searchEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "setting `searchEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// feature not enabled, using indexing embedder
let (response, _code) = index
.update_settings(json!({
"embedders": {
"test": null
}
}))
.await;
server.wait_task(response.uid()).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"test": {
"source": "userProvided",
"indexingEmbedder": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
}
}
}
}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "setting `indexingEmbedder` requires enabling the `composite embedders` experimental feature. See https://github.com/orgs/meilisearch/discussions/816",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
// enable feature
let (_, code) = server.set_features(json!({"compositeEmbedders": true})).await;
snapshot!(code, @"200 OK");
// inner distribution // inner distribution
let (response, _code) = index let (response, _code) = index
.update_settings(json!({ .update_settings(json!({

View File

@ -10,7 +10,6 @@ license.workspace = true
[dependencies] [dependencies]
anyhow = "1.0.95" anyhow = "1.0.95"
arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
clap = { version = "4.5.24", features = ["derive"] } clap = { version = "4.5.24", features = ["derive"] }
dump = { path = "../dump" } dump = { path = "../dump" }
file-store = { path = "../file-store" } file-store = { path = "../file-store" }

View File

@ -7,11 +7,11 @@ use anyhow::{bail, Context};
use clap::{Parser, Subcommand, ValueEnum}; use clap::{Parser, Subcommand, ValueEnum};
use dump::{DumpWriter, IndexMetadata}; use dump::{DumpWriter, IndexMetadata};
use file_store::FileStore; use file_store::FileStore;
use meilisearch_auth::AuthController; use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::batches::Batch; use meilisearch_types::batches::Batch;
use meilisearch_types::heed::types::{Bytes, SerdeJson, Str}; use meilisearch_types::heed::types::{Bytes, SerdeJson, Str};
use meilisearch_types::heed::{ use meilisearch_types::heed::{
CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, CompactionOption, Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, WithoutTls,
}; };
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME; use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@ -172,7 +172,7 @@ fn main() -> anyhow::Result<()> {
/// Clears the task queue located at `db_path`. /// Clears the task queue located at `db_path`.
fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
let path = db_path.join("tasks"); let path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) } let env = unsafe { EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&path) }
.with_context(|| format!("While trying to open {:?}", path.display()))?; .with_context(|| format!("While trying to open {:?}", path.display()))?;
eprintln!("Deleting tasks from the database..."); eprintln!("Deleting tasks from the database...");
@ -225,7 +225,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
} }
fn try_opening_database<KC: 'static, DC: 'static>( fn try_opening_database<KC: 'static, DC: 'static>(
env: &Env, env: &Env<WithoutTls>,
rtxn: &RoTxn, rtxn: &RoTxn,
db_name: &str, db_name: &str,
) -> anyhow::Result<Database<KC, DC>> { ) -> anyhow::Result<Database<KC, DC>> {
@ -235,7 +235,7 @@ fn try_opening_database<KC: 'static, DC: 'static>(
} }
fn try_opening_poly_database( fn try_opening_poly_database(
env: &Env, env: &Env<WithoutTls>,
rtxn: &RoTxn, rtxn: &RoTxn,
db_name: &str, db_name: &str,
) -> anyhow::Result<Database<Unspecified, Unspecified>> { ) -> anyhow::Result<Database<Unspecified, Unspecified>> {
@ -284,13 +284,18 @@ fn export_a_dump(
FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?; FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Dumping the keys..."); eprintln!("Dumping the keys...");
// 2. dump the keys // 2. dump the keys
let auth_store = AuthController::new(&db_path, &None) let auth_path = db_path.join("auth");
std::fs::create_dir_all(&auth_path).context("While creating the auth directory")?;
let auth_env = open_auth_store_env(&auth_path).context("While opening the auth store")?;
let auth_store = AuthController::new(auth_env, &None)
.with_context(|| format!("While opening the auth store at {}", db_path.display()))?; .with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
let mut dump_keys = dump.create_keys()?; let mut dump_keys = dump.create_keys()?;
let mut count = 0; let mut count = 0;
@ -386,9 +391,10 @@ fn export_a_dump(
for result in index_mapping.iter(&rtxn)? { for result in index_mapping.iter(&rtxn)? {
let (uid, uuid) = result?; let (uid, uuid) = result?;
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
format!("While trying to open the index at path {:?}", index_path.display()) .with_context(|| {
})?; format!("While trying to open the index at path {:?}", index_path.display())
})?;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let metadata = IndexMetadata { let metadata = IndexMetadata {
@ -438,8 +444,10 @@ fn export_a_dump(
fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> { fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let rtxn = env.read_txn()?; let rtxn = env.read_txn()?;
let index_mapping: Database<Str, UuidCodec> = let index_mapping: Database<Str, UuidCodec> =
@ -456,9 +464,10 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
} }
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { let index = Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
format!("While trying to open the index at path {:?}", index_path.display()) .with_context(|| {
})?; format!("While trying to open the index at path {:?}", index_path.display())
})?;
eprintln!("Awaiting for a mutable transaction..."); eprintln!("Awaiting for a mutable transaction...");
let _wtxn = index.write_txn().context("While awaiting for a write transaction")?; let _wtxn = index.write_txn().context("While awaiting for a write transaction")?;
@ -470,7 +479,7 @@ fn compact_index(db_path: PathBuf, index_name: &str) -> anyhow::Result<()> {
eprintln!("Compacting the index..."); eprintln!("Compacting the index...");
let before_compaction = Instant::now(); let before_compaction = Instant::now();
let new_file = index let new_file = index
.copy_to_file(&compacted_index_file_path, CompactionOption::Enabled) .copy_to_path(&compacted_index_file_path, CompactionOption::Enabled)
.with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?; .with_context(|| format!("While compacting {}", compacted_index_file_path.display()))?;
let after_size = new_file.metadata()?.len(); let after_size = new_file.metadata()?.len();
@ -514,8 +523,10 @@ fn export_documents(
offset: Option<usize>, offset: Option<usize>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let rtxn = env.read_txn()?; let rtxn = env.read_txn()?;
let index_mapping: Database<Str, UuidCodec> = let index_mapping: Database<Str, UuidCodec> =
@ -526,9 +537,10 @@ fn export_documents(
if uid == index_name { if uid == index_name {
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = let index =
Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
format!("While trying to open the index at path {:?}", index_path.display()) .with_context(|| {
})?; format!("While trying to open the index at path {:?}", index_path.display())
})?;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let fields_ids_map = index.fields_ids_map(&rtxn)?; let fields_ids_map = index.fields_ids_map(&rtxn)?;
@ -616,8 +628,10 @@ fn hair_dryer(
index_parts: &[IndexPart], index_parts: &[IndexPart],
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
eprintln!("Trying to get a read transaction on the index scheduler..."); eprintln!("Trying to get a read transaction on the index scheduler...");
@ -630,9 +644,10 @@ fn hair_dryer(
if index_names.iter().any(|i| i == uid) { if index_names.iter().any(|i| i == uid) {
let index_path = db_path.join("indexes").join(uuid.to_string()); let index_path = db_path.join("indexes").join(uuid.to_string());
let index = let index =
Index::new(EnvOpenOptions::new(), &index_path, false).with_context(|| { Index::new(EnvOpenOptions::new().read_txn_without_tls(), &index_path, false)
format!("While trying to open the index at path {:?}", index_path.display()) .with_context(|| {
})?; format!("While trying to open the index at path {:?}", index_path.display())
})?;
eprintln!("Trying to get a read transaction on the {uid} index..."); eprintln!("Trying to get a read transaction on the {uid} index...");

View File

@ -2,7 +2,9 @@ use std::path::Path;
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; use meilisearch_types::heed::{
Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, WithoutTls,
};
use meilisearch_types::milli::index::{db_name, main_key}; use meilisearch_types::milli::index::{db_name, main_key};
use super::v1_9; use super::v1_9;
@ -92,7 +94,7 @@ fn update_index_stats(
fn update_date_format( fn update_date_format(
index_uid: &str, index_uid: &str,
index_env: &Env, index_env: &Env<WithoutTls>,
index_wtxn: &mut RwTxn, index_wtxn: &mut RwTxn,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN) let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN)
@ -106,7 +108,7 @@ fn update_date_format(
fn find_rest_embedders( fn find_rest_embedders(
index_uid: &str, index_uid: &str,
index_env: &Env, index_env: &Env<WithoutTls>,
index_txn: &RoTxn, index_txn: &RoTxn,
) -> anyhow::Result<Vec<String>> { ) -> anyhow::Result<Vec<String>> {
let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN) let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN)
@ -164,8 +166,10 @@ pub fn v1_9_to_v1_10(
// 2. REST embedders. We don't support this case right now, so bail // 2. REST embedders. We don't support this case right now, so bail
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let mut sched_wtxn = env.write_txn()?; let mut sched_wtxn = env.write_txn()?;
@ -205,9 +209,13 @@ pub fn v1_9_to_v1_10(
let index_env = unsafe { let index_env = unsafe {
// FIXME: fetch the 25 magic number from the index file // FIXME: fetch the 25 magic number from the index file
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { EnvOpenOptions::new()
format!("while opening index {uid} at '{}'", index_path.display()) .read_txn_without_tls()
})? .max_dbs(25)
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
}; };
let index_txn = index_env.read_txn().with_context(|| { let index_txn = index_env.read_txn().with_context(|| {
@ -252,9 +260,13 @@ pub fn v1_9_to_v1_10(
let index_env = unsafe { let index_env = unsafe {
// FIXME: fetch the 25 magic number from the index file // FIXME: fetch the 25 magic number from the index file
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { EnvOpenOptions::new()
format!("while opening index {uid} at '{}'", index_path.display()) .read_txn_without_tls()
})? .max_dbs(25)
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
}; };
let mut index_wtxn = index_env.write_txn().with_context(|| { let mut index_wtxn = index_env.write_txn().with_context(|| {

View File

@ -23,8 +23,10 @@ pub fn v1_10_to_v1_11(
println!("Upgrading from v1.10.0 to v1.11.0"); println!("Upgrading from v1.10.0 to v1.11.0");
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let sched_rtxn = env.read_txn()?; let sched_rtxn = env.read_txn()?;
@ -50,9 +52,13 @@ pub fn v1_10_to_v1_11(
); );
let index_env = unsafe { let index_env = unsafe {
EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { EnvOpenOptions::new()
format!("while opening index {uid} at '{}'", index_path.display()) .read_txn_without_tls()
})? .max_dbs(25)
.open(&index_path)
.with_context(|| {
format!("while opening index {uid} at '{}'", index_path.display())
})?
}; };
let index_rtxn = index_env.read_txn().with_context(|| { let index_rtxn = index_env.read_txn().with_context(|| {
@ -76,11 +82,11 @@ pub fn v1_10_to_v1_11(
try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY) try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
.with_context(|| format!("while updating date format for index `{uid}`"))?; .with_context(|| format!("while updating date format for index `{uid}`"))?;
arroy_v04_to_v05::ugrade_from_prev_version( meilisearch_types::milli::arroy::upgrade::cosine_from_0_4_to_0_5(
&index_rtxn, &index_rtxn,
index_read_database, index_read_database.remap_types(),
&mut index_wtxn, &mut index_wtxn,
index_write_database, index_write_database.remap_types(),
)?; )?;
index_wtxn.commit()?; index_wtxn.commit()?;

View File

@ -115,8 +115,10 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
/// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3 /// Rebuild field distribution as it was wrongly computed in v1.12.x if x < 3
fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> { fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
let index_scheduler_path = db_path.join("tasks"); let index_scheduler_path = db_path.join("tasks");
let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } let env = unsafe {
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; EnvOpenOptions::new().read_txn_without_tls().max_dbs(100).open(&index_scheduler_path)
}
.with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
let mut sched_wtxn = env.write_txn()?; let mut sched_wtxn = env.write_txn()?;
@ -173,11 +175,12 @@ fn rebuild_field_distribution(db_path: &Path) -> anyhow::Result<()> {
println!("\t- Rebuilding field distribution"); println!("\t- Rebuilding field distribution");
let index = let index = meilisearch_types::milli::Index::new(
meilisearch_types::milli::Index::new(EnvOpenOptions::new(), &index_path, false) EnvOpenOptions::new().read_txn_without_tls(),
.with_context(|| { &index_path,
format!("while opening index {uid} at '{}'", index_path.display()) false,
})?; )
.with_context(|| format!("while opening index {uid} at '{}'", index_path.display()))?;
let mut index_txn = index.write_txn()?; let mut index_txn = index.write_txn()?;

View File

@ -18,7 +18,7 @@ bincode = "1.3.3"
bstr = "1.11.3" bstr = "1.11.3"
bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] } bytemuck = { version = "1.21.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.9.2", default-features = false } charabia = { version = "0.9.3", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
convert_case = "0.6.0" convert_case = "0.6.0"
crossbeam-channel = "0.5.14" crossbeam-channel = "0.5.14"
@ -28,11 +28,13 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
geoutils = "0.5.1" geoutils = "0.5.1"
grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] } grenad = { version = "0.5.0", default-features = false, features = [
heed = { version = "0.20.5", default-features = false, features = [ "rayon",
"tempfile",
] }
heed = { version = "0.22.0", default-features = false, features = [
"serde-json", "serde-json",
"serde-bincode", "serde-bincode",
"read-txn-no-tls",
] } ] }
indexmap = { version = "2.7.0", features = ["serde"] } indexmap = { version = "2.7.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" } json-depth-checker = { path = "../json-depth-checker" }
@ -85,7 +87,7 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838
"no_time", "no_time",
"sync", "sync",
] } ] }
arroy = "0.5.0" arroy = "0.6.1"
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.41" tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] } ureq = { version = "2.12.1", features = ["json"] }
@ -101,7 +103,14 @@ uell = "0.1.0"
enum-iterator = "2.1.0" enum-iterator = "2.1.0"
bbqueue = { git = "https://github.com/meilisearch/bbqueue" } bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
flume = { version = "0.11.1", default-features = false } flume = { version = "0.11.1", default-features = false }
utoipa = { version = "5.3.1", features = ["non_strict_integers", "preserve_order", "uuid", "time", "openapi_extensions"] } utoipa = { version = "5.3.1", features = [
"non_strict_integers",
"preserve_order",
"uuid",
"time",
"openapi_extensions",
] }
lru = "0.13.0"
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.43", default-features = false } mimalloc = { version = "0.1.43", default-features = false }
@ -113,9 +122,7 @@ meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] } rand = { version = "0.8.5", features = ["small_rng"] }
[features] [features]
all-tokenizations = [ all-tokenizations = ["charabia/default"]
"charabia/default",
]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml # For more information on this feature, see heed's Cargo.toml

View File

@ -1,8 +1,13 @@
use heed::types::Bytes; use std::mem;
use heed::Database; use heed::Database;
use heed::DatabaseStat;
use heed::RoTxn; use heed::RoTxn;
use heed::Unspecified;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::BEU32;
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
/// The stats of a database. /// The stats of a database.
@ -20,58 +25,24 @@ impl DatabaseStats {
/// ///
/// This function iterates over the whole database and computes the stats. /// This function iterates over the whole database and computes the stats.
/// It is not efficient and should be cached somewhere. /// It is not efficient and should be cached somewhere.
pub(crate) fn new(database: Database<Bytes, Bytes>, rtxn: &RoTxn<'_>) -> heed::Result<Self> { pub(crate) fn new(
let mut database_stats = database: Database<BEU32, Unspecified>,
Self { number_of_entries: 0, total_key_size: 0, total_value_size: 0 }; rtxn: &RoTxn<'_>,
) -> heed::Result<Self> {
let DatabaseStat { page_size, depth: _, branch_pages, leaf_pages, overflow_pages, entries } =
database.stat(rtxn)?;
let mut iter = database.iter(rtxn)?; // We first take the total size without overflow pages as the overflow pages contains the values and only that.
while let Some((key, value)) = iter.next().transpose()? { let total_size = (branch_pages + leaf_pages + overflow_pages) * page_size as usize;
let key_size = key.len() as u64; // We compute an estimated size for the keys.
let value_size = value.len() as u64; let total_key_size = entries * (mem::size_of::<u32>() + 4);
database_stats.total_key_size += key_size; let total_value_size = total_size - total_key_size;
database_stats.total_value_size += value_size;
}
database_stats.number_of_entries = database.len(rtxn)?; Ok(Self {
number_of_entries: entries as u64,
Ok(database_stats) total_key_size: total_key_size as u64,
} total_value_size: total_value_size as u64,
})
/// Recomputes the stats of the database and returns the new stats.
///
/// This function is used to update the stats of the database when some keys are modified.
/// It is more efficient than the `new` function because it does not iterate over the whole database but only the modified keys comparing the before and after states.
pub(crate) fn recompute<I, K>(
mut stats: Self,
database: Database<Bytes, Bytes>,
before_rtxn: &RoTxn<'_>,
after_rtxn: &RoTxn<'_>,
modified_keys: I,
) -> heed::Result<Self>
where
I: IntoIterator<Item = K>,
K: AsRef<[u8]>,
{
for key in modified_keys {
let key = key.as_ref();
if let Some(value) = database.get(after_rtxn, key)? {
let key_size = key.len() as u64;
let value_size = value.len() as u64;
stats.total_key_size = stats.total_key_size.saturating_add(key_size);
stats.total_value_size = stats.total_value_size.saturating_add(value_size);
}
if let Some(value) = database.get(before_rtxn, key)? {
let key_size = key.len() as u64;
let value_size = value.len() as u64;
stats.total_key_size = stats.total_key_size.saturating_sub(key_size);
stats.total_value_size = stats.total_value_size.saturating_sub(value_size);
}
}
stats.number_of_entries = database.len(after_rtxn)?;
Ok(stats)
} }
pub fn average_key_size(&self) -> u64 { pub fn average_key_size(&self) -> u64 {
@ -86,6 +57,10 @@ impl DatabaseStats {
self.number_of_entries self.number_of_entries
} }
pub fn total_size(&self) -> u64 {
self.total_key_size + self.total_value_size
}
pub fn total_key_size(&self) -> u64 { pub fn total_key_size(&self) -> u64 {
self.total_key_size self.total_key_size
} }

View File

@ -80,9 +80,13 @@ impl DocumentsBatchIndex {
let mut map = Object::new(); let mut map = Object::new();
for (k, v) in document.iter() { for (k, v) in document.iter() {
// TODO: TAMO: update the error type let key = self
let key = .0
self.0.get_by_left(&k).ok_or(crate::error::InternalError::DatabaseClosing)?.clone(); .get_by_left(&k)
.ok_or(crate::error::InternalError::FieldIdMapMissingEntry(
FieldIdMapMissingEntry::FieldId { field_id: k, process: "recreate_json" },
))?
.clone();
let value = serde_json::from_slice::<serde_json::Value>(v) let value = serde_json::from_slice::<serde_json::Value>(v)
.map_err(crate::error::InternalError::SerdeJson)?; .map_err(crate::error::InternalError::SerdeJson)?;
map.insert(key, value); map.insert(key, value);

View File

@ -33,8 +33,6 @@ pub enum Error {
#[derive(Error, Debug)] #[derive(Error, Debug)]
pub enum InternalError { pub enum InternalError {
#[error("{}", HeedError::DatabaseClosing)]
DatabaseClosing,
#[error("missing {} in the {db_name} database", key.unwrap_or("key"))] #[error("missing {} in the {db_name} database", key.unwrap_or("key"))]
DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> }, DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
#[error("missing {key} in the fieldids weights mapping")] #[error("missing {key} in the fieldids weights mapping")]
@ -131,6 +129,14 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidGeoField(#[from] GeoError), InvalidGeoField(#[from] GeoError),
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)] #[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
InvalidVectorDimensions { expected: usize, found: usize }, InvalidVectorDimensions { expected: usize, found: usize },
#[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n - note: embedding #{embedding_index} has dimensions {found}\n - note: embedder `{embedder_name}` requires {expected}")]
InvalidIndexingVectorDimensions {
embedder_name: String,
document_id: String,
embedding_index: usize,
expected: usize,
found: usize,
},
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")] #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
InvalidVectorsMapType { document_id: String, value: Value }, InvalidVectorsMapType { document_id: String, value: Value },
#[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")] #[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
@ -197,8 +203,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
valid_fields: BTreeSet<String>, valid_fields: BTreeSet<String>,
hidden_fields: bool, hidden_fields: bool,
}, },
#[error("an environment is already opened with different options")] #[error("An LMDB environment is already opened")]
InvalidLmdbOpenOptions, EnvAlreadyOpened,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
SortRankingRuleMissing, SortRankingRuleMissing,
#[error("The database file is in an invalid state.")] #[error("The database file is in an invalid state.")]
@ -362,7 +368,8 @@ impl From<arroy::Error> for Error {
| arroy::Error::UnmatchingDistance { .. } | arroy::Error::UnmatchingDistance { .. }
| arroy::Error::NeedBuild(_) | arroy::Error::NeedBuild(_)
| arroy::Error::MissingKey { .. } | arroy::Error::MissingKey { .. }
| arroy::Error::MissingMetadata(_) => { | arroy::Error::MissingMetadata(_)
| arroy::Error::CannotDecodeKeyMode { .. } => {
Error::InternalError(InternalError::ArroyError(value)) Error::InternalError(InternalError::ArroyError(value))
} }
} }
@ -516,8 +523,7 @@ impl From<HeedError> for Error {
// TODO use the encoding // TODO use the encoding
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })), HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })), HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::DatabaseClosing => InternalError(DatabaseClosing), HeedError::EnvAlreadyOpened { .. } => UserError(EnvAlreadyOpened),
HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
} }
} }
} }

View File

@ -3,8 +3,9 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fs::File; use std::fs::File;
use std::path::Path; use std::path::Path;
use heed::types::*; use heed::{types::*, DatabaseStat, WithoutTls};
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use indexmap::IndexMap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree; use rstar::RTree;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -110,7 +111,7 @@ pub mod db_name {
#[derive(Clone)] #[derive(Clone)]
pub struct Index { pub struct Index {
/// The LMDB environment which this index is associated with. /// The LMDB environment which this index is associated with.
pub(crate) env: heed::Env, pub(crate) env: heed::Env<WithoutTls>,
/// Contains many different types (e.g. the fields ids map). /// Contains many different types (e.g. the fields ids map).
pub(crate) main: Database<Unspecified, Unspecified>, pub(crate) main: Database<Unspecified, Unspecified>,
@ -177,7 +178,7 @@ pub struct Index {
impl Index { impl Index {
pub fn new_with_creation_dates<P: AsRef<Path>>( pub fn new_with_creation_dates<P: AsRef<Path>>(
mut options: heed::EnvOpenOptions, mut options: heed::EnvOpenOptions<WithoutTls>,
path: P, path: P,
created_at: time::OffsetDateTime, created_at: time::OffsetDateTime,
updated_at: time::OffsetDateTime, updated_at: time::OffsetDateTime,
@ -275,7 +276,7 @@ impl Index {
} }
pub fn new<P: AsRef<Path>>( pub fn new<P: AsRef<Path>>(
options: heed::EnvOpenOptions, options: heed::EnvOpenOptions<WithoutTls>,
path: P, path: P,
creation: bool, creation: bool,
) -> Result<Index> { ) -> Result<Index> {
@ -284,7 +285,7 @@ impl Index {
} }
fn set_creation_dates( fn set_creation_dates(
env: &heed::Env, env: &heed::Env<WithoutTls>,
main: Database<Unspecified, Unspecified>, main: Database<Unspecified, Unspecified>,
created_at: time::OffsetDateTime, created_at: time::OffsetDateTime,
updated_at: time::OffsetDateTime, updated_at: time::OffsetDateTime,
@ -306,12 +307,12 @@ impl Index {
} }
/// Create a read transaction to be able to read the index. /// Create a read transaction to be able to read the index.
pub fn read_txn(&self) -> heed::Result<RoTxn<'_>> { pub fn read_txn(&self) -> heed::Result<RoTxn<'_, WithoutTls>> {
self.env.read_txn() self.env.read_txn()
} }
/// Create a static read transaction to be able to read the index without keeping a reference to it. /// Create a static read transaction to be able to read the index without keeping a reference to it.
pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> { pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static, WithoutTls>> {
self.env.clone().static_read_txn() self.env.clone().static_read_txn()
} }
@ -340,8 +341,12 @@ impl Index {
self.env.info().map_size self.env.info().map_size
} }
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> { pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
self.env.copy_to_file(path, option).map_err(Into::into) self.env.copy_to_file(file, option).map_err(Into::into)
}
pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.env.copy_to_path(path, option).map_err(Into::into)
} }
/// Returns an `EnvClosingEvent` that can be used to wait for the closing event, /// Returns an `EnvClosingEvent` that can be used to wait for the closing event,
@ -406,38 +411,6 @@ impl Index {
Ok(count.unwrap_or_default()) Ok(count.unwrap_or_default())
} }
/// Updates the stats of the documents database based on the previous stats and the modified docids.
pub fn update_documents_stats(
&self,
wtxn: &mut RwTxn<'_>,
modified_docids: roaring::RoaringBitmap,
) -> Result<()> {
let before_rtxn = self.read_txn()?;
let document_stats = match self.documents_stats(&before_rtxn)? {
Some(before_stats) => DatabaseStats::recompute(
before_stats,
self.documents.remap_types(),
&before_rtxn,
wtxn,
modified_docids.iter().map(|docid| docid.to_be_bytes()),
)?,
None => {
// This should never happen when there are already documents in the index, the documents stats should be present.
// If it happens, it means that the index was not properly initialized/upgraded.
debug_assert_eq!(
self.documents.len(&before_rtxn)?,
0,
"The documents stats should be present when there are documents in the index"
);
tracing::warn!("No documents stats found, creating new ones");
DatabaseStats::new(self.documents.remap_types(), &*wtxn)?
}
};
self.put_documents_stats(wtxn, document_stats)?;
Ok(())
}
/// Writes the stats of the documents database. /// Writes the stats of the documents database.
pub fn put_documents_stats( pub fn put_documents_stats(
&self, &self,
@ -1751,6 +1724,122 @@ impl Index {
} }
Ok(stats) Ok(stats)
} }
/// Check if the word is indexed in the index.
///
/// This function checks if the word is indexed in the index by looking at the word_docids and exact_word_docids.
///
/// # Arguments
///
/// * `rtxn`: The read transaction.
/// * `word`: The word to check.
pub fn contains_word(&self, rtxn: &RoTxn<'_>, word: &str) -> Result<bool> {
Ok(self.word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some()
|| self.exact_word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some())
}
/// Returns the sizes in bytes of each of the index database at the given rtxn.
pub fn database_sizes(&self, rtxn: &RoTxn<'_>) -> heed::Result<IndexMap<&'static str, usize>> {
let Self {
env: _,
main,
external_documents_ids,
word_docids,
exact_word_docids,
word_prefix_docids,
exact_word_prefix_docids,
word_pair_proximity_docids,
word_position_docids,
word_fid_docids,
word_prefix_position_docids,
word_prefix_fid_docids,
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_normalized_string_strings,
facet_id_string_fst,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
vector_arroy,
embedder_category_id,
documents,
} = self;
fn compute_size(stats: DatabaseStat) -> usize {
let DatabaseStat {
page_size,
depth: _,
branch_pages,
leaf_pages,
overflow_pages,
entries: _,
} = stats;
(branch_pages + leaf_pages + overflow_pages) * page_size as usize
}
let mut sizes = IndexMap::new();
sizes.insert("main", main.stat(rtxn).map(compute_size)?);
sizes
.insert("external_documents_ids", external_documents_ids.stat(rtxn).map(compute_size)?);
sizes.insert("word_docids", word_docids.stat(rtxn).map(compute_size)?);
sizes.insert("exact_word_docids", exact_word_docids.stat(rtxn).map(compute_size)?);
sizes.insert("word_prefix_docids", word_prefix_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"exact_word_prefix_docids",
exact_word_prefix_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"word_pair_proximity_docids",
word_pair_proximity_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert("word_position_docids", word_position_docids.stat(rtxn).map(compute_size)?);
sizes.insert("word_fid_docids", word_fid_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"word_prefix_position_docids",
word_prefix_position_docids.stat(rtxn).map(compute_size)?,
);
sizes
.insert("word_prefix_fid_docids", word_prefix_fid_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"field_id_word_count_docids",
field_id_word_count_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert("facet_id_f64_docids", facet_id_f64_docids.stat(rtxn).map(compute_size)?);
sizes
.insert("facet_id_string_docids", facet_id_string_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"facet_id_normalized_string_strings",
facet_id_normalized_string_strings.stat(rtxn).map(compute_size)?,
);
sizes.insert("facet_id_string_fst", facet_id_string_fst.stat(rtxn).map(compute_size)?);
sizes
.insert("facet_id_exists_docids", facet_id_exists_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"facet_id_is_null_docids",
facet_id_is_null_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"facet_id_is_empty_docids",
facet_id_is_empty_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"field_id_docid_facet_f64s",
field_id_docid_facet_f64s.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"field_id_docid_facet_strings",
field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
);
sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
Ok(sizes)
}
} }
#[derive(Debug, Deserialize, Serialize)] #[derive(Debug, Deserialize, Serialize)]
@ -1825,7 +1914,8 @@ pub(crate) mod tests {
impl TempIndex { impl TempIndex {
/// Creates a temporary index /// Creates a temporary index
pub fn new_with_map_size(size: usize) -> Self { pub fn new_with_map_size(size: usize) -> Self {
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(size); options.map_size(size);
let _tempdir = TempDir::new_in(".").unwrap(); let _tempdir = TempDir::new_in(".").unwrap();
let inner = Index::new(options, _tempdir.path(), true).unwrap(); let inner = Index::new(options, _tempdir.path(), true).unwrap();

View File

@ -83,6 +83,8 @@ pub use self::search::{
}; };
pub use self::update::ChannelCongestion; pub use self::update::ChannelCongestion;
pub use arroy;
pub type Result<T> = std::result::Result<T, error::Error>; pub type Result<T> = std::result::Result<T, error::Error>;
pub type Attribute = u32; pub type Attribute = u32;

View File

@ -1,3 +1,4 @@
use enum_iterator::Sequence;
use std::any::TypeId; use std::any::TypeId;
use std::borrow::Cow; use std::borrow::Cow;
use std::marker::PhantomData; use std::marker::PhantomData;
@ -76,6 +77,14 @@ impl Progress {
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect() durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
} }
// TODO: ideally we should expose the progress in a way that let arroy use it directly
pub(crate) fn update_progress_from_arroy(&self, progress: arroy::WriterProgress) {
self.update_progress(progress.main);
if let Some(sub) = progress.sub {
self.update_progress(sub);
}
}
} }
/// Generate the names associated with the durations and push them. /// Generate the names associated with the durations and push them.
@ -181,8 +190,18 @@ macro_rules! make_atomic_progress {
}; };
} }
make_atomic_progress!(Document alias AtomicDocumentStep => "document" ); make_atomic_progress!(Document alias AtomicDocumentStep => "document");
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" ); make_atomic_progress!(Payload alias AtomicPayloadStep => "payload");
make_enum_progress! {
pub enum MergingWordCache {
WordDocids,
WordFieldIdDocids,
ExactWordDocids,
WordPositionDocids,
FieldIdWordCountDocids,
}
}
#[derive(Debug, Serialize, Clone, ToSchema)] #[derive(Debug, Serialize, Clone, ToSchema)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
@ -238,3 +257,44 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
self.total self.total
} }
} }
impl Step for arroy::MainStep {
fn name(&self) -> Cow<'static, str> {
match self {
arroy::MainStep::PreProcessingTheItems => "pre processing the items",
arroy::MainStep::WritingTheDescendantsAndMetadata => {
"writing the descendants and metadata"
}
arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items",
arroy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes",
arroy::MainStep::UpdatingTheTrees => "updating the trees",
arroy::MainStep::CreateNewTrees => "create new trees",
arroy::MainStep::WritingNodesToDatabase => "writing nodes to database",
arroy::MainStep::DeleteExtraneousTrees => "delete extraneous trees",
arroy::MainStep::WriteTheMetadata => "write the metadata",
}
.into()
}
fn current(&self) -> u32 {
*self as u32
}
fn total(&self) -> u32 {
Self::CARDINALITY as u32
}
}
impl Step for arroy::SubStep {
fn name(&self) -> Cow<'static, str> {
self.unit.into()
}
fn current(&self) -> u32 {
self.current.load(Ordering::Relaxed)
}
fn total(&self) -> u32 {
self.max
}
}

View File

@ -203,7 +203,7 @@ impl<'a> Search<'a> {
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3); let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
match embedder.embed_search(query, Some(deadline)) { match embedder.embed_search(&query, Some(deadline)) {
Ok(embedding) => embedding, Ok(embedding) => embedding,
Err(error) => { Err(error) => {
tracing::error!(error=%error, "Embedding failed"); tracing::error!(error=%error, "Embedding failed");

View File

@ -173,16 +173,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_rule_scores.push(ScoreDetails::Skipped); ranking_rule_scores.push(ScoreDetails::Skipped);
// remove candidates from the universe without adding them to result if their score is below the threshold // remove candidates from the universe without adding them to result if their score is below the threshold
if let Some(ranking_score_threshold) = ranking_score_threshold { let is_below_threshold =
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter()); ranking_score_threshold.is_some_and(|ranking_score_threshold| {
if current_score < ranking_score_threshold { let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index]; current_score < ranking_score_threshold
back!(); });
continue;
}
}
maybe_add_to_results!(bucket); if is_below_threshold {
all_candidates -= &bucket;
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
} else {
maybe_add_to_results!(bucket);
}
ranking_rule_scores.pop(); ranking_rule_scores.pop();
@ -237,23 +239,24 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
); );
// remove candidates from the universe without adding them to result if their score is below the threshold // remove candidates from the universe without adding them to result if their score is below the threshold
if let Some(ranking_score_threshold) = ranking_score_threshold { let is_below_threshold = ranking_score_threshold.is_some_and(|ranking_score_threshold| {
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter()); let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
if current_score < ranking_score_threshold { current_score < ranking_score_threshold
all_candidates -= });
next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
back!();
continue;
}
}
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
if cur_ranking_rule_index == ranking_rules_len - 1 if cur_ranking_rule_index == ranking_rules_len - 1
|| (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1) || (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
|| cur_offset + (next_bucket.candidates.len() as usize) < from || cur_offset + (next_bucket.candidates.len() as usize) < from
|| is_below_threshold
{ {
maybe_add_to_results!(next_bucket.candidates); if is_below_threshold {
all_candidates -= &next_bucket.candidates;
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
} else {
maybe_add_to_results!(next_bucket.candidates);
}
ranking_rule_scores.pop(); ranking_rule_scores.pop();
continue; continue;
} }

View File

@ -1,10 +1,12 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::ops::ControlFlow; use std::ops::ControlFlow;
use fst::automaton::Str; use fst::automaton::Str;
use fst::{Automaton, IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
use heed::types::DecodeIgnore; use heed::types::DecodeIgnore;
use itertools::{merge_join_by, EitherOrBoth};
use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm}; use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm};
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union}; use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
@ -16,16 +18,10 @@ use crate::{Result, MAX_WORD_LENGTH};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NumberOfTypos { pub enum NumberOfTypos {
Zero,
One, One,
Two, Two,
} }
pub enum ZeroOrOneTypo {
Zero,
One,
}
impl Interned<QueryTerm> { impl Interned<QueryTerm> {
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> { pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> {
let s = ctx.term_interner.get_mut(self); let s = ctx.term_interner.get_mut(self);
@ -47,34 +43,45 @@ impl Interned<QueryTerm> {
} }
fn find_zero_typo_prefix_derivations( fn find_zero_typo_prefix_derivations(
ctx: &mut SearchContext<'_>,
word_interned: Interned<String>, word_interned: Interned<String>,
fst: fst::Set<Cow<'_, [u8]>>,
word_interner: &mut DedupInterner<String>,
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>, mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
) -> Result<()> { ) -> Result<()> {
let word = word_interner.get(word_interned).to_owned(); let word = ctx.word_interner.get(word_interned).to_owned();
let word = word.as_str(); let word = word.as_str();
let prefix = Str::new(word).starts_with();
let mut stream = fst.search(prefix).into_stream();
while let Some(derived_word) = stream.next() { let words =
let derived_word = std::str::from_utf8(derived_word)?.to_owned(); ctx.index.word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?;
let derived_word_interned = word_interner.insert(derived_word); let exact_words =
if derived_word_interned != word_interned { ctx.index.exact_word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?;
let cf = visit(derived_word_interned)?;
if cf.is_break() { for eob in merge_join_by(words, exact_words, |lhs, rhs| match (lhs, rhs) {
break; (Ok((word, _)), Ok((exact_word, _))) => word.cmp(exact_word),
(Err(_), _) | (_, Err(_)) => Ordering::Equal,
}) {
match eob {
EitherOrBoth::Both(kv, _) | EitherOrBoth::Left(kv) | EitherOrBoth::Right(kv) => {
let (derived_word, _) = kv?;
let derived_word = derived_word.to_string();
let derived_word_interned = ctx.word_interner.insert(derived_word);
if derived_word_interned != word_interned {
let cf = visit(derived_word_interned)?;
if cf.is_break() {
break;
}
}
} }
} }
} }
Ok(()) Ok(())
} }
fn find_zero_one_typo_derivations( fn find_one_typo_derivations(
ctx: &mut SearchContext<'_>, ctx: &mut SearchContext<'_>,
word_interned: Interned<String>, word_interned: Interned<String>,
is_prefix: bool, is_prefix: bool,
mut visit: impl FnMut(Interned<String>, ZeroOrOneTypo) -> Result<ControlFlow<()>>, mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
) -> Result<()> { ) -> Result<()> {
let fst = ctx.get_words_fst()?; let fst = ctx.get_words_fst()?;
let word = ctx.word_interner.get(word_interned).to_owned(); let word = ctx.word_interner.get(word_interned).to_owned();
@ -89,16 +96,9 @@ fn find_zero_one_typo_derivations(
let derived_word = ctx.word_interner.insert(derived_word.to_owned()); let derived_word = ctx.word_interner.insert(derived_word.to_owned());
let d = dfa.distance(state.1); let d = dfa.distance(state.1);
match d.to_u8() { match d.to_u8() {
0 => { 0 => (),
if derived_word != word_interned {
let cf = visit(derived_word, ZeroOrOneTypo::Zero)?;
if cf.is_break() {
break;
}
}
}
1 => { 1 => {
let cf = visit(derived_word, ZeroOrOneTypo::One)?; let cf = visit(derived_word)?;
if cf.is_break() { if cf.is_break() {
break; break;
} }
@ -111,7 +111,7 @@ fn find_zero_one_typo_derivations(
Ok(()) Ok(())
} }
fn find_zero_one_two_typo_derivations( fn find_one_two_typo_derivations(
word_interned: Interned<String>, word_interned: Interned<String>,
is_prefix: bool, is_prefix: bool,
fst: fst::Set<Cow<'_, [u8]>>, fst: fst::Set<Cow<'_, [u8]>>,
@ -144,14 +144,7 @@ fn find_zero_one_two_typo_derivations(
// correct distance // correct distance
let d = second_dfa.distance((state.1).0); let d = second_dfa.distance((state.1).0);
match d.to_u8() { match d.to_u8() {
0 => { 0 => (),
if derived_word_interned != word_interned {
let cf = visit(derived_word_interned, NumberOfTypos::Zero)?;
if cf.is_break() {
break;
}
}
}
1 => { 1 => {
let cf = visit(derived_word_interned, NumberOfTypos::One)?; let cf = visit(derived_word_interned, NumberOfTypos::One)?;
if cf.is_break() { if cf.is_break() {
@ -194,8 +187,6 @@ pub fn partially_initialized_term_from_word(
}); });
} }
let fst = ctx.index.words_fst(ctx.txn)?;
let use_prefix_db = is_prefix let use_prefix_db = is_prefix
&& (ctx && (ctx
.index .index
@ -215,24 +206,19 @@ pub fn partially_initialized_term_from_word(
let mut zero_typo = None; let mut zero_typo = None;
let mut prefix_of = BTreeSet::new(); let mut prefix_of = BTreeSet::new();
if fst.contains(word) || ctx.index.exact_word_docids.get(ctx.txn, word)?.is_some() { if ctx.index.contains_word(ctx.txn, word)? {
zero_typo = Some(word_interned); zero_typo = Some(word_interned);
} }
if is_prefix && use_prefix_db.is_none() { if is_prefix && use_prefix_db.is_none() {
find_zero_typo_prefix_derivations( find_zero_typo_prefix_derivations(ctx, word_interned, |derived_word| {
word_interned, if prefix_of.len() < limits::MAX_PREFIX_COUNT {
fst, prefix_of.insert(derived_word);
&mut ctx.word_interner, Ok(ControlFlow::Continue(()))
|derived_word| { } else {
if prefix_of.len() < limits::MAX_PREFIX_COUNT { Ok(ControlFlow::Break(()))
prefix_of.insert(derived_word); }
Ok(ControlFlow::Continue(())) })?;
} else {
Ok(ControlFlow::Break(()))
}
},
)?;
} }
let synonyms = ctx.index.synonyms(ctx.txn)?; let synonyms = ctx.index.synonyms(ctx.txn)?;
let mut synonym_word_count = 0; let mut synonym_word_count = 0;
@ -295,18 +281,13 @@ impl Interned<QueryTerm> {
let mut one_typo_words = BTreeSet::new(); let mut one_typo_words = BTreeSet::new();
if *max_nbr_typos > 0 { if *max_nbr_typos > 0 {
find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| { find_one_typo_derivations(ctx, original, is_prefix, |derived_word| {
match nbr_typos { if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
ZeroOrOneTypo::Zero => {} one_typo_words.insert(derived_word);
ZeroOrOneTypo::One => { Ok(ControlFlow::Continue(()))
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT { } else {
one_typo_words.insert(derived_word); Ok(ControlFlow::Break(()))
} else {
return Ok(ControlFlow::Break(()));
}
}
} }
Ok(ControlFlow::Continue(()))
})?; })?;
} }
@ -357,7 +338,7 @@ impl Interned<QueryTerm> {
let mut two_typo_words = BTreeSet::new(); let mut two_typo_words = BTreeSet::new();
if *max_nbr_typos > 0 { if *max_nbr_typos > 0 {
find_zero_one_two_typo_derivations( find_one_two_typo_derivations(
*original, *original,
*is_prefix, *is_prefix,
ctx.index.words_fst(ctx.txn)?, ctx.index.words_fst(ctx.txn)?,
@ -370,7 +351,6 @@ impl Interned<QueryTerm> {
return Ok(ControlFlow::Break(())); return Ok(ControlFlow::Break(()));
} }
match nbr_typos { match nbr_typos {
NumberOfTypos::Zero => {}
NumberOfTypos::One => { NumberOfTypos::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT { if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word); one_typo_words.insert(derived_word);

View File

@ -15,7 +15,8 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new(); let options = EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path, true).unwrap(); let index = Index::new(options, &path, true).unwrap();

View File

@ -352,7 +352,7 @@ pub(crate) mod test_helpers {
use grenad::MergerBuilder; use grenad::MergerBuilder;
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn}; use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn, WithoutTls};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::bulk::FacetsUpdateBulkInner; use super::bulk::FacetsUpdateBulkInner;
@ -390,7 +390,7 @@ pub(crate) mod test_helpers {
for<'a> BoundCodec: for<'a> BoundCodec:
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{ {
pub env: Env, pub env: Env<WithoutTls>,
pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>, pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
pub group_size: Cell<u8>, pub group_size: Cell<u8>,
pub min_level_size: Cell<u8>, pub min_level_size: Cell<u8>,
@ -412,7 +412,8 @@ pub(crate) mod test_helpers {
let group_size = group_size.clamp(2, 127); let group_size = group_size.clamp(2, 127);
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127 let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
let mut options = heed::EnvOpenOptions::new(); let options = heed::EnvOpenOptions::new();
let mut options = options.read_txn_without_tls();
let options = options.map_size(4096 * 4 * 1000 * 100); let options = options.map_size(4096 * 4 * 1000 * 100);
let tempdir = tempfile::TempDir::new().unwrap(); let tempdir = tempfile::TempDir::new().unwrap();
let env = unsafe { options.open(tempdir.path()) }.unwrap(); let env = unsafe { options.open(tempdir.path()) }.unwrap();

View File

@ -28,9 +28,11 @@ pub use self::helpers::*;
pub use self::transform::{Transform, TransformOutput}; pub use self::transform::{Transform, TransformOutput};
use super::facet::clear_facet_levels_based_on_settings_diff; use super::facet::clear_facet_levels_based_on_settings_diff;
use super::new::StdResult; use super::new::StdResult;
use crate::database_stats::DatabaseStats;
use crate::documents::{obkv_to_object, DocumentsBatchReader}; use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::{Error, InternalError}; use crate::error::{Error, InternalError};
use crate::index::{PrefixSearch, PrefixSettings}; use crate::index::{PrefixSearch, PrefixSettings};
use crate::progress::Progress;
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder; use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
pub use crate::update::index_documents::helpers::CursorClonableMmap; pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{ use crate::update::{
@ -475,7 +477,8 @@ where
if !settings_diff.settings_update_only { if !settings_diff.settings_update_only {
// Update the stats of the documents database when there is a document update. // Update the stats of the documents database when there is a document update.
self.index.update_documents_stats(self.wtxn, modified_docids)?; let stats = DatabaseStats::new(self.index.documents.remap_data_type(), self.wtxn)?;
self.index.put_documents_stats(self.wtxn, stats)?;
} }
// We write the field distribution into the main database // We write the field distribution into the main database
self.index.put_field_distribution(self.wtxn, &field_distribution)?; self.index.put_field_distribution(self.wtxn, &field_distribution)?;
@ -520,7 +523,16 @@ where
pool.install(|| { pool.install(|| {
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized); let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?; writer.build_and_quantize(
wtxn,
// In the settings we don't have any progress to share
&Progress::default(),
&mut rng,
dimension,
is_quantizing,
self.indexer_config.max_memory,
cancel,
)?;
Result::Ok(()) Result::Ok(())
}) })
.map_err(InternalError::from)??; .map_err(InternalError::from)??;
@ -2799,8 +2811,9 @@ mod tests {
embedding_configs.pop().unwrap(); embedding_configs.pop().unwrap();
insta::assert_snapshot!(embedder_name, @"manual"); insta::assert_snapshot!(embedder_name, @"manual");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>"); insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>");
let embedder = let embedder = std::sync::Arc::new(
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); crate::vector::Embedder::new(embedder.embedder_options, 0).unwrap(),
);
let res = index let res = index
.search(&rtxn) .search(&rtxn)
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec())) .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))

View File

@ -1,5 +1,6 @@
use bumpalo::Bump; use bumpalo::Bump;
use heed::RoTxn; use heed::RoTxn;
use serde_json::Value;
use super::document::{ use super::document::{
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions, Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
@ -10,7 +11,7 @@ use super::vector_document::{
use crate::attribute_patterns::PatternMatch; use crate::attribute_patterns::PatternMatch;
use crate::documents::FieldIdMapper; use crate::documents::FieldIdMapper;
use crate::vector::EmbeddingConfigs; use crate::vector::EmbeddingConfigs;
use crate::{DocumentId, Index, Result}; use crate::{DocumentId, Index, InternalError, Result};
pub enum DocumentChange<'doc> { pub enum DocumentChange<'doc> {
Deletion(Deletion<'doc>), Deletion(Deletion<'doc>),
@ -243,6 +244,29 @@ impl<'doc> Update<'doc> {
Ok(has_deleted_fields) Ok(has_deleted_fields)
} }
/// Returns `true` if the geo fields have changed.
pub fn has_changed_for_geo_fields<'t, Mapper: FieldIdMapper>(
&self,
rtxn: &'t RoTxn,
index: &'t Index,
mapper: &'t Mapper,
) -> Result<bool> {
let current = self.current(rtxn, index, mapper)?;
let current_geo = current.geo_field()?;
let updated_geo = self.only_changed_fields().geo_field()?;
match (current_geo, updated_geo) {
(Some(current_geo), Some(updated_geo)) => {
let current: Value =
serde_json::from_str(current_geo.get()).map_err(InternalError::SerdeJson)?;
let updated: Value =
serde_json::from_str(updated_geo.get()).map_err(InternalError::SerdeJson)?;
Ok(current != updated)
}
(None, None) => Ok(false),
_ => Ok(true),
}
}
pub fn only_changed_vectors( pub fn only_changed_vectors(
&self, &self,
doc_alloc: &'doc Bump, doc_alloc: &'doc Bump,

View File

@ -117,7 +117,7 @@ impl FacetedDocidsExtractor {
}, },
), ),
DocumentChange::Update(inner) => { DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields( let has_changed = inner.has_changed_for_fields(
&mut |field_name| { &mut |field_name| {
match_faceted_field( match_faceted_field(
field_name, field_name,
@ -130,7 +130,10 @@ impl FacetedDocidsExtractor {
rtxn, rtxn,
index, index,
context.db_fields_ids_map, context.db_fields_ids_map,
)? { )?;
let has_changed_for_geo_fields =
inner.has_changed_for_geo_fields(rtxn, index, context.db_fields_ids_map)?;
if !has_changed && !has_changed_for_geo_fields {
return Ok(()); return Ok(());
} }

View File

@ -121,6 +121,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
// do we have set embeddings? // do we have set embeddings?
if let Some(embeddings) = new_vectors.embeddings { if let Some(embeddings) = new_vectors.embeddings {
chunks.set_vectors( chunks.set_vectors(
update.external_document_id(),
update.docid(), update.docid(),
embeddings embeddings
.into_vec(&context.doc_alloc, embedder_name) .into_vec(&context.doc_alloc, embedder_name)
@ -128,7 +129,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
document_id: update.external_document_id().to_string(), document_id: update.external_document_id().to_string(),
error: error.to_string(), error: error.to_string(),
})?, })?,
); )?;
} else if new_vectors.regenerate { } else if new_vectors.regenerate {
let new_rendered = prompt.render_document( let new_rendered = prompt.render_document(
update.external_document_id(), update.external_document_id(),
@ -209,6 +210,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
chunks.set_regenerate(insertion.docid(), new_vectors.regenerate); chunks.set_regenerate(insertion.docid(), new_vectors.regenerate);
if let Some(embeddings) = new_vectors.embeddings { if let Some(embeddings) = new_vectors.embeddings {
chunks.set_vectors( chunks.set_vectors(
insertion.external_document_id(),
insertion.docid(), insertion.docid(),
embeddings embeddings
.into_vec(&context.doc_alloc, embedder_name) .into_vec(&context.doc_alloc, embedder_name)
@ -218,7 +220,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
.to_string(), .to_string(),
error: error.to_string(), error: error.to_string(),
})?, })?,
); )?;
} else if new_vectors.regenerate { } else if new_vectors.regenerate {
let rendered = prompt.render_document( let rendered = prompt.render_document(
insertion.external_document_id(), insertion.external_document_id(),
@ -273,6 +275,7 @@ struct Chunks<'a, 'b, 'extractor> {
embedder: &'a Embedder, embedder: &'a Embedder,
embedder_id: u8, embedder_id: u8,
embedder_name: &'a str, embedder_name: &'a str,
dimensions: usize,
prompt: &'a Prompt, prompt: &'a Prompt,
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes, possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>, user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
@ -297,6 +300,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint(); let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
let texts = BVec::with_capacity_in(capacity, doc_alloc); let texts = BVec::with_capacity_in(capacity, doc_alloc);
let ids = BVec::with_capacity_in(capacity, doc_alloc); let ids = BVec::with_capacity_in(capacity, doc_alloc);
let dimensions = embedder.dimensions();
Self { Self {
texts, texts,
ids, ids,
@ -309,6 +313,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
embedder_name, embedder_name,
user_provided, user_provided,
has_manual_generation: None, has_manual_generation: None,
dimensions,
} }
} }
@ -490,7 +495,25 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
} }
} }
fn set_vectors(&self, docid: DocumentId, embeddings: Vec<Embedding>) { fn set_vectors(
&self,
external_docid: &'a str,
docid: DocumentId,
embeddings: Vec<Embedding>,
) -> Result<()> {
for (embedding_index, embedding) in embeddings.iter().enumerate() {
if embedding.len() != self.dimensions {
return Err(UserError::InvalidIndexingVectorDimensions {
expected: self.dimensions,
found: embedding.len(),
embedder_name: self.embedder_name.to_string(),
document_id: external_docid.to_string(),
embedding_index,
}
.into());
}
}
self.sender.set_vectors(docid, self.embedder_id, embeddings).unwrap(); self.sender.set_vectors(docid, self.embedder_id, embeddings).unwrap();
Ok(())
} }
} }

View File

@ -144,7 +144,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
let mut merger_iter = builder.build().into_stream_merger_iter()?; let mut merger_iter = builder.build().into_stream_merger_iter()?;
let mut current_field_id = None; let mut current_field_id = None;
let mut fst; let mut fst;
let mut fst_merger_builder: Option<FstMergerBuilder> = None; let mut fst_merger_builder: Option<FstMergerBuilder<_>> = None;
while let Some((key, deladd)) = merger_iter.next()? { while let Some((key, deladd)) = merger_iter.next()? {
let (field_id, normalized_facet_string) = let (field_id, normalized_facet_string) =
BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?; BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?;
@ -153,12 +153,13 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
if let (Some(current_field_id), Some(fst_merger_builder)) = if let (Some(current_field_id), Some(fst_merger_builder)) =
(current_field_id, fst_merger_builder) (current_field_id, fst_merger_builder)
{ {
let mmap = fst_merger_builder.build(&mut callback)?; if let Some(mmap) = fst_merger_builder.build(&mut callback)? {
index.facet_id_string_fst.remap_data_type::<Bytes>().put( index.facet_id_string_fst.remap_data_type::<Bytes>().put(
wtxn, wtxn,
&current_field_id, &current_field_id,
&mmap, &mmap,
)?; )?;
}
} }
fst = index.facet_id_string_fst.get(rtxn, &field_id)?; fst = index.facet_id_string_fst.get(rtxn, &field_id)?;
@ -209,8 +210,9 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
} }
if let (Some(field_id), Some(fst_merger_builder)) = (current_field_id, fst_merger_builder) { if let (Some(field_id), Some(fst_merger_builder)) = (current_field_id, fst_merger_builder) {
let mmap = fst_merger_builder.build(&mut callback)?; if let Some(mmap) = fst_merger_builder.build(&mut callback)? {
index.facet_id_string_fst.remap_data_type::<Bytes>().put(wtxn, &field_id, &mmap)?; index.facet_id_string_fst.remap_data_type::<Bytes>().put(wtxn, &field_id, &mmap)?;
}
} }
Ok(()) Ok(())

View File

@ -1,25 +1,27 @@
use std::fs::File; use std::fs::File;
use std::io::BufWriter; use std::io::BufWriter;
use fst::{Set, SetBuilder, Streamer}; use fst::{IntoStreamer, Set, SetBuilder, Streamer};
use memmap2::Mmap; use memmap2::Mmap;
use tempfile::tempfile; use tempfile::tempfile;
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::{InternalError, Result}; use crate::{InternalError, Result};
pub struct FstMergerBuilder<'a> { pub struct FstMergerBuilder<'a, D: AsRef<[u8]>> {
fst: Option<&'a Set<D>>,
stream: Option<fst::set::Stream<'a>>, stream: Option<fst::set::Stream<'a>>,
fst_builder: SetBuilder<BufWriter<File>>, fst_builder: Option<SetBuilder<BufWriter<File>>>,
last: Option<Vec<u8>>, last: Option<Vec<u8>>,
inserted_words: usize, inserted_words: usize,
} }
impl<'a> FstMergerBuilder<'a> { impl<'a, D: AsRef<[u8]>> FstMergerBuilder<'a, D> {
pub fn new<D: AsRef<[u8]>>(fst: Option<&'a Set<D>>) -> Result<Self> { pub fn new(fst: Option<&'a Set<D>>) -> Result<Self> {
Ok(Self { Ok(Self {
fst,
stream: fst.map(|fst| fst.stream()), stream: fst.map(|fst| fst.stream()),
fst_builder: SetBuilder::new(BufWriter::new(tempfile()?))?, fst_builder: None,
last: None, last: None,
inserted_words: 0, inserted_words: 0,
}) })
@ -110,11 +112,17 @@ impl<'a> FstMergerBuilder<'a> {
is_modified: bool, is_modified: bool,
insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>, insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>,
) -> Result<()> { ) -> Result<()> {
// Addition: We insert the word if is_modified && self.fst_builder.is_none() {
// Deletion: We delete the word by not inserting it self.build_new_fst(bytes)?;
if deladd == DelAdd::Addition { }
self.inserted_words += 1;
self.fst_builder.insert(bytes)?; if let Some(fst_builder) = self.fst_builder.as_mut() {
// Addition: We insert the word
// Deletion: We delete the word by not inserting it
if deladd == DelAdd::Addition {
self.inserted_words += 1;
fst_builder.insert(bytes)?;
}
} }
insertion_callback(bytes, deladd, is_modified)?; insertion_callback(bytes, deladd, is_modified)?;
@ -122,6 +130,19 @@ impl<'a> FstMergerBuilder<'a> {
Ok(()) Ok(())
} }
// Lazily build the new fst
fn build_new_fst(&mut self, bytes: &[u8]) -> Result<()> {
let mut fst_builder = SetBuilder::new(BufWriter::new(tempfile()?))?;
if let Some(fst) = self.fst {
fst_builder.extend_stream(fst.range().lt(bytes).into_stream())?;
}
self.fst_builder = Some(fst_builder);
Ok(())
}
fn drain_stream( fn drain_stream(
&mut self, &mut self,
insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>, insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>,
@ -142,16 +163,20 @@ impl<'a> FstMergerBuilder<'a> {
pub fn build( pub fn build(
mut self, mut self,
insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>, insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>,
) -> Result<Mmap> { ) -> Result<Option<Mmap>> {
self.drain_stream(insertion_callback)?; self.drain_stream(insertion_callback)?;
let fst_file = self match self.fst_builder {
.fst_builder Some(fst_builder) => {
.into_inner()? let fst_file = fst_builder
.into_inner() .into_inner()?
.map_err(|_| InternalError::IndexingMergingKeys { process: "building-fst" })?; .into_inner()
let fst_mmap = unsafe { Mmap::map(&fst_file)? }; .map_err(|_| InternalError::IndexingMergingKeys { process: "building-fst" })?;
let fst_mmap = unsafe { Mmap::map(&fst_file)? };
Ok(fst_mmap) Ok(Some(fst_mmap))
}
None => Ok(None),
}
} }
} }

View File

@ -3,7 +3,7 @@ use std::sync::atomic::Ordering;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use bumpalo::Bump; use bumpalo::Bump;
use heed::RoTxn; use heed::{RoTxn, WithoutTls};
use rayon::iter::IndexedParallelIterator; use rayon::iter::IndexedParallelIterator;
use super::super::document_change::DocumentChange; use super::super::document_change::DocumentChange;
@ -28,7 +28,7 @@ pub struct DocumentChangeContext<
/// inside of the DB. /// inside of the DB.
pub db_fields_ids_map: &'indexer FieldsIdsMap, pub db_fields_ids_map: &'indexer FieldsIdsMap,
/// A transaction providing data from the DB before all indexing operations /// A transaction providing data from the DB before all indexing operations
pub rtxn: RoTxn<'indexer>, pub rtxn: RoTxn<'indexer, WithoutTls>,
/// Global field id map that is up to date with the current state of the indexing process. /// Global field id map that is up to date with the current state of the indexing process.
/// ///

Some files were not shown because too many files have changed in this diff Show More