Compare commits

...

87 Commits

Author SHA1 Message Date
dfca4b6219 Make sure to recover from missing update file 2025-07-15 18:46:29 +02:00
8d9eb2a7c4 Use a NonZeroUsize to store the prefix buffer length 2025-04-21 18:50:28 +02:00
44586e089d Fix the retrieval of the doc in scope 2025-04-21 12:52:42 +02:00
c8b7822d0d Allow users to delete documents 2025-04-19 12:22:37 +02:00
446b9c142c Erase a document if the AST is set 2025-04-16 23:12:31 +02:00
e755e25847 Use the script in the edit documents 2025-04-16 22:53:21 +02:00
81419935f2 Introduce a new executeAfterUpdate index setting 2025-04-16 22:50:33 +02:00
51acd7a381 Seems to work great, still need to read function from settings 2025-04-16 22:01:50 +02:00
3ec5b9d488 Merge pull request #5487 from HDT3213/bugfix/geosort
fix ranking rules after _geo do not work
2025-04-15 13:29:07 +00:00
55adbac2dd Apply suggestions from code review 2025-04-15 14:43:07 +02:00
fd7fbfa9eb Refactor geo_max_bucket_size injection 2025-04-15 20:24:04 +08:00
3a93f88ba6 Merge pull request #5498 from meilisearch/snapshot-no-compaction
Stop compacting the snapshot
2025-04-15 08:30:40 +00:00
7c1c4f9c26 fix test_geo_sort_reached_max_bucket_size 2025-04-15 08:19:22 +08:00
1f5412003d optimize test suite 2025-04-15 07:17:47 +08:00
5da92a3d53 test geo sort reached max_bucket_size 2025-04-14 23:14:17 +08:00
c4a8b84dc0 code style 2025-04-14 23:04:17 +08:00
ffe3faeca7 cargo fmt 2025-04-14 23:04:17 +08:00
0f07cfed14 GeoSort support max_bucket_size and distance_error_margin configuration 2025-04-14 23:04:17 +08:00
326a728434 fix code style 2025-04-14 23:04:17 +08:00
e4733dcd42 fix ranking rules after _geo do not work 2025-04-14 23:04:17 +08:00
a500fa053c Merge pull request #5509 from meilisearch/release-v1.14.0-tmp
Bring back changes from v1.14.0 to main
2025-04-14 13:59:23 +00:00
61db56f785 remove duplicated test 2025-04-14 14:55:57 +02:00
235556d699 Merge pull request #5485 from meilisearch/dependabot/github_actions/actions/checkout-3
Bump actions/checkout from 1 to 3
2025-04-14 11:40:37 +00:00
a3a1065c16 Merge pull request #5497 from meilisearch/dependabot/cargo/tokio-1.43.1
Bump tokio from 1.42.0 to 1.43.1
2025-04-14 11:40:13 +00:00
b025f1bcf1 Merge branch 'main' into release-v1.14.0-tmp 2025-04-14 12:35:47 +02:00
707d106a24 Merge pull request #5482 from meilisearch/dependabot/github_actions/actions/github-script-7
Bump actions/github-script from 6 to 7
2025-04-14 09:53:41 +00:00
97d6726291 Merge pull request #5483 from meilisearch/dependabot/github_actions/Swatinem/rust-cache-2.7.8
Bump Swatinem/rust-cache from 2.7.7 to 2.7.8
2025-04-14 09:53:32 +00:00
82fa571ef7 Merge pull request #5503 from meilisearch/dependabot/cargo/crossbeam-channel-0.5.15
Bump crossbeam-channel from 0.5.14 to 0.5.15
2025-04-14 09:53:03 +00:00
5d453e6049 Bump crossbeam-channel from 0.5.14 to 0.5.15
Bumps [crossbeam-channel](https://github.com/crossbeam-rs/crossbeam) from 0.5.14 to 0.5.15.
- [Release notes](https://github.com/crossbeam-rs/crossbeam/releases)
- [Changelog](https://github.com/crossbeam-rs/crossbeam/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crossbeam-rs/crossbeam/compare/crossbeam-channel-0.5.14...crossbeam-channel-0.5.15)

---
updated-dependencies:
- dependency-name: crossbeam-channel
  dependency-version: 0.5.15
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-10 14:44:12 +00:00
9e7d7beb4a stop compacting the snapshot 2025-04-08 14:53:58 +02:00
a225ab2637 Bump tokio from 1.42.0 to 1.43.1
Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.42.0 to 1.43.1.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.42.0...tokio-1.43.1)

---
updated-dependencies:
- dependency-name: tokio
  dependency-version: 1.43.1
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-08 02:13:40 +00:00
94b43001db Merge pull request #5492 from meilisearch/accept-cancelation-tasks-when-disk-full
make meilisearch accept cancelation tasks even when the disk is full
2025-04-03 15:46:46 +00:00
796a325972 Fix typos
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-04-03 15:53:42 +02:00
1db550ec7f make meilisearch accept cancelation tasks even when the disk is full 2025-04-03 15:47:56 +02:00
a10efedd2f Bump actions/checkout from 1 to 3
Bumps [actions/checkout](https://github.com/actions/checkout) from 1 to 3.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v1...v3)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '3'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-01 17:31:28 +00:00
55ec96d31a Bump Swatinem/rust-cache from 2.7.7 to 2.7.8
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.7.7 to 2.7.8.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.7.7...v2.7.8)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-version: 2.7.8
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-01 17:31:18 +00:00
4249630791 Bump actions/github-script from 6 to 7
Bumps [actions/github-script](https://github.com/actions/github-script) from 6 to 7.
- [Release notes](https://github.com/actions/github-script/releases)
- [Commits](https://github.com/actions/github-script/compare/v6...v7)

---
updated-dependencies:
- dependency-name: actions/github-script
  dependency-version: '7'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-04-01 17:31:14 +00:00
418fa47963 Merge pull request #5313 from barloes/fixRankingScoreThresholdRankingIssue
fix for rankingScoreThreshold changes the results' ranking
2025-04-01 13:10:55 +00:00
0656a0d515 Optimize roaring operation
Co-authored-by: Many the fish <many@meilisearch.com>
2025-04-01 14:25:27 +02:00
e36a8c50b9 Merge pull request #5478 from meilisearch/enforce-embedding-dimensions
Enforce embedding dimensions
2025-03-31 15:31:29 +00:00
08ff135ad6 Fix test 2025-03-31 15:27:49 +02:00
f729864466 Check dimension mismatch at insertion time 2025-03-31 15:27:49 +02:00
94ea263bef Add new error for dimensions mismatch during indexing 2025-03-31 15:27:49 +02:00
0e475cb5e6 fix warn and show what meilisearch understood of the vectors in the cursed test 2025-03-31 13:49:22 +02:00
62de70b73c Document problematic case in test and acknowledge PR comment 2025-03-31 13:49:22 +02:00
7707fb18dd add embedding with dimension mismatch test case 2025-03-31 13:49:22 +02:00
bb2e9419d3 Merge pull request #5468 from meilisearch/more-precise-post-processing
More Precise Post Processing
2025-03-27 10:07:09 +00:00
cf68713145 Merge pull request #5465 from meilisearch/improve-stats-perf
Improve documents stats performances
2025-03-27 09:20:14 +00:00
811143cbe9 Add more progress precision when doing post processing 2025-03-27 10:17:28 +01:00
c670e9a39b Make sure the snaps are happy 2025-03-26 20:03:35 +01:00
65f1b13475 Merge pull request #5464 from meilisearch/camel-case-database-sizes
Prefer camelCase for internal database sizes db name
2025-03-26 16:40:39 +00:00
db7ce03763 Improve the performances of computing the size of the documents database 2025-03-26 17:40:12 +01:00
7ed9adde29 Prefer camelCase for internal database sizes db name 2025-03-26 16:45:52 +01:00
9ce7ccfbe7 Merge pull request #5457 from meilisearch/show-database-sizes-changes
Show database sizes batches
2025-03-26 10:19:40 +00:00
3deb1ef78f Fix the snapshots again 2025-03-26 10:38:49 +01:00
5820d822c8 Add more details about the finalizing progress step 2025-03-26 09:49:43 +01:00
637bea0370 Compute and store the database sizes 2025-03-26 09:49:42 +01:00
fd079c6757 Add an index method to get the database sizes 2025-03-25 16:30:51 +01:00
182e5d5632 Add database sizes stats to the batches 2025-03-25 16:30:15 +01:00
82aee6a9af Merge pull request #5415 from meilisearch/isolate-word-fst-usage
Isolate word fst usage
2025-03-25 11:43:37 +00:00
fca947219f Merge pull request #5402 from meilisearch/do-not-reindex-searchable-order-change
Avoid reindexing searchable order changes
2025-03-25 07:03:14 +00:00
fb7ae9f97f Merge pull request #5454 from meilisearch/update-charabia-v0.9.3
Update Charabia v0.9.3
2025-03-24 22:34:51 +00:00
cd421fea1e Merge pull request #5456 from meilisearch/fix-CI
Fix CI to work with merge queues
2025-03-25 09:55:59 +00:00
1ad4235beb Remove the bors file 2025-03-25 10:05:41 +01:00
de6c7e551e Remove bors references from the repository 2025-03-25 10:04:38 +01:00
c0fe70c5f0 Make the CI work with merge queue grouping 2025-03-25 10:04:24 +01:00
a09d08c7b6 Avoid reindexing searchable order changes
Update settings.rs

Update settings.rs
2025-03-24 16:26:52 +01:00
2e6aa63efc Update Charabia v0.9.3 2025-03-24 14:32:21 +01:00
f9807ba32e Fix logic when results are below the threshold 2025-03-19 11:34:53 +01:00
8c8cc59a6c remove new line added by accident 2025-03-19 11:34:53 +01:00
f540a69ac3 add 1 to index so it points to correct position 2025-03-19 11:34:52 +01:00
7df2bdfb15 Merge #5436
5436: Update mini-dashboard to v0.2.19 version r=Kerollmops a=curquiza

Fixes mini dashboard to prevent the panel from popping up every time

Fixed by `@mdubus` 👍 

Co-authored-by: curquiza <clementine@meilisearch.com>
2025-03-18 16:24:31 +00:00
71f7456748 Update mini-dashboard to v0.2.19 version 2025-03-18 12:48:38 +01:00
c98b313d03 Merge #5426
5426: Bump zip from 2.2.2 to 2.3.0 r=Kerollmops a=dependabot[bot]

Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/releases">zip's releases</a>.</em></p>
<blockquote>
<h2>v2.3.0</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2>v2.2.3</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md">zip's changelog</a>.</em></p>
<blockquote>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.3...v2.3.0">2.3.0</a> - 2025-03-16</h2>
<h3><!-- raw HTML omitted -->🚀 Features</h3>
<ul>
<li>Add support for NTFS extra field (<a href="https://redirect.github.com/zip-rs/zip2/pull/279">#279</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->🐛 Bug Fixes</h3>
<ul>
<li><em>(test)</em> Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/pull/308">#308</a>)</li>
<li>fix failing tests, remove symlink loop check</li>
<li>Canonicalize output path to avoid false negatives</li>
<li>Symlink handling in stream extraction</li>
<li>Canonicalize output paths and symlink targets, and ensure they descend from the destination</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/pull/310">#310</a>)</li>
</ul>
<h2><a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.2.3">2.2.3</a> - 2025-02-26</h2>
<h3><!-- raw HTML omitted -->🚜 Refactor</h3>
<ul>
<li>Change the inner structure of <code>DateTime</code> (<a href="https://redirect.github.com/zip-rs/zip2/issues/267">#267</a>)</li>
</ul>
<h3><!-- raw HTML omitted -->⚙️ Miscellaneous Tasks</h3>
<ul>
<li>cargo fix --edition</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="6eab5f5cc6"><code>6eab5f5</code></a> chore: release v2.3.0 (<a href="https://redirect.github.com/zip-rs/zip2/issues/300">#300</a>)</li>
<li><a href="e4aee2050f"><code>e4aee20</code></a> implement <code>ZipFile::options</code> + refactor options normalization (<a href="https://redirect.github.com/zip-rs/zip2/issues/305">#305</a>)</li>
<li><a href="ea8a7bba24"><code>ea8a7bb</code></a> fix(test): Conditionalize a zip64 doctest (<a href="https://redirect.github.com/zip-rs/zip2/issues/308">#308</a>)</li>
<li><a href="365c81a39f"><code>365c81a</code></a> Use <code>xz2</code> crate instead of a custom implementation (<a href="https://redirect.github.com/zip-rs/zip2/issues/306">#306</a>)</li>
<li><a href="ae94b3452b"><code>ae94b34</code></a> chore: Fix clippy and cargo fmt warnings (<a href="https://redirect.github.com/zip-rs/zip2/issues/310">#310</a>)</li>
<li><a href="a2e062f370"><code>a2e062f</code></a> Merge commit from fork</li>
<li><a href="0199ac2cb8"><code>0199ac2</code></a> Simplify handling for symlink targets</li>
<li><a href="977bb9479d"><code>977bb94</code></a> fix failing tests, remove symlink loop check</li>
<li><a href="3cb29e70d1"><code>3cb29e7</code></a> Partial fix for tests</li>
<li><a href="2182b07686"><code>2182b07</code></a> Refactor</li>
<li>Additional commits viewable in <a href="https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=zip&package-manager=cargo&previous-version=2.2.2&new-version=2.3.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/meilisearch/meilisearch/network/alerts).

</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-03-18 08:57:11 +00:00
69678ed8e1 Bump zip from 2.2.2 to 2.3.0
Bumps [zip](https://github.com/zip-rs/zip2) from 2.2.2 to 2.3.0.
- [Release notes](https://github.com/zip-rs/zip2/releases)
- [Changelog](https://github.com/zip-rs/zip2/blob/master/CHANGELOG.md)
- [Commits](https://github.com/zip-rs/zip2/compare/v2.2.2...v2.3.0)

---
updated-dependencies:
- dependency-name: zip
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-03-18 00:19:49 +00:00
bf144a94d8 No more use FST to find a word without any typo 2025-03-17 14:20:10 +01:00
b0b1888ef9 Add test 2025-03-17 14:20:10 +01:00
6ec1d2b712 Merge #5423
5423: Bump ring to v0.17.14 to compile on old aarch64 r=irevoire a=Kerollmops

This PR will fix [this CI issue](https://github.com/meilisearch/meilisearch/actions/runs/13896085925/job/38876941154) where ring v0.17.13 breaks the compilation on old aarch64 machines by bumping its version to v0.17.14.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:53:02 +00:00
cbdf80893d Merge #5422
5422: Add more progress levels to measure merging r=Kerollmops a=Kerollmops

I found out that Meilisearch was not correctly reporting the long indexing times in the progress and that a lot of time was spent on extracting words with all documents already extracted. The reason was that there was no step to report merging the cache and sending the entries to write to the writer thread. This PR adds these entries to the progress.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 12:02:46 +00:00
e2156ddfc7 Simplify the IndexingStep progress enum 2025-03-17 11:40:50 +01:00
49dd50dab2 Bump ring to v0.17.14 to compile on old aarch64 2025-03-17 11:29:17 +01:00
13a88d6131 Merge #5407
5407: Geo update bug r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #5380
Fixes #5399



Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-03-17 10:24:33 +00:00
d9875b782d Merge #5421
5421: Accept total batch size in human size r=irevoire a=Kerollmops

This PR fixes the new `experimental-limit-batched-tasks-total-size` to accept human-defined sizes in bytes.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2025-03-17 09:41:22 +00:00
cb16baab18 Add more progress levels to measure merging 2025-03-17 10:13:29 +01:00
d3e4b2dfe7 Accept total batch size in human size 2025-03-14 13:07:51 +01:00
d3cd5ea689 Check if the geo fields changed additionally to the other faceted fields when reindexing facets 2025-03-12 11:20:10 +01:00
3ed43f9097 add a failing test reproducing the bug 2025-03-12 11:20:10 +01:00
71 changed files with 2308 additions and 619 deletions

View File

@ -17,7 +17,7 @@ jobs:
uses: actions/checkout@v3
- name: Validate PR milestone
uses: actions/github-script@v6
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |

View File

@ -44,7 +44,7 @@ jobs:
if: github.event.label.name == 'db change'
steps:
- name: Add comment
uses: actions/github-script@v6
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |

View File

@ -12,7 +12,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
uses: actions/checkout@v3
- name: Check db change labels
id: check_labels
run: |

View File

@ -22,7 +22,7 @@ jobs:
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
@ -46,7 +46,7 @@ jobs:
MEILISEARCH_VERSION: ${{ needs.define-docker-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
@ -75,7 +75,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-dart
- uses: dart-lang/setup-dart@v1
@ -103,7 +103,7 @@ jobs:
uses: actions/setup-go@v5
with:
go-version: stable
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-go
- name: Get dependencies
@ -129,7 +129,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-java
- name: Set up Java
@ -156,7 +156,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js
- name: Setup node
@ -191,7 +191,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-php
- name: Install PHP
@ -220,7 +220,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
@ -245,7 +245,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-ruby
- name: Set up Ruby 3
@ -270,7 +270,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rust
- name: Build
@ -291,7 +291,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-swift
- name: Run tests
@ -314,7 +314,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
@ -345,7 +345,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-rails
- name: Set up Ruby 3
@ -369,7 +369,7 @@ jobs:
ports:
- '7700:7700'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
repository: meilisearch/meilisearch-symfony
- name: Install PHP

View File

@ -21,7 +21,7 @@ jobs:
# Use ubuntu-22.04 to compile with glibc 2.35
image: ubuntu:22.04
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update && apt-get install -y curl
@ -29,7 +29,7 @@ jobs:
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.85
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7
uses: Swatinem/rust-cache@v2.7.8
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@ -51,7 +51,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7
uses: Swatinem/rust-cache@v2.7.8
- uses: dtolnay/rust-toolchain@1.85
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
@ -91,7 +91,7 @@ jobs:
env:
MEILI_TEST_OLLAMA_SERVER: "http://localhost:11434"
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v3
- name: Install Ollama
run: |
curl -fsSL https://ollama.com/install.sh | sudo -E sh
@ -155,7 +155,7 @@ jobs:
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7
uses: Swatinem/rust-cache@v2.7.8
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@ -172,7 +172,7 @@ jobs:
profile: minimal
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7
uses: Swatinem/rust-cache@v2.7.8
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@ -191,7 +191,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.7.7
uses: Swatinem/rust-cache@v2.7.8
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

146
Cargo.lock generated
View File

@ -258,7 +258,7 @@ version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9"
dependencies = [
"getrandom",
"getrandom 0.2.15",
"once_cell",
"version_check",
]
@ -271,7 +271,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"const-random",
"getrandom",
"getrandom 0.2.15",
"once_cell",
"version_check",
"zerocopy",
@ -790,22 +790,20 @@ dependencies = [
[[package]]
name = "bzip2"
version = "0.4.4"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47"
dependencies = [
"bzip2-sys",
"libc",
]
[[package]]
name = "bzip2-sys"
version = "0.1.11+1.0.8"
version = "0.1.13+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14"
dependencies = [
"cc",
"libc",
"pkg-config",
]
@ -1143,7 +1141,7 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
dependencies = [
"getrandom",
"getrandom 0.2.15",
"once_cell",
"tiny-keccak",
]
@ -1257,9 +1255,9 @@ dependencies = [
[[package]]
name = "crossbeam-channel"
version = "0.5.14"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"crossbeam-utils",
]
@ -2216,10 +2214,24 @@ dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
]
[[package]]
name = "getrandom"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
dependencies = [
"cfg-if",
"js-sys",
"libc",
"wasi 0.13.3+wasi-0.2.2",
"wasm-bindgen",
"windows-targets 0.52.6",
]
[[package]]
name = "gimli"
version = "0.27.3"
@ -2733,6 +2745,7 @@ dependencies = [
"bincode",
"bumpalo",
"bumparaw-collections",
"byte-unit",
"convert_case 0.6.0",
"crossbeam-channel",
"csv",
@ -2741,6 +2754,7 @@ dependencies = [
"enum-iterator",
"file-store",
"flate2",
"indexmap",
"insta",
"maplit",
"meili-snap",
@ -2923,10 +2937,11 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.69"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
@ -3518,6 +3533,17 @@ dependencies = [
"crc",
]
[[package]]
name = "lzma-sys"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "macro_rules_attribute"
version = "0.2.0"
@ -3656,7 +3682,7 @@ dependencies = [
"uuid",
"wiremock",
"yaup",
"zip 2.2.2",
"zip 2.3.0",
]
[[package]]
@ -3882,7 +3908,7 @@ checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"log",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.48.0",
]
@ -3893,7 +3919,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [
"libc",
"wasi",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.52.0",
]
@ -4670,7 +4696,7 @@ version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
"getrandom 0.2.15",
]
[[package]]
@ -4762,7 +4788,7 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
dependencies = [
"getrandom",
"getrandom 0.2.15",
"redox_syscall 0.2.16",
"thiserror 1.0.69",
]
@ -4886,13 +4912,13 @@ dependencies = [
[[package]]
name = "ring"
version = "0.17.13"
version = "0.17.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
dependencies = [
"cc",
"cfg-if",
"getrandom",
"getrandom 0.2.15",
"libc",
"untrusted",
"windows-sys 0.52.0",
@ -5576,7 +5602,7 @@ checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704"
dependencies = [
"cfg-if",
"fastrand",
"getrandom",
"getrandom 0.2.15",
"once_cell",
"rustix",
"windows-sys 0.52.0",
@ -5751,7 +5777,7 @@ dependencies = [
"aho-corasick",
"derive_builder 0.12.0",
"esaxx-rs",
"getrandom",
"getrandom 0.2.15",
"itertools 0.12.1",
"lazy_static",
"log",
@ -5775,9 +5801,9 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.42.0"
version = "1.43.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551"
checksum = "492a604e2fd7f814268a378409e6c92b5525d747d10db9a229723f55a417958c"
dependencies = [
"backtrace",
"bytes",
@ -5793,9 +5819,9 @@ dependencies = [
[[package]]
name = "tokio-macros"
version = "2.4.0"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
"proc-macro2",
"quote",
@ -6238,7 +6264,7 @@ version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a"
dependencies = [
"getrandom",
"getrandom 0.2.15",
"serde",
]
@ -6335,24 +6361,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.92"
name = "wasi"
version = "0.13.3+wasi-0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8"
checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
dependencies = [
"wit-bindgen-rt",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.92"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.87",
@ -6373,9 +6409,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.92"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@ -6383,9 +6419,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.92"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
@ -6396,9 +6432,12 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.92"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]]
name = "wasm-streams"
@ -6803,6 +6842,15 @@ dependencies = [
"url",
]
[[package]]
name = "wit-bindgen-rt"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [
"bitflags 2.9.0",
]
[[package]]
name = "write16"
version = "1.0.0"
@ -6858,6 +6906,15 @@ dependencies = [
"uuid",
]
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
]
[[package]]
name = "yada"
version = "0.5.1"
@ -6999,9 +7056,9 @@ dependencies = [
[[package]]
name = "zip"
version = "2.2.2"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45"
checksum = "84e9a772a54b54236b9b744aaaf8d7be01b4d6e99725523cb82cb32d1c81b1d7"
dependencies = [
"aes",
"arbitrary",
@ -7012,15 +7069,16 @@ dependencies = [
"deflate64",
"displaydoc",
"flate2",
"getrandom 0.3.1",
"hmac",
"indexmap",
"lzma-rs",
"memchr",
"pbkdf2",
"rand",
"sha1",
"thiserror 2.0.9",
"time",
"xz2",
"zeroize",
"zopfli",
"zstd",

View File

@ -305,6 +305,7 @@ pub(crate) mod test {
localized_attributes: Setting::NotSet,
facet_search: Setting::NotSet,
prefix_search: Setting::NotSet,
execute_after_update: Setting::NotSet,
_kind: std::marker::PhantomData,
};
settings.check()
@ -326,6 +327,7 @@ pub(crate) mod test {
index_uids: maplit::btreemap! { "doggo".to_string() => 1 },
progress_trace: Default::default(),
write_channel_congestion: None,
internal_database_sizes: Default::default(),
},
enqueued_at: Some(BatchEnqueuedAt {
earliest: datetime!(2022-11-11 0:00 UTC),

View File

@ -397,6 +397,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
search_cutoff_ms: v6::Setting::NotSet,
facet_search: v6::Setting::NotSet,
prefix_search: v6::Setting::NotSet,
execute_after_update: v6::Setting::NotSet,
_kind: std::marker::PhantomData,
}
}

View File

@ -13,6 +13,7 @@ license.workspace = true
[dependencies]
anyhow = "1.0.95"
bincode = "1.3.3"
byte-unit = "5.1.6"
bumpalo = "3.16.0"
bumparaw-collections = "0.1.4"
convert_case = "0.6.0"
@ -22,6 +23,7 @@ dump = { path = "../dump" }
enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.0.35"
indexmap = "2.7.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5"
@ -45,7 +47,7 @@ uuid = { version = "1.11.0", features = ["serde", "v4"] }
[dev-dependencies]
big_s = "1.0.2"
crossbeam-channel = "0.5.14"
crossbeam-channel = "0.5.15"
# fixed version due to format breakages in v1.40
insta = { version = "=1.39.0", features = ["json", "redactions"] }
maplit = "1.0.2"

View File

@ -344,6 +344,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
let Batch { uid, details, stats, started_at, finished_at, progress: _, enqueued_at } = batch;
let stats = BatchStats {
progress_trace: Default::default(),
internal_database_sizes: Default::default(),
write_channel_congestion: None,
..stats.clone()
};

View File

@ -625,8 +625,8 @@ impl IndexScheduler {
task_id: Option<TaskId>,
dry_run: bool,
) -> Result<Task> {
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
// if the task doesn't delete or cancel anything and 40% of the task queue is full, we must refuse to enqueue the incoming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } | KindWithContent::TaskCancelation { tasks, .. } if !tasks.is_empty())
&& (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
{
return Err(Error::NoSpaceLeftInTaskQueue);

View File

@ -64,6 +64,13 @@ make_enum_progress! {
}
}
make_enum_progress! {
pub enum FinalizingIndexStep {
Committing,
ComputingStats,
}
}
make_enum_progress! {
pub enum TaskCancelationProgress {
RetrievingTasks,

View File

@ -292,8 +292,6 @@ impl Queue {
return Ok(task);
}
// Get rid of the mutability.
let task = task;
self.tasks.register(wtxn, &task)?;
Ok(task)

View File

@ -364,7 +364,7 @@ fn test_task_queue_is_full() {
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
// Even the task deletion that doesn't delete anything shouldn't be accepted
// Even the task deletion and cancelation that don't delete anything should be refused
let result = index_scheduler
.register(
KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() },
@ -373,10 +373,39 @@ fn test_task_queue_is_full() {
)
.unwrap_err();
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
let result = index_scheduler
.register(
KindWithContent::TaskCancelation { query: S("test"), tasks: RoaringBitmap::new() },
None,
false,
)
.unwrap_err();
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
// But a task deletion that delete something should works
// But a task cancelation that cancel something should work
index_scheduler
.register(
KindWithContent::TaskCancelation { query: S("test"), tasks: (0..100).collect() },
None,
false,
)
.unwrap();
handle.advance_one_successful_batch();
// But we should still be forbidden from enqueuing new tasks
let result = index_scheduler
.register(
KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None },
None,
false,
)
.unwrap_err();
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
// And a task deletion that delete something should works
index_scheduler
.register(
KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() },

View File

@ -1,4 +1,5 @@
use std::fmt;
use std::io::ErrorKind;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::IndexDocumentsMethod;
@ -535,7 +536,11 @@ impl IndexScheduler {
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))?;
if let Some(uuid) = task.content_uuid() {
let content_size = self.queue.file_store.compute_size(uuid)?;
let content_size = match self.queue.file_store.compute_size(uuid) {
Ok(content_size) => content_size,
Err(file_store::Error::IoError(err)) if err.kind() == ErrorKind::NotFound => 0,
Err(otherwise) => return Err(otherwise.into()),
};
total_size = total_size.saturating_add(content_size);
}

View File

@ -20,10 +20,12 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::Arc;
use convert_case::{Case, Casing as _};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::{Env, WithoutTls};
use meilisearch_types::milli;
use meilisearch_types::tasks::Status;
use process_batch::ProcessBatchInfo;
use rayon::current_num_threads;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap;
@ -223,16 +225,16 @@ impl IndexScheduler {
let mut stop_scheduler_forever = false;
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new();
let mut congestion = None;
let mut process_batch_info = ProcessBatchInfo::default();
match res {
Ok((tasks, cong)) => {
Ok((tasks, info)) => {
#[cfg(test)]
self.breakpoint(crate::test_utils::Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj);
congestion = cong;
process_batch_info = info;
let mut success = 0;
let mut failure = 0;
let mut canceled_by = None;
@ -350,6 +352,9 @@ impl IndexScheduler {
// We must re-add the canceled task so they're part of the same batch.
ids |= canceled;
let ProcessBatchInfo { congestion, pre_commit_dabases_sizes, post_commit_dabases_sizes } =
process_batch_info;
processing_batch.stats.progress_trace =
progress.accumulated_durations().into_iter().map(|(k, v)| (k, v.into())).collect();
processing_batch.stats.write_channel_congestion = congestion.map(|congestion| {
@ -359,6 +364,33 @@ impl IndexScheduler {
congestion_info.insert("blocking_ratio".into(), congestion.congestion_ratio().into());
congestion_info
});
processing_batch.stats.internal_database_sizes = pre_commit_dabases_sizes
.iter()
.flat_map(|(dbname, pre_size)| {
post_commit_dabases_sizes
.get(dbname)
.map(|post_size| {
use byte_unit::{Byte, UnitType::Binary};
use std::cmp::Ordering::{Equal, Greater, Less};
let post = Byte::from_u64(*post_size as u64).get_appropriate_unit(Binary);
let diff_size = post_size.abs_diff(*pre_size) as u64;
let diff = Byte::from_u64(diff_size).get_appropriate_unit(Binary);
let sign = match post_size.cmp(pre_size) {
Equal => return None,
Greater => "+",
Less => "-",
};
Some((
dbname.to_case(Case::Camel),
format!("{post:#.2} ({sign}{diff:#.2})").into(),
))
})
.into_iter()
.flatten()
})
.collect();
if let Some(congestion) = congestion {
tracing::debug!(

View File

@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
use super::create_batch::Batch;
use crate::processing::{
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress,
AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep,
InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress,
};
@ -22,6 +22,16 @@ use crate::utils::{
};
use crate::{Error, IndexScheduler, Result, TaskId};
#[derive(Debug, Default)]
pub struct ProcessBatchInfo {
/// The write channel congestion. None when unavailable: settings update.
pub congestion: Option<ChannelCongestion>,
/// The sizes of the different databases before starting the indexation.
pub pre_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
/// The sizes of the different databases after commiting the indexation.
pub post_commit_dabases_sizes: indexmap::IndexMap<&'static str, usize>,
}
impl IndexScheduler {
/// Apply the operation associated with the given batch.
///
@ -35,7 +45,7 @@ impl IndexScheduler {
batch: Batch,
current_batch: &mut ProcessingBatch,
progress: Progress,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
) -> Result<(Vec<Task>, ProcessBatchInfo)> {
#[cfg(test)]
{
self.maybe_fail(crate::test_utils::FailureLocation::InsideProcessBatch)?;
@ -76,7 +86,7 @@ impl IndexScheduler {
canceled_tasks.push(task);
Ok((canceled_tasks, None))
Ok((canceled_tasks, ProcessBatchInfo::default()))
}
Batch::TaskDeletions(mut tasks) => {
// 1. Retrieve the tasks that matched the query at enqueue-time.
@ -115,14 +125,14 @@ impl IndexScheduler {
_ => unreachable!(),
}
}
Ok((tasks, None))
}
Batch::SnapshotCreation(tasks) => {
self.process_snapshot(progress, tasks).map(|tasks| (tasks, None))
}
Batch::Dump(task) => {
self.process_dump_creation(progress, task).map(|tasks| (tasks, None))
Ok((tasks, ProcessBatchInfo::default()))
}
Batch::SnapshotCreation(tasks) => self
.process_snapshot(progress, tasks)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::Dump(task) => self
.process_dump_creation(progress, task)
.map(|tasks| (tasks, ProcessBatchInfo::default())),
Batch::IndexOperation { op, must_create_index } => {
let index_uid = op.index_uid().to_string();
let index = if must_create_index {
@ -139,10 +149,12 @@ impl IndexScheduler {
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?;
let pre_commit_dabases_sizes = index.database_sizes(&index_wtxn)?;
let (tasks, congestion) =
self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
self.apply_index_operation(&mut index_wtxn, &index, op, &progress)?;
{
progress.update_progress(FinalizingIndexStep::Committing);
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
let _entered = span.enter();
@ -153,12 +165,15 @@ impl IndexScheduler {
// stats of the index. Since the tasks have already been processed and
// this is a non-critical operation. If it fails, we should not fail
// the entire batch.
let mut post_commit_dabases_sizes = None;
let res = || -> Result<()> {
progress.update_progress(FinalizingIndexStep::ComputingStats);
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
let mut wtxn = self.env.write_txn()?;
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
post_commit_dabases_sizes = Some(index.database_sizes(&index_rtxn)?);
wtxn.commit()?;
Ok(())
}();
@ -171,7 +186,16 @@ impl IndexScheduler {
),
}
Ok((tasks, congestion))
let info = ProcessBatchInfo {
congestion,
// In case we fail to the get post-commit sizes we decide
// that nothing changed and use the pre-commit sizes.
post_commit_dabases_sizes: post_commit_dabases_sizes
.unwrap_or_else(|| pre_commit_dabases_sizes.clone()),
pre_commit_dabases_sizes,
};
Ok((tasks, info))
}
Batch::IndexCreation { index_uid, primary_key, task } => {
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
@ -239,7 +263,7 @@ impl IndexScheduler {
),
}
Ok((vec![task], None))
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
@ -273,7 +297,9 @@ impl IndexScheduler {
};
}
Ok((tasks, None))
// Here we could also show that all the internal database sizes goes to 0
// but it would mean opening the index and that's costly.
Ok((tasks, ProcessBatchInfo::default()))
}
Batch::IndexSwap { mut task } => {
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
@ -321,7 +347,7 @@ impl IndexScheduler {
}
wtxn.commit()?;
task.status = Status::Succeeded;
Ok((vec![task], None))
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::UpgradeDatabase { mut tasks } => {
let KindWithContent::UpgradeDatabase { from } = tasks.last().unwrap().kind else {
@ -351,7 +377,7 @@ impl IndexScheduler {
task.error = None;
}
Ok((tasks, None))
Ok((tasks, ProcessBatchInfo::default()))
}
}
}

View File

@ -32,7 +32,7 @@ impl IndexScheduler {
index_wtxn: &mut RwTxn<'i>,
index: &'i Index,
operation: IndexOperation,
progress: Progress,
progress: &Progress,
) -> Result<(Vec<Task>, Option<ChannelCongestion>)> {
let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now();
@ -186,7 +186,7 @@ impl IndexScheduler {
&document_changes,
embedders,
&|| must_stop_processing.get(),
&progress,
progress,
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
);
@ -307,7 +307,7 @@ impl IndexScheduler {
&document_changes,
embedders,
&|| must_stop_processing.get(),
&progress,
progress,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
@ -465,7 +465,7 @@ impl IndexScheduler {
&document_changes,
embedders,
&|| must_stop_processing.get(),
&progress,
progress,
)
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
);
@ -520,7 +520,7 @@ impl IndexScheduler {
index_uid: index_uid.clone(),
tasks: cleared_tasks,
},
progress.clone(),
progress,
)?;
let (settings_tasks, _congestion) = self.apply_index_operation(

View File

@ -41,7 +41,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)?;
// 2.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?;
@ -80,7 +80,7 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?;
index
.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)
.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
}
@ -90,7 +90,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?;
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Disabled)?;
// 5. Copy and tarball the flat snapshot
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);

View File

@ -39,7 +39,7 @@ time = { version = "0.3.37", features = [
"parsing",
"macros",
] }
tokio = "1.42"
tokio = "1.43"
utoipa = { version = "5.3.1", features = ["macros"] }
uuid = { version = "1.11.0", features = ["serde", "v4"] }

View File

@ -64,4 +64,6 @@ pub struct BatchStats {
pub progress_trace: serde_json::Map<String, serde_json::Value>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub write_channel_congestion: Option<serde_json::Map<String, serde_json::Value>>,
#[serde(default, skip_serializing_if = "serde_json::Map::is_empty")]
pub internal_database_sizes: serde_json::Map<String, serde_json::Value>,
}

View File

@ -312,6 +312,7 @@ InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFacetSearch , InvalidRequest , BAD_REQUEST ;
InvalidSettingsexecuteAfterUpdate , InvalidRequest , BAD_REQUEST ;
InvalidSettingsPrefixSearch , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ;
@ -454,7 +455,10 @@ impl ErrorCode for milli::Error {
}
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
UserError::InvalidVectorDimensions { .. }
| UserError::InvalidIndexingVectorDimensions { .. } => {
Code::InvalidVectorDimensions
}
UserError::InvalidVectorsMapType { .. }
| UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
UserError::TooManyVectors(_, _) => Code::TooManyVectors,

View File

@ -289,6 +289,12 @@ pub struct Settings<T> {
#[schema(value_type = Option<PrefixSearchSettings>, example = json!("Hemlo"))]
pub prefix_search: Setting<PrefixSearchSettings>,
/// Function to execute after an update
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsexecuteAfterUpdate>)]
#[schema(value_type = Option<String>, example = json!("doc.likes += 1"))]
pub execute_after_update: Setting<String>,
#[serde(skip)]
#[deserr(skip)]
pub _kind: PhantomData<T>,
@ -354,6 +360,7 @@ impl Settings<Checked> {
localized_attributes: Setting::Reset,
facet_search: Setting::Reset,
prefix_search: Setting::Reset,
execute_after_update: Setting::Reset,
_kind: PhantomData,
}
}
@ -380,6 +387,7 @@ impl Settings<Checked> {
localized_attributes: localized_attributes_rules,
facet_search,
prefix_search,
execute_after_update,
_kind,
} = self;
@ -404,6 +412,7 @@ impl Settings<Checked> {
localized_attributes: localized_attributes_rules,
facet_search,
prefix_search,
execute_after_update,
_kind: PhantomData,
}
}
@ -454,6 +463,7 @@ impl Settings<Unchecked> {
localized_attributes: self.localized_attributes,
facet_search: self.facet_search,
prefix_search: self.prefix_search,
execute_after_update: self.execute_after_update,
_kind: PhantomData,
}
}
@ -530,6 +540,10 @@ impl Settings<Unchecked> {
},
prefix_search: other.prefix_search.or(self.prefix_search),
facet_search: other.facet_search.or(self.facet_search),
execute_after_update: other
.execute_after_update
.clone()
.or(self.execute_after_update.clone()),
_kind: PhantomData,
}
}
@ -568,6 +582,7 @@ pub fn apply_settings_to_builder(
localized_attributes: localized_attributes_rules,
facet_search,
prefix_search,
execute_after_update,
_kind,
} = settings;
@ -772,6 +787,14 @@ pub fn apply_settings_to_builder(
Setting::Reset => builder.reset_facet_search(),
Setting::NotSet => (),
}
match execute_after_update {
Setting::Set(execute_after_update) => {
builder.set_execute_after_update(execute_after_update.clone())
}
Setting::Reset => builder.reset_execute_after_update(),
Setting::NotSet => (),
}
}
pub enum SecretPolicy {
@ -867,14 +890,11 @@ pub fn settings(
})
.collect();
let embedders = Setting::Set(embedders);
let search_cutoff_ms = index.search_cutoff(rtxn)?;
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
let prefix_search = index.prefix_search(rtxn)?.map(PrefixSearchSettings::from);
let facet_search = index.facet_search(rtxn)?;
let execute_after_update = index.execute_after_update(rtxn)?;
let mut settings = Settings {
displayed_attributes: match displayed_attributes {
@ -914,6 +934,10 @@ pub fn settings(
},
prefix_search: Setting::Set(prefix_search.unwrap_or_default()),
facet_search: Setting::Set(facet_search),
execute_after_update: match execute_after_update {
Some(function) => Setting::Set(function.to_string()),
None => Setting::NotSet,
},
_kind: PhantomData,
};
@ -1141,6 +1165,7 @@ pub(crate) mod test {
search_cutoff_ms: Setting::NotSet,
facet_search: Setting::NotSet,
prefix_search: Setting::NotSet,
execute_after_update: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
@ -1172,6 +1197,7 @@ pub(crate) mod test {
search_cutoff_ms: Setting::NotSet,
facet_search: Setting::NotSet,
prefix_search: Setting::NotSet,
execute_after_update: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};

View File

@ -30,14 +30,10 @@ actix-web = { version = "4.9.0", default-features = false, features = [
anyhow = { version = "1.0.95", features = ["backtrace"] }
async-trait = "0.1.85"
bstr = "1.11.3"
byte-unit = { version = "5.1.6", default-features = false, features = [
"std",
"byte",
"serde",
] }
byte-unit = { version = "5.1.6", features = ["serde"] }
bytes = "1.9.0"
clap = { version = "4.5.24", features = ["derive", "env"] }
crossbeam-channel = "0.5.14"
crossbeam-channel = "0.5.15"
deserr = { version = "0.6.3", features = ["actix-web"] }
dump = { path = "../dump" }
either = "1.13.0"
@ -92,7 +88,7 @@ time = { version = "0.3.37", features = [
"parsing",
"macros",
] }
tokio = { version = "1.42.0", features = ["full"] }
tokio = { version = "1.43.1", features = ["full"] }
toml = "0.8.19"
uuid = { version = "1.11.0", features = ["serde", "v4"] }
serde_urlencoded = "0.7.1"
@ -140,7 +136,7 @@ reqwest = { version = "0.12.12", features = [
sha-1 = { version = "0.10.1", optional = true }
static-files = { version = "0.2.4", optional = true }
tempfile = { version = "3.15.0", optional = true }
zip = { version = "2.2.2", optional = true }
zip = { version = "2.3.0", optional = true }
[features]
default = ["meilisearch-types/all-tokenizations", "mini-dashboard"]
@ -170,5 +166,5 @@ german = ["meilisearch-types/german"]
turkish = ["meilisearch-types/turkish"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.18/build.zip"
sha1 = "b408a30dcb6e20cddb0c153c23385bcac4c8e912"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.19/build.zip"
sha1 = "7974430d5277c97f67cf6e95eec6faaac2788834"

View File

@ -329,7 +329,8 @@ impl Infos {
http_addr: http_addr != default_http_addr(),
http_payload_size_limit,
experimental_max_number_of_batched_tasks,
experimental_limit_batched_tasks_total_size,
experimental_limit_batched_tasks_total_size:
experimental_limit_batched_tasks_total_size.into(),
task_queue_webhook: task_webhook_url.is_some(),
task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
log_level: log_level.to_string(),

View File

@ -228,7 +228,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
cleanup_enabled: !opt.experimental_replication_parameters,
max_number_of_tasks: 1_000_000,
max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size,
batched_tasks_size_limit: opt.experimental_limit_batched_tasks_total_size.into(),
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().as_u64() as usize,
index_count: DEFAULT_INDEX_COUNT,
instance_features: opt.to_instance_features(),

View File

@ -445,7 +445,7 @@ pub struct Opt {
/// see: <https://github.com/orgs/meilisearch/discussions/801>
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE, default_value_t = default_limit_batched_tasks_total_size())]
#[serde(default = "default_limit_batched_tasks_total_size")]
pub experimental_limit_batched_tasks_total_size: u64,
pub experimental_limit_batched_tasks_total_size: Byte,
/// Enables experimental caching of search query embeddings. The value represents the maximal number of entries in the cache of each
/// distinct embedder.
@ -968,8 +968,8 @@ fn default_limit_batched_tasks() -> usize {
usize::MAX
}
fn default_limit_batched_tasks_total_size() -> u64 {
u64::MAX
fn default_limit_batched_tasks_total_size() -> Byte {
Byte::from_u64(u64::MAX)
}
fn default_embedding_cache_entries() -> usize {

View File

@ -518,7 +518,7 @@ impl From<index_scheduler::IndexStats> for IndexStats {
.inner_stats
.number_of_documents
.unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()),
raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(),
raw_document_db_size: stats.inner_stats.documents_database_stats.total_size(),
avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
is_indexing: stats.is_indexing,
number_of_embeddings: stats.inner_stats.number_of_embeddings,

View File

@ -497,6 +497,17 @@ make_setting_routes!(
camelcase_attr: "facetSearch",
analytics: FacetSearchAnalytics
},
{
route: "/execute-after-update",
update_verb: put,
value_type: String,
err_type: meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsexecuteAfterUpdate,
>,
attr: execute_after_update,
camelcase_attr: "executeAfterUpdate",
analytics: ExecuteAfterUpdateAnalytics
},
{
route: "/prefix-search",
update_verb: put,
@ -596,6 +607,9 @@ pub async fn update_all(
new_settings.non_separator_tokens.as_ref().set(),
),
facet_search: FacetSearchAnalytics::new(new_settings.facet_search.as_ref().set()),
execute_after_update: ExecuteAfterUpdateAnalytics::new(
new_settings.execute_after_update.as_ref().set(),
),
prefix_search: PrefixSearchAnalytics::new(new_settings.prefix_search.as_ref().set()),
},
&req,

View File

@ -39,6 +39,7 @@ pub struct SettingsAnalytics {
pub non_separator_tokens: NonSeparatorTokensAnalytics,
pub facet_search: FacetSearchAnalytics,
pub prefix_search: PrefixSearchAnalytics,
pub execute_after_update: ExecuteAfterUpdateAnalytics,
}
impl Aggregate for SettingsAnalytics {
@ -194,6 +195,9 @@ impl Aggregate for SettingsAnalytics {
set: new.facet_search.set | self.facet_search.set,
value: new.facet_search.value.or(self.facet_search.value),
},
execute_after_update: ExecuteAfterUpdateAnalytics {
set: new.execute_after_update.set | self.execute_after_update.set,
},
prefix_search: PrefixSearchAnalytics {
set: new.prefix_search.set | self.prefix_search.set,
value: new.prefix_search.value.or(self.prefix_search.value),
@ -659,6 +663,21 @@ impl FacetSearchAnalytics {
}
}
#[derive(Serialize, Default)]
pub struct ExecuteAfterUpdateAnalytics {
pub set: bool,
}
impl ExecuteAfterUpdateAnalytics {
pub fn new(distinct: Option<&String>) -> Self {
Self { set: distinct.is_some() }
}
pub fn into_settings(self) -> SettingsAnalytics {
SettingsAnalytics { execute_after_update: self, ..Default::default() }
}
}
#[derive(Serialize, Default)]
pub struct PrefixSearchAnalytics {
pub set: bool,

View File

@ -281,7 +281,8 @@ async fn test_summarized_document_addition_or_update() {
".startedAt" => "[date]",
".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
},
@r###"
{
@ -303,7 +304,8 @@ async fn test_summarized_document_addition_or_update() {
"test": 1
},
"progressTrace": "[progressTrace]",
"writeChannelCongestion": "[writeChannelCongestion]"
"writeChannelCongestion": "[writeChannelCongestion]",
"internalDatabaseSizes": "[internalDatabaseSizes]"
},
"duration": "[duration]",
"startedAt": "[date]",
@ -322,7 +324,8 @@ async fn test_summarized_document_addition_or_update() {
".startedAt" => "[date]",
".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
},
@r###"
{
@ -407,7 +410,8 @@ async fn test_summarized_delete_documents_by_batch() {
".startedAt" => "[date]",
".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
},
@r###"
{
@ -495,7 +499,8 @@ async fn test_summarized_delete_documents_by_filter() {
".startedAt" => "[date]",
".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
},
@r###"
{
@ -537,7 +542,8 @@ async fn test_summarized_delete_documents_by_filter() {
".startedAt" => "[date]",
".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
},
@r#"
{
@ -623,7 +629,8 @@ async fn test_summarized_delete_document_by_id() {
".startedAt" => "[date]",
".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
},
@r#"
{
@ -679,7 +686,8 @@ async fn test_summarized_settings_update() {
".startedAt" => "[date]",
".finishedAt" => "[date]",
".stats.progressTrace" => "[progressTrace]",
".stats.writeChannelCongestion" => "[writeChannelCongestion]"
".stats.writeChannelCongestion" => "[writeChannelCongestion]",
".stats.internalDatabaseSizes" => "[internalDatabaseSizes]"
},
@r###"
{

View File

@ -1777,7 +1777,7 @@ async fn add_documents_with_geo_field() {
},
{
"id": "4",
"_geo": { "lat": "1", "lng": "1" },
"_geo": { "lat": "2", "lng": "2" },
},
]);
@ -1828,8 +1828,8 @@ async fn add_documents_with_geo_field() {
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
"lat": "2",
"lng": "2"
}
}
],
@ -1848,14 +1848,6 @@ async fn add_documents_with_geo_field() {
@r###"
{
"hits": [
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
},
"_geoDistance": 5522018
},
{
"id": "3",
"_geo": {
@ -1864,6 +1856,14 @@ async fn add_documents_with_geo_field() {
},
"_geoDistance": 5522018
},
{
"id": "4",
"_geo": {
"lat": "2",
"lng": "2"
},
"_geoDistance": 5408322
},
{
"id": "1"
},
@ -1897,11 +1897,11 @@ async fn update_documents_with_geo_field() {
},
{
"id": "3",
"_geo": { "lat": 1, "lng": 1 },
"_geo": { "lat": 3, "lng": 0 },
},
{
"id": "4",
"_geo": { "lat": "1", "lng": "1" },
"_geo": { "lat": "4", "lng": "0" },
},
]);
@ -1928,9 +1928,7 @@ async fn update_documents_with_geo_field() {
}
"###);
let (response, code) = index
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
snapshot!(code, @"200 OK");
// we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@ -1940,18 +1938,18 @@ async fn update_documents_with_geo_field() {
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
"lat": "4",
"lng": "0"
},
"_geoDistance": 5522018
"_geoDistance": 667170
},
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
"lat": 3,
"lng": 0
},
"_geoDistance": 5522018
"_geoDistance": 778364
},
{
"id": "1"
@ -1969,10 +1967,13 @@ async fn update_documents_with_geo_field() {
}
"###);
let updated_documents = json!([{
"id": "3",
"doggo": "kefir",
}]);
let updated_documents = json!([
{
"id": "3",
"doggo": "kefir",
"_geo": { "lat": 5, "lng": 0 },
}
]);
let (task, _status_code) = index.update_documents(updated_documents, None).await;
let response = index.wait_task(task.uid()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@ -2012,16 +2013,16 @@ async fn update_documents_with_geo_field() {
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
"lat": 5,
"lng": 0
},
"doggo": "kefir"
},
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
"lat": "4",
"lng": "0"
}
}
],
@ -2031,31 +2032,29 @@ async fn update_documents_with_geo_field() {
}
"###);
let (response, code) = index
.search_post(json!({"sort": ["_geoPoint(50.629973371633746,3.0569447399419567):desc"]}))
.await;
let (response, code) = index.search_post(json!({"sort": ["_geoPoint(10,0):asc"]})).await;
snapshot!(code, @"200 OK");
// the search response should not have changed: we are expecting docs 4 and 3 first as they have geo
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }),
@r###"
{
"hits": [
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
},
"_geoDistance": 5522018
},
{
"id": "3",
"_geo": {
"lat": 1,
"lng": 1
"lat": 5,
"lng": 0
},
"doggo": "kefir",
"_geoDistance": 5522018
"_geoDistance": 555975
},
{
"id": "4",
"_geo": {
"lat": "4",
"lng": "0"
},
"_geoDistance": 667170
},
{
"id": "1"

View File

@ -157,11 +157,14 @@ async fn delete_document_by_filter() {
index.wait_task(task.uid()).await.succeeded();
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 4,
"rawDocumentDbSize": 42,
"avgDocumentSize": 10,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -208,11 +211,14 @@ async fn delete_document_by_filter() {
"###);
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 2,
"rawDocumentDbSize": 16,
"avgDocumentSize": 8,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -278,11 +284,14 @@ async fn delete_document_by_filter() {
"###);
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 1,
"rawDocumentDbSize": 12,
"avgDocumentSize": 12,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,

View File

@ -28,12 +28,15 @@ async fn import_dump_v1_movie_raw() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 21965,
"avgDocumentSize": 414,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -185,12 +188,15 @@ async fn import_dump_v1_movie_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 21965,
"avgDocumentSize": 414,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -355,12 +361,15 @@ async fn import_dump_v1_rubygems_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 8606,
"avgDocumentSize": 162,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -522,12 +531,15 @@ async fn import_dump_v2_movie_raw() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 21965,
"avgDocumentSize": 414,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -679,12 +691,15 @@ async fn import_dump_v2_movie_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 21965,
"avgDocumentSize": 414,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -846,12 +861,15 @@ async fn import_dump_v2_rubygems_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 8606,
"avgDocumentSize": 162,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -1010,12 +1028,15 @@ async fn import_dump_v3_movie_raw() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 21965,
"avgDocumentSize": 414,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -1167,12 +1188,15 @@ async fn import_dump_v3_movie_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 21965,
"avgDocumentSize": 414,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -1334,12 +1358,15 @@ async fn import_dump_v3_rubygems_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 8606,
"avgDocumentSize": 162,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -1498,12 +1525,15 @@ async fn import_dump_v4_movie_raw() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 21965,
"avgDocumentSize": 414,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -1655,12 +1685,15 @@ async fn import_dump_v4_movie_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 21965,
"avgDocumentSize": 414,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -1822,12 +1855,15 @@ async fn import_dump_v4_rubygems_with_settings() {
let (stats, code) = index.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 53,
"rawDocumentDbSize": 8606,
"avgDocumentSize": 162,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -1994,11 +2030,14 @@ async fn import_dump_v5() {
let (stats, code) = index1.stats().await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 10,
"rawDocumentDbSize": 6782,
"avgDocumentSize": 678,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -2031,12 +2070,15 @@ async fn import_dump_v5() {
let (stats, code) = index2.stats().await;
snapshot!(code, @"200 OK");
snapshot!(
json_string!(stats),
json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}),
@r###"
{
"numberOfDocuments": 10,
"rawDocumentDbSize": 6782,
"avgDocumentSize": 678,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -2237,6 +2279,7 @@ async fn import_dump_v6_containing_batches_and_enqueued_tasks() {
".results[0].duration" => "[date]",
".results[0].stats.progressTrace" => "[progressTrace]",
".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]",
".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]",
}), name: "batches");
let (indexes, code) = server.list_indexes(None, None).await;

View File

@ -1783,6 +1783,146 @@ async fn test_nested_fields() {
.await;
}
#[actix_rt::test]
async fn test_typo_settings() {
let documents = json!([
{
"id": 0,
"title": "The zeroth document",
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule",
},
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field",
},
{
"prout": "truc",
"machin": "lol",
},
],
},
{
"id": 3,
"title": "The third document",
"nested": "I lied",
},
]);
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"typoTolerance": {
"enabled": true,
"disableOnAttributes": ["title"]
}
}),
&json!({"q": "document"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "The zeroth document"
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
},
{
"id": 3,
"title": "The third document",
"nested": "I lied"
}
]
"###);
},
)
.await;
// Test prefix search
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"typoTolerance": {
"enabled": true,
"disableOnAttributes": ["title"]
}
}),
&json!({"q": "docume"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "The zeroth document"
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
},
{
"id": 3,
"title": "The third document",
"nested": "I lied"
}
]
"###);
},
)
.await;
}
/// Modifying facets with different casing should work correctly
#[actix_rt::test]
async fn change_facet_casing() {

View File

@ -110,11 +110,14 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 2,
"rawDocumentDbSize": 27,
"avgDocumentSize": 13,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 5,
"numberOfEmbeddedDocuments": 2,
@ -135,11 +138,14 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 2,
"rawDocumentDbSize": 27,
"avgDocumentSize": 13,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 2,
@ -160,11 +166,14 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 2,
"rawDocumentDbSize": 27,
"avgDocumentSize": 13,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 2,
@ -186,11 +195,14 @@ async fn add_remove_embeddings() {
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 2,
"rawDocumentDbSize": 27,
"avgDocumentSize": 13,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 2,
"numberOfEmbeddedDocuments": 1,
@ -236,11 +248,14 @@ async fn add_remove_embedded_documents() {
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 2,
"rawDocumentDbSize": 27,
"avgDocumentSize": 13,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 5,
"numberOfEmbeddedDocuments": 2,
@ -257,11 +272,14 @@ async fn add_remove_embedded_documents() {
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 1,
"rawDocumentDbSize": 13,
"avgDocumentSize": 13,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 1,
@ -290,11 +308,14 @@ async fn update_embedder_settings() {
index.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 2,
"rawDocumentDbSize": 108,
"avgDocumentSize": 54,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -326,11 +347,14 @@ async fn update_embedder_settings() {
server.wait_task(response.uid()).await.succeeded();
let (stats, _code) = index.stats().await;
snapshot!(json_string!(stats), @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[size]",
".avgDocumentSize" => "[size]",
}), @r###"
{
"numberOfDocuments": 2,
"rawDocumentDbSize": 108,
"avgDocumentSize": 54,
"rawDocumentDbSize": "[size]",
"avgDocumentSize": "[size]",
"isIndexing": false,
"numberOfEmbeddings": 3,
"numberOfEmbeddedDocuments": 2,

View File

@ -133,7 +133,9 @@ async fn check_the_index_scheduler(server: &Server) {
let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, {
".databaseSize" => "[bytes]",
".usedDatabaseSize" => "[bytes]"
".usedDatabaseSize" => "[bytes]",
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
".indexes.kefir.avgDocumentSize" => "[bytes]",
},
@r###"
{
@ -143,8 +145,8 @@ async fn check_the_index_scheduler(server: &Server) {
"indexes": {
"kefir": {
"numberOfDocuments": 1,
"rawDocumentDbSize": 109,
"avgDocumentSize": 109,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -193,31 +195,33 @@ async fn check_the_index_scheduler(server: &Server) {
// Tests all the batches query parameters
let (batches, _) = server.batches_filter("uids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
let (batches, _) = server.batches_filter("batchUids=10").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
let (batches, _) = server.batches_filter("statuses=canceled").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database
let (batches, _) = server.batches_filter("canceledBy=19").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, {
".databaseSize" => "[bytes]",
".usedDatabaseSize" => "[bytes]"
".usedDatabaseSize" => "[bytes]",
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
".indexes.kefir.avgDocumentSize" => "[bytes]",
},
@r###"
{
@ -227,8 +231,8 @@ async fn check_the_index_scheduler(server: &Server) {
"indexes": {
"kefir": {
"numberOfDocuments": 1,
"rawDocumentDbSize": 109,
"avgDocumentSize": 109,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,
@ -245,11 +249,14 @@ async fn check_the_index_scheduler(server: &Server) {
"###);
let index = server.index("kefir");
let (stats, _) = index.stats().await;
snapshot!(stats, @r###"
snapshot!(json_string!(stats, {
".rawDocumentDbSize" => "[bytes]",
".avgDocumentSize" => "[bytes]",
}), @r###"
{
"numberOfDocuments": 1,
"rawDocumentDbSize": 109,
"avgDocumentSize": 109,
"rawDocumentDbSize": "[bytes]",
"avgDocumentSize": "[bytes]",
"isIndexing": false,
"numberOfEmbeddings": 0,
"numberOfEmbeddedDocuments": 0,

View File

@ -188,7 +188,7 @@ async fn user_provide_mismatched_embedding_dimension() {
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r#"
snapshot!(task, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
@ -201,7 +201,7 @@ async fn user_provide_mismatched_embedding_dimension() {
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Invalid vector dimensions: expected: `3`, found: `2`.",
"message": "Index `doggo`: Invalid vector dimensions in document with id `0` in `._vectors.manual`.\n - note: embedding #0 has dimensions 2\n - note: embedder `manual` requires 3",
"code": "invalid_vector_dimensions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
@ -211,46 +211,36 @@ async fn user_provide_mismatched_embedding_dimension() {
"startedAt": "[date]",
"finishedAt": "[date]"
}
"#);
"###);
// FIXME: /!\ Case where number of embeddings is divisor of `dimensions` would still pass
let new_document = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [[0, 0], [1, 1], [2, 2]] }},
]);
let (response, code) = index.add_documents(new_document, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(response.uid()).await.succeeded();
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
let task = index.wait_task(response.uid()).await;
snapshot!(task, @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
0.0,
0.0,
1.0
],
[
1.0,
2.0,
2.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 1
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "doggo",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 0
},
"error": {
"message": "Index `doggo`: Invalid vector dimensions in document with id `0` in `._vectors.manual`.\n - note: embedding #0 has dimensions 2\n - note: embedder `manual` requires 3",
"code": "invalid_vector_dimensions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}

View File

@ -21,7 +21,7 @@ byteorder = "1.5.0"
charabia = { version = "0.9.3", default-features = false }
concat-arrays = "0.1.2"
convert_case = "0.6.0"
crossbeam-channel = "0.5.14"
crossbeam-channel = "0.5.15"
deserr = "0.6.3"
either = { version = "1.13.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" }

View File

@ -1,8 +1,13 @@
use heed::types::Bytes;
use std::mem;
use heed::Database;
use heed::DatabaseStat;
use heed::RoTxn;
use heed::Unspecified;
use serde::{Deserialize, Serialize};
use crate::BEU32;
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
/// The stats of a database.
@ -20,58 +25,24 @@ impl DatabaseStats {
///
/// This function iterates over the whole database and computes the stats.
/// It is not efficient and should be cached somewhere.
pub(crate) fn new(database: Database<Bytes, Bytes>, rtxn: &RoTxn<'_>) -> heed::Result<Self> {
let mut database_stats =
Self { number_of_entries: 0, total_key_size: 0, total_value_size: 0 };
pub(crate) fn new(
database: Database<BEU32, Unspecified>,
rtxn: &RoTxn<'_>,
) -> heed::Result<Self> {
let DatabaseStat { page_size, depth: _, branch_pages, leaf_pages, overflow_pages, entries } =
database.stat(rtxn)?;
let mut iter = database.iter(rtxn)?;
while let Some((key, value)) = iter.next().transpose()? {
let key_size = key.len() as u64;
let value_size = value.len() as u64;
database_stats.total_key_size += key_size;
database_stats.total_value_size += value_size;
}
// We first take the total size without overflow pages as the overflow pages contains the values and only that.
let total_size = (branch_pages + leaf_pages + overflow_pages) * page_size as usize;
// We compute an estimated size for the keys.
let total_key_size = entries * (mem::size_of::<u32>() + 4);
let total_value_size = total_size - total_key_size;
database_stats.number_of_entries = database.len(rtxn)?;
Ok(database_stats)
}
/// Recomputes the stats of the database and returns the new stats.
///
/// This function is used to update the stats of the database when some keys are modified.
/// It is more efficient than the `new` function because it does not iterate over the whole database but only the modified keys comparing the before and after states.
pub(crate) fn recompute<I, K>(
mut stats: Self,
database: Database<Bytes, Bytes>,
before_rtxn: &RoTxn<'_>,
after_rtxn: &RoTxn<'_>,
modified_keys: I,
) -> heed::Result<Self>
where
I: IntoIterator<Item = K>,
K: AsRef<[u8]>,
{
for key in modified_keys {
let key = key.as_ref();
if let Some(value) = database.get(after_rtxn, key)? {
let key_size = key.len() as u64;
let value_size = value.len() as u64;
stats.total_key_size = stats.total_key_size.saturating_add(key_size);
stats.total_value_size = stats.total_value_size.saturating_add(value_size);
}
if let Some(value) = database.get(before_rtxn, key)? {
let key_size = key.len() as u64;
let value_size = value.len() as u64;
stats.total_key_size = stats.total_key_size.saturating_sub(key_size);
stats.total_value_size = stats.total_value_size.saturating_sub(value_size);
}
}
stats.number_of_entries = database.len(after_rtxn)?;
Ok(stats)
Ok(Self {
number_of_entries: entries as u64,
total_key_size: total_key_size as u64,
total_value_size: total_value_size as u64,
})
}
pub fn average_key_size(&self) -> u64 {
@ -86,6 +57,10 @@ impl DatabaseStats {
self.number_of_entries
}
pub fn total_size(&self) -> u64 {
self.total_key_size + self.total_value_size
}
pub fn total_key_size(&self) -> u64 {
self.total_key_size
}

View File

@ -154,6 +154,14 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidGeoField(#[from] Box<GeoError>),
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
InvalidVectorDimensions { expected: usize, found: usize },
#[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n - note: embedding #{embedding_index} has dimensions {found}\n - note: embedder `{embedder_name}` requires {expected}")]
InvalidIndexingVectorDimensions {
embedder_name: String,
document_id: String,
embedding_index: usize,
expected: usize,
found: usize,
},
#[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
InvalidVectorsMapType { document_id: String, value: Value },
#[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]

View File

@ -3,8 +3,9 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::fs::File;
use std::path::Path;
use heed::{types::*, WithoutTls};
use heed::{types::*, DatabaseStat, WithoutTls};
use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use indexmap::IndexMap;
use roaring::RoaringBitmap;
use rstar::RTree;
use serde::{Deserialize, Serialize};
@ -75,6 +76,7 @@ pub mod main_key {
pub const SEARCH_CUTOFF: &str = "search_cutoff";
pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
pub const FACET_SEARCH: &str = "facet_search";
pub const EXECUTE_AFTER_UPDATE: &str = "execute-after-update";
pub const PREFIX_SEARCH: &str = "prefix_search";
pub const DOCUMENTS_STATS: &str = "documents_stats";
}
@ -410,38 +412,6 @@ impl Index {
Ok(count.unwrap_or_default())
}
/// Updates the stats of the documents database based on the previous stats and the modified docids.
pub fn update_documents_stats(
&self,
wtxn: &mut RwTxn<'_>,
modified_docids: roaring::RoaringBitmap,
) -> Result<()> {
let before_rtxn = self.read_txn()?;
let document_stats = match self.documents_stats(&before_rtxn)? {
Some(before_stats) => DatabaseStats::recompute(
before_stats,
self.documents.remap_types(),
&before_rtxn,
wtxn,
modified_docids.iter().map(|docid| docid.to_be_bytes()),
)?,
None => {
// This should never happen when there are already documents in the index, the documents stats should be present.
// If it happens, it means that the index was not properly initialized/upgraded.
debug_assert_eq!(
self.documents.len(&before_rtxn)?,
0,
"The documents stats should be present when there are documents in the index"
);
tracing::warn!("No documents stats found, creating new ones");
DatabaseStats::new(self.documents.remap_types(), &*wtxn)?
}
};
self.put_documents_stats(wtxn, document_stats)?;
Ok(())
}
/// Writes the stats of the documents database.
pub fn put_documents_stats(
&self,
@ -1654,6 +1624,22 @@ impl Index {
self.main.remap_key_type::<Str>().delete(txn, main_key::FACET_SEARCH)
}
pub fn execute_after_update<'t>(&self, txn: &'t RoTxn<'_>) -> heed::Result<Option<&'t str>> {
self.main.remap_types::<Str, Str>().get(txn, main_key::EXECUTE_AFTER_UPDATE)
}
pub(crate) fn put_execute_after_update(
&self,
txn: &mut RwTxn<'_>,
val: &str,
) -> heed::Result<()> {
self.main.remap_types::<Str, Str>().put(txn, main_key::EXECUTE_AFTER_UPDATE, &val)
}
pub(crate) fn delete_execute_after_update(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(txn, main_key::EXECUTE_AFTER_UPDATE)
}
pub fn localized_attributes_rules(
&self,
rtxn: &RoTxn<'_>,
@ -1755,6 +1741,122 @@ impl Index {
}
Ok(stats)
}
/// Check if the word is indexed in the index.
///
/// This function checks if the word is indexed in the index by looking at the word_docids and exact_word_docids.
///
/// # Arguments
///
/// * `rtxn`: The read transaction.
/// * `word`: The word to check.
pub fn contains_word(&self, rtxn: &RoTxn<'_>, word: &str) -> Result<bool> {
Ok(self.word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some()
|| self.exact_word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some())
}
/// Returns the sizes in bytes of each of the index database at the given rtxn.
pub fn database_sizes(&self, rtxn: &RoTxn<'_>) -> heed::Result<IndexMap<&'static str, usize>> {
let Self {
env: _,
main,
external_documents_ids,
word_docids,
exact_word_docids,
word_prefix_docids,
exact_word_prefix_docids,
word_pair_proximity_docids,
word_position_docids,
word_fid_docids,
word_prefix_position_docids,
word_prefix_fid_docids,
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_normalized_string_strings,
facet_id_string_fst,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
vector_arroy,
embedder_category_id,
documents,
} = self;
fn compute_size(stats: DatabaseStat) -> usize {
let DatabaseStat {
page_size,
depth: _,
branch_pages,
leaf_pages,
overflow_pages,
entries: _,
} = stats;
(branch_pages + leaf_pages + overflow_pages) * page_size as usize
}
let mut sizes = IndexMap::new();
sizes.insert("main", main.stat(rtxn).map(compute_size)?);
sizes
.insert("external_documents_ids", external_documents_ids.stat(rtxn).map(compute_size)?);
sizes.insert("word_docids", word_docids.stat(rtxn).map(compute_size)?);
sizes.insert("exact_word_docids", exact_word_docids.stat(rtxn).map(compute_size)?);
sizes.insert("word_prefix_docids", word_prefix_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"exact_word_prefix_docids",
exact_word_prefix_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"word_pair_proximity_docids",
word_pair_proximity_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert("word_position_docids", word_position_docids.stat(rtxn).map(compute_size)?);
sizes.insert("word_fid_docids", word_fid_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"word_prefix_position_docids",
word_prefix_position_docids.stat(rtxn).map(compute_size)?,
);
sizes
.insert("word_prefix_fid_docids", word_prefix_fid_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"field_id_word_count_docids",
field_id_word_count_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert("facet_id_f64_docids", facet_id_f64_docids.stat(rtxn).map(compute_size)?);
sizes
.insert("facet_id_string_docids", facet_id_string_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"facet_id_normalized_string_strings",
facet_id_normalized_string_strings.stat(rtxn).map(compute_size)?,
);
sizes.insert("facet_id_string_fst", facet_id_string_fst.stat(rtxn).map(compute_size)?);
sizes
.insert("facet_id_exists_docids", facet_id_exists_docids.stat(rtxn).map(compute_size)?);
sizes.insert(
"facet_id_is_null_docids",
facet_id_is_null_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"facet_id_is_empty_docids",
facet_id_is_empty_docids.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"field_id_docid_facet_f64s",
field_id_docid_facet_f64s.stat(rtxn).map(compute_size)?,
);
sizes.insert(
"field_id_docid_facet_strings",
field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
);
sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
Ok(sizes)
}
}
#[derive(Debug, Deserialize, Serialize)]

View File

@ -190,8 +190,18 @@ macro_rules! make_atomic_progress {
};
}
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
make_atomic_progress!(Document alias AtomicDocumentStep => "document");
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload");
make_enum_progress! {
pub enum MergingWordCache {
WordDocids,
WordFieldIdDocids,
ExactWordDocids,
WordPositionDocids,
FieldIdWordCountDocids,
}
}
#[derive(Debug, Serialize, Clone, ToSchema)]
#[serde(rename_all = "camelCase")]

View File

@ -164,7 +164,7 @@ impl Search<'_> {
sort_criteria: self.sort_criteria.clone(),
distinct: self.distinct.clone(),
searchable_attributes: self.searchable_attributes,
geo_strategy: self.geo_strategy,
geo_param: self.geo_param,
terms_matching_strategy: self.terms_matching_strategy,
scoring_strategy: ScoringStrategy::Detailed,
words_limit: self.words_limit,

View File

@ -45,7 +45,7 @@ pub struct Search<'a> {
sort_criteria: Option<Vec<AscDesc>>,
distinct: Option<String>,
searchable_attributes: Option<&'a [String]>,
geo_strategy: new::GeoSortStrategy,
geo_param: new::GeoSortParameter,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
words_limit: usize,
@ -68,7 +68,7 @@ impl<'a> Search<'a> {
sort_criteria: None,
distinct: None,
searchable_attributes: None,
geo_strategy: new::GeoSortStrategy::default(),
geo_param: new::GeoSortParameter::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
scoring_strategy: Default::default(),
exhaustive_number_hits: false,
@ -145,7 +145,13 @@ impl<'a> Search<'a> {
#[cfg(test)]
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
self.geo_strategy = strategy;
self.geo_param.strategy = strategy;
self
}
#[cfg(test)]
pub fn geo_max_bucket_size(&mut self, max_size: u64) -> &mut Search<'a> {
self.geo_param.max_bucket_size = max_size;
self
}
@ -232,7 +238,7 @@ impl<'a> Search<'a> {
universe,
&self.sort_criteria,
&self.distinct,
self.geo_strategy,
self.geo_param,
self.offset,
self.limit,
embedder_name,
@ -251,7 +257,7 @@ impl<'a> Search<'a> {
universe,
&self.sort_criteria,
&self.distinct,
self.geo_strategy,
self.geo_param,
self.offset,
self.limit,
Some(self.words_limit),
@ -290,7 +296,7 @@ impl fmt::Debug for Search<'_> {
sort_criteria,
distinct,
searchable_attributes,
geo_strategy: _,
geo_param: _,
terms_matching_strategy,
scoring_strategy,
words_limit,

View File

@ -173,16 +173,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_rule_scores.push(ScoreDetails::Skipped);
// remove candidates from the universe without adding them to result if their score is below the threshold
if let Some(ranking_score_threshold) = ranking_score_threshold {
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
if current_score < ranking_score_threshold {
all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
back!();
continue;
}
}
let is_below_threshold =
ranking_score_threshold.is_some_and(|ranking_score_threshold| {
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
current_score < ranking_score_threshold
});
maybe_add_to_results!(bucket);
if is_below_threshold {
all_candidates -= &bucket;
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
} else {
maybe_add_to_results!(bucket);
}
ranking_rule_scores.pop();
@ -237,23 +239,24 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
);
// remove candidates from the universe without adding them to result if their score is below the threshold
if let Some(ranking_score_threshold) = ranking_score_threshold {
let is_below_threshold = ranking_score_threshold.is_some_and(|ranking_score_threshold| {
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
if current_score < ranking_score_threshold {
all_candidates -=
next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
back!();
continue;
}
}
current_score < ranking_score_threshold
});
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
if cur_ranking_rule_index == ranking_rules_len - 1
|| (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
|| cur_offset + (next_bucket.candidates.len() as usize) < from
|| is_below_threshold
{
maybe_add_to_results!(next_bucket.candidates);
if is_below_threshold {
all_candidates -= &next_bucket.candidates;
all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
} else {
maybe_add_to_results!(next_bucket.candidates);
}
ranking_rule_scores.pop();
continue;
}

View File

@ -1,10 +1,8 @@
use std::collections::VecDeque;
use std::iter::FromIterator;
use heed::types::{Bytes, Unit};
use heed::{RoPrefix, RoTxn};
use roaring::RoaringBitmap;
use rstar::RTree;
use std::collections::VecDeque;
use super::facet_string_values;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
@ -41,6 +39,21 @@ fn facet_number_values<'a>(
Ok(iter)
}
#[derive(Debug, Clone, Copy)]
pub struct Parameter {
// Define the strategy used by the geo sort
pub strategy: Strategy,
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
pub max_bucket_size: u64,
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
pub distance_error_margin: f64,
}
impl Default for Parameter {
fn default() -> Self {
Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 }
}
}
/// Define the strategy used by the geo sort.
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
/// the point where we move from using the iterative strategy to the rtree.
@ -84,15 +97,21 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
geo_candidates: RoaringBitmap,
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
max_bucket_size: u64,
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
distance_error_margin: f64,
}
impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
pub fn new(
strategy: Strategy,
parameter: Parameter,
geo_faceted_docids: RoaringBitmap,
point: [f64; 2],
ascending: bool,
) -> Result<Self> {
let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter;
Ok(Self {
query: None,
strategy,
@ -102,6 +121,8 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
field_ids: None,
rtree: None,
cached_sorted_docids: VecDeque::new(),
max_bucket_size,
distance_error_margin,
})
}
@ -240,12 +261,12 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
fn next_bucket(
&mut self,
ctx: &mut SearchContext<'ctx>,
logger: &mut dyn SearchLogger<Q>,
_logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Q>>> {
let query = self.query.as_ref().unwrap().clone();
let geo_candidates = &self.geo_candidates & universe;
let mut geo_candidates = &self.geo_candidates & universe;
if geo_candidates.is_empty() {
return Ok(Some(RankingRuleOutput {
@ -267,24 +288,102 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
cache.pop_back()
}
};
while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
if geo_candidates.contains(id) {
return Ok(Some(RankingRuleOutput {
query,
candidates: RoaringBitmap::from_iter([id]),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point),
}),
}));
let put_back = |cache: &mut VecDeque<_>, x: _| {
if ascending {
cache.push_front(x)
} else {
cache.push_back(x)
}
};
let mut current_bucket = RoaringBitmap::new();
// current_distance stores the first point and distance in current bucket
let mut current_distance: Option<([f64; 2], f64)> = None;
loop {
// The loop will only exit when we have found all points with equal distance or have exhausted the candidates.
if let Some((id, point)) = next(&mut self.cached_sorted_docids) {
if geo_candidates.contains(id) {
let distance = distance_between_two_points(&self.point, &point);
if let Some((point0, bucket_distance)) = current_distance.as_ref() {
if (bucket_distance - distance).abs() > self.distance_error_margin {
// different distance, point belongs to next bucket
put_back(&mut self.cached_sorted_docids, (id, point));
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point0.to_owned()),
}),
}));
} else {
// same distance, point belongs to current bucket
current_bucket.insert(id);
// remove from cadidates to prevent it from being added to the cache again
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == self.max_bucket_size {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point0.to_owned()),
}),
}));
}
}
} else {
// first doc in current bucket
current_distance = Some((point, distance));
current_bucket.insert(id);
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == self.max_bucket_size {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point.to_owned()),
}),
}));
}
}
}
} else {
// cache exhausted, we need to refill it
self.fill_buffer(ctx, &geo_candidates)?;
if self.cached_sorted_docids.is_empty() {
// candidates exhausted, exit
if let Some((point0, _)) = current_distance.as_ref() {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point0.to_owned()),
}),
}));
} else {
return Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: None,
}),
}));
}
}
}
}
// if we got out of this loop it means we've exhausted our cache.
// we need to refill it and run the function again.
self.fill_buffer(ctx, &geo_candidates)?;
self.next_bucket(ctx, logger, universe)
}
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]

View File

@ -513,7 +513,7 @@ mod tests {
universe,
&None,
&None,
crate::search::new::GeoSortStrategy::default(),
crate::search::new::GeoSortParameter::default(),
0,
100,
Some(10),

View File

@ -45,6 +45,7 @@ use sort::Sort;
use self::distinct::facet_string_values;
use self::geo_sort::GeoSort;
pub use self::geo_sort::Parameter as GeoSortParameter;
pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
@ -274,7 +275,7 @@ fn resolve_negative_phrases(
fn get_ranking_rules_for_placeholder_search<'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy,
geo_param: geo_sort::Parameter,
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
let mut sort = false;
let mut sorted_fields = HashSet::new();
@ -299,7 +300,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
&mut ranking_rules,
&mut sorted_fields,
&mut geo_sorted,
geo_strategy,
geo_param,
)?;
sort = true;
}
@ -326,7 +327,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
fn get_ranking_rules_for_vector<'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy,
geo_param: geo_sort::Parameter,
limit_plus_offset: usize,
target: &[f32],
embedder_name: &str,
@ -375,7 +376,7 @@ fn get_ranking_rules_for_vector<'ctx>(
&mut ranking_rules,
&mut sorted_fields,
&mut geo_sorted,
geo_strategy,
geo_param,
)?;
sort = true;
}
@ -403,7 +404,7 @@ fn get_ranking_rules_for_vector<'ctx>(
fn get_ranking_rules_for_query_graph_search<'ctx>(
ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy,
geo_param: geo_sort::Parameter,
terms_matching_strategy: TermsMatchingStrategy,
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
// query graph search
@ -477,7 +478,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
&mut ranking_rules,
&mut sorted_fields,
&mut geo_sorted,
geo_strategy,
geo_param,
)?;
sort = true;
}
@ -514,7 +515,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
sorted_fields: &mut HashSet<String>,
geo_sorted: &mut bool,
geo_strategy: geo_sort::Strategy,
geo_param: geo_sort::Parameter,
) -> Result<()> {
let sort_criteria = sort_criteria.clone().unwrap_or_default();
ranking_rules.reserve(sort_criteria.len());
@ -540,7 +541,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
}
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
ranking_rules.push(Box::new(GeoSort::new(
geo_strategy,
geo_param,
geo_faceted_docids,
point,
true,
@ -552,7 +553,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
}
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
ranking_rules.push(Box::new(GeoSort::new(
geo_strategy,
geo_param,
geo_faceted_docids,
point,
false,
@ -584,7 +585,7 @@ pub fn execute_vector_search(
universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy,
geo_param: geo_sort::Parameter,
from: usize,
length: usize,
embedder_name: &str,
@ -600,7 +601,7 @@ pub fn execute_vector_search(
let ranking_rules = get_ranking_rules_for_vector(
ctx,
sort_criteria,
geo_strategy,
geo_param,
from + length,
vector,
embedder_name,
@ -647,7 +648,7 @@ pub fn execute_search(
mut universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy,
geo_param: geo_sort::Parameter,
from: usize,
length: usize,
words_limit: Option<usize>,
@ -761,7 +762,7 @@ pub fn execute_search(
let ranking_rules = get_ranking_rules_for_query_graph_search(
ctx,
sort_criteria,
geo_strategy,
geo_param,
terms_matching_strategy,
)?;
@ -783,7 +784,7 @@ pub fn execute_search(
)?
} else {
let ranking_rules =
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_param)?;
bucket_sort(
ctx,
ranking_rules,

View File

@ -1,10 +1,12 @@
use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::BTreeSet;
use std::ops::ControlFlow;
use fst::automaton::Str;
use fst::{Automaton, IntoStreamer, Streamer};
use fst::{IntoStreamer, Streamer};
use heed::types::DecodeIgnore;
use itertools::{merge_join_by, EitherOrBoth};
use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm};
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
@ -16,16 +18,10 @@ use crate::{Result, MAX_WORD_LENGTH};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NumberOfTypos {
Zero,
One,
Two,
}
pub enum ZeroOrOneTypo {
Zero,
One,
}
impl Interned<QueryTerm> {
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> {
let s = ctx.term_interner.get_mut(self);
@ -47,34 +43,45 @@ impl Interned<QueryTerm> {
}
fn find_zero_typo_prefix_derivations(
ctx: &mut SearchContext<'_>,
word_interned: Interned<String>,
fst: fst::Set<Cow<'_, [u8]>>,
word_interner: &mut DedupInterner<String>,
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
) -> Result<()> {
let word = word_interner.get(word_interned).to_owned();
let word = ctx.word_interner.get(word_interned).to_owned();
let word = word.as_str();
let prefix = Str::new(word).starts_with();
let mut stream = fst.search(prefix).into_stream();
while let Some(derived_word) = stream.next() {
let derived_word = std::str::from_utf8(derived_word)?.to_owned();
let derived_word_interned = word_interner.insert(derived_word);
if derived_word_interned != word_interned {
let cf = visit(derived_word_interned)?;
if cf.is_break() {
break;
let words =
ctx.index.word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?;
let exact_words =
ctx.index.exact_word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?;
for eob in merge_join_by(words, exact_words, |lhs, rhs| match (lhs, rhs) {
(Ok((word, _)), Ok((exact_word, _))) => word.cmp(exact_word),
(Err(_), _) | (_, Err(_)) => Ordering::Equal,
}) {
match eob {
EitherOrBoth::Both(kv, _) | EitherOrBoth::Left(kv) | EitherOrBoth::Right(kv) => {
let (derived_word, _) = kv?;
let derived_word = derived_word.to_string();
let derived_word_interned = ctx.word_interner.insert(derived_word);
if derived_word_interned != word_interned {
let cf = visit(derived_word_interned)?;
if cf.is_break() {
break;
}
}
}
}
}
Ok(())
}
fn find_zero_one_typo_derivations(
fn find_one_typo_derivations(
ctx: &mut SearchContext<'_>,
word_interned: Interned<String>,
is_prefix: bool,
mut visit: impl FnMut(Interned<String>, ZeroOrOneTypo) -> Result<ControlFlow<()>>,
mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
) -> Result<()> {
let fst = ctx.get_words_fst()?;
let word = ctx.word_interner.get(word_interned).to_owned();
@ -89,16 +96,9 @@ fn find_zero_one_typo_derivations(
let derived_word = ctx.word_interner.insert(derived_word.to_owned());
let d = dfa.distance(state.1);
match d.to_u8() {
0 => {
if derived_word != word_interned {
let cf = visit(derived_word, ZeroOrOneTypo::Zero)?;
if cf.is_break() {
break;
}
}
}
0 => (),
1 => {
let cf = visit(derived_word, ZeroOrOneTypo::One)?;
let cf = visit(derived_word)?;
if cf.is_break() {
break;
}
@ -111,7 +111,7 @@ fn find_zero_one_typo_derivations(
Ok(())
}
fn find_zero_one_two_typo_derivations(
fn find_one_two_typo_derivations(
word_interned: Interned<String>,
is_prefix: bool,
fst: fst::Set<Cow<'_, [u8]>>,
@ -144,14 +144,7 @@ fn find_zero_one_two_typo_derivations(
// correct distance
let d = second_dfa.distance((state.1).0);
match d.to_u8() {
0 => {
if derived_word_interned != word_interned {
let cf = visit(derived_word_interned, NumberOfTypos::Zero)?;
if cf.is_break() {
break;
}
}
}
0 => (),
1 => {
let cf = visit(derived_word_interned, NumberOfTypos::One)?;
if cf.is_break() {
@ -194,8 +187,6 @@ pub fn partially_initialized_term_from_word(
});
}
let fst = ctx.index.words_fst(ctx.txn)?;
let use_prefix_db = is_prefix
&& (ctx
.index
@ -215,24 +206,19 @@ pub fn partially_initialized_term_from_word(
let mut zero_typo = None;
let mut prefix_of = BTreeSet::new();
if fst.contains(word) || ctx.index.exact_word_docids.get(ctx.txn, word)?.is_some() {
if ctx.index.contains_word(ctx.txn, word)? {
zero_typo = Some(word_interned);
}
if is_prefix && use_prefix_db.is_none() {
find_zero_typo_prefix_derivations(
word_interned,
fst,
&mut ctx.word_interner,
|derived_word| {
if prefix_of.len() < limits::MAX_PREFIX_COUNT {
prefix_of.insert(derived_word);
Ok(ControlFlow::Continue(()))
} else {
Ok(ControlFlow::Break(()))
}
},
)?;
find_zero_typo_prefix_derivations(ctx, word_interned, |derived_word| {
if prefix_of.len() < limits::MAX_PREFIX_COUNT {
prefix_of.insert(derived_word);
Ok(ControlFlow::Continue(()))
} else {
Ok(ControlFlow::Break(()))
}
})?;
}
let synonyms = ctx.index.synonyms(ctx.txn)?;
let mut synonym_word_count = 0;
@ -295,18 +281,13 @@ impl Interned<QueryTerm> {
let mut one_typo_words = BTreeSet::new();
if *max_nbr_typos > 0 {
find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
match nbr_typos {
ZeroOrOneTypo::Zero => {}
ZeroOrOneTypo::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
} else {
return Ok(ControlFlow::Break(()));
}
}
find_one_typo_derivations(ctx, original, is_prefix, |derived_word| {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
Ok(ControlFlow::Continue(()))
} else {
Ok(ControlFlow::Break(()))
}
Ok(ControlFlow::Continue(()))
})?;
}
@ -357,7 +338,7 @@ impl Interned<QueryTerm> {
let mut two_typo_words = BTreeSet::new();
if *max_nbr_typos > 0 {
find_zero_one_two_typo_derivations(
find_one_two_typo_derivations(
*original,
*is_prefix,
ctx.index.words_fst(ctx.txn)?,
@ -370,7 +351,6 @@ impl Interned<QueryTerm> {
return Ok(ControlFlow::Break(()));
}
match nbr_typos {
NumberOfTypos::Zero => {}
NumberOfTypos::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);

View File

@ -4,6 +4,7 @@ This module tests the `geo_sort` ranking rule
use big_s::S;
use heed::RoTxn;
use itertools::Itertools;
use maplit::hashset;
use crate::constants::RESERVED_GEO_FIELD_NAME;
@ -18,7 +19,7 @@ fn create_index() -> TempIndex {
index
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("score") });
s.set_criteria(vec![Criterion::Words, Criterion::Sort]);
})
.unwrap();
@ -95,6 +96,112 @@ fn test_geo_sort() {
insta::assert_snapshot!(format!("{scores:#?}"));
}
#[test]
fn test_geo_sort_with_following_ranking_rules() {
let index = create_index();
index
.add_documents(documents!([
{ "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 },
{ "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 },
{ "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 },
{ "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 },
{ "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 },
{ "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 },
{ "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 },
{ "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 },
{ "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 },
{ "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 },
{ "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
AscDesc::Asc(Member::Geo([0., 0.])),
AscDesc::Desc(Member::Field("score".to_string())),
]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 4, 3, 2, 5]");
insta::assert_snapshot!(format!("{scores:#?}"));
s.sort_criteria(vec![
AscDesc::Desc(Member::Geo([0., 0.])),
AscDesc::Desc(Member::Field("score".to_string())),
]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[12, 13, 14, 15, 6, 7, 8, 9, 10, 11, 1, 4, 3, 2, 5]");
insta::assert_snapshot!(format!("{scores:#?}"));
}
#[test]
fn test_geo_sort_reached_max_bucket_size() {
let index = create_index();
index
.add_documents(documents!([
{ "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 },
{ "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 },
{ "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 },
{ "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 },
{ "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 },
{ "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 },
{ "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 },
{ "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 },
{ "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 },
{ "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 },
{ "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
s.geo_max_bucket_size(2);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
AscDesc::Asc(Member::Geo([0., 0.])),
AscDesc::Desc(Member::Field("score".to_string())),
]);
/* We should not expect the results to obey the following ranking rules when the bucket size limit is reached,
* nor should we expect Iteration and rtree to give exactly the same order for the same bucket in this case.*/
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
let iterative_ids = collect_field_values(&index, &rtxn, "id", &documents_ids);
assert_eq!(iterative_ids.len(), 15);
for id_str in &iterative_ids[0..6] {
let id = id_str.parse::<u32>().unwrap();
assert!((6..=11).contains(&id))
}
for id_str in &iterative_ids[6..10] {
let id = id_str.parse::<u32>().unwrap();
assert!((12..=15).contains(&id))
}
let no_geo_ids = iterative_ids[10..].iter().collect_vec();
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
let rtree_ids = collect_field_values(&index, &rtxn, "id", &documents_ids);
assert_eq!(rtree_ids.len(), 15);
for id_str in &rtree_ids[0..6] {
let id = id_str.parse::<u32>().unwrap();
assert!((6..=11).contains(&id))
}
for id_str in &rtree_ids[6..10] {
let id = id_str.parse::<u32>().unwrap();
assert!((12..=15).contains(&id))
}
let no_geo_ids = rtree_ids[10..].iter().collect_vec();
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
}
#[test]
fn test_geo_sort_around_the_edge_of_the_flat_earth() {
let index = create_index();

View File

@ -0,0 +1,356 @@
---
source: crates/milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(10.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(9.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(8.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(7.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(6.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(5.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(10.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(9.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(8.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(7.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
]

View File

@ -0,0 +1,356 @@
---
source: crates/milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(10.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(9.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(8.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(7.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(10.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(9.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(8.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(7.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(6.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(5.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
]

View File

@ -28,6 +28,7 @@ pub use self::helpers::*;
pub use self::transform::{Transform, TransformOutput};
use super::facet::clear_facet_levels_based_on_settings_diff;
use super::new::StdResult;
use crate::database_stats::DatabaseStats;
use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::{Error, InternalError};
use crate::index::{PrefixSearch, PrefixSettings};
@ -476,7 +477,8 @@ where
if !settings_diff.settings_update_only {
// Update the stats of the documents database when there is a document update.
self.index.update_documents_stats(self.wtxn, modified_docids)?;
let stats = DatabaseStats::new(self.index.documents.remap_data_type(), self.wtxn)?;
self.index.put_documents_stats(self.wtxn, stats)?;
}
// We write the field distribution into the main database
self.index.put_field_distribution(self.wtxn, &field_distribution)?;

View File

@ -406,6 +406,71 @@ impl<'doc> Versions<'doc> {
Ok(Some(Self::single(data)))
}
pub fn multiple_with_edits(
doc: Option<rhai::Map>,
mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
engine: &rhai::Engine,
edit_function: &rhai::AST,
doc_alloc: &'doc bumpalo::Bump,
) -> Result<Option<Option<Self>>> {
let Some(data) = versions.next() else { return Ok(None) };
let mut doc = doc.unwrap_or_default();
let mut data = data?;
for version in versions {
let version = version?;
for (field, value) in version {
data.insert(field, value);
}
let mut scope = rhai::Scope::new();
data.iter().for_each(|(k, v)| {
doc.insert(k.into(), serde_json::from_str(v.get()).unwrap());
});
scope.push("doc", doc.clone());
let _ = engine.eval_ast_with_scope::<rhai::Dynamic>(&mut scope, edit_function).unwrap();
data = RawMap::with_hasher_in(FxBuildHasher, doc_alloc);
match scope.get_value::<rhai::Map>("doc") {
Some(map) => {
for (key, value) in map {
let mut vec = bumpalo::collections::Vec::new_in(doc_alloc);
serde_json::to_writer(&mut vec, &value).unwrap();
let key = doc_alloc.alloc_str(key.as_str());
let raw_value = serde_json::from_slice(vec.into_bump_slice()).unwrap();
data.insert(key, raw_value);
}
}
// In case the deletes the document and it's not the last change
// we simply set the document to an empty one and await the next change.
None => (),
}
}
// We must also run the code after the last change
let mut scope = rhai::Scope::new();
data.iter().for_each(|(k, v)| {
doc.insert(k.into(), serde_json::from_str(v.get()).unwrap());
});
scope.push("doc", doc);
let _ = engine.eval_ast_with_scope::<rhai::Dynamic>(&mut scope, edit_function).unwrap();
data = RawMap::with_hasher_in(FxBuildHasher, doc_alloc);
match scope.get_value::<rhai::Map>("doc") {
Some(map) => {
for (key, value) in map {
let mut vec = bumpalo::collections::Vec::new_in(doc_alloc);
serde_json::to_writer(&mut vec, &value).unwrap();
let key = doc_alloc.alloc_str(key.as_str());
let raw_value = serde_json::from_slice(vec.into_bump_slice()).unwrap();
data.insert(key, raw_value);
}
Ok(Some(Some(Self::single(data))))
}
None => Ok(Some(None)),
}
}
pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
Self { data: version }
}

View File

@ -1,5 +1,6 @@
use bumpalo::Bump;
use heed::RoTxn;
use serde_json::Value;
use super::document::{
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
@ -10,7 +11,7 @@ use super::vector_document::{
use crate::attribute_patterns::PatternMatch;
use crate::documents::FieldIdMapper;
use crate::vector::EmbeddingConfigs;
use crate::{DocumentId, Index, Result};
use crate::{DocumentId, Index, InternalError, Result};
pub enum DocumentChange<'doc> {
Deletion(Deletion<'doc>),
@ -243,6 +244,29 @@ impl<'doc> Update<'doc> {
Ok(has_deleted_fields)
}
/// Returns `true` if the geo fields have changed.
pub fn has_changed_for_geo_fields<'t, Mapper: FieldIdMapper>(
&self,
rtxn: &'t RoTxn,
index: &'t Index,
mapper: &'t Mapper,
) -> Result<bool> {
let current = self.current(rtxn, index, mapper)?;
let current_geo = current.geo_field()?;
let updated_geo = self.only_changed_fields().geo_field()?;
match (current_geo, updated_geo) {
(Some(current_geo), Some(updated_geo)) => {
let current: Value =
serde_json::from_str(current_geo.get()).map_err(InternalError::SerdeJson)?;
let updated: Value =
serde_json::from_str(updated_geo.get()).map_err(InternalError::SerdeJson)?;
Ok(current != updated)
}
(None, None) => Ok(false),
_ => Ok(true),
}
}
pub fn only_changed_vectors(
&self,
doc_alloc: &'doc Bump,

View File

@ -117,7 +117,7 @@ impl FacetedDocidsExtractor {
},
),
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
let has_changed = inner.has_changed_for_fields(
&mut |field_name| {
match_faceted_field(
field_name,
@ -130,7 +130,10 @@ impl FacetedDocidsExtractor {
rtxn,
index,
context.db_fields_ids_map,
)? {
)?;
let has_changed_for_geo_fields =
inner.has_changed_for_geo_fields(rtxn, index, context.db_fields_ids_map)?;
if !has_changed && !has_changed_for_geo_fields {
return Ok(());
}

View File

@ -121,6 +121,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
// do we have set embeddings?
if let Some(embeddings) = new_vectors.embeddings {
chunks.set_vectors(
update.external_document_id(),
update.docid(),
embeddings
.into_vec(&context.doc_alloc, embedder_name)
@ -128,7 +129,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
document_id: update.external_document_id().to_string(),
error: error.to_string(),
})?,
);
)?;
} else if new_vectors.regenerate {
let new_rendered = prompt.render_document(
update.external_document_id(),
@ -209,6 +210,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
chunks.set_regenerate(insertion.docid(), new_vectors.regenerate);
if let Some(embeddings) = new_vectors.embeddings {
chunks.set_vectors(
insertion.external_document_id(),
insertion.docid(),
embeddings
.into_vec(&context.doc_alloc, embedder_name)
@ -218,7 +220,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
.to_string(),
error: error.to_string(),
})?,
);
)?;
} else if new_vectors.regenerate {
let rendered = prompt.render_document(
insertion.external_document_id(),
@ -273,6 +275,7 @@ struct Chunks<'a, 'b, 'extractor> {
embedder: &'a Embedder,
embedder_id: u8,
embedder_name: &'a str,
dimensions: usize,
prompt: &'a Prompt,
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
@ -297,6 +300,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
let texts = BVec::with_capacity_in(capacity, doc_alloc);
let ids = BVec::with_capacity_in(capacity, doc_alloc);
let dimensions = embedder.dimensions();
Self {
texts,
ids,
@ -309,6 +313,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
embedder_name,
user_provided,
has_manual_generation: None,
dimensions,
}
}
@ -490,7 +495,25 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
}
}
fn set_vectors(&self, docid: DocumentId, embeddings: Vec<Embedding>) {
fn set_vectors(
&self,
external_docid: &'a str,
docid: DocumentId,
embeddings: Vec<Embedding>,
) -> Result<()> {
for (embedding_index, embedding) in embeddings.iter().enumerate() {
if embedding.len() != self.dimensions {
return Err(UserError::InvalidIndexingVectorDimensions {
expected: self.dimensions,
found: embedding.len(),
embedder_name: self.embedder_name.to_string(),
document_id: external_docid.to_string(),
embedding_index,
}
.into());
}
}
self.sender.set_vectors(docid, self.embedder_id, embeddings).unwrap();
Ok(())
}
}

View File

@ -17,6 +17,7 @@ use super::guess_primary_key::retrieve_or_guess_primary_key;
use crate::documents::PrimaryKey;
use crate::progress::{AtomicPayloadStep, Progress};
use crate::update::new::document::Versions;
use crate::update::new::indexer::update_by_function::obkv_to_rhaimap;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::MostlySend;
use crate::update::new::{Deletion, Insertion, Update};
@ -157,7 +158,16 @@ impl<'pl> DocumentOperation<'pl> {
.sort_unstable_by_key(|(_, po)| first_update_pointer(&po.operations).unwrap_or(0));
let docids_version_offsets = docids_version_offsets.into_bump_slice();
Ok((DocumentOperationChanges { docids_version_offsets }, operations_stats, primary_key))
let engine = rhai::Engine::new();
// Make sure to correctly setup the engine and remove all settings
let ast = index.execute_after_update(rtxn)?.map(|f| engine.compile(f).unwrap());
let fidmap = index.fields_ids_map(rtxn)?;
Ok((
DocumentOperationChanges { docids_version_offsets, engine, ast, fidmap },
operations_stats,
primary_key,
))
}
}
@ -418,7 +428,15 @@ impl<'pl> DocumentChanges<'pl> for DocumentOperationChanges<'pl> {
'pl: 'doc,
{
let (external_doc, payload_operations) = item;
payload_operations.merge(external_doc, &context.doc_alloc)
payload_operations.merge(
&context.rtxn,
context.index,
&self.fidmap,
&self.engine,
self.ast.as_ref(),
external_doc,
&context.doc_alloc,
)
}
fn len(&self) -> usize {
@ -427,6 +445,9 @@ impl<'pl> DocumentChanges<'pl> for DocumentOperationChanges<'pl> {
}
pub struct DocumentOperationChanges<'pl> {
engine: rhai::Engine,
ast: Option<rhai::AST>,
fidmap: FieldsIdsMap,
docids_version_offsets: &'pl [(&'pl str, PayloadOperations<'pl>)],
}
@ -489,10 +510,14 @@ impl<'pl> PayloadOperations<'pl> {
}
/// Returns only the most recent version of a document based on the updates from the payloads.
///
/// This function is only meant to be used when doing a replacement and not an update.
#[allow(clippy::too_many_arguments)]
fn merge<'doc>(
&self,
rtxn: &heed::RoTxn,
index: &Index,
fidmap: &FieldsIdsMap,
engine: &rhai::Engine,
ast: Option<&rhai::AST>,
external_doc: &'doc str,
doc_alloc: &'doc Bump,
) -> Result<Option<DocumentChange<'doc>>>
@ -556,9 +581,34 @@ impl<'pl> PayloadOperations<'pl> {
Ok(document)
});
let Some(versions) = Versions::multiple(versions)? else { return Ok(None) };
let versions = match ast {
Some(ast) => {
let doc = index
.documents
.get(rtxn, &self.docid)?
.map(|obkv| obkv_to_rhaimap(obkv, fidmap))
.transpose()?;
match Versions::multiple_with_edits(doc, versions, engine, ast, doc_alloc)?
{
Some(Some(versions)) => Some(versions),
Some(None) if self.is_new => return Ok(None),
Some(None) => {
return Ok(Some(DocumentChange::Deletion(Deletion::create(
self.docid,
external_doc,
))));
}
None => None,
}
}
None => Versions::multiple(versions)?,
};
if self.is_new {
let Some(versions) = versions else {
return Ok(None);
};
if self.is_new || ast.is_some() {
Ok(Some(DocumentChange::Insertion(Insertion::create(
self.docid,
external_doc,

View File

@ -13,6 +13,7 @@ use super::super::thread_local::{FullySend, ThreadLocal};
use super::super::FacetFieldIdsDelta;
use super::document_changes::{extract, DocumentChanges, IndexingContext};
use crate::index::IndexEmbeddingConfig;
use crate::progress::MergingWordCache;
use crate::proximity::ProximityPrecision;
use crate::update::new::extract::EmbeddingExtractor;
use crate::update::new::merger::merge_and_send_rtree;
@ -96,6 +97,7 @@ where
{
let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
let _entered = span.enter();
indexing_context.progress.update_progress(IndexingStep::MergingFacetCaches);
facet_field_ids_delta = merge_and_send_facet_docids(
caches,
@ -117,7 +119,6 @@ where
} = {
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
let _entered = span.enter();
WordDocidsExtractors::run_extraction(
document_changes,
indexing_context,
@ -126,9 +127,13 @@ where
)?
};
indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
{
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordDocids);
merge_and_send_docids(
word_docids,
index.word_docids.remap_types(),
@ -142,6 +147,8 @@ where
let span =
tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
merge_and_send_docids(
word_fid_docids,
index.word_fid_docids.remap_types(),
@ -155,6 +162,8 @@ where
let span =
tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
merge_and_send_docids(
exact_word_docids,
index.exact_word_docids.remap_types(),
@ -168,6 +177,8 @@ where
let span =
tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
merge_and_send_docids(
word_position_docids,
index.word_position_docids.remap_types(),
@ -181,6 +192,8 @@ where
let span =
tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
merge_and_send_docids(
fid_word_count_docids,
index.field_id_word_count_docids.remap_types(),
@ -210,6 +223,7 @@ where
{
let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
let _entered = span.enter();
indexing_context.progress.update_progress(IndexingStep::MergingWordProximity);
merge_and_send_docids(
caches,

View File

@ -234,7 +234,6 @@ where
embedders,
field_distribution,
document_ids,
modified_docids,
)?;
Ok(congestion)

View File

@ -7,12 +7,13 @@ use itertools::{merge_join_by, EitherOrBoth};
use super::document_changes::IndexingContext;
use crate::facet::FacetType;
use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
use crate::progress::Progress;
use crate::update::del_add::DelAdd;
use crate::update::facet::new_incremental::FacetsUpdateIncremental;
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::update::new::facet_search_builder::FacetSearchBuilder;
use crate::update::new::merger::FacetFieldIdDelta;
use crate::update::new::steps::IndexingStep;
use crate::update::new::steps::{IndexingStep, PostProcessingFacets, PostProcessingWords};
use crate::update::new::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
use crate::update::new::words_prefix_docids::{
compute_exact_word_prefix_docids, compute_word_prefix_docids, compute_word_prefix_fid_docids,
@ -33,11 +34,23 @@ where
{
let index = indexing_context.index;
indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
compute_facet_level_database(index, wtxn, facet_field_ids_delta, &mut global_fields_ids_map)?;
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
compute_facet_level_database(
index,
wtxn,
facet_field_ids_delta,
&mut global_fields_ids_map,
indexing_context.progress,
)?;
compute_facet_search_database(index, wtxn, global_fields_ids_map, indexing_context.progress)?;
indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?;
if let Some(prefix_delta) = compute_word_fst(index, wtxn, indexing_context.progress)? {
compute_prefix_database(
index,
wtxn,
prefix_delta,
indexing_context.grenad_parameters,
indexing_context.progress,
)?;
};
Ok(())
}
@ -48,21 +61,32 @@ fn compute_prefix_database(
wtxn: &mut RwTxn,
prefix_delta: PrefixDelta,
grenad_parameters: &GrenadParameters,
progress: &Progress,
) -> Result<()> {
let PrefixDelta { modified, deleted } = prefix_delta;
// Compute word prefix docids
progress.update_progress(PostProcessingWords::WordPrefixDocids);
compute_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
// Compute exact word prefix docids
progress.update_progress(PostProcessingWords::ExactWordPrefixDocids);
compute_exact_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
// Compute word prefix fid docids
progress.update_progress(PostProcessingWords::WordPrefixFieldIdDocids);
compute_word_prefix_fid_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
// Compute word prefix position docids
progress.update_progress(PostProcessingWords::WordPrefixPositionDocids);
compute_word_prefix_position_docids(wtxn, index, &modified, &deleted, grenad_parameters)
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing")]
fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result<Option<PrefixDelta>> {
fn compute_word_fst(
index: &Index,
wtxn: &mut RwTxn,
progress: &Progress,
) -> Result<Option<PrefixDelta>> {
let rtxn = index.read_txn()?;
progress.update_progress(PostProcessingWords::WordFst);
let words_fst = index.words_fst(&rtxn)?;
let mut word_fst_builder = WordFstBuilder::new(&words_fst)?;
let prefix_settings = index.prefix_settings(&rtxn)?;
@ -112,8 +136,10 @@ fn compute_facet_search_database(
index: &Index,
wtxn: &mut RwTxn,
global_fields_ids_map: GlobalFieldsIdsMap,
progress: &Progress,
) -> Result<()> {
let rtxn = index.read_txn()?;
progress.update_progress(PostProcessingFacets::FacetSearch);
// if the facet search is not enabled, we can skip the rest of the function
if !index.facet_search(wtxn)? {
@ -171,10 +197,16 @@ fn compute_facet_level_database(
wtxn: &mut RwTxn,
mut facet_field_ids_delta: FacetFieldIdsDelta,
global_fields_ids_map: &mut GlobalFieldsIdsMap,
progress: &Progress,
) -> Result<()> {
let rtxn = index.read_txn()?;
let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() {
let mut deltas: Vec<_> = facet_field_ids_delta.consume_facet_string_delta().collect();
// We move all bulks at the front and incrementals (others) at the end.
deltas.sort_by_key(|(_, delta)| if let FacetFieldIdDelta::Bulk = delta { 0 } else { 1 });
for (fid, delta) in deltas {
// skip field ids that should not be facet leveled
let Some(metadata) = global_fields_ids_map.metadata(fid) else {
continue;
@ -187,11 +219,13 @@ fn compute_facet_level_database(
let _entered = span.enter();
match delta {
FacetFieldIdDelta::Bulk => {
progress.update_progress(PostProcessingFacets::StringsBulk);
tracing::debug!(%fid, "bulk string facet processing");
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
.execute(wtxn)?
}
FacetFieldIdDelta::Incremental(delta_data) => {
progress.update_progress(PostProcessingFacets::StringsIncremental);
tracing::debug!(%fid, len=%delta_data.len(), "incremental string facet processing");
FacetsUpdateIncremental::new(
index,
@ -207,16 +241,22 @@ fn compute_facet_level_database(
}
}
for (fid, delta) in facet_field_ids_delta.consume_facet_number_delta() {
let mut deltas: Vec<_> = facet_field_ids_delta.consume_facet_number_delta().collect();
// We move all bulks at the front and incrementals (others) at the end.
deltas.sort_by_key(|(_, delta)| if let FacetFieldIdDelta::Bulk = delta { 0 } else { 1 });
for (fid, delta) in deltas {
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "number");
let _entered = span.enter();
match delta {
FacetFieldIdDelta::Bulk => {
progress.update_progress(PostProcessingFacets::NumbersBulk);
tracing::debug!(%fid, "bulk number facet processing");
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::Number)
.execute(wtxn)?
}
FacetFieldIdDelta::Incremental(delta_data) => {
progress.update_progress(PostProcessingFacets::NumbersIncremental);
tracing::debug!(%fid, len=%delta_data.len(), "incremental number facet processing");
FacetsUpdateIncremental::new(
index,

View File

@ -189,7 +189,7 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
}
}
fn obkv_to_rhaimap(obkv: &KvReaderFieldId, fields_ids_map: &FieldsIdsMap) -> Result<rhai::Map> {
pub fn obkv_to_rhaimap(obkv: &KvReaderFieldId, fields_ids_map: &FieldsIdsMap) -> Result<rhai::Map> {
let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
let map: Result<rhai::Map> = all_keys
.iter()

View File

@ -7,6 +7,7 @@ use rand::SeedableRng as _;
use time::OffsetDateTime;
use super::super::channel::*;
use crate::database_stats::DatabaseStats;
use crate::documents::PrimaryKey;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::index::IndexEmbeddingConfig;
@ -142,7 +143,6 @@ pub(super) fn update_index(
embedders: EmbeddingConfigs,
field_distribution: std::collections::BTreeMap<String, u64>,
document_ids: roaring::RoaringBitmap,
modified_docids: roaring::RoaringBitmap,
) -> Result<()> {
index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?;
if let Some(new_primary_key) = new_primary_key {
@ -153,7 +153,8 @@ pub(super) fn update_index(
index.put_field_distribution(wtxn, &field_distribution)?;
index.put_documents_ids(wtxn, &document_ids)?;
index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
index.update_documents_stats(wtxn, modified_docids)?;
let stats = DatabaseStats::new(index.documents.remap_data_type(), wtxn)?;
index.put_documents_stats(wtxn, stats)?;
Ok(())
}

View File

@ -82,14 +82,8 @@ where
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
let current = database.get(&rtxn, key)?;
match merge_cbo_bitmaps(current, del, add)? {
Operation::Write(bitmap) => {
docids_sender.write(key, &bitmap)?;
Ok(())
}
Operation::Delete => {
docids_sender.delete(key)?;
Ok(())
}
Operation::Write(bitmap) => docids_sender.write(key, &bitmap),
Operation::Delete => docids_sender.delete(key),
Operation::Ignore => Ok(()),
}
})
@ -130,7 +124,6 @@ pub fn merge_and_send_facet_docids<'extractor>(
Operation::Ignore => Ok(()),
}
})?;
Ok(facet_field_ids_delta)
})
.reduce(

View File

@ -1,52 +1,42 @@
use std::borrow::Cow;
use crate::make_enum_progress;
use enum_iterator::Sequence;
use crate::progress::Step;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
#[repr(u8)]
pub enum IndexingStep {
PreparingPayloads,
ExtractingDocuments,
ExtractingFacets,
ExtractingWords,
ExtractingWordProximity,
ExtractingEmbeddings,
WritingGeoPoints,
WaitingForDatabaseWrites,
WaitingForExtractors,
WritingEmbeddingsToDatabase,
PostProcessingFacets,
PostProcessingWords,
Finalizing,
}
impl Step for IndexingStep {
fn name(&self) -> Cow<'static, str> {
match self {
IndexingStep::PreparingPayloads => "preparing update file",
IndexingStep::ExtractingDocuments => "extracting documents",
IndexingStep::ExtractingFacets => "extracting facets",
IndexingStep::ExtractingWords => "extracting words",
IndexingStep::ExtractingWordProximity => "extracting word proximity",
IndexingStep::ExtractingEmbeddings => "extracting embeddings",
IndexingStep::WritingGeoPoints => "writing geo points",
IndexingStep::WaitingForDatabaseWrites => "waiting for database writes",
IndexingStep::WaitingForExtractors => "waiting for extractors",
IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
IndexingStep::PostProcessingFacets => "post-processing facets",
IndexingStep::PostProcessingWords => "post-processing words",
IndexingStep::Finalizing => "finalizing",
}
.into()
}
fn current(&self) -> u32 {
*self as u32
}
fn total(&self) -> u32 {
Self::CARDINALITY as u32
make_enum_progress! {
pub enum IndexingStep {
PreparingPayloads,
ExtractingDocuments,
ExtractingFacets,
ExtractingWords,
ExtractingWordProximity,
ExtractingEmbeddings,
MergingFacetCaches,
MergingWordCaches,
MergingWordProximity,
WritingGeoPoints,
WaitingForDatabaseWrites,
WaitingForExtractors,
WritingEmbeddingsToDatabase,
PostProcessingFacets,
PostProcessingWords,
Finalizing,
}
}
make_enum_progress! {
pub enum PostProcessingFacets {
StringsBulk,
StringsIncremental,
NumbersBulk,
NumbersIncremental,
FacetSearch,
}
}
make_enum_progress! {
pub enum PostProcessingWords {
WordFst,
WordPrefixDocids,
ExactWordPrefixDocids,
WordPrefixFieldIdDocids,
WordPrefixPositionDocids,
}
}

View File

@ -1,6 +1,7 @@
use std::cell::RefCell;
use std::collections::BTreeSet;
use std::io::{BufReader, BufWriter, Read, Seek, Write};
use std::num::NonZeroUsize;
use hashbrown::HashMap;
use heed::types::Bytes;
@ -217,7 +218,7 @@ impl WordPrefixIntegerDocids {
index.push(PrefixIntegerEntry {
prefix,
pos,
serialized_length: Some(buffer.len()),
serialized_length: NonZeroUsize::new(buffer.len()),
});
file.write_all(buffer)?;
}
@ -243,7 +244,7 @@ impl WordPrefixIntegerDocids {
key_buffer.extend_from_slice(&pos.to_be_bytes());
match serialized_length {
Some(serialized_length) => {
buffer.resize(serialized_length, 0);
buffer.resize(serialized_length.get(), 0);
file.read_exact(&mut buffer)?;
self.prefix_database.remap_data_type::<Bytes>().put(
wtxn,
@ -266,7 +267,7 @@ impl WordPrefixIntegerDocids {
struct PrefixIntegerEntry<'a> {
prefix: &'a str,
pos: u16,
serialized_length: Option<usize>,
serialized_length: Option<NonZeroUsize>,
}
/// TODO doc

View File

@ -183,6 +183,7 @@ pub struct Settings<'a, 't, 'i> {
localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
prefix_search: Setting<PrefixSearch>,
facet_search: Setting<bool>,
execute_after_update: Setting<String>,
}
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
@ -220,6 +221,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
localized_attributes_rules: Setting::NotSet,
prefix_search: Setting::NotSet,
facet_search: Setting::NotSet,
execute_after_update: Setting::NotSet,
indexer_config,
}
}
@ -442,6 +444,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.facet_search = Setting::Reset;
}
pub fn set_execute_after_update(&mut self, value: String) {
self.execute_after_update = Setting::Set(value);
}
pub fn reset_execute_after_update(&mut self) {
self.execute_after_update = Setting::Reset;
}
#[tracing::instrument(
level = "trace"
skip(self, progress_callback, should_abort, settings_diff),
@ -994,6 +1004,18 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
Ok(changed)
}
fn update_execute_after_update(&mut self) -> Result<()> {
match self.execute_after_update.as_ref() {
Setting::Set(new) => {
self.index.put_execute_after_update(self.wtxn, &new).map_err(Into::into)
}
Setting::Reset => {
self.index.delete_execute_after_update(self.wtxn).map(drop).map_err(Into::into)
}
Setting::NotSet => Ok(()),
}
}
fn update_embedding_configs(&mut self) -> Result<BTreeMap<String, EmbedderAction>> {
match std::mem::take(&mut self.embedder_settings) {
Setting::Set(configs) => self.update_embedding_configs_set(configs),
@ -1245,6 +1267,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.update_proximity_precision()?;
self.update_prefix_search()?;
self.update_facet_search()?;
self.update_execute_after_update()?;
self.update_localized_attributes_rules()?;
let embedding_config_updates = self.update_embedding_configs()?;
@ -1331,8 +1354,21 @@ impl InnerIndexSettingsDiff {
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes
!= new_settings.user_defined_searchable_attributes;
// Check if any searchable field has been added or removed form the list,
// Changing the order should not be considered as a change for reindexing.
let cache_user_defined_searchables = match (
&old_settings.user_defined_searchable_attributes,
&new_settings.user_defined_searchable_attributes,
) {
(Some(old), Some(new)) => {
let old: BTreeSet<_> = old.iter().collect();
let new: BTreeSet<_> = new.iter().collect();
old != new
}
(None, None) => false,
_otherwise => true,
};
// if the user-defined searchables changed, then we need to reindex prompts.
if cache_user_defined_searchables {

View File

@ -896,6 +896,7 @@ fn test_correct_settings_init() {
localized_attributes_rules,
prefix_search,
facet_search,
execute_after_update,
} = settings;
assert!(matches!(searchable_fields, Setting::NotSet));
assert!(matches!(displayed_fields, Setting::NotSet));
@ -923,6 +924,7 @@ fn test_correct_settings_init() {
assert!(matches!(localized_attributes_rules, Setting::NotSet));
assert!(matches!(prefix_search, Setting::NotSet));
assert!(matches!(facet_search, Setting::NotSet));
assert!(matches!(execute_after_update, Setting::NotSet));
})
.unwrap();
}

View File

@ -18,7 +18,7 @@ byte-unit = { version = "5.1.6", default-features = false, features = [
"byte",
"serde",
] }
tokio = { version = "1.42.0", features = ["sync"] }
tokio = { version = "1.43.1", features = ["sync"] }
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
libproc = "0.14.10"

View File

@ -31,7 +31,7 @@ time = { version = "0.3.37", features = [
"serde-human-readable",
"macros",
] }
tokio = { version = "1.42.0", features = [
tokio = { version = "1.43.1", features = [
"rt",
"net",
"time",