Compare commits

...

35 Commits

Author SHA1 Message Date
Clément Renault
7f8a1ac0be Remove useless heed path 2025-10-01 16:19:58 +02:00
Clément Renault
1a67163ee8 Use git cellulite in case 2025-10-01 16:02:07 +02:00
Clément Renault
38141de68d Use local heed in case 2025-10-01 16:01:58 +02:00
Clément Renault
7a98b80687 Use temporary git repo for hannoy and arroy in nested-rtxns pre-version 2025-10-01 15:28:36 +02:00
Kerollmops
229a12c8e6 Multithread word prefix position docids 2025-10-01 15:18:21 +02:00
Kerollmops
2fdfe79400 Make clippy happy 2025-10-01 15:09:59 +02:00
Kerollmops
9184b12a26 Fix the algorithm 2025-10-01 15:09:59 +02:00
Kerollmops
742378d8e1 Multi-thread the facet bulk processing 2025-10-01 15:09:59 +02:00
Kerollmops
6dcd739a8b Patch heed to create multiple nested RoTxns 2025-10-01 15:09:59 +02:00
Many the fish
c29bdcae23 Merge pull request #5913 from meilisearch/dependabot/github_actions/actions/setup-python-6
Bump actions/setup-python from 5 to 6
2025-09-29 14:58:45 +00:00
Many the fish
75219181a3 Merge pull request #5834 from meilisearch/fix-openapi-ci
Minor improvement in OpenAPI CI
2025-09-29 13:55:12 +00:00
Many the fish
a5b5cf7cd1 Merge pull request #5916 from meilisearch/dependabot/github_actions/sigstore/cosign-installer-3.10.0
Bump sigstore/cosign-installer from 3.9.2 to 3.10.0
2025-09-29 13:52:31 +00:00
Many the fish
142ba8ea00 Merge pull request #5915 from meilisearch/dependabot/github_actions/actions/setup-node-5
Bump actions/setup-node from 4 to 5
2025-09-29 13:52:28 +00:00
Many the fish
4bc823e07c Merge pull request #5914 from meilisearch/dependabot/github_actions/actions/setup-dotnet-5
Bump actions/setup-dotnet from 4 to 5
2025-09-29 13:52:10 +00:00
Many the fish
db06ca7138 Merge pull request #5912 from meilisearch/dependabot/github_actions/actions/setup-go-6
Bump actions/setup-go from 5 to 6
2025-09-29 13:52:06 +00:00
Clément Renault
95595a768e Merge pull request #5911 from EazyAl/main
Update README.md to fix newsletter link
2025-09-29 13:10:16 +00:00
dependabot[bot]
36f649768e Bump sigstore/cosign-installer from 3.9.2 to 3.10.0
Bumps [sigstore/cosign-installer](https://github.com/sigstore/cosign-installer) from 3.9.2 to 3.10.0.
- [Release notes](https://github.com/sigstore/cosign-installer/releases)
- [Commits](d58896d6a1...d7543c93d8)

---
updated-dependencies:
- dependency-name: sigstore/cosign-installer
  dependency-version: 3.10.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:14 +00:00
dependabot[bot]
0c6fc243f2 Bump actions/setup-node from 4 to 5
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 4 to 5.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/setup-node
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:11 +00:00
dependabot[bot]
dfc46d5627 Bump actions/setup-dotnet from 4 to 5
Bumps [actions/setup-dotnet](https://github.com/actions/setup-dotnet) from 4 to 5.
- [Release notes](https://github.com/actions/setup-dotnet/releases)
- [Commits](https://github.com/actions/setup-dotnet/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/setup-dotnet
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:08 +00:00
dependabot[bot]
11d55f2121 Bump actions/setup-python from 5 to 6
Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:03 +00:00
dependabot[bot]
014da57cf6 Bump actions/setup-go from 5 to 6
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5 to 6.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:00 +00:00
Clément Renault
70a0ff4a8f Merge pull request #5900 from meilisearch/show-dependencies
Show Dependabot dependency upgrade in the changelog
2025-09-25 16:04:03 +00:00
Clément Renault
dd0d5e4b90 Merge pull request #5910 from meilisearch/curquiza-patch-1
Change Java version in SDK CI
2025-09-25 14:32:16 +00:00
Ali Imran
15b3bb1700 Update README.md to fix newsletter link 2025-09-25 16:07:08 +02:00
Louis Dureuil
077ec2ab11 Merge pull request #5908 from meilisearch/update-version
Update version
2025-09-25 13:10:34 +00:00
Clémentine
f25db0795e Change Java version in SDK CI
Updated Java version and distribution in workflow.
2025-09-25 15:03:50 +02:00
Tamo
c50a337c29 bump version for 1.22.1 2025-09-25 13:44:44 +02:00
Tamo
efeae09ce1 Merge pull request #5906 from meilisearch/task-deletion-strategy
Delete oldest tasks first
2025-09-25 10:11:33 +00:00
Tamo
ad55b48664 Merge pull request #5907 from meilisearch/fix-geojson-bug
use the latest version of zerometry that supports collection, lines and multi-lines
2025-09-25 09:56:01 +00:00
Tamo
94eabd34e6 fmt 2025-09-25 11:01:53 +02:00
Tamo
6935589f74 use the latest version of zerometry that supports collection, lines and multi-lines 2025-09-25 10:31:07 +02:00
Louis Dureuil
4beb452027 Optimize by using from_sorted_iter
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-09-25 10:16:30 +02:00
Louis Dureuil
b722da303a Do not start from the end of the finished tasks when selecting the tasks to delete 2025-09-25 09:54:58 +02:00
curquiza
6f0d26c22c Show dependency upgrade in the changelog for full transparency 2025-09-22 18:30:34 +02:00
curquiza
d52c7dcc94 Add needs: check-version 2025-08-12 20:47:43 +02:00
31 changed files with 452 additions and 131 deletions

View File

@@ -7,6 +7,5 @@ updates:
schedule:
interval: "monthly"
labels:
- 'skip changelog'
- 'dependencies'
rebase-strategy: disabled

View File

@@ -18,6 +18,7 @@ categories:
label: 'security'
- title: '⚙️ Maintenance/misc'
label:
- 'dependencies'
- 'maintenance'
- 'documentation'
template: |
@@ -26,8 +27,3 @@ template: |
❤️ Huge thanks to our contributors: $CONTRIBUTORS.
no-changes-template: 'Changes are coming soon 😎'
sort-direction: 'ascending'
replacers:
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
replace: ''
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
replace: ''

View File

@@ -65,7 +65,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Install cosign
uses: sigstore/cosign-installer@d58896d6a1865668819e1d91763c7751a165e159 # tag=v3.9.2
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
- name: Login to Docker Hub
uses: docker/login-action@v3

View File

@@ -11,7 +11,7 @@ jobs:
check-version:
name: Check the version validity
runs-on: ubuntu-latest
# No need to check the version for dry run (cron)
# No need to check the version for dry run (cron or workflow_dispatch)
steps:
- uses: actions/checkout@v5
# Check if the tag has the v<nmumber>.<number>.<number> format.
@@ -48,7 +48,7 @@ jobs:
- uses: dtolnay/rust-toolchain@1.89
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
@@ -78,7 +78,7 @@ jobs:
- uses: dtolnay/rust-toolchain@1.89
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
@@ -111,7 +111,7 @@ jobs:
command: build
args: --release --target ${{ matrix.target }}
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
@@ -176,7 +176,7 @@ jobs:
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
@@ -187,6 +187,7 @@ jobs:
publish-openapi-file:
name: Publish OpenAPI file
needs: check-version
runs-on: ubuntu-latest
steps:
- name: Checkout code
@@ -201,7 +202,7 @@ jobs:
cd crates/openapi-generator
cargo run --release -- --pretty --output ../../meilisearch.json
- name: Upload OpenAPI to Release
# No need to upload for dry run (cron)
# No need to upload for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:

View File

@@ -50,7 +50,7 @@ jobs:
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
uses: actions/setup-dotnet@v5
with:
dotnet-version: "8.0.x"
- name: Install dependencies
@@ -100,7 +100,7 @@ jobs:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@v6
with:
go-version: stable
- uses: actions/checkout@v5
@@ -135,13 +135,13 @@ jobs:
- name: Set up Java
uses: actions/setup-java@v5
with:
java-version: 8
distribution: 'zulu'
java-version: 17
distribution: 'temurin'
cache: gradle
- name: Grant execute permission for gradlew
run: chmod +x gradlew
- name: Build and run unit and integration tests
run: ./gradlew build integrationTest
run: ./gradlew build integrationTest --info
meilisearch-js-tests:
needs: define-docker-image
@@ -160,7 +160,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v5
with:
cache: 'yarn'
- name: Install dependencies
@@ -224,7 +224,7 @@ jobs:
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
- name: Install pipenv
uses: dschep/install-pipenv-action@v1
- name: Install dependencies
@@ -318,7 +318,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v5
with:
cache: yarn
- name: Install dependencies

89
Cargo.lock generated
View File

@@ -453,9 +453,8 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arroy"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8578a72223dfa13dfd9fc144d15260d134361789ebdea9b16e85a511edc73c7d"
version = "0.6.4-nested-rtxns"
source = "git+https://github.com/meilisearch/arroy?branch=use-heed-nested-rtxns#61c8f4f0addeff968e80438018d0aee2c1eb8d67"
dependencies = [
"bytemuck",
"byteorder",
@@ -589,7 +588,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
[[package]]
name = "benchmarks"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"anyhow",
"bumpalo",
@@ -799,7 +798,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"anyhow",
"time",
@@ -1075,9 +1074,8 @@ dependencies = [
[[package]]
name = "cellulite"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71a41aa2cd021bc3f23d97cc1e645848ca8c279fc757d1570ba7fe7ddc021290"
version = "0.3.1-nested-rtxns"
source = "git+https://github.com/meilisearch/cellulite?branch=use-heed-nested-rtxns#9fb1866cc49277d26f606769112fa704944ccc61"
dependencies = [
"crossbeam",
"geo",
@@ -1092,7 +1090,7 @@ dependencies = [
"steppe",
"thiserror 2.0.16",
"thread_local",
"zerometry 0.3.0",
"zerometry",
]
[[package]]
@@ -1829,7 +1827,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"anyhow",
"big_s",
@@ -2072,7 +2070,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "file-store"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"tempfile",
"thiserror 2.0.16",
@@ -2094,7 +2092,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"insta",
"levenshtein_automata",
@@ -2122,7 +2120,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"criterion",
"serde_json",
@@ -2279,7 +2277,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"arbitrary",
"bumpalo",
@@ -2577,7 +2575,6 @@ dependencies = [
"num-traits",
"robust",
"rstar",
"spade",
]
[[package]]
@@ -2588,7 +2585,6 @@ checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99"
dependencies = [
"approx",
"num-traits",
"rayon",
"rstar",
"serde",
]
@@ -2760,9 +2756,8 @@ dependencies = [
[[package]]
name = "hannoy"
version = "0.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0dba13a271c49a119a97862ebf0a74131d879832868400d9fcd937b790058fdd"
version = "0.0.9-nested-rtxns"
source = "git+https://github.com/nnethercott/hannoy?branch=use-heed-nested-rtxns#d4ca5454eff6539e9fc2119f07113abebbda0a39"
dependencies = [
"bytemuck",
"byteorder",
@@ -2840,9 +2835,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "heed"
version = "0.22.0"
version = "0.22.1-nested-rtxns"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393"
checksum = "0ff115ba5712b1f1fc7617b195f5c2f139e29c397ff79da040cd19db75ccc240"
dependencies = [
"bitflags 2.9.4",
"byteorder",
@@ -2852,7 +2847,6 @@ dependencies = [
"lmdb-master-sys",
"once_cell",
"page_size",
"serde",
"synchronoise",
"url",
]
@@ -3060,7 +3054,6 @@ dependencies = [
"i_key_sort",
"i_shape",
"i_tree",
"rayon",
]
[[package]]
@@ -3237,7 +3230,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"anyhow",
"backoff",
@@ -3491,7 +3484,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"criterion",
"serde_json",
@@ -3892,9 +3885,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]]
name = "lmdb-master-sys"
version = "0.2.5"
version = "0.2.6-nested-rtxns"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df"
checksum = "f4ff85130e3c994b36877045fbbb138d521dea7197bfc19dc3d5d95101a8e20a"
dependencies = [
"cc",
"doxygen-rs",
@@ -4000,7 +3993,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"insta",
"md5",
@@ -4011,7 +4004,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"actix-cors",
"actix-http",
@@ -4108,7 +4101,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"base64 0.22.1",
"enum-iterator",
@@ -4127,7 +4120,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"actix-web",
"anyhow",
@@ -4162,7 +4155,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"anyhow",
"clap",
@@ -4196,7 +4189,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"allocator-api2 0.3.1",
"arroy",
@@ -4275,7 +4268,7 @@ dependencies = [
"url",
"utoipa",
"uuid",
"zerometry 0.1.0",
"zerometry",
]
[[package]]
@@ -4777,7 +4770,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "permissive-json-pointer"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"big_s",
"serde_json",
@@ -6145,18 +6138,6 @@ dependencies = [
"winapi",
]
[[package]]
name = "spade"
version = "2.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990"
dependencies = [
"hashbrown 0.15.5",
"num-traits",
"robust",
"smallvec",
]
[[package]]
name = "spin"
version = "0.5.2"
@@ -7836,7 +7817,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.22.0"
version = "1.22.1"
dependencies = [
"anyhow",
"build-info",
@@ -7983,18 +7964,6 @@ dependencies = [
"syn 2.0.106",
]
[[package]]
name = "zerometry"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "681f08f3f4ef27d3021a128eb6d8df1cd781e4c9c797c3971c1f85316374f977"
dependencies = [
"bytemuck",
"byteorder",
"geo",
"geo-types",
]
[[package]]
name = "zerometry"
version = "0.3.0"

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.22.0"
version = "1.22.1"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -121,7 +121,7 @@ If you want to know more about the kind of data we collect and what we use it fo
Meilisearch is a search engine created by [Meili](https://www.meilisearch.com/careers), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
🗞 [Subscribe to our newsletter](https://share-eu1.hsforms.com/1LN5N0x_GQgq7ss7tXmSykwfg3aq) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
💌 Want to make a suggestion or give feedback? Here are some of the channels where you can reach us:

View File

@@ -310,7 +310,8 @@ impl Queue {
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
let to_delete =
RoaringBitmap::from_sorted_iter(finished.into_iter().take(100_000)).unwrap();
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves.

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -45,6 +45,7 @@ pub fn upgrade_index_scheduler(
(1, 19, _) => 0,
(1, 20, _) => 0,
(1, 21, _) => 0,
(1, 22, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)

View File

@@ -418,3 +418,36 @@ async fn geo_radius() {
}
"#);
}
#[actix_rt::test]
async fn bug_5904() {
// https://github.com/meilisearch/meilisearch/issues/5904
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) =
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(response.uid()).await.succeeded();
let geojson = json!({
"id": 1,
"_geojson": {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
4.23914,
48.382893
]
},
"properties": {}
}
]
}
});
let (response, _code) = index.add_documents(geojson, Some("id")).await;
server.wait_task(response.uid()).await.succeeded();
}

View File

@@ -43,7 +43,7 @@ async fn version_too_old() {
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.22.0");
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.22.1");
}
#[actix_rt::test]
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.22.1 is higher than the Meilisearch version 1.22.0. Downgrade is not supported");
snapshot!(err, @"Database version 1.22.2 is higher than the Meilisearch version 1.22.1. Downgrade is not supported");
}
#[actix_rt::test]

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.22.0"
"upgradeTo": "v1.22.1"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.22.0"
"upgradeTo": "v1.22.1"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.22.0"
"upgradeTo": "v1.22.1"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.22.0"
"upgradeTo": "v1.22.1"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.22.0"
"upgradeTo": "v1.22.1"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.22.0"
"upgradeTo": "v1.22.1"
},
"error": null,
"duration": "[duration]",

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.22.0"
"upgradeTo": "v1.22.1"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.22.0"
"upgradeTo": "v1.22.1"
},
"error": null,
"duration": "[duration]",

View File

@@ -19,7 +19,7 @@ bstr = "1.12.0"
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
charabia = { version = "0.9.7", default-features = false }
cellulite = "0.3.0"
cellulite = { version = "0.3.1-nested-rtxns", git = "https://github.com/meilisearch/cellulite", "branch" = "use-heed-nested-rtxns" }
concat-arrays = "0.1.2"
convert_case = "0.8.0"
crossbeam-channel = "0.5.15"
@@ -34,7 +34,7 @@ grenad = { version = "0.5.0", default-features = false, features = [
"rayon",
"tempfile",
] }
heed = { version = "0.22.0", default-features = false, features = [
heed = { version = "0.22.1-nested-rtxns", default-features = false, features = [
"serde-json",
"serde-bincode",
] }
@@ -89,8 +89,8 @@ rhai = { version = "1.22.2", features = [
"no_time",
"sync",
] }
arroy = "0.6.3"
hannoy = { version = "0.0.8", features = ["arroy"] }
arroy = { version = "0.6.4-nested-rtxns", git = "https://github.com/meilisearch/arroy", "branch" = "use-heed-nested-rtxns" }
hannoy = { version = "0.0.9-nested-rtxns", git = "https://github.com/nnethercott/hannoy", "branch" = "use-heed-nested-rtxns", features = ["arroy"] }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }
@@ -119,7 +119,7 @@ twox-hash = { version = "2.1.1", default-features = false, features = [
"xxhash64",
] }
geo-types = "0.7.16"
zerometry = "0.1.0"
zerometry = "0.3.0"
[dev-dependencies]
mimalloc = { version = "0.1.47", default-features = false }

View File

@@ -180,12 +180,15 @@ where
})
.unwrap()?;
post_processing::post_process(
indexing_context,
wtxn,
global_fields_ids_map,
facet_field_ids_delta,
)?;
pool.install(|| {
post_processing::post_process(
indexing_context,
wtxn,
global_fields_ids_map,
facet_field_ids_delta,
)
})
.unwrap()?;
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
index.cellulite.build(

View File

@@ -0,0 +1,164 @@
use std::fs::File;
use std::io::BufReader;
use std::{iter, mem};
use grenad::CompressionType;
use heed::types::{Bytes, LazyDecode};
use heed::{Database, RwTxn};
use rayon::prelude::*;
use roaring::MultiOps;
use tempfile::tempfile;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::BytesRefCodec;
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::update::{create_writer, writer_into_reader};
use crate::{CboRoaringBitmapCodec, FieldId, Index};
/// Generate the facet level based on the level 0.
///
/// The function will generate all the group levels from
/// the group 1 to the level n until the number of group
/// is smaller than the minimum required size.
pub fn generate_facet_levels(
index: &Index,
wtxn: &mut RwTxn,
field_id: FieldId,
facet_type: FacetType,
) -> crate::Result<()> {
let db = match facet_type {
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
.lazily_decode_data(),
FacetType::Number => index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
.lazily_decode_data(),
};
clear_levels(db, wtxn, field_id)?;
let mut base_level = 0;
// That's a do-while loop
while {
let mut level_size = 0;
for reader in compute_level(index, wtxn, db, field_id, base_level)? {
let mut cursor = reader.into_cursor()?;
while let Some((left_bound, facet_group_value)) = cursor.move_on_next()? {
level_size += 1;
let level = base_level.checked_add(1).unwrap();
let key = FacetGroupKey { field_id, level, left_bound };
debug_assert!(
db.get(wtxn, &key).transpose().is_none(),
"entry must not be there and must have already been deleted: {key:?}"
);
db.remap_data_type::<Bytes>().put(wtxn, &key, facet_group_value)?;
}
}
base_level += 1;
// If the next level will have the minimum required groups, continue.
(level_size / FACET_GROUP_SIZE as usize) >= FACET_MIN_LEVEL_SIZE as usize
} {}
Ok(())
}
/// Compute the groups of facets from the provided base level
/// and write the content into different grenad files.
fn compute_level(
index: &Index,
wtxn: &heed::RwTxn,
db: Database<FacetGroupKeyCodec<BytesRefCodec>, LazyDecode<FacetGroupValueCodec>>,
field_id: FieldId,
base_level: u8,
) -> Result<Vec<grenad::Reader<BufReader<File>>>, crate::Error> {
let thread_count = rayon::current_num_threads();
let rtxns = iter::repeat_with(|| index.env.nested_read_txn(wtxn))
.take(thread_count)
.collect::<heed::Result<Vec<_>>>()?;
let range = {
// Based on the first possible value for the base level up to
// the first possible value for the next level *excluded*.
let left = FacetGroupKey::<&[u8]> { field_id, level: base_level, left_bound: &[] };
let right = FacetGroupKey::<&[u8]> {
field_id,
level: base_level.checked_add(1).unwrap(),
left_bound: &[],
};
left..right
};
rtxns
.into_par_iter()
.enumerate()
.map(|(thread_id, rtxn)| {
let mut writer = tempfile().map(|f| create_writer(CompressionType::None, None, f))?;
let mut left_bound = None;
let mut group_docids = Vec::new();
let mut ser_buffer = Vec::new();
for (i, result) in db.range(&rtxn, &range)?.enumerate() {
let (key, lazy_value) = result?;
let start_of_group = i % FACET_GROUP_SIZE as usize == 0;
let group_index = i / FACET_GROUP_SIZE as usize;
let group_for_thread = group_index % thread_count == thread_id;
if group_for_thread {
if start_of_group {
if let Some(left_bound) = left_bound.take() {
// We store the bitmaps in a Vec this way we can use
// the MultiOps operations that tends to be more efficient
// for unions. The Vec is empty after the operation.
//
// We also don't forget to store the group size corresponding
// to the number of entries merged in this group.
ser_buffer.clear();
let group_len: u8 = group_docids.len().try_into().unwrap();
ser_buffer.push(group_len);
let group_docids = mem::take(&mut group_docids);
let docids = group_docids.into_iter().union();
CboRoaringBitmapCodec::serialize_into_vec(&docids, &mut ser_buffer);
writer.insert(left_bound, &ser_buffer)?;
}
left_bound = Some(key.left_bound);
}
// Lazily decode the bitmaps we are interested in.
let value = lazy_value.decode().map_err(heed::Error::Decoding)?;
group_docids.push(value.bitmap);
}
}
if let Some(left_bound) = left_bound.take() {
ser_buffer.clear();
// We don't forget to store the group size corresponding
// to the number of entries merged in this group.
let group_len: u8 = group_docids.len().try_into().unwrap();
ser_buffer.push(group_len);
let group_docids = group_docids.into_iter().union();
CboRoaringBitmapCodec::serialize_into_vec(&group_docids, &mut ser_buffer);
writer.insert(left_bound, &ser_buffer)?;
}
writer_into_reader(writer)
})
.collect()
}
/// Clears all the levels and only keeps the level 0 of the specified field id.
fn clear_levels(
db: Database<FacetGroupKeyCodec<BytesRefCodec>, LazyDecode<FacetGroupValueCodec>>,
wtxn: &mut RwTxn<'_>,
field_id: FieldId,
) -> heed::Result<()> {
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
let range = left..=right;
db.delete_range(wtxn, &range).map(drop)
}

View File

@@ -1,5 +1,6 @@
use std::cmp::Ordering;
use facet_bulk::generate_facet_levels;
use heed::types::{Bytes, DecodeIgnore, Str};
use heed::RwTxn;
use itertools::{merge_join_by, EitherOrBoth};
@@ -23,6 +24,8 @@ use crate::update::new::FacetFieldIdsDelta;
use crate::update::{FacetsUpdateBulk, GrenadParameters};
use crate::{GlobalFieldsIdsMap, Index, Result};
mod facet_bulk;
pub(super) fn post_process<MSP>(
indexing_context: IndexingContext<MSP>,
wtxn: &mut RwTxn<'_>,
@@ -239,9 +242,8 @@ fn compute_facet_level_database(
match delta {
FacetFieldIdDelta::Bulk => {
progress.update_progress(PostProcessingFacets::StringsBulk);
tracing::debug!(%fid, "bulk string facet processing");
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
.execute(wtxn)?
tracing::debug!(%fid, "bulk string facet processing in parallel");
generate_facet_levels(index, wtxn, fid, FacetType::String)?
}
FacetFieldIdDelta::Incremental(delta_data) => {
progress.update_progress(PostProcessingFacets::StringsIncremental);

View File

@@ -1,11 +1,12 @@
use std::cell::RefCell;
use std::collections::BTreeSet;
use std::io::{BufReader, BufWriter, Read, Seek, Write};
use std::iter;
use hashbrown::HashMap;
use heed::types::Bytes;
use heed::{BytesDecode, Database, Error, RoTxn, RwTxn};
use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
use rayon::iter::{IndexedParallelIterator as _, IntoParallelIterator, ParallelIterator as _};
use roaring::MultiOps;
use tempfile::spooled_tempfile;
use thread_local::ThreadLocal;
@@ -151,25 +152,35 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
unsafe impl Sync for FrozenPrefixBitmaps<'_, '_> {}
struct WordPrefixIntegerDocids {
struct WordPrefixIntegerDocids<'i> {
index: &'i Index,
database: Database<Bytes, CboRoaringBitmapCodec>,
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
max_memory_by_thread: Option<usize>,
read_uncommitted_in_parallel: bool,
}
impl WordPrefixIntegerDocids {
impl<'i> WordPrefixIntegerDocids<'i> {
fn new(
index: &'i Index,
database: Database<Bytes, CboRoaringBitmapCodec>,
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
grenad_parameters: &GrenadParameters,
) -> WordPrefixIntegerDocids {
grenad_parameters: &'_ GrenadParameters,
) -> WordPrefixIntegerDocids<'i> {
WordPrefixIntegerDocids {
index,
database,
prefix_database,
max_memory_by_thread: grenad_parameters.max_memory_by_thread(),
read_uncommitted_in_parallel: false,
}
}
/// Use an experimental LMDB feature to read uncommitted data in parallel.
fn read_uncommitted_in_parallel(&mut self, value: bool) {
self.read_uncommitted_in_parallel = value;
}
fn execute(
self,
wtxn: &mut heed::RwTxn,
@@ -177,7 +188,144 @@ impl WordPrefixIntegerDocids {
prefix_to_delete: &BTreeSet<Prefix>,
) -> Result<()> {
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
if self.read_uncommitted_in_parallel {
self.recompute_modified_prefixes_no_frozen(wtxn, prefix_to_compute)
} else {
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
}
}
/// Computes the same as `recompute_modified_prefixes`.
///
/// ...but without aggregating the prefixes mmap pointers into a static HashMap
/// beforehand and rather use an experimental LMDB feature to read the subset
/// of prefixes in parallel from the uncommitted transaction.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
fn recompute_modified_prefixes_no_frozen(
&self,
wtxn: &mut RwTxn,
prefixes: &BTreeSet<Prefix>,
) -> Result<()> {
let thread_count = rayon::current_num_threads();
let rtxns = iter::repeat_with(|| self.index.env.nested_read_txn(wtxn))
.take(thread_count)
.collect::<heed::Result<Vec<_>>>()?;
let outputs = rtxns
.into_par_iter()
.enumerate()
.map(|(thread_id, rtxn)| {
// `indexes` represent offsets at which prefixes computations were stored in the `file`.
let mut indexes = Vec::new();
let mut file = BufWriter::new(spooled_tempfile(
self.max_memory_by_thread.unwrap_or(usize::MAX),
));
let mut buffer = Vec::new();
for (prefix_index, prefix) in prefixes.iter().enumerate() {
// Is prefix for another thread?
if prefix_index % thread_count != thread_id {
continue;
}
let mut bitmaps_bytes = Vec::<&[u8]>::new();
let mut prev_pos = None;
for result in self
.database
.remap_data_type::<Bytes>()
.prefix_iter(&rtxn, prefix.as_bytes())?
{
let (key, current_bitmap_bytes) = result?;
let (_word, pos) =
StrBEU16Codec::bytes_decode(key).map_err(Error::Decoding)?;
if prev_pos.is_some_and(|p| p != pos) {
if bitmaps_bytes.is_empty() {
indexes.push(PrefixIntegerEntry {
prefix,
pos,
serialized_length: None,
});
} else {
let output = bitmaps_bytes
.iter()
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
.union()?;
buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
indexes.push(PrefixIntegerEntry {
prefix,
pos,
serialized_length: Some(buffer.len()),
});
file.write_all(&buffer)?;
bitmaps_bytes.clear();
}
}
bitmaps_bytes.push(current_bitmap_bytes);
prev_pos = Some(pos);
}
if let Some(pos) = prev_pos {
if bitmaps_bytes.is_empty() {
indexes.push(PrefixIntegerEntry {
prefix,
pos,
serialized_length: None,
});
} else {
let output = bitmaps_bytes
.iter()
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
.union()?;
buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
indexes.push(PrefixIntegerEntry {
prefix,
pos,
serialized_length: Some(buffer.len()),
});
file.write_all(&buffer)?;
}
}
}
Ok((indexes, file))
})
.collect::<Result<Vec<_>>>()?;
// We iterate over all the collected and serialized bitmaps through
// the files and entries to eventually put them in the final database.
let mut key_buffer = Vec::new();
let mut buffer = Vec::new();
for (index, file) in outputs {
let mut file = file.into_inner().map_err(|e| e.into_error())?;
file.rewind()?;
let mut file = BufReader::new(file);
for PrefixIntegerEntry { prefix, pos, serialized_length } in index {
key_buffer.clear();
key_buffer.extend_from_slice(prefix.as_bytes());
key_buffer.push(0);
key_buffer.extend_from_slice(&pos.to_be_bytes());
match serialized_length {
Some(serialized_length) => {
buffer.resize(serialized_length, 0);
file.read_exact(&mut buffer)?;
self.prefix_database.remap_data_type::<Bytes>().put(
wtxn,
&key_buffer,
&buffer,
)?;
}
None => {
self.prefix_database.delete(wtxn, &key_buffer)?;
}
}
}
}
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
@@ -262,7 +410,7 @@ impl WordPrefixIntegerDocids {
}
}
/// Represents a prefix and the lenght the bitmap takes on disk.
/// Represents a prefix and the length the bitmap takes on disk.
struct PrefixIntegerEntry<'a> {
prefix: &'a str,
pos: u16,
@@ -362,12 +510,14 @@ pub fn compute_word_prefix_fid_docids(
prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: &GrenadParameters,
) -> Result<()> {
WordPrefixIntegerDocids::new(
let mut builder = WordPrefixIntegerDocids::new(
index,
index.word_fid_docids.remap_key_type(),
index.word_prefix_fid_docids.remap_key_type(),
grenad_parameters,
)
.execute(wtxn, prefix_to_compute, prefix_to_delete)
);
builder.read_uncommitted_in_parallel(true);
builder.execute(wtxn, prefix_to_compute, prefix_to_delete)
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
@@ -378,10 +528,12 @@ pub fn compute_word_prefix_position_docids(
prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: &GrenadParameters,
) -> Result<()> {
WordPrefixIntegerDocids::new(
let mut builder = WordPrefixIntegerDocids::new(
index,
index.word_position_docids.remap_key_type(),
index.word_prefix_position_docids.remap_key_type(),
grenad_parameters,
)
.execute(wtxn, prefix_to_compute, prefix_to_delete)
);
builder.read_uncommitted_in_parallel(true);
builder.execute(wtxn, prefix_to_compute, prefix_to_delete)
}