mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-24 05:26:57 +00:00
Compare commits
35 Commits
prototype-
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7f8a1ac0be | ||
|
|
1a67163ee8 | ||
|
|
38141de68d | ||
|
|
7a98b80687 | ||
|
|
229a12c8e6 | ||
|
|
2fdfe79400 | ||
|
|
9184b12a26 | ||
|
|
742378d8e1 | ||
|
|
6dcd739a8b | ||
|
|
c29bdcae23 | ||
|
|
75219181a3 | ||
|
|
a5b5cf7cd1 | ||
|
|
142ba8ea00 | ||
|
|
4bc823e07c | ||
|
|
db06ca7138 | ||
|
|
95595a768e | ||
|
|
36f649768e | ||
|
|
0c6fc243f2 | ||
|
|
dfc46d5627 | ||
|
|
11d55f2121 | ||
|
|
014da57cf6 | ||
|
|
70a0ff4a8f | ||
|
|
dd0d5e4b90 | ||
|
|
15b3bb1700 | ||
|
|
077ec2ab11 | ||
|
|
f25db0795e | ||
|
|
c50a337c29 | ||
|
|
efeae09ce1 | ||
|
|
ad55b48664 | ||
|
|
94eabd34e6 | ||
|
|
6935589f74 | ||
|
|
4beb452027 | ||
|
|
b722da303a | ||
|
|
6f0d26c22c | ||
|
|
d52c7dcc94 |
1
.github/dependabot.yml
vendored
1
.github/dependabot.yml
vendored
@@ -7,6 +7,5 @@ updates:
|
||||
schedule:
|
||||
interval: "monthly"
|
||||
labels:
|
||||
- 'skip changelog'
|
||||
- 'dependencies'
|
||||
rebase-strategy: disabled
|
||||
|
||||
6
.github/release-draft-template.yml
vendored
6
.github/release-draft-template.yml
vendored
@@ -18,6 +18,7 @@ categories:
|
||||
label: 'security'
|
||||
- title: '⚙️ Maintenance/misc'
|
||||
label:
|
||||
- 'dependencies'
|
||||
- 'maintenance'
|
||||
- 'documentation'
|
||||
template: |
|
||||
@@ -26,8 +27,3 @@ template: |
|
||||
❤️ Huge thanks to our contributors: $CONTRIBUTORS.
|
||||
no-changes-template: 'Changes are coming soon 😎'
|
||||
sort-direction: 'ascending'
|
||||
replacers:
|
||||
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
|
||||
replace: ''
|
||||
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
|
||||
replace: ''
|
||||
|
||||
2
.github/workflows/publish-docker-images.yml
vendored
2
.github/workflows/publish-docker-images.yml
vendored
@@ -65,7 +65,7 @@ jobs:
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@d58896d6a1865668819e1d91763c7751a165e159 # tag=v3.9.2
|
||||
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
|
||||
13
.github/workflows/publish-release-assets.yml
vendored
13
.github/workflows/publish-release-assets.yml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
check-version:
|
||||
name: Check the version validity
|
||||
runs-on: ubuntu-latest
|
||||
# No need to check the version for dry run (cron)
|
||||
# No need to check the version for dry run (cron or workflow_dispatch)
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
# Check if the tag has the v<nmumber>.<number>.<number> format.
|
||||
@@ -48,7 +48,7 @@ jobs:
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
- uses: dtolnay/rust-toolchain@1.89
|
||||
- name: Build
|
||||
run: cargo build --release --locked
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
command: build
|
||||
args: --release --target ${{ matrix.target }}
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
@@ -176,7 +176,7 @@ jobs:
|
||||
- name: List target output files
|
||||
run: ls -lR ./target
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
# No need to upload binaries for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
@@ -187,6 +187,7 @@ jobs:
|
||||
|
||||
publish-openapi-file:
|
||||
name: Publish OpenAPI file
|
||||
needs: check-version
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -201,7 +202,7 @@ jobs:
|
||||
cd crates/openapi-generator
|
||||
cargo run --release -- --pretty --output ../../meilisearch.json
|
||||
- name: Upload OpenAPI to Release
|
||||
# No need to upload for dry run (cron)
|
||||
# No need to upload for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
|
||||
16
.github/workflows/sdks-tests.yml
vendored
16
.github/workflows/sdks-tests.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-dotnet
|
||||
- name: Setup .NET Core
|
||||
uses: actions/setup-dotnet@v4
|
||||
uses: actions/setup-dotnet@v5
|
||||
with:
|
||||
dotnet-version: "8.0.x"
|
||||
- name: Install dependencies
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@v6
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: actions/checkout@v5
|
||||
@@ -135,13 +135,13 @@ jobs:
|
||||
- name: Set up Java
|
||||
uses: actions/setup-java@v5
|
||||
with:
|
||||
java-version: 8
|
||||
distribution: 'zulu'
|
||||
java-version: 17
|
||||
distribution: 'temurin'
|
||||
cache: gradle
|
||||
- name: Grant execute permission for gradlew
|
||||
run: chmod +x gradlew
|
||||
- name: Build and run unit and integration tests
|
||||
run: ./gradlew build integrationTest
|
||||
run: ./gradlew build integrationTest --info
|
||||
|
||||
meilisearch-js-tests:
|
||||
needs: define-docker-image
|
||||
@@ -160,7 +160,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
@@ -224,7 +224,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-python
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
uses: actions/setup-python@v6
|
||||
- name: Install pipenv
|
||||
uses: dschep/install-pipenv-action@v1
|
||||
- name: Install dependencies
|
||||
@@ -318,7 +318,7 @@ jobs:
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js-plugins
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
|
||||
89
Cargo.lock
generated
89
Cargo.lock
generated
@@ -453,9 +453,8 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
|
||||
|
||||
[[package]]
|
||||
name = "arroy"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8578a72223dfa13dfd9fc144d15260d134361789ebdea9b16e85a511edc73c7d"
|
||||
version = "0.6.4-nested-rtxns"
|
||||
source = "git+https://github.com/meilisearch/arroy?branch=use-heed-nested-rtxns#61c8f4f0addeff968e80438018d0aee2c1eb8d67"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -589,7 +588,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bumpalo",
|
||||
@@ -799,7 +798,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@@ -1075,9 +1074,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cellulite"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71a41aa2cd021bc3f23d97cc1e645848ca8c279fc757d1570ba7fe7ddc021290"
|
||||
version = "0.3.1-nested-rtxns"
|
||||
source = "git+https://github.com/meilisearch/cellulite?branch=use-heed-nested-rtxns#9fb1866cc49277d26f606769112fa704944ccc61"
|
||||
dependencies = [
|
||||
"crossbeam",
|
||||
"geo",
|
||||
@@ -1092,7 +1090,7 @@ dependencies = [
|
||||
"steppe",
|
||||
"thiserror 2.0.16",
|
||||
"thread_local",
|
||||
"zerometry 0.3.0",
|
||||
"zerometry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1829,7 +1827,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -2072,7 +2070,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"tempfile",
|
||||
"thiserror 2.0.16",
|
||||
@@ -2094,7 +2092,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"levenshtein_automata",
|
||||
@@ -2122,7 +2120,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -2279,7 +2277,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"bumpalo",
|
||||
@@ -2577,7 +2575,6 @@ dependencies = [
|
||||
"num-traits",
|
||||
"robust",
|
||||
"rstar",
|
||||
"spade",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2588,7 +2585,6 @@ checksum = "75a4dcd69d35b2c87a7c83bce9af69fd65c9d68d3833a0ded568983928f3fc99"
|
||||
dependencies = [
|
||||
"approx",
|
||||
"num-traits",
|
||||
"rayon",
|
||||
"rstar",
|
||||
"serde",
|
||||
]
|
||||
@@ -2760,9 +2756,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hannoy"
|
||||
version = "0.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0dba13a271c49a119a97862ebf0a74131d879832868400d9fcd937b790058fdd"
|
||||
version = "0.0.9-nested-rtxns"
|
||||
source = "git+https://github.com/nnethercott/hannoy?branch=use-heed-nested-rtxns#d4ca5454eff6539e9fc2119f07113abebbda0a39"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -2840,9 +2835,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "heed"
|
||||
version = "0.22.0"
|
||||
version = "0.22.1-nested-rtxns"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a56c94661ddfb51aa9cdfbf102cfcc340aa69267f95ebccc4af08d7c530d393"
|
||||
checksum = "0ff115ba5712b1f1fc7617b195f5c2f139e29c397ff79da040cd19db75ccc240"
|
||||
dependencies = [
|
||||
"bitflags 2.9.4",
|
||||
"byteorder",
|
||||
@@ -2852,7 +2847,6 @@ dependencies = [
|
||||
"lmdb-master-sys",
|
||||
"once_cell",
|
||||
"page_size",
|
||||
"serde",
|
||||
"synchronoise",
|
||||
"url",
|
||||
]
|
||||
@@ -3060,7 +3054,6 @@ dependencies = [
|
||||
"i_key_sort",
|
||||
"i_shape",
|
||||
"i_tree",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3237,7 +3230,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"backoff",
|
||||
@@ -3491,7 +3484,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -3892,9 +3885,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
||||
|
||||
[[package]]
|
||||
name = "lmdb-master-sys"
|
||||
version = "0.2.5"
|
||||
version = "0.2.6-nested-rtxns"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "864808e0b19fb6dd3b70ba94ee671b82fce17554cf80aeb0a155c65bb08027df"
|
||||
checksum = "f4ff85130e3c994b36877045fbbb138d521dea7197bfc19dc3d5d95101a8e20a"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"doxygen-rs",
|
||||
@@ -4000,7 +3993,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -4011,7 +4004,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -4108,7 +4101,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"enum-iterator",
|
||||
@@ -4127,7 +4120,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -4162,7 +4155,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@@ -4196,7 +4189,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"allocator-api2 0.3.1",
|
||||
"arroy",
|
||||
@@ -4275,7 +4268,7 @@ dependencies = [
|
||||
"url",
|
||||
"utoipa",
|
||||
"uuid",
|
||||
"zerometry 0.1.0",
|
||||
"zerometry",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4777,7 +4770,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -6145,18 +6138,6 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spade"
|
||||
version = "2.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb313e1c8afee5b5647e00ee0fe6855e3d529eb863a0fdae1d60006c4d1e9990"
|
||||
dependencies = [
|
||||
"hashbrown 0.15.5",
|
||||
"num-traits",
|
||||
"robust",
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.5.2"
|
||||
@@ -7836,7 +7817,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
@@ -7983,18 +7964,6 @@ dependencies = [
|
||||
"syn 2.0.106",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerometry"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "681f08f3f4ef27d3021a128eb6d8df1cd781e4c9c797c3971c1f85316374f977"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
"geo",
|
||||
"geo-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerometry"
|
||||
version = "0.3.0"
|
||||
|
||||
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.22.0"
|
||||
version = "1.22.1"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -121,7 +121,7 @@ If you want to know more about the kind of data we collect and what we use it fo
|
||||
|
||||
Meilisearch is a search engine created by [Meili](https://www.meilisearch.com/careers), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
🗞 [Subscribe to our newsletter](https://share-eu1.hsforms.com/1LN5N0x_GQgq7ss7tXmSykwfg3aq) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
💌 Want to make a suggestion or give feedback? Here are some of the channels where you can reach us:
|
||||
|
||||
|
||||
@@ -310,7 +310,8 @@ impl Queue {
|
||||
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
let to_delete =
|
||||
RoaringBitmap::from_sorted_iter(finished.into_iter().take(100_000)).unwrap();
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -45,6 +45,7 @@ pub fn upgrade_index_scheduler(
|
||||
(1, 19, _) => 0,
|
||||
(1, 20, _) => 0,
|
||||
(1, 21, _) => 0,
|
||||
(1, 22, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
|
||||
@@ -418,3 +418,36 @@ async fn geo_radius() {
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn bug_5904() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/5904
|
||||
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (response, _code) =
|
||||
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let geojson = json!({
|
||||
"id": 1,
|
||||
"_geojson": {
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"geometry": {
|
||||
"type": "Point",
|
||||
"coordinates": [
|
||||
4.23914,
|
||||
48.382893
|
||||
]
|
||||
},
|
||||
"properties": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
let (response, _code) = index.add_documents(geojson, Some("id")).await;
|
||||
server.wait_task(response.uid()).await.succeeded();
|
||||
}
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn version_too_old() {
|
||||
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.22.0");
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.22.1");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
|
||||
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.22.1 is higher than the Meilisearch version 1.22.0. Downgrade is not supported");
|
||||
snapshot!(err, @"Database version 1.22.2 is higher than the Meilisearch version 1.22.1. Downgrade is not supported");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.22.0"
|
||||
"upgradeTo": "v1.22.1"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.22.0"
|
||||
"upgradeTo": "v1.22.1"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.22.0"
|
||||
"upgradeTo": "v1.22.1"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.22.0"
|
||||
"upgradeTo": "v1.22.1"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.22.0"
|
||||
"upgradeTo": "v1.22.1"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.22.0"
|
||||
"upgradeTo": "v1.22.1"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.22.0"
|
||||
"upgradeTo": "v1.22.1"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.22.0"
|
||||
"upgradeTo": "v1.22.1"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -19,7 +19,7 @@ bstr = "1.12.0"
|
||||
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.9.7", default-features = false }
|
||||
cellulite = "0.3.0"
|
||||
cellulite = { version = "0.3.1-nested-rtxns", git = "https://github.com/meilisearch/cellulite", "branch" = "use-heed-nested-rtxns" }
|
||||
concat-arrays = "0.1.2"
|
||||
convert_case = "0.8.0"
|
||||
crossbeam-channel = "0.5.15"
|
||||
@@ -34,7 +34,7 @@ grenad = { version = "0.5.0", default-features = false, features = [
|
||||
"rayon",
|
||||
"tempfile",
|
||||
] }
|
||||
heed = { version = "0.22.0", default-features = false, features = [
|
||||
heed = { version = "0.22.1-nested-rtxns", default-features = false, features = [
|
||||
"serde-json",
|
||||
"serde-bincode",
|
||||
] }
|
||||
@@ -89,8 +89,8 @@ rhai = { version = "1.22.2", features = [
|
||||
"no_time",
|
||||
"sync",
|
||||
] }
|
||||
arroy = "0.6.3"
|
||||
hannoy = { version = "0.0.8", features = ["arroy"] }
|
||||
arroy = { version = "0.6.4-nested-rtxns", git = "https://github.com/meilisearch/arroy", "branch" = "use-heed-nested-rtxns" }
|
||||
hannoy = { version = "0.0.9-nested-rtxns", git = "https://github.com/nnethercott/hannoy", "branch" = "use-heed-nested-rtxns", features = ["arroy"] }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
@@ -119,7 +119,7 @@ twox-hash = { version = "2.1.1", default-features = false, features = [
|
||||
"xxhash64",
|
||||
] }
|
||||
geo-types = "0.7.16"
|
||||
zerometry = "0.1.0"
|
||||
zerometry = "0.3.0"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
|
||||
@@ -180,12 +180,15 @@ where
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
post_processing::post_process(
|
||||
indexing_context,
|
||||
wtxn,
|
||||
global_fields_ids_map,
|
||||
facet_field_ids_delta,
|
||||
)?;
|
||||
pool.install(|| {
|
||||
post_processing::post_process(
|
||||
indexing_context,
|
||||
wtxn,
|
||||
global_fields_ids_map,
|
||||
facet_field_ids_delta,
|
||||
)
|
||||
})
|
||||
.unwrap()?;
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::BuildingGeoJson);
|
||||
index.cellulite.build(
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{iter, mem};
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::{Bytes, LazyDecode};
|
||||
use heed::{Database, RwTxn};
|
||||
use rayon::prelude::*;
|
||||
use roaring::MultiOps;
|
||||
use tempfile::tempfile;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::update::facet::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::update::{create_writer, writer_into_reader};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, Index};
|
||||
|
||||
/// Generate the facet level based on the level 0.
|
||||
///
|
||||
/// The function will generate all the group levels from
|
||||
/// the group 1 to the level n until the number of group
|
||||
/// is smaller than the minimum required size.
|
||||
pub fn generate_facet_levels(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn,
|
||||
field_id: FieldId,
|
||||
facet_type: FacetType,
|
||||
) -> crate::Result<()> {
|
||||
let db = match facet_type {
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||
.lazily_decode_data(),
|
||||
FacetType::Number => index
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
|
||||
.lazily_decode_data(),
|
||||
};
|
||||
|
||||
clear_levels(db, wtxn, field_id)?;
|
||||
|
||||
let mut base_level = 0;
|
||||
// That's a do-while loop
|
||||
while {
|
||||
let mut level_size = 0;
|
||||
for reader in compute_level(index, wtxn, db, field_id, base_level)? {
|
||||
let mut cursor = reader.into_cursor()?;
|
||||
while let Some((left_bound, facet_group_value)) = cursor.move_on_next()? {
|
||||
level_size += 1;
|
||||
let level = base_level.checked_add(1).unwrap();
|
||||
let key = FacetGroupKey { field_id, level, left_bound };
|
||||
debug_assert!(
|
||||
db.get(wtxn, &key).transpose().is_none(),
|
||||
"entry must not be there and must have already been deleted: {key:?}"
|
||||
);
|
||||
db.remap_data_type::<Bytes>().put(wtxn, &key, facet_group_value)?;
|
||||
}
|
||||
}
|
||||
|
||||
base_level += 1;
|
||||
|
||||
// If the next level will have the minimum required groups, continue.
|
||||
(level_size / FACET_GROUP_SIZE as usize) >= FACET_MIN_LEVEL_SIZE as usize
|
||||
} {}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Compute the groups of facets from the provided base level
|
||||
/// and write the content into different grenad files.
|
||||
fn compute_level(
|
||||
index: &Index,
|
||||
wtxn: &heed::RwTxn,
|
||||
db: Database<FacetGroupKeyCodec<BytesRefCodec>, LazyDecode<FacetGroupValueCodec>>,
|
||||
field_id: FieldId,
|
||||
base_level: u8,
|
||||
) -> Result<Vec<grenad::Reader<BufReader<File>>>, crate::Error> {
|
||||
let thread_count = rayon::current_num_threads();
|
||||
let rtxns = iter::repeat_with(|| index.env.nested_read_txn(wtxn))
|
||||
.take(thread_count)
|
||||
.collect::<heed::Result<Vec<_>>>()?;
|
||||
|
||||
let range = {
|
||||
// Based on the first possible value for the base level up to
|
||||
// the first possible value for the next level *excluded*.
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: base_level, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> {
|
||||
field_id,
|
||||
level: base_level.checked_add(1).unwrap(),
|
||||
left_bound: &[],
|
||||
};
|
||||
left..right
|
||||
};
|
||||
|
||||
rtxns
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(thread_id, rtxn)| {
|
||||
let mut writer = tempfile().map(|f| create_writer(CompressionType::None, None, f))?;
|
||||
|
||||
let mut left_bound = None;
|
||||
let mut group_docids = Vec::new();
|
||||
let mut ser_buffer = Vec::new();
|
||||
for (i, result) in db.range(&rtxn, &range)?.enumerate() {
|
||||
let (key, lazy_value) = result?;
|
||||
|
||||
let start_of_group = i % FACET_GROUP_SIZE as usize == 0;
|
||||
let group_index = i / FACET_GROUP_SIZE as usize;
|
||||
let group_for_thread = group_index % thread_count == thread_id;
|
||||
|
||||
if group_for_thread {
|
||||
if start_of_group {
|
||||
if let Some(left_bound) = left_bound.take() {
|
||||
// We store the bitmaps in a Vec this way we can use
|
||||
// the MultiOps operations that tends to be more efficient
|
||||
// for unions. The Vec is empty after the operation.
|
||||
//
|
||||
// We also don't forget to store the group size corresponding
|
||||
// to the number of entries merged in this group.
|
||||
ser_buffer.clear();
|
||||
let group_len: u8 = group_docids.len().try_into().unwrap();
|
||||
ser_buffer.push(group_len);
|
||||
let group_docids = mem::take(&mut group_docids);
|
||||
let docids = group_docids.into_iter().union();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&docids, &mut ser_buffer);
|
||||
writer.insert(left_bound, &ser_buffer)?;
|
||||
}
|
||||
left_bound = Some(key.left_bound);
|
||||
}
|
||||
|
||||
// Lazily decode the bitmaps we are interested in.
|
||||
let value = lazy_value.decode().map_err(heed::Error::Decoding)?;
|
||||
group_docids.push(value.bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(left_bound) = left_bound.take() {
|
||||
ser_buffer.clear();
|
||||
// We don't forget to store the group size corresponding
|
||||
// to the number of entries merged in this group.
|
||||
let group_len: u8 = group_docids.len().try_into().unwrap();
|
||||
ser_buffer.push(group_len);
|
||||
let group_docids = group_docids.into_iter().union();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&group_docids, &mut ser_buffer);
|
||||
writer.insert(left_bound, &ser_buffer)?;
|
||||
}
|
||||
|
||||
writer_into_reader(writer)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Clears all the levels and only keeps the level 0 of the specified field id.
|
||||
fn clear_levels(
|
||||
db: Database<FacetGroupKeyCodec<BytesRefCodec>, LazyDecode<FacetGroupValueCodec>>,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<()> {
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||
let range = left..=right;
|
||||
db.delete_range(wtxn, &range).map(drop)
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use facet_bulk::generate_facet_levels;
|
||||
use heed::types::{Bytes, DecodeIgnore, Str};
|
||||
use heed::RwTxn;
|
||||
use itertools::{merge_join_by, EitherOrBoth};
|
||||
@@ -23,6 +24,8 @@ use crate::update::new::FacetFieldIdsDelta;
|
||||
use crate::update::{FacetsUpdateBulk, GrenadParameters};
|
||||
use crate::{GlobalFieldsIdsMap, Index, Result};
|
||||
|
||||
mod facet_bulk;
|
||||
|
||||
pub(super) fn post_process<MSP>(
|
||||
indexing_context: IndexingContext<MSP>,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
@@ -239,9 +242,8 @@ fn compute_facet_level_database(
|
||||
match delta {
|
||||
FacetFieldIdDelta::Bulk => {
|
||||
progress.update_progress(PostProcessingFacets::StringsBulk);
|
||||
tracing::debug!(%fid, "bulk string facet processing");
|
||||
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
|
||||
.execute(wtxn)?
|
||||
tracing::debug!(%fid, "bulk string facet processing in parallel");
|
||||
generate_facet_levels(index, wtxn, fid, FacetType::String)?
|
||||
}
|
||||
FacetFieldIdDelta::Incremental(delta_data) => {
|
||||
progress.update_progress(PostProcessingFacets::StringsIncremental);
|
||||
@@ -1,11 +1,12 @@
|
||||
use std::cell::RefCell;
|
||||
use std::collections::BTreeSet;
|
||||
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
||||
use std::iter;
|
||||
|
||||
use hashbrown::HashMap;
|
||||
use heed::types::Bytes;
|
||||
use heed::{BytesDecode, Database, Error, RoTxn, RwTxn};
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
|
||||
use rayon::iter::{IndexedParallelIterator as _, IntoParallelIterator, ParallelIterator as _};
|
||||
use roaring::MultiOps;
|
||||
use tempfile::spooled_tempfile;
|
||||
use thread_local::ThreadLocal;
|
||||
@@ -151,25 +152,35 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
|
||||
|
||||
unsafe impl Sync for FrozenPrefixBitmaps<'_, '_> {}
|
||||
|
||||
struct WordPrefixIntegerDocids {
|
||||
struct WordPrefixIntegerDocids<'i> {
|
||||
index: &'i Index,
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
max_memory_by_thread: Option<usize>,
|
||||
read_uncommitted_in_parallel: bool,
|
||||
}
|
||||
|
||||
impl WordPrefixIntegerDocids {
|
||||
impl<'i> WordPrefixIntegerDocids<'i> {
|
||||
fn new(
|
||||
index: &'i Index,
|
||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||
grenad_parameters: &GrenadParameters,
|
||||
) -> WordPrefixIntegerDocids {
|
||||
grenad_parameters: &'_ GrenadParameters,
|
||||
) -> WordPrefixIntegerDocids<'i> {
|
||||
WordPrefixIntegerDocids {
|
||||
index,
|
||||
database,
|
||||
prefix_database,
|
||||
max_memory_by_thread: grenad_parameters.max_memory_by_thread(),
|
||||
read_uncommitted_in_parallel: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Use an experimental LMDB feature to read uncommitted data in parallel.
|
||||
fn read_uncommitted_in_parallel(&mut self, value: bool) {
|
||||
self.read_uncommitted_in_parallel = value;
|
||||
}
|
||||
|
||||
fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn,
|
||||
@@ -177,7 +188,144 @@ impl WordPrefixIntegerDocids {
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
if self.read_uncommitted_in_parallel {
|
||||
self.recompute_modified_prefixes_no_frozen(wtxn, prefix_to_compute)
|
||||
} else {
|
||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the same as `recompute_modified_prefixes`.
|
||||
///
|
||||
/// ...but without aggregating the prefixes mmap pointers into a static HashMap
|
||||
/// beforehand and rather use an experimental LMDB feature to read the subset
|
||||
/// of prefixes in parallel from the uncommitted transaction.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
fn recompute_modified_prefixes_no_frozen(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
prefixes: &BTreeSet<Prefix>,
|
||||
) -> Result<()> {
|
||||
let thread_count = rayon::current_num_threads();
|
||||
let rtxns = iter::repeat_with(|| self.index.env.nested_read_txn(wtxn))
|
||||
.take(thread_count)
|
||||
.collect::<heed::Result<Vec<_>>>()?;
|
||||
|
||||
let outputs = rtxns
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.map(|(thread_id, rtxn)| {
|
||||
// `indexes` represent offsets at which prefixes computations were stored in the `file`.
|
||||
let mut indexes = Vec::new();
|
||||
let mut file = BufWriter::new(spooled_tempfile(
|
||||
self.max_memory_by_thread.unwrap_or(usize::MAX),
|
||||
));
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
for (prefix_index, prefix) in prefixes.iter().enumerate() {
|
||||
// Is prefix for another thread?
|
||||
if prefix_index % thread_count != thread_id {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut bitmaps_bytes = Vec::<&[u8]>::new();
|
||||
let mut prev_pos = None;
|
||||
for result in self
|
||||
.database
|
||||
.remap_data_type::<Bytes>()
|
||||
.prefix_iter(&rtxn, prefix.as_bytes())?
|
||||
{
|
||||
let (key, current_bitmap_bytes) = result?;
|
||||
let (_word, pos) =
|
||||
StrBEU16Codec::bytes_decode(key).map_err(Error::Decoding)?;
|
||||
|
||||
if prev_pos.is_some_and(|p| p != pos) {
|
||||
if bitmaps_bytes.is_empty() {
|
||||
indexes.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
serialized_length: None,
|
||||
});
|
||||
} else {
|
||||
let output = bitmaps_bytes
|
||||
.iter()
|
||||
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
|
||||
.union()?;
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
|
||||
indexes.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
serialized_length: Some(buffer.len()),
|
||||
});
|
||||
file.write_all(&buffer)?;
|
||||
bitmaps_bytes.clear();
|
||||
}
|
||||
}
|
||||
|
||||
bitmaps_bytes.push(current_bitmap_bytes);
|
||||
prev_pos = Some(pos);
|
||||
}
|
||||
|
||||
if let Some(pos) = prev_pos {
|
||||
if bitmaps_bytes.is_empty() {
|
||||
indexes.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
serialized_length: None,
|
||||
});
|
||||
} else {
|
||||
let output = bitmaps_bytes
|
||||
.iter()
|
||||
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
|
||||
.union()?;
|
||||
buffer.clear();
|
||||
CboRoaringBitmapCodec::serialize_into_vec(&output, &mut buffer);
|
||||
indexes.push(PrefixIntegerEntry {
|
||||
prefix,
|
||||
pos,
|
||||
serialized_length: Some(buffer.len()),
|
||||
});
|
||||
file.write_all(&buffer)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((indexes, file))
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
// We iterate over all the collected and serialized bitmaps through
|
||||
// the files and entries to eventually put them in the final database.
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut buffer = Vec::new();
|
||||
for (index, file) in outputs {
|
||||
let mut file = file.into_inner().map_err(|e| e.into_error())?;
|
||||
file.rewind()?;
|
||||
let mut file = BufReader::new(file);
|
||||
for PrefixIntegerEntry { prefix, pos, serialized_length } in index {
|
||||
key_buffer.clear();
|
||||
key_buffer.extend_from_slice(prefix.as_bytes());
|
||||
key_buffer.push(0);
|
||||
key_buffer.extend_from_slice(&pos.to_be_bytes());
|
||||
match serialized_length {
|
||||
Some(serialized_length) => {
|
||||
buffer.resize(serialized_length, 0);
|
||||
file.read_exact(&mut buffer)?;
|
||||
self.prefix_database.remap_data_type::<Bytes>().put(
|
||||
wtxn,
|
||||
&key_buffer,
|
||||
&buffer,
|
||||
)?;
|
||||
}
|
||||
None => {
|
||||
self.prefix_database.delete(wtxn, &key_buffer)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
@@ -262,7 +410,7 @@ impl WordPrefixIntegerDocids {
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a prefix and the lenght the bitmap takes on disk.
|
||||
/// Represents a prefix and the length the bitmap takes on disk.
|
||||
struct PrefixIntegerEntry<'a> {
|
||||
prefix: &'a str,
|
||||
pos: u16,
|
||||
@@ -362,12 +510,14 @@ pub fn compute_word_prefix_fid_docids(
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: &GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
let mut builder = WordPrefixIntegerDocids::new(
|
||||
index,
|
||||
index.word_fid_docids.remap_key_type(),
|
||||
index.word_prefix_fid_docids.remap_key_type(),
|
||||
grenad_parameters,
|
||||
)
|
||||
.execute(wtxn, prefix_to_compute, prefix_to_delete)
|
||||
);
|
||||
builder.read_uncommitted_in_parallel(true);
|
||||
builder.execute(wtxn, prefix_to_compute, prefix_to_delete)
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
|
||||
@@ -378,10 +528,12 @@ pub fn compute_word_prefix_position_docids(
|
||||
prefix_to_delete: &BTreeSet<Prefix>,
|
||||
grenad_parameters: &GrenadParameters,
|
||||
) -> Result<()> {
|
||||
WordPrefixIntegerDocids::new(
|
||||
let mut builder = WordPrefixIntegerDocids::new(
|
||||
index,
|
||||
index.word_position_docids.remap_key_type(),
|
||||
index.word_prefix_position_docids.remap_key_type(),
|
||||
grenad_parameters,
|
||||
)
|
||||
.execute(wtxn, prefix_to_compute, prefix_to_delete)
|
||||
);
|
||||
builder.read_uncommitted_in_parallel(true);
|
||||
builder.execute(wtxn, prefix_to_compute, prefix_to_delete)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user