Compare commits

...

95 Commits

Author SHA1 Message Date
Louis Dureuil
c885171029 process: add cancelling points in process snapshot 2025-10-02 17:16:05 +02:00
Louis Dureuil
3870a374af Compression: implement cancellation and change env copy method 2025-10-02 16:56:25 +02:00
Louis Dureuil
d41716d8f0 Add MustStopProcessing::as_lambda 2025-10-02 16:50:44 +02:00
Louis Dureuil
43a6505435 Use PipedArchiveBuilder to process snapshots without compaction 2025-10-02 11:18:54 +02:00
Louis Dureuil
467e15d9c0 WIP: Add PipedArchiveBuilder 2025-10-02 11:18:13 +02:00
Louis Dureuil
91275adb76 Add necessary accessors 2025-10-02 11:12:51 +02:00
Many the fish
c29bdcae23 Merge pull request #5913 from meilisearch/dependabot/github_actions/actions/setup-python-6
Bump actions/setup-python from 5 to 6
2025-09-29 14:58:45 +00:00
Many the fish
75219181a3 Merge pull request #5834 from meilisearch/fix-openapi-ci
Minor improvement in OpenAPI CI
2025-09-29 13:55:12 +00:00
Many the fish
a5b5cf7cd1 Merge pull request #5916 from meilisearch/dependabot/github_actions/sigstore/cosign-installer-3.10.0
Bump sigstore/cosign-installer from 3.9.2 to 3.10.0
2025-09-29 13:52:31 +00:00
Many the fish
142ba8ea00 Merge pull request #5915 from meilisearch/dependabot/github_actions/actions/setup-node-5
Bump actions/setup-node from 4 to 5
2025-09-29 13:52:28 +00:00
Many the fish
4bc823e07c Merge pull request #5914 from meilisearch/dependabot/github_actions/actions/setup-dotnet-5
Bump actions/setup-dotnet from 4 to 5
2025-09-29 13:52:10 +00:00
Many the fish
db06ca7138 Merge pull request #5912 from meilisearch/dependabot/github_actions/actions/setup-go-6
Bump actions/setup-go from 5 to 6
2025-09-29 13:52:06 +00:00
Clément Renault
95595a768e Merge pull request #5911 from EazyAl/main
Update README.md to fix newsletter link
2025-09-29 13:10:16 +00:00
dependabot[bot]
36f649768e Bump sigstore/cosign-installer from 3.9.2 to 3.10.0
Bumps [sigstore/cosign-installer](https://github.com/sigstore/cosign-installer) from 3.9.2 to 3.10.0.
- [Release notes](https://github.com/sigstore/cosign-installer/releases)
- [Commits](d58896d6a1...d7543c93d8)

---
updated-dependencies:
- dependency-name: sigstore/cosign-installer
  dependency-version: 3.10.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:14 +00:00
dependabot[bot]
0c6fc243f2 Bump actions/setup-node from 4 to 5
Bumps [actions/setup-node](https://github.com/actions/setup-node) from 4 to 5.
- [Release notes](https://github.com/actions/setup-node/releases)
- [Commits](https://github.com/actions/setup-node/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/setup-node
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:11 +00:00
dependabot[bot]
dfc46d5627 Bump actions/setup-dotnet from 4 to 5
Bumps [actions/setup-dotnet](https://github.com/actions/setup-dotnet) from 4 to 5.
- [Release notes](https://github.com/actions/setup-dotnet/releases)
- [Commits](https://github.com/actions/setup-dotnet/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/setup-dotnet
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:08 +00:00
dependabot[bot]
11d55f2121 Bump actions/setup-python from 5 to 6
Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:03 +00:00
dependabot[bot]
014da57cf6 Bump actions/setup-go from 5 to 6
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5 to 6.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-25 18:01:00 +00:00
Clément Renault
70a0ff4a8f Merge pull request #5900 from meilisearch/show-dependencies
Show Dependabot dependency upgrade in the changelog
2025-09-25 16:04:03 +00:00
Clément Renault
dd0d5e4b90 Merge pull request #5910 from meilisearch/curquiza-patch-1
Change Java version in SDK CI
2025-09-25 14:32:16 +00:00
Ali Imran
15b3bb1700 Update README.md to fix newsletter link 2025-09-25 16:07:08 +02:00
Louis Dureuil
077ec2ab11 Merge pull request #5908 from meilisearch/update-version
Update version
2025-09-25 13:10:34 +00:00
Clémentine
f25db0795e Change Java version in SDK CI
Updated Java version and distribution in workflow.
2025-09-25 15:03:50 +02:00
Tamo
c50a337c29 bump version for 1.22.1 2025-09-25 13:44:44 +02:00
Tamo
efeae09ce1 Merge pull request #5906 from meilisearch/task-deletion-strategy
Delete oldest tasks first
2025-09-25 10:11:33 +00:00
Tamo
ad55b48664 Merge pull request #5907 from meilisearch/fix-geojson-bug
use the latest version of zerometry that supports collection, lines and multi-lines
2025-09-25 09:56:01 +00:00
Tamo
94eabd34e6 fmt 2025-09-25 11:01:53 +02:00
Tamo
6935589f74 use the latest version of zerometry that supports collection, lines and multi-lines 2025-09-25 10:31:07 +02:00
Louis Dureuil
4beb452027 Optimize by using from_sorted_iter
Co-authored-by: Tamo <tamo@meilisearch.com>
2025-09-25 10:16:30 +02:00
Louis Dureuil
b722da303a Do not start from the end of the finished tasks when selecting the tasks to delete 2025-09-25 09:54:58 +02:00
Louis Dureuil
ad39263b94 Merge pull request #5902 from meilisearch/bump-version
bump the version of meilisearch
2025-09-24 07:23:39 +00:00
Tamo
0ffb08b112 bump the version of meilisearch 2025-09-23 17:37:31 +02:00
Clément Renault
ff80b4d0ff Merge pull request #5891 from nnethercott/fix-hannoy-arroy-conversion
Bump `hannoy` to v0.0.8
2025-09-23 13:26:54 +00:00
Louis Dureuil
7fb4404928 Merge pull request #5758 from meilisearch/cellulite
Cellulite integration
2025-09-23 12:48:13 +00:00
Tamo
8405f0bf9c fmt 2025-09-23 13:55:36 +02:00
Tamo
3a7f9b56fe update cellulite 2025-09-23 13:55:36 +02:00
Louis Dureuil
61034e2e2e write geojson in obkv 2025-09-23 13:55:36 +02:00
Tamo
108d6d3344 remove a bunch of useless logs 2025-09-23 13:55:36 +02:00
Tamo
35bd00f6a1 continue previous commit 2025-09-23 13:55:36 +02:00
Tamo
69059d67ef stop returning the geojson field when iterating on the fields 2025-09-23 13:55:36 +02:00
Tamo
e13783103f use the CELLULITE constant 2025-09-23 13:55:36 +02:00
Tamo
f719665c4e update the filter-parser after updating its error messages 2025-09-23 13:55:36 +02:00
Tamo
638f284614 densify the shapes before storing them 2025-09-23 13:55:36 +02:00
Tamo
32ac98ed95 style improvement 2025-09-23 13:55:36 +02:00
Tamo
46aee695ca review the filters errors 2025-09-23 13:55:36 +02:00
Tamo
716c67f858 review and fix all error codes 2025-09-23 13:55:36 +02:00
Tamo
fec10bb2d6 update cellulite to the latest version 2025-09-23 13:55:36 +02:00
Mubelotix
3dac2cf73e Update tests 2025-09-23 13:55:36 +02:00
Mubelotix
03eca800e6 Support _geoRadius 2025-09-23 13:55:36 +02:00
Mubelotix
28fa2e960e Tolerate trailing comma 2025-09-23 13:55:36 +02:00
Mubelotix
a3b9220f84 Improve error message 2025-09-23 13:55:36 +02:00
Mubelotix
c09d48edf2 Fix coordinates order in filters 2025-09-23 13:55:36 +02:00
Mubelotix
ae4ab0ebbb Improve filter parser errors 2025-09-23 13:55:36 +02:00
Mubelotix
900a9a6d59 Reduce identations 2025-09-23 13:55:36 +02:00
Mubelotix
fc560e6730 Improve geo polygon errors 2025-09-23 13:55:36 +02:00
Mubelotix
e2a06470b7 Update tests 2025-09-23 13:55:36 +02:00
Mubelotix
ada27323f2 Rename file 2025-09-23 13:55:36 +02:00
Mubelotix
607a1c2395 Add geo bounding box filter 2025-09-23 13:55:36 +02:00
Mubelotix
b56956ea0c Optimize geojson channels 2025-09-23 13:55:36 +02:00
Mubelotix
3d21290f7f Add cellulite database sizes 2025-09-23 13:55:36 +02:00
Mubelotix
4edd4c06bc Fix trivial clippy warnings 2025-09-23 13:55:36 +02:00
Mubelotix
566baddc6b Optimize points removed serialization 2025-09-23 13:55:36 +02:00
Tamo
febe3186ce improve deletion 2025-09-23 13:55:36 +02:00
Tamo
5dd42c1871 remove useless log 2025-09-23 13:55:36 +02:00
Tamo
8670793e6e fix the cellulite spilling bug 2025-09-23 13:55:36 +02:00
Tamo
41a04aa3ab fix the cellulite integration 2025-09-23 13:55:36 +02:00
Tamo
88f841bc05 plug in the document deletion in cellulite 2025-09-23 13:55:36 +02:00
Tamo
d19892d2ea update to the latest version of cellulite and steppe 2025-09-23 13:55:36 +02:00
Tamo
c0905d6650 add the deletion in the new indexer 2025-09-23 13:55:36 +02:00
Tamo
576d7d94b1 fix the old indexer 2025-09-23 13:55:36 +02:00
Tamo
f4f1334b62 add a new _geoPolygon filter to query the cellulite database 2025-09-23 13:55:36 +02:00
Tamo
aaff6c3685 fmt 2025-09-23 13:55:36 +02:00
Tamo
42d2af4c84 finish plugin cellulite to the new indexer 2025-09-23 13:55:36 +02:00
Tamo
6be91c824c Cellulite is almost in the new indexer. We must add the documentID to the geojson pipeline 2025-09-23 13:55:36 +02:00
Tamo
6ee0537db8 add an extractor for cellulite in the new pipeline 2025-09-23 13:55:36 +02:00
Tamo
3fbeff4308 add cellulite to the old pipeline, it probably doesn't works 2025-09-23 13:55:36 +02:00
Tamo
375546b61a add a few helpers 2025-09-23 13:55:36 +02:00
Tamo
25a1d50763 add cellulite to the index 2025-09-23 13:55:36 +02:00
curquiza
6f0d26c22c Show dependency upgrade in the changelog for full transparency 2025-09-22 18:30:34 +02:00
Louis Dureuil
4fe073cc1a Merge pull request #5896 from meilisearch/fix-doc-template
Document template: Correctly render when indexing first item in array
2025-09-22 07:20:38 +00:00
Clément Renault
5cd3d36d20 Merge pull request #5897 from meilisearch/improve-prom
improve the prometheus content type we return
2025-09-18 16:18:16 +00:00
Tamo
d7ad76ea1e improve the prometheus content type we return 2025-09-18 17:04:13 +02:00
Louis Dureuil
e82bb93221 Fix indexing bug 2025-09-18 16:57:20 +02:00
Clément Renault
000cb93aad Merge pull request #5895 from meilisearch/fix-ci
Update the dtolnay action to 1.89
2025-09-18 14:56:45 +00:00
Tamo
ad4f5514b9 update the dtolnay action to 1.89 2025-09-18 15:52:39 +02:00
Louis Dureuil
8d29a29867 Merge pull request #5894 from meilisearch/fix-hannoy-unreachable-items
Bump Hannoy to fix unreachable documents
2025-09-18 13:33:34 +00:00
Kerollmops
d7de819d11 Bump Hannoy to fix unreachable documents 2025-09-18 14:26:13 +02:00
nnethercott
7a6cf30cb2 bump hannoy to 0.0.8 2025-09-18 11:23:57 +02:00
Tamo
e43d67591c Merge pull request #5892 from meilisearch/increase-msrv
increase rust version from 1.85 to 1.89
2025-09-17 08:26:08 +00:00
Tamo
134237d1eb update the toolchain for rustfmt 2025-09-16 17:45:49 +02:00
Tamo
26d9070aa7 increase rust version from 1.85 to 1.89 2025-09-16 17:21:33 +02:00
nnethercott
f9ffb8ada5 bump from hannoy 0.0.6 to 0.0.7 2025-09-16 12:00:36 +02:00
nnethercott
a47888f02c bump hannoy to 0.6 2025-09-16 11:02:46 +02:00
nnethercott
5bef2f4d86 Update arroy-hannoy conversion internals 2025-09-15 16:10:56 +02:00
curquiza
d52c7dcc94 Add needs: check-version 2025-08-12 20:47:43 +02:00
128 changed files with 3794 additions and 927 deletions

View File

@@ -7,6 +7,5 @@ updates:
schedule:
interval: "monthly"
labels:
- 'skip changelog'
- 'dependencies'
rebase-strategy: disabled

View File

@@ -18,6 +18,7 @@ categories:
label: 'security'
- title: '⚙️ Maintenance/misc'
label:
- 'dependencies'
- 'maintenance'
- 'documentation'
template: |
@@ -26,8 +27,3 @@ template: |
❤️ Huge thanks to our contributors: $CONTRIBUTORS.
no-changes-template: 'Changes are coming soon 😎'
sort-direction: 'ascending'
replacers:
- search: '/(?:and )?@dependabot-preview(?:\[bot\])?,?/g'
replace: ''
- search: '/(?:and )?@dependabot(?:\[bot\])?,?/g'
replace: ''

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -66,7 +66,7 @@ jobs:
fetch-depth: 0 # fetch full history to be able to get main commit sha
ref: ${{ steps.comment-branch.outputs.head_ref }}
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -12,7 +12,7 @@ jobs:
timeout-minutes: 180 # 3h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -18,7 +18,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -44,7 +44,7 @@ jobs:
exit 1
fi
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -16,7 +16,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -15,7 +15,7 @@ jobs:
runs-on: benchmarks
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -17,7 +17,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Install cargo-flaky
run: cargo install cargo-flaky
- name: Run cargo flaky in the dumps

View File

@@ -12,7 +12,7 @@ jobs:
timeout-minutes: 4320 # 72h
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal

View File

@@ -25,7 +25,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Install cargo-deb
run: cargo install cargo-deb
- uses: actions/checkout@v5

View File

@@ -65,7 +65,7 @@ jobs:
uses: docker/setup-buildx-action@v3
- name: Install cosign
uses: sigstore/cosign-installer@d58896d6a1865668819e1d91763c7751a165e159 # tag=v3.9.2
uses: sigstore/cosign-installer@d7543c93d881b35a8faa02e8e3605f69b7a1ce62 # tag=v3.10.0
- name: Login to Docker Hub
uses: docker/login-action@v3

View File

@@ -11,7 +11,7 @@ jobs:
check-version:
name: Check the version validity
runs-on: ubuntu-latest
# No need to check the version for dry run (cron)
# No need to check the version for dry run (cron or workflow_dispatch)
steps:
- uses: actions/checkout@v5
# Check if the tag has the v<nmumber>.<number>.<number> format.
@@ -45,10 +45,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
@@ -75,10 +75,10 @@ jobs:
asset_name: meilisearch-windows-amd64.exe
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Build
run: cargo build --release --locked
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
@@ -101,7 +101,7 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v5
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.85
uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
target: ${{ matrix.target }}
@@ -111,7 +111,7 @@ jobs:
command: build
args: --release --target ${{ matrix.target }}
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
@@ -148,7 +148,7 @@ jobs:
add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
apt-get update -y && apt-get install -y docker-ce
- name: Installing Rust toolchain
uses: dtolnay/rust-toolchain@1.85
uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
target: ${{ matrix.target }}
@@ -176,7 +176,7 @@ jobs:
- name: List target output files
run: ls -lR ./target
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
# No need to upload binaries for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:
@@ -187,6 +187,7 @@ jobs:
publish-openapi-file:
name: Publish OpenAPI file
needs: check-version
runs-on: ubuntu-latest
steps:
- name: Checkout code
@@ -201,7 +202,7 @@ jobs:
cd crates/openapi-generator
cargo run --release -- --pretty --output ../../meilisearch.json
- name: Upload OpenAPI to Release
# No need to upload for dry run (cron)
# No need to upload for dry run (cron or workflow_dispatch)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.11.2
with:

View File

@@ -50,7 +50,7 @@ jobs:
with:
repository: meilisearch/meilisearch-dotnet
- name: Setup .NET Core
uses: actions/setup-dotnet@v4
uses: actions/setup-dotnet@v5
with:
dotnet-version: "8.0.x"
- name: Install dependencies
@@ -100,7 +100,7 @@ jobs:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v5
uses: actions/setup-go@v6
with:
go-version: stable
- uses: actions/checkout@v5
@@ -135,13 +135,13 @@ jobs:
- name: Set up Java
uses: actions/setup-java@v5
with:
java-version: 8
distribution: 'zulu'
java-version: 17
distribution: 'temurin'
cache: gradle
- name: Grant execute permission for gradlew
run: chmod +x gradlew
- name: Build and run unit and integration tests
run: ./gradlew build integrationTest
run: ./gradlew build integrationTest --info
meilisearch-js-tests:
needs: define-docker-image
@@ -160,7 +160,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v5
with:
cache: 'yarn'
- name: Install dependencies
@@ -224,7 +224,7 @@ jobs:
with:
repository: meilisearch/meilisearch-python
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
- name: Install pipenv
uses: dschep/install-pipenv-action@v1
- name: Install dependencies
@@ -318,7 +318,7 @@ jobs:
with:
repository: meilisearch/meilisearch-js-plugins
- name: Setup node
uses: actions/setup-node@v4
uses: actions/setup-node@v5
with:
cache: yarn
- name: Install dependencies

View File

@@ -27,7 +27,7 @@ jobs:
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Setup test with Rust stable
uses: dtolnay/rust-toolchain@1.85
uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run cargo check without any default features
@@ -52,7 +52,7 @@ jobs:
- uses: actions/checkout@v5
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -77,7 +77,7 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo build with almost all features
run: |
cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda,test-ollama)"
@@ -129,7 +129,7 @@ jobs:
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Run cargo tree without default features and check lindera is not present
run: |
if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -153,7 +153,7 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Run tests in debug
@@ -167,7 +167,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
components: clippy
@@ -184,7 +184,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
toolchain: nightly-2024-07-09

View File

@@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: dtolnay/rust-toolchain@1.85
- uses: dtolnay/rust-toolchain@1.89
with:
profile: minimal
- name: Install sd

1095
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.21.0"
version = "1.22.1"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -1,5 +1,5 @@
# Compile
FROM rust:1.85-alpine3.20 AS compiler
FROM rust:1.89-alpine3.20 AS compiler
RUN apk add -q --no-cache build-base openssl-dev

View File

@@ -121,7 +121,7 @@ If you want to know more about the kind of data we collect and what we use it fo
Meilisearch is a search engine created by [Meili](https://www.meilisearch.com/careers), a software development company headquartered in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
🗞 [Subscribe to our newsletter](https://share-eu1.hsforms.com/1LN5N0x_GQgq7ss7tXmSykwfg3aq) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
💌 Want to make a suggestion or give feedback? Here are some of the channels where you can reach us:

View File

@@ -97,6 +97,7 @@ impl CompatV2ToV3 {
}
}
#[allow(clippy::large_enum_variant)]
pub enum CompatIndexV2ToV3 {
V2(v2::V2IndexReader),
Compat(Box<CompatIndexV1ToV2>),

View File

@@ -33,6 +33,10 @@ impl FileStore {
std::fs::create_dir_all(&path)?;
Ok(FileStore { path })
}
pub fn path(&self) -> &Path {
&self.path
}
}
impl FileStore {

View File

@@ -7,23 +7,14 @@
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::char;
use nom::character::complete::multispace0;
use nom::character::complete::multispace1;
use nom::combinator::cut;
use nom::combinator::map;
use nom::combinator::value;
use nom::sequence::preceded;
use nom::sequence::{terminated, tuple};
use nom::character::complete::{char, multispace0, multispace1};
use nom::combinator::{cut, map, value};
use nom::sequence::{preceded, terminated, tuple};
use Condition::*;
use crate::error::IResultExt;
use crate::value::parse_vector_value;
use crate::value::parse_vector_value_cut;
use crate::Error;
use crate::ErrorKind;
use crate::VectorFilter;
use crate::{parse_value, FilterCondition, IResult, Span, Token};
use crate::value::{parse_vector_value, parse_vector_value_cut};
use crate::{parse_value, Error, ErrorKind, FilterCondition, IResult, Span, Token, VectorFilter};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Condition<'a> {
@@ -124,7 +115,7 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
}
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>)> {
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter)> {
let (input, _) = multispace0(input)?;
let (input, fid) = tag("_vectors")(input)?;

View File

@@ -75,7 +75,11 @@ pub enum ExpectedValueKind {
pub enum ErrorKind<'a> {
ReservedGeo(&'a str),
GeoRadius,
GeoRadiusArgumentCount(usize),
GeoBoundingBox,
GeoPolygon,
GeoPolygonNotEnoughPoints(usize),
GeoCoordinatesNotPair(usize),
MisusedGeoRadius,
MisusedGeoBoundingBox,
VectorFilterLeftover,
@@ -189,7 +193,7 @@ impl Display for Error<'_> {
}
ErrorKind::InvalidPrimary => {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` {text}")?
}
ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
@@ -198,11 +202,23 @@ impl Display for Error<'_> {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
}
ErrorKind::GeoRadius => {
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
writeln!(f, "The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.")?
}
ErrorKind::GeoRadiusArgumentCount(count) => {
writeln!(f, "Was expecting 3 or 4 arguments for `_geoRadius`, but instead found {count}.")?
}
ErrorKind::GeoBoundingBox => {
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
}
ErrorKind::GeoPolygon => {
writeln!(f, "The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.")?
}
ErrorKind::GeoPolygonNotEnoughPoints(n) => {
writeln!(f, "The `_geoPolygon` filter expects at least 3 points but only {n} were specified")?;
}
ErrorKind::GeoCoordinatesNotPair(number) => {
writeln!(f, "Was expecting 2 coordinates but instead found {number}.")?
}
ErrorKind::ReservedGeo(name) => {
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
}

View File

@@ -19,6 +19,7 @@
//! word = (alphanumeric | _ | - | .)+
//! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]")
//! geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
//! ```
//!
//! Other BNF grammar used to handle some specific errors:
@@ -116,7 +117,7 @@ impl<'a> Token<'a> {
self.span
}
pub fn parse_finite_float(&self) -> Result<f64, Error> {
pub fn parse_finite_float(&self) -> Result<f64, Error<'a>> {
let value: f64 = self.value().parse().map_err(|e| self.as_external_error(e))?;
if value.is_finite() {
Ok(value)
@@ -156,8 +157,9 @@ pub enum FilterCondition<'a> {
Or(Vec<Self>),
And(Vec<Self>),
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a>, resolution: Option<Token<'a>> },
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
GeoPolygon { points: Vec<[Token<'a>; 2]> },
}
pub enum TraversedElement<'a> {
@@ -166,7 +168,7 @@ pub enum TraversedElement<'a> {
}
impl<'a> FilterCondition<'a> {
pub fn use_contains_operator(&self) -> Option<&Token> {
pub fn use_contains_operator(&self) -> Option<&Token<'a>> {
match self {
FilterCondition::Condition { fid: _, op } => match op {
Condition::GreaterThan(_)
@@ -189,11 +191,12 @@ impl<'a> FilterCondition<'a> {
FilterCondition::VectorExists { .. }
| FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::GeoPolygon { .. }
| FilterCondition::In { .. } => None,
}
}
pub fn use_vector_filter(&self) -> Option<&Token> {
pub fn use_vector_filter(&self) -> Option<&Token<'a>> {
match self {
FilterCondition::Condition { .. } => None,
FilterCondition::Not(this) => this.use_vector_filter(),
@@ -202,12 +205,13 @@ impl<'a> FilterCondition<'a> {
}
FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::GeoPolygon { .. }
| FilterCondition::In { .. } => None,
FilterCondition::VectorExists { fid, .. } => Some(fid),
}
}
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token> + '_> {
pub fn fids(&self, depth: usize) -> Box<dyn Iterator<Item = &Token<'a>> + '_> {
if depth == 0 {
return Box::new(std::iter::empty());
}
@@ -228,7 +232,7 @@ impl<'a> FilterCondition<'a> {
}
/// Returns the first token found at the specified depth, `None` if no token at this depth.
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
pub fn token_at_depth(&self, depth: usize) -> Option<&Token<'a>> {
match self {
FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid),
FilterCondition::Or(subfilters) => {
@@ -396,23 +400,27 @@ fn parse_not(input: Span, depth: usize) -> IResult<FilterCondition> {
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoRadius but not after
let parsed = preceded(
tuple((multispace0, word_exact("_geoRadius"))),
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
)(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoRadius)));
let (input, _) = tuple((multispace0, word_exact("_geoRadius")))(input)?;
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
let parsed =
delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))(input)
.map_cut(ErrorKind::GeoRadius);
let (input, args) = parsed?;
if args.len() != 3 {
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoRadius)));
if !(3..=4).contains(&args.len()) {
return Err(Error::failure_from_kind(input, ErrorKind::GeoRadiusArgumentCount(args.len())));
}
let res = FilterCondition::GeoLowerThan {
point: [args[0].into(), args[1].into()],
radius: args[2].into(),
resolution: args.get(3).cloned().map(Token::from),
};
Ok((input, res))
}
@@ -420,26 +428,33 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression.
fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoBoundingBox but not after
let parsed = preceded(
tuple((multispace0, word_exact("_geoBoundingBox"))),
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
cut(delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
char(')'),
)),
let (input, _) = tuple((multispace0, word_exact("_geoBoundingBox")))(input)?;
// if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure
let (input, args) = delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
char(')'),
)(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
.map_cut(ErrorKind::GeoBoundingBox)?;
let (input, args) = parsed?;
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
if args.len() != 2 {
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
}
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
let context = offending.first().unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoCoordinatesNotPair(offending.len()),
));
}
let res = FilterCondition::GeoBoundingBox {
top_right_point: [args[0][0].into(), args[0][1].into()],
bottom_left_point: [args[1][0].into(), args[1][1].into()],
@@ -447,6 +462,47 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
Ok((input, res))
}
/// geoPolygon = "_geoPolygon([[" WS* float WS* "," WS* float WS* "],+])"
/// If we parse `_geoPolygon` we MUST parse the rest of the expression.
fn parse_geo_polygon(input: Span) -> IResult<FilterCondition> {
// we want to allow space BEFORE the _geoPolygon but not after
let (input, _) = tuple((multispace0, word_exact("_geoPolygon")))(input)?;
// if we were able to parse `_geoPolygon` and can't parse the rest of the input we return a failure
let (input, args): (_, Vec<Vec<LocatedSpan<_, _>>>) = delimited(
char('('),
separated_list1(
tag(","),
ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))),
),
preceded(opt(ws(char(','))), char(')')), // Tolerate trailing comma
)(input)
.map_cut(ErrorKind::GeoPolygon)?;
if args.len() < 3 {
let context = args.last().and_then(|a| a.last()).unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoPolygonNotEnoughPoints(args.len()),
));
}
if let Some(offending) = args.iter().find(|a| a.len() != 2) {
let context = offending.first().unwrap_or(&input);
return Err(Error::failure_from_kind(
*context,
ErrorKind::GeoCoordinatesNotPair(offending.len()),
));
}
let res = FilterCondition::GeoPolygon {
points: args.into_iter().map(|a| [a[0].into(), a[1].into()]).collect(),
};
Ok((input, res))
}
/// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float)
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoPoint but not after
@@ -516,8 +572,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
}),
),
parse_geo_radius,
parse_geo_bounding_box,
// Made a random block of functions because we reached the maximum number of elements per alt
alt((parse_geo_radius, parse_geo_bounding_box, parse_geo_polygon)),
parse_in,
parse_not_in,
parse_condition,
@@ -597,9 +653,12 @@ impl std::fmt::Display for FilterCondition<'_> {
}
write!(f, " EXISTS")
}
FilterCondition::GeoLowerThan { point, radius } => {
FilterCondition::GeoLowerThan { point, radius, resolution: None } => {
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
}
FilterCondition::GeoLowerThan { point, radius, resolution: Some(resolution) } => {
write!(f, "_geoRadius({}, {}, {}, {})", point[0], point[1], radius, resolution)
}
FilterCondition::GeoBoundingBox {
top_right_point: top_left_point,
bottom_left_point: bottom_right_point,
@@ -613,6 +672,13 @@ impl std::fmt::Display for FilterCondition<'_> {
bottom_right_point[1]
)
}
FilterCondition::GeoPolygon { points } => {
write!(f, "_geoPolygon([")?;
for point in points {
write!(f, "[{}, {}], ", point[0], point[1])?;
}
write!(f, "])")
}
}
}
}
@@ -651,7 +717,7 @@ pub mod tests {
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
// if the string is empty we still need to return 1 for the line number
let lines = before.is_empty().then_some(1).unwrap_or_else(|| before.lines().count());
let lines = if before.is_empty() { 1 } else { before.lines().count() };
let offset = before.chars().count();
// the extra field is not checked in the tests so we can set it to nothing
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
@@ -776,12 +842,17 @@ pub mod tests {
insta::assert_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))");
insta::assert_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})");
insta::assert_snapshot!(p("_geoRadius(12,13,14,1000)"), @"_geoRadius({12}, {13}, {14}, {1000})");
// Test geo bounding box
insta::assert_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
insta::assert_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))");
insta::assert_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])");
// Test geo polygon
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [16, 17])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{16}, {17}], ])");
insta::assert_snapshot!(p("_geoPolygon([12, 13], [14, 15], [-1.2,2939.2], [1,1])"), @"_geoPolygon([[{12}, {13}], [{14}, {15}], [{-1.2}, {2939.2}], [{1}, {1}], ])");
// Test OR + AND
insta::assert_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
insta::assert_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]");
@@ -838,50 +909,80 @@ pub mod tests {
11:12 channel = 🐻 AND followers < 100
"###);
insta::assert_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
insta::assert_snapshot!(p("'OR'"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `\'OR\'`.
1:5 'OR'
"###);
");
insta::assert_snapshot!(p("OR"), @r###"
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
1:3 OR
"###);
insta::assert_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
insta::assert_snapshot!(p("channel Ponce"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `channel Ponce`.
1:14 channel Ponce
"###);
");
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
insta::assert_snapshot!(p("channel = Ponce OR"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` but instead got nothing.
19:19 channel = Ponce OR
"###);
");
insta::assert_snapshot!(p("_geoRadius"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:11 _geoRadius
"###);
insta::assert_snapshot!(p("_geoRadius"), @r"
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
11:11 _geoRadius
");
insta::assert_snapshot!(p("_geoRadius = 12"), @r###"
The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.
1:16 _geoRadius = 12
"###);
insta::assert_snapshot!(p("_geoRadius = 12"), @r"
The `_geoRadius` filter must be in the form: `_geoRadius(latitude, longitude, radius, optionalResolution)`.
11:16 _geoRadius = 12
");
insta::assert_snapshot!(p("_geoBoundingBox"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:16 _geoBoundingBox
"###);
16:16 _geoBoundingBox
");
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox = 12"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:21 _geoBoundingBox = 12
"###);
16:21 _geoBoundingBox = 12
");
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###"
insta::assert_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r"
The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.
1:26 _geoBoundingBox(1.0, 1.0)
"###);
17:26 _geoBoundingBox(1.0, 1.0)
");
insta::assert_snapshot!(p("_geoPolygon([1,2,3])"), @r"
The `_geoPolygon` filter expects at least 3 points but only 1 were specified
18:19 _geoPolygon([1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon(1,2,3)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
13:19 _geoPolygon(1,2,3)
");
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2],[1,2,3])"), @r"
Was expecting 2 coordinates but instead found 3.
26:27 _geoPolygon([1,2],[1,2],[1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon([1,2],[1,2,3])"), @r"
The `_geoPolygon` filter expects at least 3 points but only 2 were specified
24:25 _geoPolygon([1,2],[1,2,3])
");
insta::assert_snapshot!(p("_geoPolygon(1)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
13:15 _geoPolygon(1)
");
insta::assert_snapshot!(p("_geoPolygon([1,2)"), @r"
The `_geoPolygon` filter doesn't match the expected format: `_geoPolygon([latitude, longitude], [latitude, longitude])`.
17:18 _geoPolygon([1,2)
");
insta::assert_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
@@ -938,15 +1039,15 @@ pub mod tests {
34:35 channel = mv OR followers >= 1000)
"###);
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
insta::assert_snapshot!(p("colour NOT EXIST"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `colour NOT EXIST`.
1:17 colour NOT EXIST
"###);
");
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000
"###);
");
insta::assert_snapshot!(p("channel = ponce ORdog != 'bernese mountain'"), @r###"
Found unexpected characters at the end of the filter: `ORdog != \'bernese mountain\'`. You probably forgot an `OR` or an `AND` rule.
@@ -1071,38 +1172,38 @@ pub mod tests {
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
"###);
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
insta::assert_snapshot!(p(r#"value NULL"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NULL`.
1:11 value NULL
"###);
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
");
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT NULL`.
1:15 value NOT NULL
"###);
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
");
insta::assert_snapshot!(p(r#"value EMPTY"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value EMPTY`.
1:12 value EMPTY
"###);
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
");
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value NOT EMPTY`.
1:16 value NOT EMPTY
"###);
insta::assert_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
");
insta::assert_snapshot!(p(r#"value IS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS`.
1:9 value IS
"###);
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
");
insta::assert_snapshot!(p(r#"value IS NOT"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT`.
1:13 value IS NOT
"###);
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
");
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS EXISTS`.
1:16 value IS EXISTS
"###);
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
");
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS
"###);
");
}
#[test]

View File

@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
use std::collections::HashMap;
use std::io;

View File

@@ -1,4 +1,4 @@
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock};
use std::time::Duration;
use std::{fs, thread};
@@ -591,4 +591,8 @@ impl IndexMapper {
pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
*self.currently_updating_index.write().unwrap() = index;
}
pub fn base_path(&self) -> &Path {
&self.base_path
}
}

View File

@@ -1,3 +1,6 @@
// The main Error type is large and boxing the large variant make the pattern matching fails
#![allow(clippy::result_large_err)]
/*!
This crate defines the index scheduler, which is responsible for:
1. Keeping references to meilisearch's indexes and mapping them to their
@@ -344,7 +347,7 @@ impl IndexScheduler {
Ok(this)
}
fn read_txn(&self) -> Result<RoTxn<WithoutTls>> {
fn read_txn(&self) -> Result<RoTxn<'_, WithoutTls>> {
self.env.read_txn().map_err(|e| e.into())
}
@@ -757,7 +760,7 @@ impl IndexScheduler {
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(&mut self) -> Result<Dump> {
pub fn register_dumped_task(&mut self) -> Result<Dump<'_>> {
Dump::new(self)
}
@@ -806,10 +809,8 @@ impl IndexScheduler {
.queue
.tasks
.get_task(self.rtxn, task_id)
.map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
.ok_or_else(|| {
io::Error::new(io::ErrorKind::Other, Error::CorruptedTaskQueue)
})?;
.map_err(io::Error::other)?
.ok_or_else(|| io::Error::other(Error::CorruptedTaskQueue))?;
serde_json::to_writer(&mut self.buffer, &TaskView::from_task(&task))?;
self.buffer.push(b'\n');

View File

@@ -310,7 +310,8 @@ impl Queue {
| self.tasks.status.get(wtxn, &Status::Failed)?.unwrap_or_default()
| self.tasks.status.get(wtxn, &Status::Canceled)?.unwrap_or_default();
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
let to_delete =
RoaringBitmap::from_sorted_iter(finished.into_iter().take(100_000)).unwrap();
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves.
@@ -326,7 +327,7 @@ impl Queue {
);
// it's safe to unwrap here because we checked the len above
let newest_task_id = to_delete.iter().last().unwrap();
let newest_task_id = to_delete.iter().next_back().unwrap();
let last_task_to_delete =
self.tasks.get_task(wtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;

View File

@@ -66,6 +66,7 @@ pub(crate) enum DocumentOperation {
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug)]
#[allow(clippy::large_enum_variant)]
pub(crate) enum IndexOperation {
DocumentOperation {
index_uid: String,

View File

@@ -50,6 +50,11 @@ impl MustStopProcessing {
pub fn reset(&self) {
self.0.store(false, Ordering::Relaxed);
}
pub fn as_lambda(&self) -> impl Fn() -> bool + Send + Sync + 'static {
let clone = self.clone();
move || clone.get()
}
}
pub struct Scheduler {

View File

@@ -370,7 +370,7 @@ fn ureq_error_into_error(error: ureq::Error) -> Error {
}
Err(e) => e.into(),
},
ureq::Error::Transport(transport) => io::Error::new(io::ErrorKind::Other, transport).into(),
ureq::Error::Transport(transport) => io::Error::other(transport).into(),
}
}

View File

@@ -4,6 +4,7 @@ use std::sync::atomic::Ordering;
use meilisearch_types::heed::CompactionOption;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::InternalError;
use meilisearch_types::tasks::{Status, Task};
use meilisearch_types::{compression, VERSION_FILE_NAME};
@@ -76,6 +77,22 @@ unsafe fn remove_tasks(
impl IndexScheduler {
pub(super) fn process_snapshot(
&self,
progress: Progress,
tasks: Vec<Task>,
) -> Result<Vec<Task>> {
let compaction_option = if self.scheduler.experimental_no_snapshot_compaction {
CompactionOption::Disabled
} else {
CompactionOption::Enabled
};
match compaction_option {
CompactionOption::Enabled => self.process_snapshot_with_temp(progress, tasks),
CompactionOption::Disabled => self.process_snapshot_with_pipe(progress, tasks),
}
}
fn process_snapshot_with_temp(
&self,
progress: Progress,
mut tasks: Vec<Task>,
@@ -105,12 +122,8 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?;
let compaction_option = if self.scheduler.experimental_no_snapshot_compaction {
CompactionOption::Disabled
} else {
CompactionOption::Enabled
};
self.env.copy_to_path(dst.join("data.mdb"), compaction_option)?;
self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 2.2 Remove the current snapshot tasks
//
@@ -161,7 +174,7 @@ impl IndexScheduler {
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?;
index
.copy_to_path(dst.join("data.mdb"), compaction_option)
.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
}
@@ -171,7 +184,7 @@ impl IndexScheduler {
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?;
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), compaction_option)?;
self.scheduler.auth_env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
@@ -206,4 +219,139 @@ impl IndexScheduler {
Ok(tasks)
}
fn process_snapshot_with_pipe(
&self,
progress: Progress,
mut tasks: Vec<Task>,
) -> Result<Vec<Task>> {
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
let must_stop_processing = &self.scheduler.must_stop_processing;
let abort_no_index = Err(Error::from_milli(InternalError::AbortedIndexation.into(), None));
fs::create_dir_all(&self.scheduler.snapshots_path)?;
// 1. Find the base path and original name of the database
// TODO find a better way to get this path
let mut base_path = self.env.path().to_owned();
base_path.pop();
let base_path = base_path;
let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms");
// 2. Start the tarball builder. The tarball will be created on another thread from piped data.
let mut builder = compression::PipedArchiveBuilder::new(
self.scheduler.snapshots_path.clone(),
format!("{db_name}.snapshot"),
base_path,
must_stop_processing.as_lambda(),
);
// 3. Snapshot the VERSION file
builder.add_file_to_archive(self.scheduler.version_file_path.clone())?;
if must_stop_processing.get() {
return abort_no_index;
}
// 4. Snapshot the index-scheduler LMDB env
//
// When we call copy_to_path, LMDB opens a read transaction by itself,
// we can't provide our own. It is an issue as we would like to know
// the update files to copy but new ones can be enqueued between the copy
// of the env and the new transaction we open to retrieve the enqueued tasks.
// So we prefer opening a new transaction after copying the env and copy more
// update files than not enough.
//
// Note that there cannot be any update files deleted between those
// two read operations as the task processing is synchronous.
// 4.1 First copy the LMDB env of the index-scheduler
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
builder.add_env_to_archive(&self.env)?;
if must_stop_processing.get() {
return abort_no_index;
}
// 4.2 Create a read transaction on the index-scheduler
let rtxn = self.env.read_txn()?;
// 4.3 Only copy the update files of the enqueued tasks
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
builder.add_dir_to_archive(self.queue.file_store.path().to_path_buf())?;
let enqueued = self.queue.tasks.get_status(&rtxn, Status::Enqueued)?;
let (atomic, update_file_progress) = AtomicUpdateFileStep::new(enqueued.len() as u32);
progress.update_progress(update_file_progress);
for task_id in enqueued {
if must_stop_processing.get() {
return abort_no_index;
}
let task =
self.queue.tasks.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
if let Some(content_uuid) = task.content_uuid() {
let src = self.queue.file_store.get_update_path(content_uuid);
builder.add_file_to_archive(src)?;
}
atomic.fetch_add(1, Ordering::Relaxed);
}
// 5. Snapshot every index
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
builder.add_dir_to_archive(self.index_mapper.base_path().to_path_buf())?;
let index_mapping = self.index_mapper.index_mapping;
let nb_indexes = index_mapping.len(&rtxn)? as u32;
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
let (name, _) = result?;
let abort_index = || {
Err(Error::from_milli(
InternalError::AbortedIndexation.into(),
Some(name.to_string()), // defer the `to_string`
))
};
if must_stop_processing.get() {
return abort_index();
}
progress.update_progress(VariableNameStep::<SnapshotCreationProgress>::new(
name, i as u32, nb_indexes,
));
let index = self.index_mapper.index(&rtxn, name)?;
builder.add_env_to_archive(index.raw_env())?;
}
drop(rtxn);
if must_stop_processing.get() {
return abort_no_index;
}
// 6. Snapshot the auth LMDB env
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
builder.add_env_to_archive(&self.scheduler.auth_env)?;
// 7. Finalize the tarball
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
let file = builder.finish()?;
// 8. Change the permission to make the snapshot readonly
let mut permissions = file.metadata()?.permissions();
permissions.set_readonly(true);
#[cfg(unix)]
{
use std::os::unix::fs::PermissionsExt;
#[allow(clippy::non_octal_unix_permissions)]
// rwxrwxrwx
permissions.set_mode(0b100100100);
}
file.set_permissions(permissions)?;
for task in &mut tasks {
task.status = Status::Succeeded;
}
Ok(tasks)
}
}

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 21, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.21.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -45,6 +45,7 @@ pub fn upgrade_index_scheduler(
(1, 19, _) => 0,
(1, 20, _) => 0,
(1, 21, _) => 0,
(1, 22, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)

View File

@@ -17,7 +17,7 @@ impl<'a> BytesDecode<'a> for UuidCodec {
impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
}
}

View File

@@ -271,9 +271,10 @@ macro_rules! json_string {
#[cfg(test)]
mod tests {
use uuid::Uuid;
use crate as meili_snap;
use crate::UUID_IN_MESSAGE_RE;
use uuid::Uuid;
#[test]
fn snap() {

View File

@@ -315,7 +315,9 @@ impl<'a> heed::BytesDecode<'a> for KeyIdActionCodec {
impl<'a> heed::BytesEncode<'a> for KeyIdActionCodec {
type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);
fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
fn bytes_encode(
(key_id, action, index): &'_ Self::EItem,
) -> StdResult<Cow<'_, [u8]>, BoxedError> {
let mut bytes = Vec::new();
bytes.extend_from_slice(key_id.as_bytes());

View File

@@ -1,11 +1,17 @@
use std::fs::{create_dir_all, File};
use std::io::Write;
use std::path::Path;
use std::io::{PipeWriter, Read, Write};
use std::mem::ManuallyDrop;
use std::ops::DerefMut;
use std::os::fd::{AsRawFd, FromRawFd};
use std::path::{Path, PathBuf};
use std::sync::mpsc::{Receiver, RecvTimeoutError, Sender};
use std::thread::JoinHandle;
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use flate2::Compression;
use tar::{Archive, Builder};
use milli::heed::Env;
use tar::{Archive, Builder, Header};
pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
let mut f = File::create(dest)?;
@@ -26,3 +32,197 @@ pub fn from_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Res
ar.unpack(&dest)?;
Ok(())
}
pub struct PipedArchiveBuilder {
send_compression: Sender<CompressionMessage>,
send_cancellation: Sender<CancellationMessage>,
processing_thread: JoinHandle<anyhow::Result<File>>,
cancellation_thread: JoinHandle<()>,
}
enum CompressionMessage {
Env { path: PathBuf, reader: std::io::PipeReader },
File { path: PathBuf },
Dir { path: PathBuf },
}
impl PipedArchiveBuilder {
pub fn new<F>(
dest_dir: PathBuf,
dest_filename: String,
base_path: PathBuf,
must_stop_processing: F,
) -> Self
where
F: Fn() -> bool + Send + 'static,
{
let (send_compression, recv) = std::sync::mpsc::channel();
let processing_thread = std::thread::Builder::new()
.name("piped-archive-builder".into())
.spawn(|| Self::run_processing(dest_dir, dest_filename, recv, base_path))
.unwrap();
let (send_cancellation, recv) = std::sync::mpsc::channel();
let cancellation_thread = std::thread::Builder::new()
.name("piped-archive-builder-cancellation".into())
.spawn(|| Self::run_cancellation(must_stop_processing, recv))
.unwrap();
Self { send_compression, send_cancellation, processing_thread, cancellation_thread }
}
pub fn add_env_to_archive<T>(&mut self, env: &Env<T>) -> anyhow::Result<()> {
let (reader, writer) = std::io::pipe()?;
let path = env.path().to_path_buf();
// make sure that the environment cannot change while it is being added to the archive,
// as any concurrent change would corrupt the copy.
let env_wtxn = env.write_txn()?;
// SAFETY: only the cancellation thread has the actual responsibility of closing the pipe since
// the clone is `ManuallyDrop`.
let mut cloned_writer = unsafe {
let writer_raw_fd = writer.as_raw_fd();
ManuallyDrop::new(PipeWriter::from_raw_fd(writer_raw_fd))
};
self.send_cancellation.send(CancellationMessage::OpenedPipe { pipe: writer });
self.send_compression.send(CompressionMessage::Env { path, reader });
let mdb_path = env.path().join("data.mdb");
let mut file = std::fs::File::open(&mdb_path)?;
std::io::copy(&mut file, cloned_writer.deref_mut())?;
self.send_cancellation.send(CancellationMessage::ClosingPipe);
// no change we might want to commit
env_wtxn.abort();
Ok(())
}
pub fn add_file_to_archive(&mut self, path: PathBuf) -> anyhow::Result<()> {
self.send_compression.send(CompressionMessage::File { path });
Ok(())
}
pub fn add_dir_to_archive(&mut self, path: PathBuf) -> anyhow::Result<()> {
self.send_compression.send(CompressionMessage::Dir { path });
Ok(())
}
pub fn finish(self) -> anyhow::Result<File> {
drop(self.send_cancellation);
drop(self.send_compression);
/// FIXME catch panics
let file = self.processing_thread.join().unwrap()?;
self.cancellation_thread.join().unwrap();
Ok(file)
}
fn run_processing(
dest_dir: PathBuf,
dest_filename: String,
recv: Receiver<CompressionMessage>,
base_path: PathBuf,
) -> anyhow::Result<File> {
let mut temp_archive = tempfile::NamedTempFile::new_in(&dest_dir)?;
let gz_encoder = GzEncoder::new(&mut temp_archive, Compression::default());
let mut tar_encoder = Builder::new(gz_encoder);
let base_path_in_archive = PathInArchive::from_absolute_and_base(&base_path, &base_path);
// add the root
tar_encoder.append_dir(base_path_in_archive.as_path(), &base_path)?;
while let Ok(message) = recv.recv() {
match message {
CompressionMessage::Env { path, reader } => {
let dir_path_in_archive =
PathInArchive::from_absolute_and_base(&path, &base_path);
tar_encoder.append_dir(dir_path_in_archive.as_path(), &path)?;
let path = path.join("data.mdb");
Self::add_to_archive(&mut tar_encoder, &path, &base_path, reader)?;
}
CompressionMessage::File { path } => {
let path_in_archive = PathInArchive::from_absolute_and_base(&path, &base_path);
tar_encoder.append_path_with_name(&path, path_in_archive.as_path())?;
}
CompressionMessage::Dir { path } => {
let path_in_archive = PathInArchive::from_absolute_and_base(&path, &base_path);
tar_encoder.append_dir(path_in_archive.as_path(), &path)?;
}
}
}
let gz_encoder = tar_encoder.into_inner()?;
gz_encoder.finish()?;
temp_archive.flush()?;
let archive = temp_archive.persist(dest_dir.join(dest_filename))?;
Ok(archive)
}
fn run_cancellation<F>(must_stop_processing: F, recv: Receiver<CancellationMessage>)
where
F: Fn() -> bool + Send + 'static,
{
let mut current_pipe = None;
loop {
let next_message = match recv.recv_timeout(std::time::Duration::from_secs(60)) {
Ok(message) => message,
Err(RecvTimeoutError::Disconnected) => break,
Err(RecvTimeoutError::Timeout) => {
if must_stop_processing() {
break;
}
continue;
}
};
match next_message {
CancellationMessage::OpenedPipe { pipe } => current_pipe = Some(pipe),
CancellationMessage::ClosingPipe => current_pipe = None,
}
}
drop(current_pipe);
}
fn add_to_archive(
tar_encoder: &mut Builder<impl Write>,
path: &Path,
base: &Path,
reader: impl Read,
) -> anyhow::Result<()> {
let stats = path.metadata()?;
let mut header = Header::new_gnu();
header.set_metadata_in_mode(&stats, tar::HeaderMode::Complete);
let path_in_archive = PathInArchive::from_absolute_and_base(path, base);
tar_encoder.append_data(&mut header, path_in_archive.as_path(), reader)?;
Ok(())
}
}
enum CancellationMessage {
OpenedPipe { pipe: PipeWriter },
ClosingPipe,
}
struct PathInArchive(PathBuf);
impl PathInArchive {
pub fn from_absolute_and_base(absolute: &Path, base: &Path) -> Self {
/// FIXME
let canonical = absolute.canonicalize().unwrap();
let relative = match canonical.strip_prefix(base) {
Ok(stripped) => Path::new(&".").join(stripped),
Err(_) => absolute.to_path_buf(),
};
Self(relative)
}
pub fn as_path(&self) -> &Path {
self.0.as_path()
}
}

View File

@@ -5,6 +5,7 @@ use actix_web::{self as aweb, HttpResponseBuilder};
use aweb::http::header;
use aweb::rt::task::JoinError;
use convert_case::Casing;
use milli::cellulite;
use milli::heed::{Error as HeedError, MdbError};
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
@@ -239,6 +240,7 @@ InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQU
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeojsonField , InvalidRequest , BAD_REQUEST ;
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
@@ -501,7 +503,9 @@ impl ErrorCode for milli::Error {
Code::InvalidFacetSearchFacetName
}
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::InvalidGeoField { .. } | UserError::GeoJsonError(_) => {
Code::InvalidDocumentGeoField
}
UserError::InvalidVectorDimensions { .. }
| UserError::InvalidIndexingVectorDimensions { .. } => {
Code::InvalidVectorDimensions
@@ -525,6 +529,17 @@ impl ErrorCode for milli::Error {
| UserError::DocumentEditionCompilationError(_) => {
Code::EditDocumentsByFunctionError
}
UserError::CelluliteError(err) => match err {
cellulite::Error::BuildCanceled
| cellulite::Error::VersionMismatchOnBuild(_)
| cellulite::Error::DatabaseDoesntExists
| cellulite::Error::Heed(_)
| cellulite::Error::InvalidGeometry(_)
| cellulite::Error::InternalDocIdMissing(_, _)
| cellulite::Error::CannotConvertLineToCell(_, _, _) => Code::Internal,
cellulite::Error::InvalidGeoJson(_) => Code::InvalidDocumentGeojsonField,
},
UserError::MalformedGeojson(_) => Code::InvalidDocumentGeojsonField,
}
}
}

View File

@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
pub mod batch_view;
pub mod batches;
pub mod compression;

View File

@@ -12,6 +12,7 @@ use tokio::task::JoinError;
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
#[derive(Debug, thiserror::Error)]
#[allow(clippy::large_enum_variant)]
pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]

View File

@@ -1,4 +1,6 @@
#![allow(clippy::result_large_err)]
#![allow(rustdoc::private_intra_doc_links)]
#[macro_use]
pub mod error;
pub mod analytics;

View File

@@ -180,12 +180,6 @@ pub async fn get_metrics(
let response = String::from_utf8(buffer).expect("Failed to convert bytes to string");
// We cannot specify the version with ContentType(TEXT_PLAIN_UTF_8) so we have to write everything by hand :(
// see the following for what should be returned: https://prometheus.io/docs/instrumenting/content_negotiation/#content-type-response
let content_type = ("content-type", "text/plain; version=0.0.4; charset=utf-8");
Ok(HttpResponse::Ok()
// .insert_header(header::ContentType(mime::TEXT_PLAIN_UTF_8))
.insert_header(content_type)
.body(response))
let content_type = ("content-type", prometheus::TEXT_FORMAT);
Ok(HttpResponse::Ok().insert_header(content_type).body(response))
}

View File

@@ -1,3 +1,4 @@
use core::convert::Infallible;
use std::collections::BTreeMap;
use std::str::FromStr;
@@ -7,7 +8,6 @@ use actix_http::header::{
};
use actix_web::web::{self, Data, Path};
use actix_web::{HttpRequest, HttpResponse};
use core::convert::Infallible;
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler;
@@ -24,12 +24,12 @@ use tracing::debug;
use url::Url;
use utoipa::{OpenApi, ToSchema};
use uuid::Uuid;
use WebhooksError::*;
use crate::analytics::{Aggregate, Analytics};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use WebhooksError::*;
#[derive(OpenApi)]
#[openapi(

View File

@@ -219,7 +219,7 @@ struct SearchResultByQueryIterItem<'a> {
fn merge_index_local_results(
results_by_query: Vec<SearchResultByQuery<'_>>,
) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
) -> impl Iterator<Item = SearchResultByQueryIterItem<'_>> + '_ {
itertools::kmerge_by(
results_by_query.into_iter().map(SearchResultByQueryIter::new),
|left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {

View File

@@ -2080,7 +2080,7 @@ pub(crate) fn parse_filter(
facets: &Value,
filter_parsing_error_code: Code,
features: RoFeatures,
) -> Result<Option<Filter>, ResponseError> {
) -> Result<Option<Filter<'_>>, ResponseError> {
let filter = match facets {
Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()),
Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()),
@@ -2117,7 +2117,7 @@ pub(crate) fn parse_filter(
Ok(filter)
}
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {
fn parse_filter_array(arr: &'_ [Value]) -> Result<Option<Filter<'_>>, MeilisearchHttpError> {
let mut ands = Vec::new();
for value in arr {
match value {

View File

@@ -13,9 +13,9 @@
//! What is going to happen at this point is that you're going to send a oneshot::Sender over an async mpsc channel.
//! Then, the queue/scheduler is going to either:
//! - Drop your oneshot channel => that means there are too many searches going on, and yours won't be executed.
//! You should exit and free all the RAM you use ASAP.
//! You should exit and free all the RAM you use ASAP.
//! - Sends you a Permit => that will unlock the method, and you will be able to process your search.
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
//! And should drop the Permit only once you have freed all the RAM consumed by the method.
use std::num::NonZeroUsize;
use std::sync::atomic::{AtomicUsize, Ordering};

View File

@@ -1040,7 +1040,7 @@ async fn error_single_search_forbidden_token() {
];
let failed_query_indexes: Vec<_> =
std::iter::repeat(Some(0)).take(5).chain(std::iter::repeat(None).take(6)).collect();
std::iter::repeat_n(Some(0), 5).chain(std::iter::repeat_n(None, 6)).collect();
let failed_query_indexes = vec![failed_query_indexes; ACCEPTED_KEYS_SINGLE.len()];
@@ -1118,10 +1118,9 @@ async fn error_multi_search_forbidden_token() {
},
];
let failed_query_indexes: Vec<_> = std::iter::repeat(Some(0))
.take(5)
.chain(std::iter::repeat(Some(1)).take(5))
.chain(std::iter::repeat(None).take(6))
let failed_query_indexes: Vec<_> = std::iter::repeat_n(Some(0), 5)
.chain(std::iter::repeat_n(Some(1), 5))
.chain(std::iter::repeat_n(None, 6))
.collect();
let failed_query_indexes = vec![failed_query_indexes; ACCEPTED_KEYS_BOTH.len()];

View File

@@ -522,6 +522,26 @@ pub async fn shared_index_with_geo_documents() -> &'static Index<'static, Shared
.await
}
pub async fn shared_index_geojson_documents() -> &'static Index<'static, Shared> {
static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
INDEX
.get_or_init(|| async {
// Retrieved from https://gitlab-forge.din.developpement-durable.gouv.fr/pub/geomatique/descartes/d-map/-/blob/main/demo/examples/commons/countries.geojson?ref_type=heads
let server = Server::new_shared();
let index = server._index("SHARED_GEOJSON_DOCUMENTS").to_shared();
let countries = include_str!("../documents/geojson/assets/countries.json");
let lille = serde_json::from_str::<serde_json::Value>(countries).unwrap();
let (response, _code) = index._add_documents(Value(lille), Some("name")).await;
server.wait_task(response.uid()).await.succeeded();
let (response, _code) =
index._update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(response.uid()).await.succeeded();
index
})
.await
}
pub async fn shared_index_for_fragments() -> Index<'static, Shared> {
static INDEX: OnceCell<(Server<Shared>, String)> = OnceCell::const_new();
let (server, uid) = INDEX

View File

@@ -1,6 +1,3 @@
use crate::common::encoder::Encoder;
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
use crate::json;
use actix_web::test;
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
@@ -8,6 +5,10 @@ use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use uuid::Uuid;
use crate::common::encoder::Encoder;
use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
use crate::json;
/// This is the basic usage of our API and every other tests uses the content-type application/json
#[actix_rt::test]
async fn add_documents_test_json_content_types() {

View File

@@ -134,14 +134,14 @@ async fn get_all_documents_bad_filter() {
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
snapshot!(json_string!(response), @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `doggo`.\n1:6 doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
"#);
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
snapshot!(code, @"400 Bad Request");
@@ -523,14 +523,14 @@ async fn delete_document_by_filter() {
// send bad filter
let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `hello`.\n1:6 hello",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
"#);
// send empty filter
let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
@@ -724,14 +724,14 @@ async fn fetch_document_by_filter() {
let (response, code) = index.fetch_documents(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
"#);
let (response, code) = index.fetch_documents(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request");

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,547 @@
{
"type": "Polygon",
"coordinates": [
[
[
3.11681,
50.63646
],
[
3.11945,
50.63488
],
[
3.12134,
50.63504
],
[
3.12064,
50.63127
],
[
3.12203,
50.62785
],
[
3.12389,
50.6262
],
[
3.12161,
50.62358
],
[
3.12547,
50.62114
],
[
3.12447,
50.61874
],
[
3.12288,
50.61988
],
[
3.12054,
50.61846
],
[
3.11846,
50.61754
],
[
3.11482,
50.6207
],
[
3.11232,
50.6188
],
[
3.10936,
50.61727
],
[
3.10822,
50.61765
],
[
3.10603,
50.61536
],
[
3.1041,
50.61596
],
[
3.10017,
50.6186
],
[
3.09688,
50.61714
],
[
3.09575,
50.61795
],
[
3.0891,
50.61532
],
[
3.08625,
50.61792
],
[
3.07948,
50.61428
],
[
3.07146,
50.6066
],
[
3.06819,
50.60918
],
[
3.06502,
50.61046
],
[
3.06223,
50.61223
],
[
3.05925,
50.60659
],
[
3.05463,
50.60077
],
[
3.04906,
50.6008
],
[
3.04726,
50.6035
],
[
3.04328,
50.60667
],
[
3.04155,
50.60417
],
[
3.03767,
50.60456
],
[
3.03528,
50.60538
],
[
3.03239,
50.60725
],
[
3.0254,
50.6111
],
[
3.02387,
50.6125
],
[
3.0248,
50.61344
],
[
3.02779,
50.61418
],
[
3.02414,
50.6169
],
[
3.02312,
50.61975
],
[
3.02172,
50.62082
],
[
3.01953,
50.62484
],
[
3.01811,
50.62529
],
[
3.01313,
50.62558
],
[
3.01385,
50.62695
],
[
3.00844,
50.62717
],
[
3.0056,
50.6267
],
[
3.00229,
50.62557
],
[
3.00119,
50.62723
],
[
2.99769,
50.62901
],
[
2.99391,
50.62732
],
[
2.98971,
50.63036
],
[
2.9862,
50.63328
],
[
2.98178,
50.63404
],
[
2.97917,
50.63499
],
[
2.97284,
50.63429
],
[
2.97174,
50.63365
],
[
2.97002,
50.63366
],
[
2.96956,
50.63506
],
[
2.97046,
50.6365
],
[
2.96878,
50.63833
],
[
2.97039,
50.6395
],
[
2.97275,
50.64183
],
[
2.97225,
50.64381
],
[
2.9745,
50.64442
],
[
2.97474,
50.64648
],
[
2.97091,
50.65108
],
[
2.96975,
50.65361
],
[
2.97061,
50.65513
],
[
2.96929,
50.65739
],
[
2.97072,
50.6581
],
[
2.97973,
50.66048
],
[
2.98369,
50.66123
],
[
2.9865,
50.65959
],
[
2.9896,
50.65845
],
[
2.9963,
50.65666
],
[
2.99903,
50.65552
],
[
3.00274,
50.65235
],
[
3.00714,
50.64887
],
[
3.01088,
50.64845
],
[
3.01318,
50.64541
],
[
3.01974,
50.63972
],
[
3.02317,
50.63813
],
[
3.02639,
50.63613
],
[
3.029,
50.63521
],
[
3.03414,
50.6382
],
[
3.03676,
50.63888
],
[
3.03686,
50.64147
],
[
3.03791,
50.64379
],
[
3.0409,
50.64577
],
[
3.04582,
50.64807
],
[
3.05132,
50.64866
],
[
3.05055,
50.64949
],
[
3.05244,
50.65055
],
[
3.05784,
50.64927
],
[
3.0596,
50.65105
],
[
3.06414,
50.65041
],
[
3.06705,
50.64936
],
[
3.07023,
50.64706
],
[
3.07203,
50.64355
],
[
3.07526,
50.64188
],
[
3.0758,
50.64453
],
[
3.07753,
50.64381
],
[
3.07861,
50.64542
],
[
3.08299,
50.64725
],
[
3.08046,
50.64912
],
[
3.08349,
50.65082
],
[
3.08354,
50.65155
],
[
3.08477,
50.65312
],
[
3.08542,
50.65654
],
[
3.08753,
50.65687
],
[
3.09032,
50.65602
],
[
3.09018,
50.65142
],
[
3.09278,
50.65086
],
[
3.09402,
50.64982
],
[
3.09908,
50.65146
],
[
3.10316,
50.65227
],
[
3.09726,
50.64723
],
[
3.09387,
50.64358
],
[
3.09357,
50.64095
],
[
3.09561,
50.64133
],
[
3.09675,
50.64018
],
[
3.09454,
50.63891
],
[
3.09627,
50.63693
],
[
3.09795,
50.63713
],
[
3.09919,
50.63576
],
[
3.10324,
50.6351
],
[
3.10613,
50.63532
],
[
3.10649,
50.63434
],
[
3.1109,
50.63525
],
[
3.11502,
50.63504
],
[
3.11681,
50.63646
]
]
]
}

View File

@@ -0,0 +1,453 @@
use meili_snap::{json_string, snapshot};
use crate::common::{shared_index_geojson_documents, Server};
use crate::json;
const LILLE: &str = include_str!("assets/lille.geojson");
#[actix_rt::test]
async fn basic_add_settings_and_geojson_documents() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _status_code) =
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
snapshot!(response,
@r#"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0
}
"#);
let lille: serde_json::Value = serde_json::from_str(LILLE).unwrap();
let documents = json!([
{
"id": "missing",
},
{
"id": "point",
"_geojson": { "type": "Point", "coordinates": [1, 1] },
},
{
"id": "lille",
"_geojson": lille,
},
]);
let (task, _status_code) = index.add_documents(documents, None).await;
let response = server.wait_task(task.uid()).await.succeeded();
snapshot!(json_string!(response, { ".uid" => "[uid]", ".batchUid" => "[batch_uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r#"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "[uuid]",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 3,
"indexedDocuments": 3
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"#);
let (response, code) = index.get_all_documents_raw("?ids=missing,point").await;
snapshot!(code, @"200 OK");
snapshot!(response,
@r#"
{
"results": [
{
"id": "missing"
},
{
"id": "point",
"_geojson": {
"type": "Point",
"coordinates": [
1,
1
]
}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"#);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
snapshot!(response,
@r#"
{
"hits": [
{
"id": "point",
"_geojson": {
"type": "Point",
"coordinates": [
1,
1
]
}
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
"#);
}
#[actix_rt::test]
async fn basic_add_geojson_documents_and_settings() {
let server = Server::new_shared();
let index = server.unique_index();
let lille: serde_json::Value = serde_json::from_str(LILLE).unwrap();
let documents = json!([
{
"id": "missing",
},
{
"id": "point",
"_geojson": { "type": "Point", "coordinates": [1, 1] },
},
{
"id": "lille",
"_geojson": lille,
},
]);
let (task, _status_code) = index.add_documents(documents, None).await;
let response = server.wait_task(task.uid()).await.succeeded();
snapshot!(response,
@r#"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "[uuid]",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 3,
"indexedDocuments": 3
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"#);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
snapshot!(response,
@r#"
{
"message": "Index `[uuid]`: Attribute `_geojson` is not filterable. This index does not have configured filterable attributes.\n14:15 _geoPolygon([0,0],[0,2],[2,2],[2,0])",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
let (task, _status_code) =
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
snapshot!(response,
@r#"
{
"hits": [
{
"id": "point",
"_geojson": {
"type": "Point",
"coordinates": [
1,
1
]
}
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
"#);
}
#[actix_rt::test]
async fn add_and_remove_geojson() {
let server = Server::new_shared();
let index = server.unique_index();
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
let documents = json!([
{
"id": "missing",
},
{
"id": 0,
"_geojson": { "type": "Point", "coordinates": [1, 1] },
}
]);
let (task, _status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
let (task, _) = index.delete_document(0).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
// add it back
let documents = json!([
{
"id": 0,
"_geojson": { "type": "Point", "coordinates": [1, 1] },
}
]);
let (task, _status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
}
#[actix_rt::test]
async fn partial_update_geojson() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _) = index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(task.uid()).await.succeeded();
let documents = json!([
{
"id": 0,
"_geojson": { "type": "Point", "coordinates": [1, 1] },
}
]);
let (task, _status_code) = index.add_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
let documents = json!([
{
"id": 0,
"_geojson": { "type": "Point", "coordinates": [0.5, 0.5] },
}
]);
let (task, _status_code) = index.update_documents(documents, None).await;
server.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.search_get("?filter=_geoPolygon([0,0],[0,0.9],[0.9,0.9],[0.9,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0,2],[2,2],[2,0])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
let (response, _code) =
index.search_get("?filter=_geoPolygon([0.9,0.9],[0.9,2],[2,2],[2,0.9])").await;
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
}
#[actix_rt::test]
async fn geo_bounding_box() {
let index = shared_index_geojson_documents().await;
// The bounding box is a polygon over middle Europe
let (response, code) =
index.search_get("?filter=_geoBoundingBox([50.53987503447863,21.43443989912143],[43.76393151539099,0.54979129195425])&attributesToRetrieve=name").await;
snapshot!(code, @"200 OK");
snapshot!(response, @r#"
{
"hits": [
{
"name": "Austria"
},
{
"name": "Belgium"
},
{
"name": "Bosnia_and_Herzegovina"
},
{
"name": "Switzerland"
},
{
"name": "Czech_Republic"
},
{
"name": "Germany"
},
{
"name": "France"
},
{
"name": "Croatia"
},
{
"name": "Hungary"
},
{
"name": "Italy"
},
{
"name": "Luxembourg"
},
{
"name": "Netherlands"
},
{
"name": "Poland"
},
{
"name": "Romania"
},
{
"name": "Republic_of_Serbia"
},
{
"name": "Slovakia"
},
{
"name": "Slovenia"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 17
}
"#);
// Between Russia and Alaska
let (response, code) = index
.search_get("?filter=_geoBoundingBox([70,-148],[63,152])&attributesToRetrieve=name")
.await;
snapshot!(code, @"200 OK");
snapshot!(response, @r#"
{
"hits": [
{
"name": "Canada"
},
{
"name": "Russia"
},
{
"name": "United_States_of_America"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"#);
}
#[actix_rt::test]
async fn geo_radius() {
let index = shared_index_geojson_documents().await;
// 200km around Luxembourg
let (response, code) = index
.search_get("?filter=_geoRadius(49.4369862,6.5576591,200000)&attributesToRetrieve=name")
.await;
snapshot!(code, @"200 OK");
snapshot!(response, @r#"
{
"hits": [
{
"name": "Belgium"
},
{
"name": "Germany"
},
{
"name": "France"
},
{
"name": "Luxembourg"
},
{
"name": "Netherlands"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 5
}
"#);
}
#[actix_rt::test]
async fn bug_5904() {
// https://github.com/meilisearch/meilisearch/issues/5904
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) =
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
server.wait_task(response.uid()).await.succeeded();
let geojson = json!({
"id": 1,
"_geojson": {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [
4.23914,
48.382893
]
},
"properties": {}
}
]
}
});
let (response, _code) = index.add_documents(geojson, Some("id")).await;
server.wait_task(response.uid()).await.succeeded();
}

View File

@@ -1,5 +1,6 @@
mod add_documents;
mod delete_documents;
mod errors;
mod geojson;
mod get_documents;
mod update_documents;

View File

@@ -1,5 +1,4 @@
use meili_snap::snapshot;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;

View File

@@ -642,14 +642,14 @@ async fn filter_invalid_syntax_object() {
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": "title & Glass"}),
|response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
"#);
snapshot!(code, @"400 Bad Request");
},
)
@@ -663,14 +663,14 @@ async fn filter_invalid_syntax_array() {
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": ["title & Glass"]}),
|response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
"#);
snapshot!(code, @"400 Bad Request");
},
)

View File

@@ -2,8 +2,7 @@ use std::sync::Arc;
use actix_http::StatusCode;
use meili_snap::{json_string, snapshot};
use wiremock::matchers::method;
use wiremock::matchers::{path, AnyMatcher};
use wiremock::matchers::{method, path, AnyMatcher};
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
use crate::common::{Server, Value, SCORE_DOCUMENTS};

View File

@@ -1,7 +1,8 @@
use meili_snap::{json_string, snapshot};
use super::shared_index_with_documents;
use crate::common::Server;
use crate::json;
use meili_snap::{json_string, snapshot};
#[actix_rt::test]
async fn default_search_should_return_estimated_total_hit() {

View File

@@ -1,6 +1,7 @@
use meili_snap::{json_string, snapshot};
use crate::common::Server;
use crate::json;
use meili_snap::{json_string, snapshot};
#[actix_rt::test]
async fn set_reset_chat_issue_5772() {

View File

@@ -339,14 +339,14 @@ async fn filter_invalid_syntax_object() {
index
.similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
"#);
snapshot!(code, @"400 Bad Request");
})
.await;
@@ -377,14 +377,14 @@ async fn filter_invalid_syntax_array() {
index
.similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
snapshot!(response, @r###"
snapshot!(response, @r#"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, `_geoBoundingBox` or `_geoPolygon` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
"#);
snapshot!(code, @"400 Bad Request");
})
.await;

View File

@@ -43,7 +43,7 @@ async fn version_too_old() {
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.21.0");
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.22.1");
}
#[actix_rt::test]
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.21.1 is higher than the Meilisearch version 1.21.0. Downgrade is not supported");
snapshot!(err, @"Database version 1.22.2 is higher than the Meilisearch version 1.22.1. Downgrade is not supported");
}
#[actix_rt::test]

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.21.0"
"upgradeTo": "v1.22.1"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.21.0"
"upgradeTo": "v1.22.1"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.21.0"
"upgradeTo": "v1.22.1"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.21.0"
"upgradeTo": "v1.22.1"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.21.0"
"upgradeTo": "v1.22.1"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.21.0"
"upgradeTo": "v1.22.1"
},
"error": null,
"duration": "[duration]",

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.21.0"
"upgradeTo": "v1.22.1"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "v1.21.0"
"upgradeTo": "v1.22.1"
},
"error": null,
"duration": "[duration]",

View File

@@ -249,7 +249,7 @@ async fn user_provide_mismatched_embedding_dimension() {
"###);
}
async fn generate_default_user_provided_documents(server: &Server) -> Index {
async fn generate_default_user_provided_documents(server: &Server) -> Index<'_> {
let index = server.index("doggo");
let (response, code) = index

View File

@@ -1,3 +1,5 @@
#![allow(clippy::result_large_err)]
use std::fs::{read_dir, read_to_string, remove_file, File};
use std::io::{BufWriter, Write as _};
use std::path::PathBuf;

View File

@@ -17,7 +17,7 @@ impl<'a> BytesDecode<'a> for UuidCodec {
impl BytesEncode<'_> for UuidCodec {
type EItem = Uuid;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
fn bytes_encode(item: &'_ Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
}
}

View File

@@ -19,6 +19,7 @@ bstr = "1.12.0"
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
byteorder = "1.5.0"
charabia = { version = "0.9.7", default-features = false }
cellulite = "0.3.0"
concat-arrays = "0.1.2"
convert_case = "0.8.0"
crossbeam-channel = "0.5.15"
@@ -27,6 +28,7 @@ either = { version = "1.15.0", features = ["serde"] }
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geojson = "0.24.2"
geoutils = "0.5.1"
grenad = { version = "0.5.0", default-features = false, features = [
"rayon",
@@ -88,7 +90,7 @@ rhai = { version = "1.22.2", features = [
"sync",
] }
arroy = "0.6.3"
hannoy = "0.0.5"
hannoy = { version = "0.0.8", features = ["arroy"] }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }
@@ -96,7 +98,7 @@ url = "2.5.4"
hashbrown = "0.15.4"
bumpalo = "3.18.1"
bumparaw-collections = "0.1.4"
steppe = { version = "0.4.0", default-features = false }
steppe = { version = "0.4", default-features = false }
thread_local = "1.1.9"
allocator-api2 = "0.3.0"
rustc-hash = "2.1.1"
@@ -116,6 +118,8 @@ twox-hash = { version = "2.1.1", default-features = false, features = [
"xxhash3_64",
"xxhash64",
] }
geo-types = "0.7.16"
zerometry = "0.3.0"
[dev-dependencies]
mimalloc = { version = "0.1.47", default-features = false }

View File

@@ -11,3 +11,4 @@ const fn parse_u32(s: &str) -> u32 {
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
pub const RESERVED_GEO_FIELD_NAME: &str = "_geo";
pub const RESERVED_GEOJSON_FIELD_NAME: &str = "_geojson";

View File

@@ -48,6 +48,7 @@ pub enum PrimaryKey<'a> {
Nested { name: &'a str },
}
#[allow(clippy::large_enum_variant)]
pub enum DocumentIdExtractionError {
InvalidDocumentId(UserError),
MissingDocumentId,

View File

@@ -10,17 +10,26 @@ use rhai::EvalAltResult;
use serde_json::Value;
use thiserror::Error;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::documents::{self, DocumentsBatchCursorError};
use crate::thread_pool_no_abort::PanicCatched;
use crate::vector::settings::EmbeddingSettings;
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
pub fn is_reserved_keyword(keyword: &str) -> bool {
[RESERVED_GEO_FIELD_NAME, "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"]
.contains(&keyword)
[
RESERVED_GEO_FIELD_NAME,
RESERVED_GEOJSON_FIELD_NAME,
"_geoDistance",
"_geoPoint",
"_geoRadius",
"_geoBoundingBox",
"_geoPolygon",
]
.contains(&keyword)
}
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum Error {
#[error("internal: {0}.")]
@@ -80,6 +89,8 @@ pub enum InternalError {
#[error(transparent)]
HannoyError(#[from] hannoy::Error),
#[error(transparent)]
CelluliteError(#[from] cellulite::Error),
#[error(transparent)]
VectorEmbeddingError(#[from] crate::vector::Error),
}
@@ -99,6 +110,12 @@ pub enum SerializationError {
InvalidNumberSerialization,
}
impl From<cellulite::Error> for Error {
fn from(error: cellulite::Error) -> Self {
Self::UserError(UserError::CelluliteError(error))
}
}
#[derive(Error, Debug)]
pub enum FieldIdMapMissingEntry {
#[error("unknown field id {field_id} coming from the {process} process")]
@@ -107,8 +124,13 @@ pub enum FieldIdMapMissingEntry {
FieldName { field_name: String, process: &'static str },
}
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum UserError {
#[error(transparent)]
CelluliteError(#[from] cellulite::Error),
#[error("Malformed geojson: {0}")]
MalformedGeojson(serde_json::Error),
#[error("A document cannot contain more than 65,535 fields.")]
AttributeLimitReached,
#[error(transparent)]
@@ -153,6 +175,8 @@ and can not be more than 511 bytes.", .document_id.to_string()
},
#[error(transparent)]
InvalidGeoField(#[from] Box<GeoError>),
#[error(transparent)]
GeoJsonError(#[from] geojson::Error),
#[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
InvalidVectorDimensions { expected: usize, found: usize },
#[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n - note: embedding #{embedding_index} has dimensions {found}\n - note: embedder `{embedder_name}` requires {expected}")]
@@ -621,7 +645,7 @@ impl From<HeedError> for Error {
// TODO use the encoding
HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
HeedError::EnvAlreadyOpened { .. } => UserError(EnvAlreadyOpened),
HeedError::EnvAlreadyOpened => UserError(EnvAlreadyOpened),
}
}
}

View File

@@ -6,7 +6,9 @@ use heed::RoTxn;
use super::FieldsIdsMap;
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::constants::{
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
};
use crate::{
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
LocalizedAttributesRule, Result, Weight,
@@ -24,6 +26,8 @@ pub struct Metadata {
pub asc_desc: bool,
/// The field is a geo field (`_geo`, `_geo.lat`, `_geo.lng`).
pub geo: bool,
/// The field is a geo json field (`_geojson`).
pub geo_json: bool,
/// The id of the localized attributes rule if the field is localized.
pub localized_attributes_rule_id: Option<NonZeroU16>,
/// The id of the filterable attributes rule if the field is filterable.
@@ -269,6 +273,7 @@ impl MetadataBuilder {
distinct: false,
asc_desc: false,
geo: false,
geo_json: false,
localized_attributes_rule_id: None,
filterable_attributes_rule_id: None,
};
@@ -295,6 +300,20 @@ impl MetadataBuilder {
distinct: false,
asc_desc: false,
geo: true,
geo_json: false,
localized_attributes_rule_id: None,
filterable_attributes_rule_id,
};
}
if match_field_legacy(RESERVED_GEOJSON_FIELD_NAME, field) == PatternMatch::Match {
debug_assert!(!sortable, "geojson fields should not be sortable");
return Metadata {
searchable: None,
sortable,
distinct: false,
asc_desc: false,
geo: false,
geo_json: true,
localized_attributes_rule_id: None,
filterable_attributes_rule_id,
};
@@ -328,6 +347,7 @@ impl MetadataBuilder {
distinct,
asc_desc,
geo: false,
geo_json: false,
localized_attributes_rule_id,
filterable_attributes_rule_id,
}

View File

@@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch};
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME};
use crate::AttributePatterns;
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
@@ -34,6 +34,10 @@ impl FilterableAttributesRule {
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
}
pub fn has_geojson(&self) -> bool {
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEOJSON_FIELD_NAME)
}
/// Get the features of the rule.
pub fn features(&self) -> FilterableAttributesFeatures {
match self {

View File

@@ -19,14 +19,14 @@ impl RoaringBitmapLenCodec {
if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
(bytes.read_u32::<LittleEndian>()? as usize, true)
} else if (cookie as u16) == SERIAL_COOKIE {
return Err(io::Error::new(io::ErrorKind::Other, "run containers are unsupported"));
return Err(io::Error::other("run containers are unsupported"));
} else {
return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value"));
return Err(io::Error::other("unknown cookie value"));
}
};
if size > u16::MAX as usize + 1 {
return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
return Err(io::Error::other("size is greater than supported"));
}
let mut description_bytes = vec![0u8; size * 4];

View File

@@ -5,6 +5,7 @@ use std::fmt;
use std::fs::File;
use std::path::Path;
use cellulite::Cellulite;
use deserr::Deserr;
use heed::types::*;
use heed::{CompactionOption, Database, DatabaseStat, RoTxn, RwTxn, Unspecified, WithoutTls};
@@ -115,9 +116,10 @@ pub mod db_name {
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
pub const VECTOR_STORE: &str = "vector-arroy";
pub const CELLULITE: &str = "cellulite";
pub const DOCUMENTS: &str = "documents";
}
const NUMBER_OF_DBS: u32 = 25;
const NUMBER_OF_DBS: u32 = 25 + Cellulite::nb_dbs();
#[derive(Clone)]
pub struct Index {
@@ -183,6 +185,9 @@ pub struct Index {
/// Vector store based on hannoy™.
pub vector_store: hannoy::Database<Unspecified>,
/// Geo store based on cellulite™.
pub cellulite: Cellulite,
/// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<BEU32, ObkvCodec>,
}
@@ -239,6 +244,7 @@ impl Index {
let embedder_category_id =
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?;
let cellulite = cellulite::Cellulite::create_from_env(&env, &mut wtxn, CELLULITE)?;
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
@@ -267,6 +273,7 @@ impl Index {
field_id_docid_facet_strings,
vector_store,
embedder_category_id,
cellulite,
documents,
};
if this.get_version(&wtxn)?.is_none() && creation {
@@ -1052,6 +1059,13 @@ impl Index {
Ok(geo_filter)
}
/// Returns true if the geo sorting feature is enabled.
pub fn is_geojson_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geojson_filter =
self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geojson());
Ok(geojson_filter)
}
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
let asc_desc_fields = self
.criteria(rtxn)?
@@ -1882,6 +1896,7 @@ impl Index {
field_id_docid_facet_strings,
vector_store,
embedder_category_id,
cellulite,
documents,
} = self;
@@ -1955,8 +1970,24 @@ impl Index {
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
// Cellulite
const _CELLULITE_DB_CHECK: () = {
if Cellulite::nb_dbs() != 4 {
panic!("Cellulite database count has changed, please update the code accordingly.")
}
};
sizes.insert("cellulite_item", cellulite.item_db_stats(rtxn).map(compute_size)?);
sizes.insert("cellulite_cell", cellulite.cell_db_stats(rtxn).map(compute_size)?);
sizes.insert("cellulite_update", cellulite.update_db_stats(rtxn).map(compute_size)?);
sizes.insert("cellulite_metadata", cellulite.metadata_db_stats(rtxn).map(compute_size)?);
Ok(sizes)
}
/// The underlying env for raw access
pub fn raw_env(&self) -> &heed::Env<WithoutTls> {
&self.env
}
}
pub struct EmbeddingsWithMetadata {

View File

@@ -1,4 +1,5 @@
#![allow(clippy::type_complexity)]
#![allow(clippy::result_large_err)]
#[cfg(not(windows))]
#[cfg(test)]
@@ -53,7 +54,7 @@ pub use search::new::{
};
use serde_json::Value;
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
pub use {arroy, charabia as tokenizer, hannoy, heed, rhai};
pub use {arroy, cellulite, charabia as tokenizer, hannoy, heed, rhai};
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
pub use self::attribute_patterns::{AttributePatterns, PatternMatch};
@@ -86,7 +87,7 @@ pub use self::search::{
};
pub use self::update::ChannelCongestion;
pub type Result<T> = std::result::Result<T, error::Error>;
pub type Result<T, E = error::Error> = std::result::Result<T, E>;
pub type Attribute = u32;
pub type BEU16 = heed::types::U16<heed::byteorder::BE>;

View File

@@ -278,30 +278,6 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
}
}
// Integration with steppe
impl steppe::Progress for Progress {
fn update(&self, sub_progress: impl steppe::Step) {
self.update_progress(Compat(sub_progress));
}
}
struct Compat<T: steppe::Step>(T);
impl<T: steppe::Step> Step for Compat<T> {
fn name(&self) -> Cow<'static, str> {
self.0.name()
}
fn current(&self) -> u32 {
self.0.current().try_into().unwrap_or(u32::MAX)
}
fn total(&self) -> u32 {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}
impl Step for arroy::MainStep {
fn name(&self) -> Cow<'static, str> {
match self {
@@ -343,3 +319,27 @@ impl Step for arroy::SubStep {
self.max
}
}
// Integration with steppe
impl steppe::Progress for Progress {
fn update(&self, sub_progress: impl steppe::Step) {
self.update_progress(Compat(sub_progress));
}
}
struct Compat<T: steppe::Step>(T);
impl<T: steppe::Step> Step for Compat<T> {
fn name(&self) -> Cow<'static, str> {
self.0.name()
}
fn current(&self) -> u32 {
self.0.current().try_into().unwrap_or(u32::MAX)
}
fn total(&self) -> u32 {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}

View File

@@ -304,7 +304,7 @@ impl ArrayView for ParseableArray<'_> {
fn get(&self, index: i64) -> Option<&dyn ValueView> {
let index = convert_index(index, self.size());
if index <= 0 {
if index < 0 {
return None;
}
let v = self.0.get(index as usize)?;

View File

@@ -38,7 +38,7 @@ where
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
let _ = fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
Ok(())
} else {
Ok(())

View File

@@ -12,7 +12,9 @@ use roaring::{MultiOps, RoaringBitmap};
use serde_json::Value;
use super::facet_range_search;
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::constants::{
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
};
use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
@@ -36,6 +38,7 @@ pub struct Filter<'a> {
pub enum BadGeoError {
Lat(f64),
Lng(f64),
InvalidResolution(usize),
BoundingBoxTopIsBelowBottom(f64, f64),
}
@@ -47,16 +50,23 @@ impl Display for BadGeoError {
Self::BoundingBoxTopIsBelowBottom(top, bottom) => {
write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`.")
}
Self::InvalidResolution(resolution) => write!(
f,
"Invalid resolution `{resolution}`. Resolution must be between 3 and 1000."
),
Self::Lat(lat) => write!(
f,
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ",
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees.",
lat
),
Self::Lng(lng) => write!(
f,
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ",
lng
),
Self::Lng(lng) => {
let normalized = (lng + 180.0).rem_euclid(360.0) - 180.0;
write!(
f,
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. Hint: try using `{normalized}` instead.",
lng
)
}
}
}
}
@@ -225,11 +235,11 @@ impl<'a> Filter<'a> {
Ok(Some(Self { condition }))
}
pub fn use_contains_operator(&self) -> Option<&Token> {
pub fn use_contains_operator(&self) -> Option<&Token<'_>> {
self.condition.use_contains_operator()
}
pub fn use_vector_filter(&self) -> Option<&Token> {
pub fn use_vector_filter(&self) -> Option<&Token<'_>> {
self.condition.use_vector_filter()
}
}
@@ -612,50 +622,61 @@ impl<'a> Filter<'a> {
.union(),
FilterCondition::And(subfilters) => {
let mut subfilters_iter = subfilters.iter();
if let Some(first_subfilter) = subfilters_iter.next() {
let mut bitmap = Self::inner_evaluate(
&(first_subfilter.clone()).into(),
let Some(first_subfilter) = subfilters_iter.next() else {
return Ok(RoaringBitmap::new());
};
let mut bitmap = Self::inner_evaluate(
&(first_subfilter.clone()).into(),
rtxn,
index,
field_ids_map,
filterable_attribute_rules,
universe,
)?;
for f in subfilters_iter {
if bitmap.is_empty() {
return Ok(bitmap);
}
// TODO We are doing the intersections two times,
// it could be more efficient
// Can't I just replace this `&=` by an `=`?
bitmap &= Self::inner_evaluate(
&(f.clone()).into(),
rtxn,
index,
field_ids_map,
filterable_attribute_rules,
universe,
Some(&bitmap),
)?;
for f in subfilters_iter {
if bitmap.is_empty() {
return Ok(bitmap);
}
// TODO We are doing the intersections two times,
// it could be more efficient
// Can't I just replace this `&=` by an `=`?
bitmap &= Self::inner_evaluate(
&(f.clone()).into(),
rtxn,
index,
field_ids_map,
filterable_attribute_rules,
Some(&bitmap),
)?;
}
Ok(bitmap)
} else {
Ok(RoaringBitmap::new())
}
Ok(bitmap)
}
FilterCondition::VectorExists { fid: _, embedder, filter } => {
super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter)
}
FilterCondition::GeoLowerThan { point, radius } => {
FilterCondition::GeoLowerThan { point, radius, resolution: res_token } => {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
}
if !(-180.0..=180.0).contains(&base_point[1]) {
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
}
let radius = radius.parse_finite_float()?;
let mut resolution = 125;
if let Some(res_token) = res_token {
resolution = res_token.parse_finite_float()? as usize;
if !(3..=1000).contains(&resolution) {
return Err(
res_token.as_external_error(BadGeoError::InvalidResolution(resolution))
)?;
}
}
let mut r1 = None;
if index.is_geo_filtering_enabled(rtxn)? {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
}
if !(-180.0..=180.0).contains(&base_point[1]) {
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
}
let radius = radius.parse_finite_float()?;
let rtree = match index.geo_rtree(rtxn)? {
Some(rtree) => rtree,
None => return Ok(RoaringBitmap::new()),
@@ -671,52 +692,72 @@ impl<'a> Filter<'a> {
})
.map(|point| point.data.0)
.collect();
r1 = Some(result);
}
Ok(result)
} else {
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
}))?
let mut r2 = None;
if index.is_geojson_filtering_enabled(rtxn)? {
let point = geo_types::Point::new(base_point[1], base_point[0]);
let result = index.cellulite.in_circle(rtxn, point, radius, resolution)?;
r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring in meilisearch
}
match (r1, r2) {
(Some(r1), Some(r2)) => Ok(r1 | r2),
(Some(r1), None) => Ok(r1),
(None, Some(r2)) => Ok(r2),
(None, None) => {
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: &format!(
"{RESERVED_GEO_FIELD_NAME}/{RESERVED_GEOJSON_FIELD_NAME}"
),
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
}))?
}
}
}
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
if index.is_geo_filtering_enabled(rtxn)? {
let top_right: [f64; 2] = [
top_right_point[0].parse_finite_float()?,
top_right_point[1].parse_finite_float()?,
];
let bottom_left: [f64; 2] = [
bottom_left_point[0].parse_finite_float()?,
bottom_left_point[1].parse_finite_float()?,
];
if !(-90.0..=90.0).contains(&top_right[0]) {
return Err(
top_right_point[0].as_external_error(BadGeoError::Lat(top_right[0]))
)?;
}
if !(-180.0..=180.0).contains(&top_right[1]) {
return Err(
top_right_point[1].as_external_error(BadGeoError::Lng(top_right[1]))
)?;
}
if !(-90.0..=90.0).contains(&bottom_left[0]) {
return Err(bottom_left_point[0]
.as_external_error(BadGeoError::Lat(bottom_left[0])))?;
}
if !(-180.0..=180.0).contains(&bottom_left[1]) {
return Err(bottom_left_point[1]
.as_external_error(BadGeoError::Lng(bottom_left[1])))?;
}
if top_right[0] < bottom_left[0] {
return Err(bottom_left_point[1].as_external_error(
BadGeoError::BoundingBoxTopIsBelowBottom(top_right[0], bottom_left[0]),
))?;
}
let top_right: [f64; 2] = [
top_right_point[0].parse_finite_float()?,
top_right_point[1].parse_finite_float()?,
];
let bottom_left: [f64; 2] = [
bottom_left_point[0].parse_finite_float()?,
bottom_left_point[1].parse_finite_float()?,
];
if !(-90.0..=90.0).contains(&top_right[0]) {
return Err(
top_right_point[0].as_external_error(BadGeoError::Lat(top_right[0]))
)?;
}
if !(-180.0..=180.0).contains(&top_right[1]) {
return Err(
top_right_point[1].as_external_error(BadGeoError::Lng(top_right[1]))
)?;
}
if !(-90.0..=90.0).contains(&bottom_left[0]) {
return Err(
bottom_left_point[0].as_external_error(BadGeoError::Lat(bottom_left[0]))
)?;
}
if !(-180.0..=180.0).contains(&bottom_left[1]) {
return Err(
bottom_left_point[1].as_external_error(BadGeoError::Lng(bottom_left[1]))
)?;
}
if top_right[0] < bottom_left[0] {
return Err(bottom_left_point[1].as_external_error(
BadGeoError::BoundingBoxTopIsBelowBottom(top_right[0], bottom_left[0]),
))?;
}
let mut r1 = None;
if index.is_geo_filtering_enabled(rtxn)? {
// Instead of writing a custom `GeoBoundingBox` filter we're simply going to re-use the range
// filter to create the following filter;
// `_geo.lat {top_right[0]} TO {bottom_left[0]} AND _geo.lng {top_right[1]} TO {bottom_left[1]}`
@@ -811,19 +852,76 @@ impl<'a> Filter<'a> {
)?
};
Ok(selected_lat & selected_lng)
} else {
Err(top_right_point[0].as_external_error(
r1 = Some(selected_lat & selected_lng);
}
let mut r2 = None;
if index.is_geojson_filtering_enabled(rtxn)? {
let polygon = geo_types::Polygon::new(
geo_types::LineString(vec![
geo_types::Coord { x: top_right[1], y: top_right[0] },
geo_types::Coord { x: bottom_left[1], y: top_right[0] },
geo_types::Coord { x: bottom_left[1], y: bottom_left[0] },
geo_types::Coord { x: top_right[1], y: bottom_left[0] },
]),
Vec::new(),
);
let result = index.cellulite.in_shape(rtxn, &polygon)?;
r2 = Some(RoaringBitmap::from_iter(result)); // TODO: Remove once we update roaring in meilisearch
}
match (r1, r2) {
(Some(r1), Some(r2)) => Ok(r1 | r2),
(Some(r1), None) => Ok(r1),
(None, Some(r2)) => Ok(r2),
(None, None) => Err(top_right_point[0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
attribute: &format!(
"{RESERVED_GEO_FIELD_NAME}/{RESERVED_GEOJSON_FIELD_NAME}"
),
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
},
))?
))?,
}
}
FilterCondition::GeoPolygon { points } => {
if !index.is_geojson_filtering_enabled(rtxn)? {
return Err(points[0][0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: RESERVED_GEOJSON_FIELD_NAME,
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
},
))?;
}
let mut coords = Vec::new();
for [lat_token, lng_token] in points {
let lat = lat_token.parse_finite_float()?;
let lng = lng_token.parse_finite_float()?;
if !(-90.0..=90.0).contains(&lat) {
return Err(lat_token.as_external_error(BadGeoError::Lat(lat)))?;
}
if !(-180.0..=180.0).contains(&lng) {
return Err(lng_token.as_external_error(BadGeoError::Lng(lng)))?;
}
coords.push(geo_types::Coord { x: lng, y: lat });
}
let polygon = geo_types::Polygon::new(geo_types::LineString(coords), Vec::new());
let result = index.cellulite.in_shape(rtxn, &polygon)?;
let result = roaring::RoaringBitmap::from_iter(result); // TODO: Remove once we update roaring in meilisearch
Ok(result)
}
}
}
}
@@ -962,17 +1060,17 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
snapshot!(error.to_string(), @r"
Attribute `_geo/_geojson` is not filterable. This index does not have configured filterable attributes.
12:14 _geoRadius(42, 150, 10)
"###);
");
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
snapshot!(error.to_string(), @r"
Attribute `_geo/_geojson` is not filterable. This index does not have configured filterable attributes.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
");
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
@@ -993,19 +1091,19 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let filter = Filter::from_str("_geoRadius(-90, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
12:16 _geoRadius(-100, 150, 10)
"###);
snapshot!(error.to_string(), @r"
Attribute `_geo/_geojson` is not filterable. Available filterable attribute patterns are: `title`.
12:15 _geoRadius(-90, 150, 10)
");
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
snapshot!(error.to_string(), @r"
Attribute `_geo/_geojson` is not filterable. Available filterable attribute patterns are: `title`.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
");
let filter = Filter::from_str("name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
@@ -1153,38 +1251,34 @@ mod tests {
// georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
snapshot!(error.to_string(), @r"
Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees.
12:16 _geoRadius(-100, 150, 10)
");
// georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
snapshot!(error.to_string(), @r"
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
12:23 _geoRadius(-90.0000001, 150, 10)
");
// georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().contains(
"Bad longitude `250`. Longitude must be contained between -180 and 180 degrees."
),
"{}",
error.to_string(),
);
snapshot!(error.to_string(), @r"
Bad longitude `250`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-110` instead.
17:20 _geoRadius(-10, 250, 10)
");
// georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
17:27 _geoRadius(-10, 180.000001, 10)
");
}
#[test]
@@ -1207,73 +1301,73 @@ mod tests {
let filter =
Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
snapshot!(error.to_string(), @r"
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
18:29 _geoBoundingBox([-90.0000001, 150], [30, 10])
");
// geoboundingbox top left coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
snapshot!(error.to_string(), @r"
Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees.
18:28 _geoBoundingBox([90.0000001, 150], [30, 10])
");
// geoboundingbox bottom right coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
snapshot!(error.to_string(), @r"
Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees.
28:39 _geoBoundingBox([30, 10], [-90.0000001, 150])
");
// geoboundingbox bottom right coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
snapshot!(error.to_string(), @r"
Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees.
28:38 _geoBoundingBox([30, 10], [90.0000001, 150])
");
// geoboundingbox top left coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
23:33 _geoBoundingBox([-10, 180.000001], [30, 10])
");
// geoboundingbox top left coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `179.999999` instead.
23:34 _geoBoundingBox([-10, -180.000001], [30, 10])
");
// geoboundingbox bottom right coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `179.999999` instead.
33:44 _geoBoundingBox([30, 10], [-10, -180.000001])
");
// geoboundingbox bottom right coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
snapshot!(error.to_string(), @r"
Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. Hint: try using `-179.999999` instead.
33:43 _geoBoundingBox([30, 10], [-10, 180.000001])
");
}
#[test]

View File

@@ -137,7 +137,7 @@ impl<'a> SearchForFacetValues<'a> {
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
if exact_words_fst.is_some_and(|fst| fst.contains(query)) {
if fst.contains(query) {
self.fetch_original_facets_using_normalized(
let _ = self.fetch_original_facets_using_normalized(
fid,
query,
query,

View File

@@ -354,15 +354,14 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
logger.add_to_results(&candidates);
valid_docids.extend_from_slice(&candidates);
valid_scores
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
.extend(std::iter::repeat_n(ranking_rule_scores.to_owned(), candidates.len()));
}
} else {
// if we have passed the offset already, add some of the documents (up to the limit)
let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>();
logger.add_to_results(&candidates);
valid_docids.extend_from_slice(&candidates);
valid_scores
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
valid_scores.extend(std::iter::repeat_n(ranking_rule_scores.to_owned(), candidates.len()));
}
*cur_offset += candidates.len() as usize;

View File

@@ -193,6 +193,7 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
visit: VisitFn<'_, G>,
ctx: &mut VisitorContext<'_, G>,
) -> Result<ControlFlow<(), bool>> {
#[allow(clippy::manual_contains)] // there is no method contains on mapped interner
if !ctx
.all_costs_from_node
.get(dest_node)
@@ -243,6 +244,8 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
// Checking that from the destination node, there is at least
// one cost that we can visit that corresponds to our remaining budget.
#[allow(clippy::manual_contains)] // there is no contains on MappedInterner
if !ctx
.all_costs_from_node
.get(dest_node)

View File

@@ -401,7 +401,7 @@ impl Iterator for SmallBitmapInternalIter<'_> {
*cur &= *cur - 1;
Some(idx + *base)
} else if next.is_empty() {
return None;
None
} else {
*base += 64;
*cur = next[0];

View File

@@ -79,7 +79,7 @@ impl<'ctx, Query> Sort<'ctx, Query> {
return Ok(false);
};
Ok(!displayed_fields.iter().any(|&field| field == field_name))
Ok(!displayed_fields.contains(&field_name))
}
}

Some files were not shown because too many files have changed in this diff Show More