mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-27 16:30:31 +00:00
Compare commits
59 Commits
prototype-
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00caf25ea9 | ||
|
|
dc2546af21 | ||
|
|
077797b50a | ||
|
|
cde36923c5 | ||
|
|
01bacf500e | ||
|
|
ec0ecb5515 | ||
|
|
5cfb066b0a | ||
|
|
a5f44a5ceb | ||
|
|
bc25f378e8 | ||
|
|
6a068fe36a | ||
|
|
8d826e478f | ||
|
|
4d308d5237 | ||
|
|
b4c01581cd | ||
|
|
67fd3b08ef | ||
|
|
1690aec7f1 | ||
|
|
f267bed352 | ||
|
|
597d57bf1d | ||
|
|
01ac8344ad | ||
|
|
3508ba2f20 | ||
|
|
1e6fe71a67 | ||
|
|
0fba08cd72 | ||
|
|
189d4c3b70 | ||
|
|
2fff6f7f23 | ||
|
|
fddfb37f1f | ||
|
|
52b4090286 | ||
|
|
3cabfb448b | ||
|
|
77cf5b3787 | ||
|
|
3acc5bbb15 | ||
|
|
114436926f | ||
|
|
0f7904fb38 | ||
|
|
590b1d8fb7 | ||
|
|
be9741eb8a | ||
|
|
0177d66149 | ||
|
|
1861c69964 | ||
|
|
cb2b5eb38e | ||
|
|
53aa0a1b54 | ||
|
|
7871d12025 | ||
|
|
3fb67f94f7 | ||
|
|
cf5145b542 | ||
|
|
31bb61ba99 | ||
|
|
e7994cdeb3 | ||
|
|
70c906d4b4 | ||
|
|
0f33a65468 | ||
|
|
7c9a8b1e1b | ||
|
|
f45daf8031 | ||
|
|
7935bef4cd | ||
|
|
eddefb0e0f | ||
|
|
c5f22be6e1 | ||
|
|
febc8d1b52 | ||
|
|
df3986cd83 | ||
|
|
34ed6518ae | ||
|
|
22219fd88f | ||
|
|
a9e17ab8c6 | ||
|
|
2dd948a4a1 | ||
|
|
76cf1bff87 | ||
|
|
c0d8eb295d | ||
|
|
bcd3f6054a | ||
|
|
3a0314f9de | ||
|
|
fa4d8b8348 |
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -23,7 +23,8 @@ A clear and concise description of what you expected to happen.
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**Meilisearch version:** [e.g. v0.20.0]
|
||||
**Meilisearch version:**
|
||||
[e.g. v0.20.0]
|
||||
|
||||
**Additional context**
|
||||
Additional information that may be relevant to the issue.
|
||||
|
||||
34
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
Normal file
34
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
---
|
||||
name: New sprint issue
|
||||
about: ⚠️ Should only be used by the engine team ⚠️
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
|
||||
Related product discussion:
|
||||
Related spec: WIP
|
||||
|
||||
## Motivation
|
||||
|
||||
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
|
||||
## Usage
|
||||
|
||||
<!---Write a quick description of the usage if the usage has already been defined-->
|
||||
|
||||
Refer to the final spec to know the details and the final decisions about the usage.
|
||||
|
||||
## TODO
|
||||
|
||||
<!---Feel free to adapt this list with more technical/product steps-->
|
||||
|
||||
- [ ] Release a prototype
|
||||
- [ ] If prototype validated, merge changes into `main`
|
||||
- [ ] Update the spec
|
||||
|
||||
## Impacted teams
|
||||
|
||||
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks
|
||||
name: Benchmarks (manual)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks indexing (push)
|
||||
name: Benchmarks of indexing (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search geo (push)
|
||||
name: Benchmarks of search for geo (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search songs (push)
|
||||
name: Benchmarks of search for songs (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search wikipedia articles (push)
|
||||
name: Benchmarks of search for Wikipedia articles (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
28
.github/workflows/create-issue-dependencies.yml
vendored
28
.github/workflows/create-issue-dependencies.yml
vendored
@@ -1,28 +0,0 @@
|
||||
name: Create issue to upgrade dependencies
|
||||
on:
|
||||
schedule:
|
||||
# Run the first of the month, every 3 month
|
||||
- cron: '0 0 1 */3 *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
create-issue:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Create an issue
|
||||
uses: actions-ecosystem/action-create-issue@v1
|
||||
with:
|
||||
github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
title: Upgrade dependencies
|
||||
body: |
|
||||
This issue is about updating Meilisearch dependencies:
|
||||
- [ ] Cargo toml dependencies of Meilisearch; but also the main engine-team repositories that Meilisearch depends on (charabia, heed...)
|
||||
- [ ] If new Rust versions have been released, update the Rust version in the Clippy job of this [GitHub Action file](./.github/workflows/rust.yml)
|
||||
|
||||
⚠️ To avoid last minute bugs, this issue should only be done at the beginning of the sprint!
|
||||
|
||||
The GitHub action dependencies are managed by [Dependabot](./.github/dependabot.yml)
|
||||
labels: |
|
||||
dependencies
|
||||
maintenance
|
||||
24
.github/workflows/dependency-issue.yml
vendored
Normal file
24
.github/workflows/dependency-issue.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
name: Create issue to upgrade dependencies
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run the first of the month, every 3 month
|
||||
- cron: '0 0 1 */3 *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
create-issue:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ISSUE_TEMPLATE: issue-template.md
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Create issue
|
||||
run: |
|
||||
gh issue create \
|
||||
--title 'Upgrade dependencies' \
|
||||
--label 'dependencies,maintenance' \
|
||||
--body-file $ISSUE_TEMPLATE
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Publish to APT repository & Homebrew
|
||||
name: Publish to APT & Homebrew
|
||||
|
||||
on:
|
||||
release:
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
12
.github/workflows/publish-binaries.yml
vendored
12
.github/workflows/publish-binaries.yml
vendored
@@ -1,3 +1,5 @@
|
||||
name: Publish binaries to GitHub release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
@@ -5,8 +7,6 @@ on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
name: Publish binaries to release
|
||||
|
||||
jobs:
|
||||
check-version:
|
||||
name: Check the version validity
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@@ -183,7 +183,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
||||
5
.github/workflows/publish-docker-images.yml
vendored
5
.github/workflows/publish-docker-images.yml
vendored
@@ -1,4 +1,5 @@
|
||||
---
|
||||
name: Publish images to Docker Hub
|
||||
|
||||
on:
|
||||
push:
|
||||
# Will run for every tag pushed except `latest`
|
||||
@@ -12,8 +13,6 @@ on:
|
||||
- cron: '0 23 * * *' # Every day at 11:00pm
|
||||
workflow_dispatch:
|
||||
|
||||
name: Publish tagged images to Docker Hub
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: docker
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Rust
|
||||
name: Test suite
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -25,36 +25,35 @@ jobs:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
# Disable cache due to disk space issues with Windows workers in CI
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
test-others:
|
||||
name: Tests on ${{ matrix.os }}
|
||||
@@ -64,19 +63,47 @@ jobs:
|
||||
matrix:
|
||||
os: [macos-12, windows-2022]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
test-all-features:
|
||||
name: Tests all features on cron schedule only
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
if: github.event_name == 'schedule'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo build with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --workspace --locked --release --all-features
|
||||
- name: Run cargo test with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --workspace --locked --release --all-features
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
@@ -95,8 +122,8 @@ jobs:
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -114,8 +141,8 @@ jobs:
|
||||
toolchain: 1.67.0
|
||||
override: true
|
||||
components: clippy
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -134,8 +161,8 @@ jobs:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
components: rustfmt
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
6
.github/workflows/uffizzi-build.yml
vendored
6
.github/workflows/uffizzi-build.yml
vendored
@@ -23,7 +23,7 @@ jobs:
|
||||
target: x86_64-unknown-linux-musl
|
||||
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.0
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
@@ -46,14 +46,14 @@ jobs:
|
||||
|
||||
- name: Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: registry.uffizzi.com/${{ env.UUID_TAG }}
|
||||
tags: |
|
||||
type=raw,value=60d
|
||||
|
||||
- name: Build Image
|
||||
uses: docker/build-push-action@v3
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: ./
|
||||
file: .github/uffizzi/Dockerfile
|
||||
|
||||
@@ -120,29 +120,9 @@ The full Meilisearch release process is described in [this guide](https://github
|
||||
|
||||
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
|
||||
|
||||
The prototype name must follow this convention: `prototype-X-Y` where
|
||||
- `X` is the feature name formatted in `kebab-case`. It should not end with a single number.
|
||||
- `Y` is the version of the prototype, starting from `0`.
|
||||
|
||||
✅ Example: `prototype-auto-resize-0`. </br>
|
||||
❌ Bad example: `auto-resize-0`: lacks the `prototype` prefix. </br>
|
||||
❌ Bad example: `prototype-auto-resize`: lacks the version suffix. </br>
|
||||
❌ Bad example: `prototype-auto-resize-0-0`: feature name ends with a single number.
|
||||
|
||||
Steps to create a prototype:
|
||||
|
||||
1. In your terminal, go to the last commit of your branch (the one you want to provide as a prototype).
|
||||
2. Create a tag following the convention: `git tag prototype-X-Y`
|
||||
3. Run Meilisearch and check that its launch summary features a line: `Prototype: prototype-X-Y` (you may need to switch branches and back after tagging for this to work).
|
||||
3. Push the tag: `git push origin prototype-X-Y`
|
||||
4. Check the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
|
||||
|
||||
🐳 Once the CI has finished to run (~1h30), a Docker image named `prototype-X-Y` will be available on [DockerHub](https://hub.docker.com/repository/docker/getmeili/meilisearch/general). People can use it with the following command: `docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-X-Y`. <br>
|
||||
More information about [how to run Meilisearch with Docker](https://docs.meilisearch.com/learn/cookbooks/docker.html#download-meilisearch-with-docker).
|
||||
|
||||
⚙️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-X-Y` tag in the Meilisearch repository and compile from the source code.
|
||||
|
||||
⚠️ When sharing a prototype with users, remind them to not use it in production. Prototypes are solely for test purposes.
|
||||
This happens in two steps:
|
||||
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
|
||||
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
|
||||
|
||||
### Release assets
|
||||
|
||||
|
||||
43
README.md
43
README.md
@@ -7,8 +7,8 @@
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://docs.meilisearch.com">Documentation</a> |
|
||||
<a href="https://docs.meilisearch.com/faq/">FAQ</a> |
|
||||
<a href="https://meilisearch.com/docs">Documentation</a> |
|
||||
<a href="https://meilisearch.com/docs/faq">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com">Discord</a>
|
||||
</h4>
|
||||
|
||||
@@ -36,27 +36,27 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering and faceted search](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://docs.meilisearch.com/learn/advanced/sorting.html):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://docs.meilisearch.com/learn/what_is_meilisearch/language.html):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://docs.meilisearch.com/learn/security/master_api_keys.html):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://docs.meilisearch.com/learn/security/tenant_tokens.html):** personalize search results for any number of application tenants
|
||||
- **[Typo tolerance](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://docs.meilisearch.com/reference/api/overview.html):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **[RESTful API](https://meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://docs.meilisearch.com](https://docs.meilisearch.com/).
|
||||
You can consult Meilisearch's documentation at [https://meilisearch.com/docs](https://meilisearch.com/docs/).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://docs.meilisearch.com/learn/getting_started/quick_start.html) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://meilisearch.com/docs/learn/getting_started/quick_start) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://docs.meilisearch.com/learn/getting_started/filtering_and_sorting.html) for an introduction to some of Meilisearch's most popular features.
|
||||
You may also want to check out [Meilisearch 101](https://meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ☁️ Meilisearch cloud
|
||||
|
||||
@@ -66,25 +66,25 @@ Let us manage your infrastructure so you can focus on integrating a great search
|
||||
|
||||
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
|
||||
|
||||
Take a look at the complete [Meilisearch integration list](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html).
|
||||
Take a look at the complete [Meilisearch integration list](https://meilisearch.com/docs/learn/what_is_meilisearch/sdks).
|
||||
|
||||
[](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html)
|
||||
[](https://meilisearch.com/docs/learn/what_is_meilisearch/sdks.html)
|
||||
|
||||
## ⚙️ Advanced usage
|
||||
|
||||
Experienced users will want to keep our [API Reference](https://docs.meilisearch.com/reference/api) close at hand.
|
||||
Experienced users will want to keep our [API Reference](https://meilisearch.com/docs/reference/api) close at hand.
|
||||
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html), [sorting](https://docs.meilisearch.com/learn/advanced/sorting.html), [geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html), [API keys](https://docs.meilisearch.com/learn/security/master_api_keys.html), and [tenant tokens](https://docs.meilisearch.com/learn/security/tenant_tokens.html).
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://meilisearch.com/docs/learn/advanced/filtering), [sorting](https://meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://meilisearch.com/docs/learn/security/tenant_tokens).
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://docs.meilisearch.com/learn/core_concepts/documents.html) and [indexes](https://docs.meilisearch.com/learn/core_concepts/indexes.html).
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://meilisearch.com/docs/learn/core_concepts/indexes).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html) of our documentation.
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
@@ -97,7 +97,6 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
|
||||
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
|
||||
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
|
||||
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
|
||||
- For everything else, please check [this page listing some of the other places where you can find us](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html)
|
||||
|
||||
Thank you for your support!
|
||||
|
||||
|
||||
@@ -159,7 +159,7 @@ impl<'a> Display for Error<'a> {
|
||||
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
|
||||
}
|
||||
ErrorKind::ReservedGeo(name) => {
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
}
|
||||
ErrorKind::MisusedGeoRadius => {
|
||||
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
|
||||
|
||||
@@ -382,6 +382,34 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
|
||||
}
|
||||
|
||||
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoDistance but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
tag("_geoDistance"),
|
||||
// if we were able to parse `_geoDistance` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
|
||||
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))
|
||||
}
|
||||
|
||||
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geo but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
word_exact("_geo"),
|
||||
// if we were able to parse `_geo` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
|
||||
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))
|
||||
}
|
||||
|
||||
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
|
||||
match parse_condition(input) {
|
||||
Ok(result) => Ok(result),
|
||||
@@ -418,6 +446,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
parse_not_exists,
|
||||
parse_to,
|
||||
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||
parse_geo,
|
||||
parse_geo_distance,
|
||||
parse_geo_point,
|
||||
parse_error_reserved_keyword,
|
||||
))(input)
|
||||
@@ -621,15 +651,35 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:22 _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:34 position <= _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
|
||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:25 _geoDistance(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
|
||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:37 position <= _geoDistance(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
|
||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:17 _geo(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
|
||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:29 position <= _geo(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
|
||||
The `_geoRadius` filter is an operation and can't be used as a value.
|
||||
13:35 position <= _geoRadius(12, 13, 14)
|
||||
|
||||
@@ -7,8 +7,8 @@ use nom::{InputIter, InputLength, InputTake, Slice};
|
||||
|
||||
use crate::error::{ExpectedValueKind, NomErrorExt};
|
||||
use crate::{
|
||||
parse_geo_bounding_box, parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span,
|
||||
Token,
|
||||
parse_geo, parse_geo_bounding_box, parse_geo_distance, parse_geo_point, parse_geo_radius,
|
||||
Error, ErrorKind, IResult, Span, Token,
|
||||
};
|
||||
|
||||
/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
|
||||
@@ -88,11 +88,16 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
// then, we want to check if the user is misusing a geo expression
|
||||
// This expression can’t finish without error.
|
||||
// We want to return an error in case of failure.
|
||||
if let Err(err) = parse_geo_point(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
let geo_reserved_parse_functions = [parse_geo_point, parse_geo_distance, parse_geo];
|
||||
|
||||
for parser in geo_reserved_parse_functions {
|
||||
if let Err(err) = parser(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match parse_geo_radius(input) {
|
||||
Ok(_) => {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
|
||||
|
||||
@@ -31,6 +31,7 @@ mod uuid_codec;
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@@ -43,7 +44,7 @@ pub use error::Error;
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
@@ -428,6 +429,13 @@ impl IndexScheduler {
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the index scheduler is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.all_tasks.first(&rtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn index_budget(
|
||||
tasks_path: &Path,
|
||||
base_map_size: usize,
|
||||
@@ -882,127 +890,8 @@ impl IndexScheduler {
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
// Currently we don't need to access the tasks queue while loading a dump thus I can block everything.
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, mut file) = self.create_update_file()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(file.as_file_mut());
|
||||
for doc in content_file {
|
||||
builder.append_json_object(&doc?)?;
|
||||
}
|
||||
builder.into_inner()?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
},
|
||||
};
|
||||
|
||||
self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
self.update_index(&mut wtxn, index, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
self.update_status(&mut wtxn, task.status, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
|
||||
(bitmap.insert(task.uid));
|
||||
})?;
|
||||
|
||||
utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(&mut wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(&mut wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
|
||||
wtxn.commit()?;
|
||||
self.wake_up.signal();
|
||||
|
||||
Ok(task)
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump> {
|
||||
Dump::new(self)
|
||||
}
|
||||
|
||||
/// Create a new index without any associated task.
|
||||
@@ -1237,6 +1126,184 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a, 'a>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
kinds: HashMap<Kind, RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'a> Dump<'a> {
|
||||
pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
|
||||
// While loading a dump no one should be able to access the scheduler thus I can block everything.
|
||||
let wtxn = index_scheduler.env.write_txn()?;
|
||||
|
||||
Ok(Dump {
|
||||
index_scheduler,
|
||||
wtxn,
|
||||
indexes: HashMap::new(),
|
||||
statuses: HashMap::new(),
|
||||
kinds: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, mut file) = self.index_scheduler.create_update_file()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(file.as_file_mut());
|
||||
for doc in content_file {
|
||||
builder.append_json_object(&doc?)?;
|
||||
}
|
||||
builder.into_inner()?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(task.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(task.uid);
|
||||
self.indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Commit all the changes and exit the importing dump state
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
for (index, bitmap) in self.indexes {
|
||||
self.index_scheduler.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.statuses {
|
||||
self.index_scheduler.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.kinds {
|
||||
self.index_scheduler.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
self.wtxn.commit()?;
|
||||
self.index_scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The outcome of calling the [`IndexScheduler::tick`] function.
|
||||
pub enum TickOutcome {
|
||||
/// The scheduler should immediately attempt another `tick`.
|
||||
|
||||
@@ -34,6 +34,12 @@ impl AuthController {
|
||||
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth controller is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
self.store.health()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the size of the `AuthController` database in bytes.
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
self.store.size()
|
||||
|
||||
@@ -61,6 +61,13 @@ impl HeedAuthStore {
|
||||
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth store is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.keys.first(&rtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the size in bytes of database
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
Ok(self.env.real_disk_size()?)
|
||||
|
||||
@@ -86,7 +86,7 @@ impl SegmentAnalytics {
|
||||
pub async fn new(
|
||||
opt: &Opt,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) -> Arc<dyn Analytics> {
|
||||
let instance_uid = super::find_user_id(&opt.db_path);
|
||||
let first_time_run = instance_uid.is_none();
|
||||
@@ -376,7 +376,11 @@ impl Segment {
|
||||
})
|
||||
}
|
||||
|
||||
async fn run(mut self, index_scheduler: Arc<IndexScheduler>, auth_controller: AuthController) {
|
||||
async fn run(
|
||||
mut self,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
||||
// The first batch must be sent after one hour.
|
||||
let mut interval =
|
||||
@@ -408,10 +412,10 @@ impl Segment {
|
||||
async fn tick(
|
||||
&mut self,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
if let Ok(stats) =
|
||||
create_all_stats(index_scheduler.into(), auth_controller, &AuthFilter::default())
|
||||
create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
|
||||
{
|
||||
// Replace the version number with the prototype name if any.
|
||||
let version = if let Some(prototype) = crate::prototype_name() {
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::pin::Pin;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::FromRequest;
|
||||
pub use error::AuthenticationError;
|
||||
use futures::future::err;
|
||||
@@ -23,7 +24,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
|
||||
async fn auth_bearer(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
data: Option<D>,
|
||||
@@ -43,7 +44,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
}
|
||||
|
||||
async fn auth_token(auth: AuthController, data: Option<D>) -> Result<Self, ResponseError>
|
||||
async fn auth_token(auth: Data<AuthController>, data: Option<D>) -> Result<Self, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
@@ -60,7 +61,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
|
||||
async fn authenticate(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
) -> Result<Option<AuthFilter>, ResponseError>
|
||||
@@ -90,7 +91,7 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
|
||||
req: &actix_web::HttpRequest,
|
||||
_payload: &mut actix_web::dev::Payload,
|
||||
) -> Self::Future {
|
||||
match req.app_data::<AuthController>().cloned() {
|
||||
match req.app_data::<Data<AuthController>>().cloned() {
|
||||
Some(auth) => match req
|
||||
.headers()
|
||||
.get("Authorization")
|
||||
@@ -122,10 +123,15 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
|
||||
}
|
||||
|
||||
pub trait Policy {
|
||||
fn authenticate(auth: AuthController, token: &str, index: Option<&str>) -> Option<AuthFilter>;
|
||||
fn authenticate(
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter>;
|
||||
}
|
||||
|
||||
pub mod policies {
|
||||
use actix_web::web::Data;
|
||||
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
||||
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
||||
// reexport actions in policies in order to be used in routes configuration.
|
||||
@@ -178,7 +184,7 @@ pub mod policies {
|
||||
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
|
||||
/// (that may contain more indexes than requested).
|
||||
fn authenticate(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter> {
|
||||
|
||||
@@ -88,7 +88,7 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Data<AuthController>,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
enable_dashboard: bool,
|
||||
@@ -136,7 +136,7 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
||||
let snapshot_path_exists = snapshot_path.exists();
|
||||
@@ -195,6 +195,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
let index_scheduler = Arc::new(index_scheduler);
|
||||
let auth_controller = Arc::new(auth_controller);
|
||||
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
|
||||
let snapshot_delay = Duration::from_secs(snapshot_delay);
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
@@ -367,18 +368,20 @@ fn import_dump(
|
||||
log::info!("All documents successfully imported.");
|
||||
}
|
||||
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
|
||||
// 4. Import the tasks.
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler.register_dumped_task(task, file)?;
|
||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||
}
|
||||
Ok(())
|
||||
Ok(index_scheduler_dump.finish()?)
|
||||
}
|
||||
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
opt: &Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) {
|
||||
|
||||
@@ -74,13 +74,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
async fn run_http(
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) -> anyhow::Result<()> {
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let index_scheduler = Data::from(index_scheduler);
|
||||
let auth_controller = Data::from(auth_controller);
|
||||
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::str;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
@@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
}
|
||||
|
||||
pub async fn create_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, Data<AuthController>>,
|
||||
body: AwebJson<CreateApiKey, DeserrJsonError>,
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -66,7 +67,7 @@ impl ListApiKeys {
|
||||
}
|
||||
|
||||
pub async fn list_api_keys(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
list_api_keys: AwebQueryParameter<ListApiKeys, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let paginate = list_api_keys.into_inner().as_pagination();
|
||||
@@ -84,7 +85,7 @@ pub async fn list_api_keys(
|
||||
}
|
||||
|
||||
pub async fn get_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
@@ -103,7 +104,7 @@ pub async fn get_api_key(
|
||||
}
|
||||
|
||||
pub async fn patch_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, Data<AuthController>>,
|
||||
body: AwebJson<PatchApiKey, DeserrJsonError>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -123,7 +124,7 @@ pub async fn patch_api_key(
|
||||
}
|
||||
|
||||
pub async fn delete_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
|
||||
@@ -19,7 +19,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
|
||||
pub async fn create_dump(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
|
||||
178
meilisearch/src/routes/indexes/facet_search.rs
Normal file
178
meilisearch/src/routes/indexes/facet_search.rs
Normal file
@@ -0,0 +1,178 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::facet;
|
||||
use meilisearch_types::serde_cs::vec::CS;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::analytics::{Analytics, SearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(search)));
|
||||
}
|
||||
|
||||
// #[derive(Debug, deserr::Deserr)]
|
||||
// #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
// pub struct FacetSearchQuery {
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
|
||||
// facetQuery: Option<String>,
|
||||
// #[deserr(default = Param(DEFAULT_SEARCH_OFFSET()), error = DeserrQueryParamError<InvalidSearchOffset>)]
|
||||
// offset: Param<usize>,
|
||||
// #[deserr(default = Param(DEFAULT_SEARCH_LIMIT()), error = DeserrQueryParamError<InvalidSearchLimit>)]
|
||||
// limit: Param<usize>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchPage>)]
|
||||
// page: Option<Param<usize>>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchHitsPerPage>)]
|
||||
// hits_per_page: Option<Param<usize>>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToRetrieve>)]
|
||||
// attributes_to_retrieve: Option<CS<String>>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToCrop>)]
|
||||
// attributes_to_crop: Option<CS<String>>,
|
||||
// #[deserr(default = Param(DEFAULT_CROP_LENGTH()), error = DeserrQueryParamError<InvalidSearchCropLength>)]
|
||||
// crop_length: Param<usize>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToHighlight>)]
|
||||
// attributes_to_highlight: Option<CS<String>>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
|
||||
// filter: Option<String>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
|
||||
// sort: Option<String>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
|
||||
// show_matches_position: Param<bool>,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
|
||||
// facets: Option<CS<String>>,
|
||||
// #[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
|
||||
// highlight_pre_tag: String,
|
||||
// #[deserr( default = DEFAULT_HIGHLIGHT_POST_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPostTag>)]
|
||||
// highlight_post_tag: String,
|
||||
// #[deserr(default = DEFAULT_CROP_MARKER(), error = DeserrQueryParamError<InvalidSearchCropMarker>)]
|
||||
// crop_marker: String,
|
||||
// #[deserr(default, error = DeserrQueryParamError<InvalidSearchMatchingStrategy>)]
|
||||
// matching_strategy: MatchingStrategy,
|
||||
// }
|
||||
|
||||
// TODO improve the error messages
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FacetSearchQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub facet_query: Option<String>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub facet_name: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
pub limit: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
|
||||
pub page: Option<usize>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
|
||||
pub hits_per_page: Option<usize>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
||||
pub attributes_to_crop: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
||||
pub crop_length: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
|
||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
||||
pub show_matches_position: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||
pub sort: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||
pub facets: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
||||
pub highlight_pre_tag: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
|
||||
pub highlight_post_tag: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
|
||||
pub crop_marker: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
}
|
||||
|
||||
pub async fn search(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!("facet search called with params: {:?}", query);
|
||||
|
||||
let facet_query = query.facet_query.clone();
|
||||
let facet_name = query.facet_name.clone();
|
||||
let mut search_query = SearchQuery::from(query);
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut search_query, search_rules);
|
||||
}
|
||||
|
||||
// TODO log stuff
|
||||
// let mut aggregate = SearchAggregator::from_query(&query, &req);
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(&index, search_query, facet_query, facet_name)
|
||||
})
|
||||
.await?;
|
||||
|
||||
// TODO log stuff
|
||||
// if let Ok(ref search_result) = search_result {
|
||||
// aggregate.succeed(search_result);
|
||||
// }
|
||||
// TODO analytics
|
||||
// analytics.post_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
impl From<FacetSearchQuery> for SearchQuery {
|
||||
fn from(value: FacetSearchQuery) -> Self {
|
||||
SearchQuery {
|
||||
q: value.q,
|
||||
offset: value.offset,
|
||||
limit: value.limit,
|
||||
page: value.page,
|
||||
hits_per_page: value.hits_per_page,
|
||||
attributes_to_retrieve: value.attributes_to_retrieve,
|
||||
attributes_to_crop: value.attributes_to_crop,
|
||||
crop_length: value.crop_length,
|
||||
attributes_to_highlight: value.attributes_to_highlight,
|
||||
show_matches_position: value.show_matches_position,
|
||||
filter: value.filter,
|
||||
sort: value.sort,
|
||||
facets: value.facets,
|
||||
highlight_pre_tag: value.highlight_pre_tag,
|
||||
highlight_post_tag: value.highlight_post_tag,
|
||||
crop_marker: value.crop_marker,
|
||||
matching_strategy: value.matching_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub mod documents;
|
||||
pub mod facet_search;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
|
||||
@@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
|
||||
|
||||
pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<AuthController>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
|
||||
@@ -238,7 +238,7 @@ pub struct Stats {
|
||||
|
||||
async fn get_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -253,7 +253,7 @@ async fn get_stats(
|
||||
|
||||
pub fn create_all_stats(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Data<AuthController>,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
) -> Result<Stats, ResponseError> {
|
||||
let mut last_task: Option<OffsetDateTime> = None;
|
||||
@@ -318,9 +318,14 @@ struct KeysResponse {
|
||||
|
||||
pub async fn get_health(
|
||||
req: HttpRequest,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.health_seen(&req);
|
||||
|
||||
index_scheduler.health().unwrap();
|
||||
auth_controller.health().unwrap();
|
||||
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
|
||||
}
|
||||
|
||||
@@ -3,12 +3,15 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
use actix_http::header::q;
|
||||
use deserr::Deserr;
|
||||
use either::Either;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::{facet, FacetSearchResult, SearchForFacetValue};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
@@ -21,6 +24,7 @@ use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||
|
||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
|
||||
|
||||
@@ -170,7 +174,7 @@ impl SearchQueryWithIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum MatchingStrategy {
|
||||
/// Remove query words from last to first
|
||||
@@ -261,13 +265,11 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
fn prepare_search<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t RoTxn,
|
||||
query: &'t SearchQuery,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
let mut search = index.search(&rtxn);
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
@@ -320,6 +322,19 @@ pub fn perform_search(
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
|
||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query)?;
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@@ -473,6 +488,24 @@ pub fn perform_search(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn perform_facet_search(
|
||||
index: &Index,
|
||||
search_query: SearchQuery,
|
||||
mut facet_query: Option<String>,
|
||||
facet_name: String,
|
||||
) -> Result<Vec<FacetSearchResult>, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
|
||||
let mut facet_search = SearchForFacetValue::new(facet_name, search);
|
||||
if let Some(facet_query) = facet_query.take() {
|
||||
facet_search.query(facet_query);
|
||||
}
|
||||
|
||||
facet_search.execute().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
lazy_static::lazy_static! {
|
||||
static ref GEO_REGEX: Regex =
|
||||
|
||||
@@ -82,7 +82,7 @@ impl Server {
|
||||
> {
|
||||
actix_web::test::init_service(create_app(
|
||||
self.service.index_scheduler.clone().into(),
|
||||
self.service.auth.clone(),
|
||||
self.service.auth.clone().into(),
|
||||
self.service.options.clone(),
|
||||
analytics::MockAnalytics::new(&self.service.options),
|
||||
true,
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::common::encoder::Encoder;
|
||||
|
||||
pub struct Service {
|
||||
pub index_scheduler: Arc<IndexScheduler>,
|
||||
pub auth: AuthController,
|
||||
pub auth: Arc<AuthController>,
|
||||
pub options: Opt,
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
@@ -107,7 +107,7 @@ impl Service {
|
||||
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
|
||||
let app = test::init_service(create_app(
|
||||
self.index_scheduler.clone().into(),
|
||||
self.auth.clone(),
|
||||
self.auth.clone().into(),
|
||||
self.options.clone(),
|
||||
analytics::MockAnalytics::new(&self.options),
|
||||
true,
|
||||
|
||||
@@ -279,6 +279,81 @@ async fn add_csv_document() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_types() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("pets");
|
||||
|
||||
let document = "#id:number,name:string,race:string,age:number,cute:boolean
|
||||
0,jean,bernese mountain,2.5,true
|
||||
1,,,,
|
||||
2,lilou,pug,-2,false";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "pets",
|
||||
"status": "enqueued",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "pets",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 3,
|
||||
"indexedDocuments": 3
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"#id": 0,
|
||||
"name": "jean",
|
||||
"race": "bernese mountain",
|
||||
"age": 2.5,
|
||||
"cute": true
|
||||
},
|
||||
{
|
||||
"#id": 1,
|
||||
"name": null,
|
||||
"race": null,
|
||||
"age": null,
|
||||
"cute": null
|
||||
},
|
||||
{
|
||||
"#id": 2,
|
||||
"name": "lilou",
|
||||
"race": "pug",
|
||||
"age": -2,
|
||||
"cute": false
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 3
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_custom_delimiter() {
|
||||
let server = Server::new().await;
|
||||
@@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_types_error() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("pets");
|
||||
|
||||
let document = "#id:number,a:boolean,b:number
|
||||
0,doggo,1";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.",
|
||||
"code": "malformed_payload",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#malformed_payload"
|
||||
}
|
||||
"###);
|
||||
|
||||
let document = "#id:number,a:boolean,b:number
|
||||
0,true,doggo";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.",
|
||||
"code": "malformed_payload",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#malformed_payload"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
/// any other content-type is must be refused
|
||||
#[actix_rt::test]
|
||||
async fn error_add_documents_test_bad_content_types() {
|
||||
|
||||
@@ -1121,6 +1121,12 @@ async fn import_dump_v5() {
|
||||
assert_eq!(indexes["results"][1]["uid"], json!("test2"));
|
||||
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
|
||||
|
||||
// before doing anything we're going to wait until all the tasks in the dump have finished processing
|
||||
let result = server.tasks_filter("statuses=enqueued,processing").await.0;
|
||||
for task in result["results"].as_array().unwrap() {
|
||||
server.wait_task(task["uid"].as_u64().unwrap()).await;
|
||||
}
|
||||
|
||||
let expected_stats = json!({
|
||||
"numberOfDocuments": 10,
|
||||
"isIndexing": false,
|
||||
|
||||
@@ -672,7 +672,7 @@ async fn filter_reserved_geo_attribute_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass",
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -697,7 +697,7 @@ async fn filter_reserved_geo_attribute_string() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass",
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -722,7 +722,7 @@ async fn filter_reserved_attribute_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass",
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -747,7 +747,7 @@ async fn filter_reserved_attribute_string() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass",
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -760,6 +760,56 @@ async fn filter_reserved_attribute_string() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_array() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": ["_geoPoint = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_string() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": "_geoPoint = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn sort_geo_reserved_attribute() {
|
||||
let server = Server::new().await;
|
||||
|
||||
@@ -81,6 +81,8 @@ impl FromStr for Member {
|
||||
if is_reserved_keyword(text)
|
||||
|| text.starts_with("_geoRadius(")
|
||||
|| text.starts_with("_geoBoundingBox(")
|
||||
|| text.starts_with("_geo(")
|
||||
|| text.starts_with("_geoDistance(")
|
||||
{
|
||||
return Err(AscDescError::ReservedKeyword { name: text.to_string() })?;
|
||||
}
|
||||
@@ -265,6 +267,13 @@ mod tests {
|
||||
("_geoPoint(0, -180.000001):desc", GeoError(BadGeoError::Lng(-180.000001))),
|
||||
("_geoPoint(159.256, 130):asc", GeoError(BadGeoError::Lat(159.256))),
|
||||
("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))),
|
||||
("_geo(12, -2021):asc", ReservedKeyword { name: S("_geo(12, -2021)") }),
|
||||
("_geo(12, -2021):desc", ReservedKeyword { name: S("_geo(12, -2021)") }),
|
||||
("_geoDistance(12, -2021):asc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }),
|
||||
(
|
||||
"_geoDistance(12, -2021):desc",
|
||||
ReservedKeyword { name: S("_geoDistance(12, -2021)") },
|
||||
),
|
||||
];
|
||||
|
||||
for (req, expected_error) in invalid_req {
|
||||
|
||||
@@ -114,14 +114,15 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
self.value_buffer.clear();
|
||||
|
||||
let value = &record[*i];
|
||||
let trimmed_value = value.trim();
|
||||
match type_ {
|
||||
AllowedType::Number => {
|
||||
if value.trim().is_empty() {
|
||||
if trimmed_value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
} else if let Ok(integer) = value.trim().parse::<i64>() {
|
||||
} else if let Ok(integer) = trimmed_value.parse::<i64>() {
|
||||
to_writer(&mut self.value_buffer, &integer)?;
|
||||
} else {
|
||||
match value.trim().parse::<f64>() {
|
||||
match trimmed_value.parse::<f64>() {
|
||||
Ok(float) => {
|
||||
to_writer(&mut self.value_buffer, &float)?;
|
||||
}
|
||||
@@ -135,6 +136,24 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
}
|
||||
}
|
||||
}
|
||||
AllowedType::Boolean => {
|
||||
if trimmed_value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
} else {
|
||||
match trimmed_value.parse::<bool>() {
|
||||
Ok(bool) => {
|
||||
to_writer(&mut self.value_buffer, &bool)?;
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(Error::ParseBool {
|
||||
error,
|
||||
line,
|
||||
value: value.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AllowedType::String => {
|
||||
if value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
@@ -173,6 +192,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
#[derive(Debug)]
|
||||
enum AllowedType {
|
||||
String,
|
||||
Boolean,
|
||||
Number,
|
||||
}
|
||||
|
||||
@@ -181,6 +201,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
|
||||
match header.rsplit_once(':') {
|
||||
Some((field_name, field_type)) => match field_type {
|
||||
"string" => (field_name, AllowedType::String),
|
||||
"boolean" => (field_name, AllowedType::Boolean),
|
||||
"number" => (field_name, AllowedType::Number),
|
||||
// if the pattern isn't reconized, we keep the whole field.
|
||||
_otherwise => (header, AllowedType::String),
|
||||
|
||||
@@ -3,7 +3,7 @@ mod enriched;
|
||||
mod reader;
|
||||
mod serde_impl;
|
||||
|
||||
use std::fmt::{self, Debug};
|
||||
use std::fmt::Debug;
|
||||
use std::io;
|
||||
use std::str::Utf8Error;
|
||||
|
||||
@@ -87,71 +87,30 @@ impl DocumentsBatchIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("Error parsing number {value:?} at line {line}: {error}")]
|
||||
ParseFloat { error: std::num::ParseFloatError, line: usize, value: String },
|
||||
#[error("Error parsing boolean {value:?} at line {line}: {error}")]
|
||||
ParseBool { error: std::str::ParseBoolError, line: usize, value: String },
|
||||
#[error("Invalid document addition format, missing the documents batch index.")]
|
||||
InvalidDocumentFormat,
|
||||
#[error("Invalid enriched data.")]
|
||||
InvalidEnrichedData,
|
||||
InvalidUtf8(Utf8Error),
|
||||
Csv(csv::Error),
|
||||
Json(serde_json::Error),
|
||||
#[error(transparent)]
|
||||
InvalidUtf8(#[from] Utf8Error),
|
||||
#[error(transparent)]
|
||||
Csv(#[from] csv::Error),
|
||||
#[error(transparent)]
|
||||
Json(#[from] serde_json::Error),
|
||||
#[error(transparent)]
|
||||
Serialize(serde_json::Error),
|
||||
Grenad(grenad::Error),
|
||||
Io(io::Error),
|
||||
#[error(transparent)]
|
||||
Grenad(#[from] grenad::Error),
|
||||
#[error(transparent)]
|
||||
Io(#[from] io::Error),
|
||||
}
|
||||
|
||||
impl From<csv::Error> for Error {
|
||||
fn from(e: csv::Error) -> Self {
|
||||
Self::Csv(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(other: io::Error) -> Self {
|
||||
Self::Io(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(other: serde_json::Error) -> Self {
|
||||
Self::Json(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<grenad::Error> for Error {
|
||||
fn from(other: grenad::Error) -> Self {
|
||||
Self::Grenad(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Utf8Error> for Error {
|
||||
fn from(other: Utf8Error) -> Self {
|
||||
Self::InvalidUtf8(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Error::ParseFloat { error, line, value } => {
|
||||
write!(f, "Error parsing number {:?} at line {}: {}", value, line, error)
|
||||
}
|
||||
Error::InvalidDocumentFormat => {
|
||||
f.write_str("Invalid document addition format, missing the documents batch index.")
|
||||
}
|
||||
Error::InvalidEnrichedData => f.write_str("Invalid enriched data."),
|
||||
Error::InvalidUtf8(e) => write!(f, "{}", e),
|
||||
Error::Io(e) => write!(f, "{}", e),
|
||||
Error::Serialize(e) => write!(f, "{}", e),
|
||||
Error::Grenad(e) => write!(f, "{}", e),
|
||||
Error::Csv(e) => write!(f, "{}", e),
|
||||
Error::Json(e) => write!(f, "{}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
|
||||
let documents = match json {
|
||||
@@ -274,6 +233,19 @@ mod test {
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn csv_types_dont_panic() {
|
||||
let csv1_content =
|
||||
"id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5";
|
||||
let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
|
||||
|
||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
builder.append_csv(csv1).unwrap();
|
||||
let vector = builder.into_inner().unwrap();
|
||||
|
||||
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn out_of_order_csv_fields() {
|
||||
let csv1_content = "id:number,b\n1,0";
|
||||
|
||||
23
milli/src/heed_codec/fst_set_codec.rs
Normal file
23
milli/src/heed_codec/fst_set_codec.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
use fst::Set;
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `Set<&[u8]>`.
|
||||
pub struct FstSetCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for FstSetCodec {
|
||||
type EItem = Set<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item.as_fst().as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FstSetCodec {
|
||||
type DItem = Set<&'a [u8]>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(Set::new(bytes).ok()?)
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ mod beu32_str_codec;
|
||||
mod byte_slice_ref;
|
||||
pub mod facet;
|
||||
mod field_id_word_count_codec;
|
||||
mod fst_set_codec;
|
||||
mod obkv_codec;
|
||||
mod roaring_bitmap;
|
||||
mod roaring_bitmap_length;
|
||||
@@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;
|
||||
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap_length::{
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ScriptLanguageCodec, StrRefCodec};
|
||||
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrRefCodec};
|
||||
use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
@@ -81,6 +81,7 @@ pub mod db_name {
|
||||
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
|
||||
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
|
||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
pub const DOCUMENTS: &str = "documents";
|
||||
@@ -134,6 +135,8 @@ pub struct Index {
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id of the string facets with an FST containing all the facets values.
|
||||
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
|
||||
|
||||
/// Maps the document id, the facet field id and the numbers.
|
||||
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
||||
@@ -153,7 +156,7 @@ impl Index {
|
||||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(19);
|
||||
options.max_dbs(20);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
@@ -174,6 +177,7 @@ impl Index {
|
||||
let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
|
||||
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
|
||||
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
|
||||
let facet_id_string_fst = env.create_database(Some(FACET_ID_STRING_FST))?;
|
||||
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||
|
||||
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||
@@ -200,6 +204,7 @@ impl Index {
|
||||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
|
||||
@@ -43,9 +43,9 @@ pub use self::heed_codec::{
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
CriterionImplementationStrategy, FacetDistribution, Filter, FormatOptions, MatchBounds,
|
||||
MatcherBuilder, MatchingWord, MatchingWords, Search, SearchResult, TermsMatchingStrategy,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
CriterionImplementationStrategy, FacetDistribution, FacetSearchResult, Filter, FormatOptions,
|
||||
MatchBounds, MatcherBuilder, MatchingWord, MatchingWords, Search, SearchForFacetValue,
|
||||
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
|
||||
@@ -54,8 +54,6 @@ impl Display for BadGeoError {
|
||||
enum FilterError<'a> {
|
||||
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
|
||||
ParseGeoError(BadGeoError),
|
||||
ReservedGeo(&'a str),
|
||||
Reserved(&'a str),
|
||||
TooDeep,
|
||||
}
|
||||
impl<'a> std::error::Error for FilterError<'a> {}
|
||||
@@ -96,12 +94,6 @@ impl<'a> Display for FilterError<'a> {
|
||||
"Too many filter conditions, can't process more than {} filters.",
|
||||
MAX_FILTER_DEPTH
|
||||
),
|
||||
Self::ReservedGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword),
|
||||
Self::Reserved(keyword) => write!(
|
||||
f,
|
||||
"`{}` is a reserved keyword and thus can't be used as a filter expression.",
|
||||
keyword
|
||||
),
|
||||
Self::ParseGeoError(error) => write!(f, "{}", error),
|
||||
}
|
||||
}
|
||||
@@ -332,23 +324,10 @@ impl<'a> Filter<'a> {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
} else {
|
||||
match fid.value() {
|
||||
attribute @ "_geo" => {
|
||||
Err(fid.as_external_error(FilterError::ReservedGeo(attribute)))?
|
||||
}
|
||||
attribute if attribute.starts_with("_geoPoint(") => {
|
||||
Err(fid.as_external_error(FilterError::ReservedGeo("_geoPoint")))?
|
||||
}
|
||||
attribute @ "_geoDistance" => {
|
||||
Err(fid.as_external_error(FilterError::Reserved(attribute)))?
|
||||
}
|
||||
attribute => {
|
||||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute,
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
}))?
|
||||
}
|
||||
}
|
||||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: fid.value(),
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
}))?
|
||||
}
|
||||
}
|
||||
FilterCondition::Or(subfilters) => {
|
||||
|
||||
@@ -22,15 +22,20 @@ pub use self::matches::{
|
||||
};
|
||||
use self::query_tree::QueryTreeBuilder;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::index::db_name::EXACT_WORD_DOCIDS;
|
||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::{AscDesc, Criterion, DocumentId, Index, Member, Result};
|
||||
use crate::{fields_ids_map, AscDesc, Criterion, DocumentId, Index, Member, Result, BEU16};
|
||||
|
||||
// Building these factories is not free.
|
||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
|
||||
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
|
||||
|
||||
/// The maximum number of facets returned by the facet search route.
|
||||
const MAX_NUMBER_OF_FACETS: usize = 1000;
|
||||
|
||||
mod criteria;
|
||||
mod distinct;
|
||||
pub mod facet;
|
||||
@@ -446,6 +451,114 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SearchForFacetValue<'a> {
|
||||
query: Option<String>,
|
||||
facet: String,
|
||||
search_query: Search<'a>,
|
||||
}
|
||||
|
||||
impl<'a> SearchForFacetValue<'a> {
|
||||
pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValue<'a> {
|
||||
SearchForFacetValue { query: None, facet, search_query }
|
||||
}
|
||||
|
||||
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
|
||||
self.query = Some(query.into());
|
||||
self
|
||||
}
|
||||
|
||||
pub fn execute(&self) -> Result<Vec<FacetSearchResult>> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
// TODO create a new type of error
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: self.facet.clone(),
|
||||
valid_fields: filterable_fields.into_iter().collect(),
|
||||
})?;
|
||||
}
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let (field_id, fst) = match fields_ids_map.id(&self.facet) {
|
||||
Some(fid) => {
|
||||
match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
|
||||
Some(fst) => (fid, fst),
|
||||
None => todo!("return an error, is the user trying to search in numbers?"),
|
||||
}
|
||||
}
|
||||
None => todo!("return an internal error bug"),
|
||||
};
|
||||
|
||||
let search_candidates = self.search_query.execute()?.candidates;
|
||||
|
||||
match self.query.as_ref() {
|
||||
Some(query) => {
|
||||
let is_prefix = true;
|
||||
let starts = StartsWith(Str::new(get_first(query)));
|
||||
let first = Intersection(build_dfa(query, 1, is_prefix), Complement(&starts));
|
||||
let second_dfa = build_dfa(query, 2, is_prefix);
|
||||
let second = Intersection(&second_dfa, &starts);
|
||||
let automaton = Union(first, &second);
|
||||
|
||||
let mut stream = fst.search(automaton).into_stream();
|
||||
let mut result = vec![];
|
||||
let mut length = 0;
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => todo!("return an internal error"),
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
result.push(FacetSearchResult { value: value.to_string(), count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
None => {
|
||||
let mut stream = fst.stream();
|
||||
let mut result = vec![];
|
||||
let mut length = 0;
|
||||
while let Some(facet_value) = stream.next() {
|
||||
let value = std::str::from_utf8(facet_value)?;
|
||||
let key = FacetGroupKey { field_id, level: 0, left_bound: value };
|
||||
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
|
||||
Some(FacetGroupValue { bitmap, .. }) => bitmap,
|
||||
None => todo!("return an internal error"),
|
||||
};
|
||||
let count = search_candidates.intersection_len(&docids);
|
||||
if count != 0 {
|
||||
result.push(FacetSearchResult { value: value.to_string(), count });
|
||||
length += 1;
|
||||
}
|
||||
if length >= MAX_NUMBER_OF_FACETS {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize)]
|
||||
pub struct FacetSearchResult {
|
||||
/// The original facet value
|
||||
pub value: String,
|
||||
/// The number of documents associated to this facet
|
||||
pub count: u64,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
@@ -33,6 +33,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
script_language_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_string_fst: _,
|
||||
facet_id_exists_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
|
||||
@@ -241,6 +241,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
word_prefix_position_docids,
|
||||
facet_id_f64_docids: _,
|
||||
facet_id_string_docids: _,
|
||||
facet_id_string_fst: _,
|
||||
field_id_docid_facet_f64s: _,
|
||||
field_id_docid_facet_strings: _,
|
||||
script_language_docids,
|
||||
|
||||
@@ -78,15 +78,16 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
|
||||
use std::fs::File;
|
||||
|
||||
use heed::types::DecodeIgnore;
|
||||
use log::debug;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::FacetsUpdateBulk;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::{Index, Result};
|
||||
use crate::{Index, Result, BEU16};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod delete;
|
||||
@@ -157,6 +158,43 @@ impl<'i> FacetsUpdate<'i> {
|
||||
);
|
||||
incremental_update.execute(wtxn)?;
|
||||
}
|
||||
|
||||
// We compute one FST by string facet
|
||||
let mut text_fsts = vec![];
|
||||
let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
|
||||
let database = self.index.facet_id_string_docids.remap_data_type::<DecodeIgnore>();
|
||||
for result in database.iter(&wtxn)? {
|
||||
let (facet_group_key, _) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
current_fst = match current_fst.take() {
|
||||
Some((fid, fst_builder)) if fid != field_id => {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((field_id, fst));
|
||||
Some((field_id, fst::SetBuilder::memory()))
|
||||
}
|
||||
Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
|
||||
None => Some((field_id, fst::SetBuilder::memory())),
|
||||
};
|
||||
|
||||
if let Some((_, fst_builder)) = current_fst.as_mut() {
|
||||
fst_builder.insert(left_bound)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((field_id, fst_builder)) = current_fst {
|
||||
let fst = fst_builder.into_set();
|
||||
text_fsts.push((field_id, fst));
|
||||
}
|
||||
|
||||
// We remove all of the previous FSTs that were in this database
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
|
||||
// We write those FSTs in LMDB now
|
||||
for (field_id, fst) in text_fsts {
|
||||
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
@@ -565,8 +565,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.index.put_primary_key(self.wtxn, primary_key)?;
|
||||
Ok(())
|
||||
} else {
|
||||
let primary_key = self.index.primary_key(self.wtxn)?.unwrap();
|
||||
Err(UserError::PrimaryKeyCannotBeChanged(primary_key.to_string()).into())
|
||||
let curr_primary_key = self.index.primary_key(self.wtxn)?.unwrap().to_string();
|
||||
if primary_key == &curr_primary_key {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(UserError::PrimaryKeyCannotBeChanged(curr_primary_key).into())
|
||||
}
|
||||
}
|
||||
}
|
||||
Setting::Reset => {
|
||||
@@ -1332,6 +1336,17 @@ mod tests {
|
||||
.unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Updating settings with the same primary key should do nothing
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_primary_key(S("mykey"));
|
||||
})
|
||||
.unwrap();
|
||||
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey"));
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// Updating the settings with a different (or no) primary key causes an error
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let error = index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
|
||||
Reference in New Issue
Block a user