Compare commits

..

22 Commits

Author SHA1 Message Date
Clément Renault
f7e0a3edaf Use the minWordSizeForTypos index settings 2023-05-04 15:09:17 +02:00
Clément Renault
a669368f2d Format the code 2023-05-04 14:02:22 +02:00
Clément Renault
2e7ba2b092 Increase the number of dbs 2023-05-04 13:53:08 +02:00
Clément Renault
f983bbe532 Fix compilation issues 2023-05-04 12:27:19 +02:00
Clément Renault
3cbcbad71c Fix a snap test 2023-05-04 12:22:09 +02:00
Clément Renault
e83d9680ba Simplify the placeholder search of the facet-search route 2023-05-04 12:22:09 +02:00
Clément Renault
29153417c4 Use the disableOnAttributes parameter on the facet-search route 2023-05-04 12:22:09 +02:00
Clément Renault
57892e3d76 Use the disableOnWords parameter on the facet-search route 2023-05-04 12:22:09 +02:00
Clément Renault
13d4aee912 Support the typoTolerant.enabled parameter 2023-05-04 12:22:09 +02:00
Clément Renault
ba5db80afa Log an error when a facet value is missing from the database 2023-05-04 12:22:09 +02:00
Clément Renault
f8432dff5d Rename the SearchForFacetValues struct 2023-05-04 12:21:55 +02:00
Clément Renault
ae3dad5b63 Return an internal error when a field id is missing 2023-05-04 12:19:25 +02:00
Clément Renault
938ab16799 Make clippy happy 2023-05-04 12:19:25 +02:00
Clément Renault
8252f3331f Improve the returned errors from the facet-search route 2023-05-04 12:19:25 +02:00
Clément Renault
5b745cc8a2 Fix the max number of facets to be returned to 100 2023-05-04 12:19:25 +02:00
Clément Renault
19c3ef64bd Return the correct response JSON object from the facet-search route 2023-05-04 12:19:24 +02:00
Clément Renault
3b346aac42 Send analytics about the facet-search route 2023-05-04 12:18:10 +02:00
Clément Renault
72a2469178 Make the search for facet work 2023-05-04 12:18:09 +02:00
Kerollmops
d7d085ef1e Introduce the facet search route 2023-05-04 12:18:09 +02:00
Kerollmops
1e61870c2b Restrict the number of facet search results to 1000 2023-05-04 12:18:09 +02:00
Kerollmops
4d2860a692 Introduce the SearchForFacetValue struct 2023-05-04 12:17:52 +02:00
Clément Renault
51ca77726d Store the facet string values in multiple FSTs 2023-05-04 11:40:41 +02:00
196 changed files with 2913 additions and 14608 deletions

View File

@@ -2,3 +2,4 @@ target
Dockerfile
.dockerignore
.gitignore
**/.git

View File

@@ -35,7 +35,7 @@ jobs:
- name: Build deb package
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
- name: Upload debian pkg to release
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/debian/meilisearch.deb

View File

@@ -54,7 +54,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/meilisearch
@@ -87,7 +87,7 @@ jobs:
# No need to upload binaries for dry run (cron)
- name: Upload binaries to release
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/release/${{ matrix.artifact_name }}
@@ -121,7 +121,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch
@@ -183,7 +183,7 @@ jobs:
- name: Upload the binary to release
# No need to upload binaries for dry run (cron)
if: github.event_name == 'release'
uses: svenstaro/upload-release-action@2.6.1
uses: svenstaro/upload-release-action@2.5.0
with:
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
file: target/${{ matrix.target }}/release/meilisearch

View File

@@ -58,9 +58,13 @@ jobs:
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
with:
platforms: linux/amd64,linux/arm64
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
platforms: linux/amd64,linux/arm64
- name: Login to Docker Hub
uses: docker/login-action@v2
@@ -88,10 +92,13 @@ jobs:
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
builder: ${{ steps.buildx.outputs.name }}
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
cache-from: type=gha
cache-to: type=gha,mode=max
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team

View File

@@ -3,11 +3,6 @@ name: SDKs tests
on:
workflow_dispatch:
inputs:
docker_image:
description: 'The Meilisearch Docker image used'
required: false
default: nightly
schedule:
- cron: "0 6 * * MON" # Every Monday at 6:00AM
@@ -16,28 +11,13 @@ env:
MEILI_NO_ANALYTICS: 'true'
jobs:
define-docker-image:
runs-on: ubuntu-latest
outputs:
docker-image: ${{ steps.define-image.outputs.docker-image }}
steps:
- uses: actions/checkout@v3
- name: Define the Docker image we need to use
id: define-image
run: |
event=${{ github.event.action }}
echo "docker-image=nightly" >> $GITHUB_OUTPUT
if [[ $event == 'workflow_dispatch' ]]; then
echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
fi
meilisearch-js-tests:
needs: define-docker-image
name: JS SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -67,12 +47,11 @@ jobs:
run: yarn test:env:browser
instant-meilisearch-tests:
needs: define-docker-image
name: instant-meilisearch tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -94,12 +73,11 @@ jobs:
run: yarn build
meilisearch-php-tests:
needs: define-docker-image
name: PHP SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -125,12 +103,11 @@ jobs:
composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle
meilisearch-python-tests:
needs: define-docker-image
name: Python SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -150,12 +127,11 @@ jobs:
run: pipenv run pytest
meilisearch-go-tests:
needs: define-docker-image
name: Go SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -163,7 +139,7 @@ jobs:
- '7700:7700'
steps:
- name: Set up Go
uses: actions/setup-go@v4
uses: actions/setup-go@v3
with:
go-version: stable
- uses: actions/checkout@v3
@@ -180,12 +156,11 @@ jobs:
run: go test -v ./...
meilisearch-ruby-tests:
needs: define-docker-image
name: Ruby SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@@ -205,12 +180,11 @@ jobs:
run: bundle exec rspec
meilisearch-rust-tests:
needs: define-docker-image
name: Rust SDK tests
runs-on: ubuntu-latest
services:
meilisearch:
image: getmeili/meilisearch:${{ github.event.inputs.docker_image }}
image: getmeili/meilisearch:nightly
env:
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}

View File

@@ -43,7 +43,7 @@ jobs:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -65,7 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -105,29 +105,6 @@ jobs:
command: test
args: --workspace --locked --release --all-features
test-disabled-tokenization:
name: Test disabled tokenization
runs-on: ubuntu-latest
container:
image: ubuntu:18.04
if: github.event_name == 'schedule'
steps:
- uses: actions/checkout@v3
- name: Install needed dependencies
run: |
apt-get update
apt-get install --assume-yes build-essential curl
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run cargo tree without default features and check lindera is not present
run: |
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
- name: Run cargo tree with default features and check lindera is pressent
run: |
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
# We run tests in debug also, to make sure that the debug_assertions are hit
test-debug:
name: Run tests in debug
@@ -146,7 +123,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@@ -165,7 +142,7 @@ jobs:
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@@ -184,7 +161,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.4.0
uses: Swatinem/rust-cache@v2.2.1
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

View File

@@ -19,8 +19,8 @@ If Meilisearch does not offer optimized support for your language, please consid
## Assumptions
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests (PR)](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) workflow.**
2. **You've read the Meilisearch [documentation](https://www.meilisearch.com/docs).**
3. **You know about the [Meilisearch community on Discord](https://discord.meilisearch.com).
2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
Please use this for help.**
## How to Contribute

32
Cargo.lock generated
View File

@@ -463,7 +463,7 @@ checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf"
[[package]]
name = "benchmarks"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"anyhow",
"bytes",
@@ -1209,7 +1209,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"anyhow",
"big_s",
@@ -1428,7 +1428,7 @@ dependencies = [
[[package]]
name = "file-store"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"faux",
"tempfile",
@@ -1450,7 +1450,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"insta",
"nom",
@@ -1476,7 +1476,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"criterion",
"serde_json",
@@ -1794,7 +1794,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "heed"
version = "0.12.5"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8"
dependencies = [
"byteorder",
"heed-traits",
@@ -1811,12 +1811,12 @@ dependencies = [
[[package]]
name = "heed-traits"
version = "0.7.0"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8"
[[package]]
name = "heed-types"
version = "0.7.2"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"
source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8"
dependencies = [
"bincode",
"heed-traits",
@@ -1959,7 +1959,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"anyhow",
"big_s",
@@ -2113,7 +2113,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"criterion",
"serde_json",
@@ -2539,7 +2539,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"insta",
"md5",
@@ -2548,7 +2548,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"actix-cors",
"actix-http",
@@ -2636,7 +2636,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"base64 0.21.0",
"enum-iterator",
@@ -2655,7 +2655,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"actix-web",
"anyhow",
@@ -2709,7 +2709,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"big_s",
"bimap",
@@ -3064,7 +3064,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
[[package]]
name = "permissive-json-pointer"
version = "1.2.0"
version = "1.1.1"
dependencies = [
"big_s",
"serde_json",

View File

@@ -17,7 +17,7 @@ members = [
]
[workspace.package]
version = "1.2.0"
version = "1.1.1"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"

View File

@@ -1,3 +1,4 @@
# syntax=docker/dockerfile:1.4
# Compile
FROM rust:alpine3.16 AS compiler
@@ -11,7 +12,7 @@ ARG GIT_TAG
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
COPY --link . .
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
@@ -30,7 +31,7 @@ RUN apk update --quiet \
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
COPY --from=compiler --link /meilisearch/target/release/meilisearch /bin/meilisearch
# To stay compatible with the older version of the container (pre v0.27.0) we're
# going to symlink the meilisearch binary in the path to `/meilisearch`
RUN ln -s /bin/meilisearch /meilisearch

View File

@@ -7,8 +7,8 @@
<a href="https://www.meilisearch.com">Website</a> |
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
<a href="https://blog.meilisearch.com">Blog</a> |
<a href="https://www.meilisearch.com/docs">Documentation</a> |
<a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
<a href="https://meilisearch.com/docs">Documentation</a> |
<a href="https://meilisearch.com/docs/faq">FAQ</a> |
<a href="https://discord.meilisearch.com">Discord</a>
</h4>
@@ -36,27 +36,27 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
## ✨ Features
- **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
- **[Typo tolerance](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
- **[Extensive language support](https://meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **[RESTful API](https://meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **Easy to install, deploy, and maintain**
## 📖 Documentation
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).
You can consult Meilisearch's documentation at [https://meilisearch.com/docs](https://meilisearch.com/docs/).
## 🚀 Getting started
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://meilisearch.com/docs/learn/getting_started/quick_start) guide.
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
You may also want to check out [Meilisearch 101](https://meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
## ☁️ Meilisearch cloud
@@ -66,7 +66,7 @@ Let us manage your infrastructure so you can focus on integrating a great search
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).
Take a look at the complete [Meilisearch integration list](https://meilisearch.com/docs/learn/what_is_meilisearch/sdks).
[![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
@@ -74,17 +74,17 @@ Take a look at the complete [Meilisearch integration list](https://www.meilisear
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/advanced/filtering), [sorting](https://www.meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://meilisearch.com/docs/learn/advanced/filtering), [sorting](https://meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://meilisearch.com/docs/learn/security/tenant_tokens).
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://meilisearch.com/docs/learn/core_concepts/indexes).
## 📊 Telemetry
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
## 📫 Get in touch!

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +0,0 @@
global:
scrape_interval: 15s # By default, scrape targets every 15 seconds.
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'meilisearch'
# Override the global default and scrape targets from this job every 5 seconds.
scrape_interval: 5s
static_configs:
- targets: ['localhost:7700']

View File

@@ -13,7 +13,7 @@ license.workspace = true
[dependencies]
anyhow = "1.0.70"
csv = "1.2.1"
milli = { path = "../milli" }
milli = { path = "../milli", default-features = false }
mimalloc = { version = "0.1.36", default-features = false }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
@@ -31,7 +31,7 @@ flate2 = "1.0.25"
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/all-tokenizations"]
default = ["milli/default"]
[[bench]]
name = "search_songs"

View File

@@ -119,9 +119,9 @@ _[Download the `smol-wiki` dataset](https://milli-benchmarks.fra1.digitaloceansp
### Movies
`movies` is a really small dataset we uses as our example in the [getting started](https://www.meilisearch.com/docs/learn/getting_started/quick_start)
`movies` is a really small dataset we uses as our example in the [getting started](https://docs.meilisearch.com/learn/getting_started/)
_[Download the `movies` dataset](https://www.meilisearch.com/movies.json)._
_[Download the `movies` dataset](https://docs.meilisearch.com/movies.json)._
### All Countries

View File

@@ -1,131 +1,130 @@
# This file shows the default configuration of Meilisearch.
# All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables
# All variables are defined here: https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
# Designates the location where database files will be created and retrieved.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path
db_path = "./data.ms"
# Designates the location where database files will be created and retrieved.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#database-path
# Configures the instance's environment. Value must be either `production` or `development`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#environment
env = "development"
# Configures the instance's environment. Value must be either `production` or `development`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#environment
# The address on which the HTTP server will listen.
http_addr = "localhost:7700"
# The address on which the HTTP server will listen.
# Sets the instance's master key, automatically protecting all routes except GET /health.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key
# master_key = "YOUR_MASTER_KEY_VALUE"
# Sets the instance's master key, automatically protecting all routes except GET /health.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#master-key
# no_analytics = true
# Deactivates Meilisearch's built-in telemetry when provided.
# Meilisearch automatically collects data from all instances that do not opt out using this flag.
# All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics
# no_analytics = true
# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-analytics
# Sets the maximum size of accepted payloads.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size
http_payload_size_limit = "100 MB"
# Sets the maximum size of accepted payloads.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#payload-limit-size
log_level = "INFO"
# Defines how much detail should be present in Meilisearch's logs.
# Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
# https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level
log_level = "INFO"
# https://docs.meilisearch.com/learn/configuration/instance_options.html#log-level
# Sets the maximum amount of RAM Meilisearch can use when indexing.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory
# max_indexing_memory = "2 GiB"
# Sets the maximum amount of RAM Meilisearch can use when indexing.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-memory
# Sets the maximum number of threads Meilisearch can use during indexing.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads
# max_indexing_threads = 4
# Sets the maximum number of threads Meilisearch can use during indexing.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-threads
#############
### DUMPS ###
#############
# Sets the directory where Meilisearch will create dump files.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory
dump_dir = "dumps/"
# Sets the directory where Meilisearch will create dump files.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#dump-directory
# Imports the dump file located at the specified path. Path must point to a .dump file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump
# import_dump = "./path/to/my/file.dump"
# Imports the dump file located at the specified path. Path must point to a .dump file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-dump
# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump
ignore_missing_dump = false
# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-dump
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists
ignore_dump_if_db_exists = false
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-dump-if-db-exists
#################
### SNAPSHOTS ###
#################
schedule_snapshot = false
# Enables scheduled snapshots when true, disable when false (the default).
# If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval
# between each snapshot, in seconds.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation
schedule_snapshot = false
# https://docs.meilisearch.com/learn/configuration/instance_options.html#schedule-snapshot-creation
# Sets the directory where Meilisearch will store snapshots.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination
snapshot_dir = "snapshots/"
# Sets the directory where Meilisearch will store snapshots.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-destination
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot
# import_snapshot = "./path/to/my/snapshot"
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-snapshot
# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot
ignore_missing_snapshot = false
# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-snapshot
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists
ignore_snapshot_if_db_exists = false
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-snapshot-if-db-exists
###########
### SSL ###
###########
# Enables client authentication in the specified path.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path
# ssl_auth_path = "./path/to/root"
# Enables client authentication in the specified path.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-authentication-path
# Sets the server's SSL certificates.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path
# ssl_cert_path = "./path/to/certfile"
# Sets the server's SSL certificates.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-certificates-path
# Sets the server's SSL key files.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path
# ssl_key_path = "./path/to/private-key"
# Sets the server's SSL key files.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-key-path
# Sets the server's OCSP file.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path
# ssl_ocsp_path = "./path/to/ocsp-file"
# Sets the server's OCSP file.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-ocsp-path
# Makes SSL authentication mandatory.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth
ssl_require_auth = false
# Makes SSL authentication mandatory.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-require-auth
# Activates SSL session resumption.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption
ssl_resumption = false
# Activates SSL session resumption.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-resumption
# Activates SSL tickets.
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets
ssl_tickets = false
# Activates SSL tickets.
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
#############################
### Experimental features ###
#############################
experimental_enable_metrics = false
# Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
# Enables the Prometheus metrics on the `GET /metrics` endpoint.
experimental_enable_metrics = false
# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
experimental_reduce_indexing_memory_usage = false

View File

@@ -101,9 +101,6 @@ pub enum KindDump {
documents_ids: Vec<String>,
},
DocumentClear,
DocumentDeletionByFilter {
filter: serde_json::Value,
},
Settings {
settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
is_deletion: bool,
@@ -169,9 +166,6 @@ impl From<KindWithContent> for KindDump {
KindWithContent::DocumentDeletion { documents_ids, .. } => {
KindDump::DocumentDeletion { documents_ids }
}
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
KindDump::DocumentDeletionByFilter { filter: filter_expr }
}
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
KindWithContent::SettingsUpdate {
new_settings,

File diff suppressed because it is too large Load Diff

View File

@@ -25,7 +25,6 @@ enum AutobatchKind {
primary_key: Option<String>,
},
DocumentDeletion,
DocumentDeletionByFilter,
DocumentClear,
Settings {
allow_index_creation: bool,
@@ -65,9 +64,6 @@ impl From<KindWithContent> for AutobatchKind {
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
KindWithContent::DocumentDeletionByFilter { .. } => {
AutobatchKind::DocumentDeletionByFilter
}
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
AutobatchKind::Settings {
allow_index_creation: allow_index_creation && !is_deletion,
@@ -101,9 +97,6 @@ pub enum BatchKind {
DocumentDeletion {
deletion_ids: Vec<TaskId>,
},
DocumentDeletionByFilter {
id: TaskId,
},
ClearAndSettings {
other: Vec<TaskId>,
allow_index_creation: bool,
@@ -202,9 +195,6 @@ impl BatchKind {
K::DocumentDeletion => {
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
}
K::DocumentDeletionByFilter => {
(Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false)
}
K::Settings { allow_index_creation } => (
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
allow_index_creation,
@@ -222,7 +212,7 @@ impl BatchKind {
match (self, kind) {
// We don't batch any of these operations
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this),
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
Break(this)
@@ -481,8 +471,7 @@ impl BatchKind {
BatchKind::IndexCreation { .. }
| BatchKind::IndexDeletion { .. }
| BatchKind::IndexUpdate { .. }
| BatchKind::IndexSwap { .. }
| BatchKind::DocumentDeletionByFilter { .. },
| BatchKind::IndexSwap { .. },
_,
) => {
unreachable!()

View File

@@ -24,15 +24,13 @@ use std::io::BufWriter;
use dump::IndexMetadata;
use log::{debug, error, info};
use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
};
use meilisearch_types::milli::{self, Filter, BEU32};
use meilisearch_types::milli::{self, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@@ -67,10 +65,6 @@ pub(crate) enum Batch {
op: IndexOperation,
must_create_index: bool,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
},
IndexCreation {
index_uid: String,
primary_key: Option<String>,
@@ -155,7 +149,6 @@ impl Batch {
| Batch::TaskDeletion(task)
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexDocumentDeletionByFilter { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
@@ -194,8 +187,7 @@ impl Batch {
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. }
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
@@ -235,18 +227,6 @@ impl IndexScheduler {
},
must_create_index,
})),
BatchKind::DocumentDeletionByFilter { id } => {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
Ok(Some(Batch::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
}))
}
_ => unreachable!(),
}
}
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks
@@ -887,51 +867,6 @@ impl IndexScheduler {
Ok(tasks)
}
Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let (index_uid, filter) =
if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
&task.kind
{
(index_uid, filter_expr)
} else {
unreachable!()
};
let index = {
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, index_uid)?
};
let deleted_documents = delete_document_by_filter(filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
Batch::IndexCreation { index_uid, primary_key, task } => {
let wtxn = self.env.write_txn()?;
if self.index_mapper.exists(&wtxn, &index_uid)? {
@@ -1486,25 +1421,3 @@ impl IndexScheduler {
Ok(content_files_to_delete)
}
}
fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
let filter = Filter::from_json(filter)?;
Ok(if let Some(filter) = filter {
let mut wtxn = index.write_txn()?;
let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
}
e => e.into(),
})?;
let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
delete_operation.delete_documents(&candidates);
let deleted_documents =
delete_operation.execute().map(|result| result.deleted_documents)?;
wtxn.commit()?;
deleted_documents
} else {
0
})
}

View File

@@ -46,8 +46,6 @@ impl From<DateField> for Code {
#[allow(clippy::large_enum_variant)]
#[derive(Error, Debug)]
pub enum Error {
#[error("{1}")]
WithCustomErrorCode(Code, Box<Self>),
#[error("Index `{0}` not found.")]
IndexNotFound(String),
#[error("Index `{0}` already exists.")]
@@ -136,59 +134,11 @@ pub enum Error {
TaskDatabaseUpdate(Box<Self>),
#[error(transparent)]
HeedTransaction(heed::Error),
#[cfg(test)]
#[error("Planned failure for tests.")]
PlannedFailure,
}
impl Error {
pub fn is_recoverable(&self) -> bool {
match self {
Error::IndexNotFound(_)
| Error::WithCustomErrorCode(_, _)
| Error::IndexAlreadyExists(_)
| Error::SwapDuplicateIndexFound(_)
| Error::SwapDuplicateIndexesFound(_)
| Error::SwapIndexNotFound(_)
| Error::NoSpaceLeftInTaskQueue
| Error::SwapIndexesNotFound(_)
| Error::CorruptedDump
| Error::InvalidTaskDate { .. }
| Error::InvalidTaskUids { .. }
| Error::InvalidTaskStatuses { .. }
| Error::InvalidTaskTypes { .. }
| Error::InvalidTaskCanceledBy { .. }
| Error::InvalidIndexUid { .. }
| Error::TaskNotFound(_)
| Error::TaskDeletionWithEmptyQuery
| Error::TaskCancelationWithEmptyQuery
| Error::Dump(_)
| Error::Heed(_)
| Error::Milli(_)
| Error::ProcessBatchPanicked
| Error::FileStore(_)
| Error::IoError(_)
| Error::Persist(_)
| Error::Anyhow(_) => true,
Error::CreateBatch(_)
| Error::CorruptedTaskQueue
| Error::TaskDatabaseUpdate(_)
| Error::HeedTransaction(_) => false,
#[cfg(test)]
Error::PlannedFailure => false,
}
}
pub fn with_custom_error_code(self, code: Code) -> Self {
Self::WithCustomErrorCode(code, Box::new(self))
}
}
impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
Error::WithCustomErrorCode(code, _) => *code,
Error::IndexNotFound(_) => Code::IndexNotFound,
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
@@ -221,9 +171,6 @@ impl ErrorCode for Error {
Error::CorruptedDump => Code::Internal,
Error::TaskDatabaseUpdate(_) => Code::Internal,
Error::CreateBatch(_) => Code::Internal,
#[cfg(test)]
Error::PlannedFailure => Code::Internal,
}
}
}

View File

@@ -5,7 +5,6 @@ use std::collections::BTreeMap;
use std::path::Path;
use std::time::Duration;
use meilisearch_types::heed::flags::Flags;
use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
use meilisearch_types::milli::Index;
use time::OffsetDateTime;
@@ -54,7 +53,6 @@ pub struct IndexMap {
pub struct ClosingIndex {
uuid: Uuid,
closing_event: EnvClosingEvent,
enable_mdb_writemap: bool,
map_size: usize,
generation: usize,
}
@@ -70,7 +68,6 @@ impl ClosingIndex {
pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> {
self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex {
uuid: self.uuid,
enable_mdb_writemap: self.enable_mdb_writemap,
map_size: self.map_size,
generation: self.generation,
})
@@ -79,7 +76,6 @@ impl ClosingIndex {
pub struct ReopenableIndex {
uuid: Uuid,
enable_mdb_writemap: bool,
map_size: usize,
generation: usize,
}
@@ -107,7 +103,7 @@ impl ReopenableIndex {
return Ok(());
}
map.unavailable.remove(&self.uuid);
map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
map.create(&self.uuid, path, None, self.map_size)?;
}
Ok(())
}
@@ -174,17 +170,16 @@ impl IndexMap {
uuid: &Uuid,
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize,
) -> Result<Index> {
if !matches!(self.get_unavailable(uuid), Missing) {
panic!("Attempt to open an index that was unavailable");
}
let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
let index = create_or_open_index(path, date, map_size)?;
match self.available.insert(*uuid, index.clone()) {
InsertionOutcome::InsertedNew => (),
InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
self.close(evicted_uuid, evicted_index, enable_mdb_writemap, 0);
self.close(evicted_uuid, evicted_index, 0);
}
InsertionOutcome::Replaced(_) => {
panic!("Attempt to open an index that was already opened")
@@ -217,30 +212,17 @@ impl IndexMap {
/// | Closing | Closing |
/// | Available | Closing |
///
pub fn close_for_resize(
&mut self,
uuid: &Uuid,
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) {
let Some(index) = self.available.remove(uuid) else { return; };
self.close(*uuid, index, enable_mdb_writemap, map_size_growth);
self.close(*uuid, index, map_size_growth);
}
fn close(
&mut self,
uuid: Uuid,
index: Index,
enable_mdb_writemap: bool,
map_size_growth: usize,
) {
fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) {
let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
let closing_event = index.prepare_for_closing();
let generation = self.next_generation();
self.unavailable.insert(
uuid,
Some(ClosingIndex { uuid, closing_event, enable_mdb_writemap, map_size, generation }),
);
self.unavailable
.insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation }));
}
/// Attempts to delete and index.
@@ -300,15 +282,11 @@ impl IndexMap {
fn create_or_open_index(
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
enable_mdb_writemap: bool,
map_size: usize,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
if enable_mdb_writemap {
unsafe { options.flag(Flags::MdbWriteMap) };
}
if let Some((created, updated)) = date {
Ok(Index::new_with_creation_dates(options, path, created, updated)?)

View File

@@ -66,8 +66,6 @@ pub struct IndexMapper {
index_base_map_size: usize,
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
index_growth_amount: usize,
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
enable_mdb_writemap: bool,
pub indexer_config: Arc<IndexerConfig>,
}
@@ -90,17 +88,8 @@ pub enum IndexStatus {
pub struct IndexStats {
/// Number of documents in the index.
pub number_of_documents: u64,
/// Size taken up by the index' DB, in bytes.
///
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
/// are not returned to the disk after a deletion, this number is typically larger than
/// `used_database_size` that only includes the size of the used pages.
/// Size of the index' DB, in bytes.
pub database_size: u64,
/// Size taken by the used pages of the index' DB, in bytes.
///
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
/// this value is typically smaller than `database_size`.
pub used_database_size: u64,
/// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution,
/// Creation date of the index.
@@ -116,10 +105,10 @@ impl IndexStats {
///
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
let database_size = index.on_disk_size()?;
Ok(IndexStats {
number_of_documents: index.number_of_documents(rtxn)?,
database_size: index.on_disk_size()?,
used_database_size: index.used_size()?,
database_size,
field_distribution: index.field_distribution(rtxn)?,
created_at: index.created_at(rtxn)?,
updated_at: index.updated_at(rtxn)?,
@@ -134,22 +123,15 @@ impl IndexMapper {
index_base_map_size: usize,
index_growth_amount: usize,
index_count: usize,
enable_mdb_writemap: bool,
indexer_config: IndexerConfig,
) -> Result<Self> {
let mut wtxn = env.write_txn()?;
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
wtxn.commit()?;
Ok(Self {
index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
index_mapping,
index_stats,
index_mapping: env.create_database(Some(INDEX_MAPPING))?,
index_stats: env.create_database(Some(INDEX_STATS))?,
base_path,
index_base_map_size,
index_growth_amount,
enable_mdb_writemap,
indexer_config: Arc::new(indexer_config),
})
}
@@ -180,7 +162,6 @@ impl IndexMapper {
&uuid,
&index_path,
date,
self.enable_mdb_writemap,
self.index_base_map_size,
)?;
@@ -292,11 +273,7 @@ impl IndexMapper {
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
self.index_map.write().unwrap().close_for_resize(
&uuid,
self.enable_mdb_writemap,
self.index_growth_amount,
);
self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount);
Ok(())
}
@@ -361,7 +338,6 @@ impl IndexMapper {
&uuid,
&index_path,
None,
self.enable_mdb_writemap,
self.index_base_map_size,
)?;
}

View File

@@ -184,9 +184,6 @@ fn snapshot_details(d: &Details) -> String {
provided_ids: received_document_ids,
deleted_documents,
} => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"),
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => format!(
"{{ original_filter: {original_filter}, deleted_documents: {deleted_documents:?} }}"
),
Details::ClearAll { deleted_documents } => {
format!("{{ deleted_documents: {deleted_documents:?} }}")
},

View File

@@ -31,7 +31,7 @@ mod uuid_codec;
pub type Result<T> = std::result::Result<T, Error>;
pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap};
use std::collections::HashMap;
use std::ops::{Bound, RangeBounds};
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
@@ -233,8 +233,6 @@ pub struct IndexSchedulerOptions {
pub task_db_size: usize,
/// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
pub index_base_map_size: usize,
/// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
pub enable_mdb_writemap: bool,
/// The size, in bytes, by which the map size of an index is increased when it resized due to being full.
pub index_growth_amount: usize,
/// The number of indexes that can be concurrently opened in memory.
@@ -376,11 +374,6 @@ impl IndexScheduler {
std::fs::create_dir_all(&options.indexes_path)?;
std::fs::create_dir_all(&options.dumps_path)?;
if cfg!(windows) && options.enable_mdb_writemap {
// programmer error if this happens: in normal use passing the option on Windows is an error in main
panic!("Windows doesn't support the MDB_WRITEMAP LMDB option");
}
let task_db_size = clamp_to_page_size(options.task_db_size);
let budget = if options.indexer_config.skip_index_budget {
IndexBudget {
@@ -403,37 +396,25 @@ impl IndexScheduler {
.open(options.tasks_path)?;
let file_store = FileStore::new(&options.update_file_path)?;
let mut wtxn = env.write_txn()?;
let all_tasks = env.create_database(&mut wtxn, Some(db_name::ALL_TASKS))?;
let status = env.create_database(&mut wtxn, Some(db_name::STATUS))?;
let kind = env.create_database(&mut wtxn, Some(db_name::KIND))?;
let index_tasks = env.create_database(&mut wtxn, Some(db_name::INDEX_TASKS))?;
let canceled_by = env.create_database(&mut wtxn, Some(db_name::CANCELED_BY))?;
let enqueued_at = env.create_database(&mut wtxn, Some(db_name::ENQUEUED_AT))?;
let started_at = env.create_database(&mut wtxn, Some(db_name::STARTED_AT))?;
let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?;
wtxn.commit()?;
// allow unreachable_code to get rids of the warning in the case of a test build.
let this = Self {
must_stop_processing: MustStopProcessing::default(),
processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
file_store,
all_tasks,
status,
kind,
index_tasks,
canceled_by,
enqueued_at,
started_at,
finished_at,
all_tasks: env.create_database(Some(db_name::ALL_TASKS))?,
status: env.create_database(Some(db_name::STATUS))?,
kind: env.create_database(Some(db_name::KIND))?,
index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?,
canceled_by: env.create_database(Some(db_name::CANCELED_BY))?,
enqueued_at: env.create_database(Some(db_name::ENQUEUED_AT))?,
started_at: env.create_database(Some(db_name::STARTED_AT))?,
finished_at: env.create_database(Some(db_name::FINISHED_AT))?,
index_mapper: IndexMapper::new(
&env,
options.indexes_path,
budget.map_size,
options.index_growth_amount,
budget.index_count,
options.enable_mdb_writemap,
options.indexer_config,
)?,
env,
@@ -559,7 +540,13 @@ impl IndexScheduler {
Err(e) => {
log::error!("{}", e);
// Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() {
if matches!(
e,
Error::CorruptedTaskQueue
| Error::TaskDatabaseUpdate(_)
| Error::HeedTransaction(_)
| Error::CreateBatch(_)
) {
std::thread::sleep(Duration::from_secs(1));
}
}
@@ -573,16 +560,10 @@ impl IndexScheduler {
&self.index_mapper.indexer_config
}
/// Return the real database size (i.e.: The size **with** the free pages)
pub fn size(&self) -> Result<u64> {
Ok(self.env.real_disk_size()?)
}
/// Return the used database size (i.e.: The size **without** the free pages)
pub fn used_size(&self) -> Result<u64> {
Ok(self.env.non_free_pages_size()?)
}
/// Return the index corresponding to the name.
///
/// * If the index wasn't opened before, the index will be opened.
@@ -762,38 +743,6 @@ impl IndexScheduler {
Ok(tasks)
}
/// The returned structure contains:
/// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`.
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
/// 3. The number of times the properties appeared.
pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let rtxn = self.read_txn()?;
let mut res = BTreeMap::new();
res.insert(
"statuses".to_string(),
enum_iterator::all::<Status>()
.map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len())))
.collect::<Result<BTreeMap<String, u64>>>()?,
);
res.insert(
"types".to_string(),
enum_iterator::all::<Kind>()
.map(|s| Ok((s.to_string(), self.get_kind(&rtxn, s)?.len())))
.collect::<Result<BTreeMap<String, u64>>>()?,
);
res.insert(
"indexes".to_string(),
self.index_tasks
.iter(&rtxn)?
.map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
.collect::<Result<BTreeMap<String, u64>>>()?,
);
Ok(res)
}
/// Return true iff there is at least one task associated with this index
/// that is processing.
pub fn is_index_processing(&self, index: &str) -> Result<bool> {
@@ -1321,12 +1270,6 @@ impl<'a> Dump<'a> {
documents_ids,
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
},
KindDump::DocumentDeletionByFilter { filter } => {
KindWithContent::DocumentDeletionByFilter {
filter_expr: filter,
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
}
}
KindDump::DocumentClear => KindWithContent::DocumentClear {
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
},
@@ -1528,7 +1471,6 @@ mod tests {
dumps_path: tempdir.path().join("dumps"),
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false,
index_growth_amount: 1000 * 1000, // 1 MB
index_count: 5,
indexer_config,
@@ -1558,7 +1500,7 @@ mod tests {
(index_scheduler, index_scheduler_handle)
}
/// Return a [`PlannedFailure`](Error::PlannedFailure) error if a failure is planned
/// Return a [`CorruptedTaskQueue`](Error::CorruptedTaskQueue) error if a failure is planned
/// for the given location and current run loop iteration.
pub fn maybe_fail(&self, location: FailureLocation) -> Result<()> {
if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location))
@@ -1567,7 +1509,7 @@ mod tests {
FailureLocation::PanicInsideProcessBatch => {
panic!("simulated panic")
}
_ => Err(Error::PlannedFailure),
_ => Err(Error::CorruptedTaskQueue),
}
} else {
Ok(())

View File

@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs
### All Tasks:
0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: succeeded, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }}
2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }}
2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -239,7 +239,6 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
match &mut task.kind {
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
K::DocumentClear { index_uid } => index_uids.push(index_uid),
K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
K::IndexDeletion { index_uid } => index_uids.push(index_uid),
@@ -465,29 +464,6 @@ impl IndexScheduler {
}
}
}
Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => {
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter {
ref index_uid,
ref filter_expr,
} = kind
{
(index_uid, filter_expr)
} else {
unreachable!()
};
assert_eq!(&task_index_uid.unwrap(), index_uid);
match status {
Status::Enqueued | Status::Processing => (),
Status::Succeeded => {
assert!(deleted_documents.is_some());
}
Status::Failed | Status::Canceled => {
assert!(deleted_documents == Some(0));
}
}
}
Details::ClearAll { deleted_documents } => {
assert!(matches!(
kind.as_kind(),

View File

@@ -45,11 +45,6 @@ impl AuthController {
self.store.size()
}
/// Return the used size of the `AuthController` database in bytes.
pub fn used_size(&self) -> Result<u64> {
self.store.used_size()
}
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
match self.store.get_api_key(create_key.uid)? {
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),

View File

@@ -55,11 +55,9 @@ impl HeedAuthStore {
let path = path.as_ref().join(AUTH_DB_PATH);
create_dir_all(&path)?;
let env = Arc::new(open_auth_store_env(path.as_ref())?);
let mut wtxn = env.write_txn()?;
let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?;
let keys = env.create_database(Some(KEY_DB_NAME))?;
let action_keyid_index_expiration =
env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
wtxn.commit()?;
env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
}
@@ -75,11 +73,6 @@ impl HeedAuthStore {
Ok(self.env.real_disk_size()?)
}
/// Return the number of bytes actually used in the database
pub fn used_size(&self) -> Result<u64> {
Ok(self.env.non_free_pages_size()?)
}
pub fn set_drop_on_close(&mut self, v: bool) {
self.should_close_on_drop = v;
}

View File

@@ -22,7 +22,7 @@ file-store = { path = "../file-store" }
flate2 = "1.0.25"
fst = "0.4.7"
memmap2 = "0.5.10"
milli = { path = "../milli" }
milli = { path = "../milli", default-features = false }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde-cs = "0.2.4"
@@ -40,7 +40,7 @@ meili-snap = { path = "../meili-snap" }
[features]
# all specialized tokenizations
all-tokenizations = ["milli/all-tokenizations"]
default = ["milli/default"]
# chinese specialized tokenization
chinese = ["milli/chinese"]

View File

@@ -150,7 +150,6 @@ make_missing_field_convenience_builder!(MissingApiKeyActions, missing_api_key_ac
make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_expires_at);
make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes);
make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes);
make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter);
// Integrate a sub-error into a [`DeserrError`] by taking its error message but using
// the default error code (C) from `Self`

View File

@@ -214,8 +214,6 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
@@ -239,10 +237,11 @@ InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScore , InvalidRequest , BAD_REQUEST ;
InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacet , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
@@ -319,7 +318,6 @@ impl ErrorCode for milli::Error {
UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
Code::InvalidDocumentId
@@ -332,6 +330,7 @@ impl ErrorCode for milli::Error {
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
UserError::InvalidSearchFacet { .. } => Code::InvalidSearchFacet,
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
UserError::SortError(_) => Code::InvalidSearchSort,

View File

@@ -49,7 +49,6 @@ impl Task {
| IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentDeletion { index_uid, .. }
| DocumentDeletionByFilter { index_uid, .. }
| DocumentClear { index_uid }
| SettingsUpdate { index_uid, .. }
| IndexCreation { index_uid, .. }
@@ -68,7 +67,6 @@ impl Task {
match self.kind {
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
KindWithContent::DocumentDeletion { .. }
| KindWithContent::DocumentDeletionByFilter { .. }
| KindWithContent::DocumentClear { .. }
| KindWithContent::SettingsUpdate { .. }
| KindWithContent::IndexDeletion { .. }
@@ -98,10 +96,6 @@ pub enum KindWithContent {
index_uid: String,
documents_ids: Vec<String>,
},
DocumentDeletionByFilter {
index_uid: String,
filter_expr: serde_json::Value,
},
DocumentClear {
index_uid: String,
},
@@ -151,7 +145,6 @@ impl KindWithContent {
match self {
KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
KindWithContent::SettingsUpdate { .. } => Kind::SettingsUpdate,
KindWithContent::IndexCreation { .. } => Kind::IndexCreation,
@@ -175,7 +168,6 @@ impl KindWithContent {
| TaskDeletion { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentDeletion { index_uid, .. }
| DocumentDeletionByFilter { index_uid, .. }
| DocumentClear { index_uid }
| SettingsUpdate { index_uid, .. }
| IndexCreation { index_uid, .. }
@@ -208,12 +200,6 @@ impl KindWithContent {
deleted_documents: None,
})
}
KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
Some(Details::DocumentDeletionByFilter {
original_filter: filter_expr.to_string(),
deleted_documents: None,
})
}
KindWithContent::DocumentClear { .. } | KindWithContent::IndexDeletion { .. } => {
Some(Details::ClearAll { deleted_documents: None })
}
@@ -256,12 +242,6 @@ impl KindWithContent {
deleted_documents: Some(0),
})
}
KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
Some(Details::DocumentDeletionByFilter {
original_filter: filter_expr.to_string(),
deleted_documents: Some(0),
})
}
KindWithContent::DocumentClear { .. } => {
Some(Details::ClearAll { deleted_documents: None })
}
@@ -302,7 +282,6 @@ impl From<&KindWithContent> for Option<Details> {
})
}
KindWithContent::DocumentDeletion { .. } => None,
KindWithContent::DocumentDeletionByFilter { .. } => None,
KindWithContent::DocumentClear { .. } => None,
KindWithContent::SettingsUpdate { new_settings, .. } => {
Some(Details::SettingsUpdate { settings: new_settings.clone() })
@@ -499,7 +478,6 @@ pub enum Details {
SettingsUpdate { settings: Box<Settings<Unchecked>> },
IndexInfo { primary_key: Option<String> },
DocumentDeletion { provided_ids: usize, deleted_documents: Option<u64> },
DocumentDeletionByFilter { original_filter: String, deleted_documents: Option<u64> },
ClearAll { deleted_documents: Option<u64> },
TaskCancelation { matched_tasks: u64, canceled_tasks: Option<u64>, original_filter: String },
TaskDeletion { matched_tasks: u64, deleted_tasks: Option<u64>, original_filter: String },
@@ -515,9 +493,6 @@ impl Details {
*indexed_documents = Some(0)
}
Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
Self::DocumentDeletionByFilter { deleted_documents, .. } => {
*deleted_documents = Some(0)
}
Self::ClearAll { deleted_documents } => *deleted_documents = Some(0),
Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0),
Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0),

View File

@@ -106,7 +106,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] }
zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
analytics = ["segment"]
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
chinese = ["meilisearch-types/chinese"]

View File

@@ -5,7 +5,7 @@ use actix_web::HttpRequest;
use meilisearch_types::InstanceUid;
use serde_json::Value;
use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
use super::{find_user_id, Analytics, DocumentDeletionKind};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::Opt;
@@ -38,6 +38,18 @@ impl MultiSearchAggregator {
pub fn succeed(&mut self) {}
}
#[derive(Default)]
pub struct FacetSearchAggregator;
#[allow(dead_code)]
impl FacetSearchAggregator {
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
Self::default()
}
pub fn succeed(&mut self, _: &dyn Any) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@@ -56,6 +68,7 @@ impl Analytics for MockAnalytics {
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,
@@ -71,8 +84,6 @@ impl Analytics for MockAnalytics {
_request: &HttpRequest,
) {
}
fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
fn get_tasks(&self, _query: &TasksFilterQuery, _request: &HttpRequest) {}
fn health_seen(&self, _request: &HttpRequest) {}
}

View File

@@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
@@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
@@ -64,13 +68,6 @@ pub enum DocumentDeletionKind {
PerDocumentId,
ClearAll,
PerBatch,
PerFilter,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum DocumentFetchKind {
PerDocumentId,
Normal { with_filter: bool, limit: usize, offset: usize },
}
pub trait Analytics: Sync + Send {
@@ -88,6 +85,9 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
/// This method should be called to aggregate post facet values searches
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
// this method should be called to aggregate a add documents request
fn add_documents(
&self,
@@ -96,12 +96,6 @@ pub trait Analytics: Sync + Send {
request: &HttpRequest,
);
// this method should be called to aggregate a fetch documents request
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
// this method should be called to aggregate a fetch documents request
fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
// this method should be called to aggregate a add documents request
fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);

View File

@@ -1,5 +1,6 @@
use std::collections::{BinaryHeap, HashMap, HashSet};
use std::fs;
use std::mem::take;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};
@@ -23,17 +24,17 @@ use tokio::select;
use tokio::sync::mpsc::{self, Receiver, Sender};
use uuid::Uuid;
use super::{
config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH,
};
use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH};
use crate::analytics::Analytics;
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::indexes::facet_search::FacetSearchQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
use crate::Opt;
@@ -71,11 +72,10 @@ pub enum AnalyticsMsg {
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregatePostFacetSearch(FacetSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator),
AggregateGetFetchDocuments(DocumentsFetchAggregator),
AggregatePostFetchDocuments(DocumentsFetchAggregator),
AggregateTasks(TasksAggregator),
AggregateHealth(HealthAggregator),
}
@@ -139,12 +139,11 @@ impl SegmentAnalytics {
batcher,
post_search_aggregator: SearchAggregator::default(),
post_multi_search_aggregator: MultiSearchAggregator::default(),
post_facet_search_aggregator: FacetSearchAggregator::default(),
get_search_aggregator: SearchAggregator::default(),
add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
update_documents_aggregator: DocumentsAggregator::default(),
get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
get_tasks_aggregator: TasksAggregator::default(),
health_aggregator: HealthAggregator::default(),
});
@@ -182,6 +181,10 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
}
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
}
@@ -211,16 +214,6 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
}
fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
}
fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
}
fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest) {
let aggregate = TasksAggregator::from_query(query, request);
let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
@@ -241,7 +234,6 @@ impl super::Analytics for SegmentAnalytics {
struct Infos {
env: String,
experimental_enable_metrics: bool,
experimental_reduce_indexing_memory_usage: bool,
db_path: bool,
import_dump: bool,
dump_dir: bool,
@@ -275,7 +267,6 @@ impl From<Opt> for Infos {
let Opt {
db_path,
experimental_enable_metrics,
experimental_reduce_indexing_memory_usage,
http_addr,
master_key: _,
env,
@@ -318,7 +309,6 @@ impl From<Opt> for Infos {
Self {
env,
experimental_enable_metrics,
experimental_reduce_indexing_memory_usage,
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),
dump_dir: dump_dir != PathBuf::from("dumps/"),
@@ -354,11 +344,10 @@ pub struct Segment {
get_search_aggregator: SearchAggregator,
post_search_aggregator: SearchAggregator,
post_multi_search_aggregator: MultiSearchAggregator,
post_facet_search_aggregator: FacetSearchAggregator,
add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator,
get_fetch_documents_aggregator: DocumentsFetchAggregator,
post_fetch_documents_aggregator: DocumentsFetchAggregator,
get_tasks_aggregator: TasksAggregator,
health_aggregator: HealthAggregator,
}
@@ -418,11 +407,10 @@ impl Segment {
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
None => (),
@@ -461,55 +449,62 @@ impl Segment {
})
.await;
}
let get_search = std::mem::take(&mut self.get_search_aggregator)
.into_event(&self.user, "Documents Searched GET");
let post_search = std::mem::take(&mut self.post_search_aggregator)
.into_event(&self.user, "Documents Searched POST");
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
.into_event(&self.user, "Documents Searched by Multi-Search POST");
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
.into_event(&self.user, "Documents Added");
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
.into_event(&self.user, "Documents Deleted");
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
.into_event(&self.user, "Documents Updated");
let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator)
.into_event(&self.user, "Documents Fetched GET");
let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator)
.into_event(&self.user, "Documents Fetched POST");
let get_tasks =
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
let health =
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
if let Some(get_search) = get_search {
let Segment {
inbox: _,
opt: _,
batcher: _,
user,
get_search_aggregator,
post_search_aggregator,
post_multi_search_aggregator,
post_facet_search_aggregator,
add_documents_aggregator,
delete_documents_aggregator,
update_documents_aggregator,
get_tasks_aggregator,
health_aggregator,
} = self;
if let Some(get_search) =
take(get_search_aggregator).into_event(&user, "Documents Searched GET")
{
let _ = self.batcher.push(get_search).await;
}
if let Some(post_search) = post_search {
if let Some(post_search) =
take(post_search_aggregator).into_event(&user, "Documents Searched POST")
{
let _ = self.batcher.push(post_search).await;
}
if let Some(post_multi_search) = post_multi_search {
if let Some(post_multi_search) = take(post_multi_search_aggregator)
.into_event(&user, "Documents Searched by Multi-Search POST")
{
let _ = self.batcher.push(post_multi_search).await;
}
if let Some(add_documents) = add_documents {
if let Some(post_facet_search) = take(post_facet_search_aggregator)
.into_event(&user, "Documents Searched by Facet-Search POST")
{
let _ = self.batcher.push(post_facet_search).await;
}
if let Some(add_documents) =
take(add_documents_aggregator).into_event(&user, "Documents Added")
{
let _ = self.batcher.push(add_documents).await;
}
if let Some(delete_documents) = delete_documents {
if let Some(delete_documents) =
take(delete_documents_aggregator).into_event(&user, "Documents Deleted")
{
let _ = self.batcher.push(delete_documents).await;
}
if let Some(update_documents) = update_documents {
if let Some(update_documents) =
take(update_documents_aggregator).into_event(&user, "Documents Updated")
{
let _ = self.batcher.push(update_documents).await;
}
if let Some(get_fetch_documents) = get_fetch_documents {
let _ = self.batcher.push(get_fetch_documents).await;
}
if let Some(post_fetch_documents) = post_fetch_documents {
let _ = self.batcher.push(post_fetch_documents).await;
}
if let Some(get_tasks) = get_tasks {
if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") {
let _ = self.batcher.push(get_tasks).await;
}
if let Some(health) = health {
if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") {
let _ = self.batcher.push(health).await;
}
let _ = self.batcher.flush().await;
@@ -886,6 +881,144 @@ impl MultiSearchAggregator {
}
}
#[derive(Default)]
pub struct FacetSearchAggregator {
timestamp: Option<OffsetDateTime>,
// context
user_agents: HashSet<String>,
// requests
total_received: usize,
total_succeeded: usize,
time_spent: BinaryHeap<usize>,
// The set of all facetNames that were used
facet_names: HashSet<String>,
// As there been any other parameter than the facetName or facetQuery ones?
additional_search_parameters_provided: bool,
}
impl FacetSearchAggregator {
pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
let FacetSearchQuery {
facet_query: _,
facet_name,
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
} = query;
let mut ret = Self::default();
ret.timestamp = Some(OffsetDateTime::now_utc());
ret.total_received = 1;
ret.user_agents = extract_user_agents(request).into_iter().collect();
ret.facet_names = Some(facet_name.clone()).into_iter().collect();
ret.additional_search_parameters_provided = q.is_some()
|| *offset != DEFAULT_SEARCH_OFFSET()
|| *limit != DEFAULT_SEARCH_LIMIT()
|| page.is_some()
|| hits_per_page.is_some()
|| attributes_to_retrieve.is_some()
|| attributes_to_crop.is_some()
|| *crop_length != DEFAULT_CROP_LENGTH()
|| attributes_to_highlight.is_some()
|| *show_matches_position
|| filter.is_some()
|| sort.is_some()
|| facets.is_some()
|| *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG()
|| *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG()
|| *crop_marker != DEFAULT_CROP_MARKER()
|| *matching_strategy != MatchingStrategy::default();
ret
}
pub fn succeed(&mut self, result: &FacetSearchResult) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
self.time_spent.push(result.processing_time_ms as usize);
}
/// Aggregate one [SearchAggregator] into another.
pub fn aggregate(&mut self, mut other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
// context
for user_agent in other.user_agents.into_iter() {
self.user_agents.insert(user_agent);
}
// request
self.total_received = self.total_received.saturating_add(other.total_received);
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
self.time_spent.append(&mut other.time_spent);
// facet_names
for facet_name in other.facet_names.into_iter() {
self.facet_names.insert(facet_name);
}
// additional_search_parameters_provided
self.additional_search_parameters_provided = self.additional_search_parameters_provided
| other.additional_search_parameters_provided;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if self.total_received == 0 {
None
} else {
// the index of the 99th percentage of value
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
// we get all the values in a sorted manner
let time_spent = self.time_spent.into_sorted_vec();
// We are only interested by the slowest value of the 99th fastest results
let time_spent = time_spent.get(percentile_99th as usize);
let properties = json!({
"user-agent": self.user_agents,
"requests": {
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
"total_succeeded": self.total_succeeded,
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
"total_received": self.total_received,
},
"facets": {
"total_distinct_facet_count": self.facet_names.len(),
},
"additional_search_parameters_provided": self.additional_search_parameters_provided,
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,
@@ -982,7 +1115,6 @@ pub struct DocumentsDeletionAggregator {
per_document_id: bool,
clear_all: bool,
per_batch: bool,
per_filter: bool,
}
impl DocumentsDeletionAggregator {
@@ -996,7 +1128,6 @@ impl DocumentsDeletionAggregator {
DocumentDeletionKind::PerDocumentId => ret.per_document_id = true,
DocumentDeletionKind::ClearAll => ret.clear_all = true,
DocumentDeletionKind::PerBatch => ret.per_batch = true,
DocumentDeletionKind::PerFilter => ret.per_filter = true,
}
ret
@@ -1016,7 +1147,6 @@ impl DocumentsDeletionAggregator {
self.per_document_id |= other.per_document_id;
self.clear_all |= other.clear_all;
self.per_batch |= other.per_batch;
self.per_filter |= other.per_filter;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -1168,76 +1298,3 @@ impl HealthAggregator {
})
}
}
#[derive(Default, Serialize)]
pub struct DocumentsFetchAggregator {
#[serde(skip)]
timestamp: Option<OffsetDateTime>,
// context
#[serde(rename = "user-agent")]
user_agents: HashSet<String>,
#[serde(rename = "requests.max_limit")]
total_received: usize,
// a call on ../documents/:doc_id
per_document_id: bool,
// if a filter was used
per_filter: bool,
// pagination
#[serde(rename = "pagination.max_limit")]
max_limit: usize,
#[serde(rename = "pagination.max_offset")]
max_offset: usize,
}
impl DocumentsFetchAggregator {
pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
let (limit, offset) = match query {
DocumentFetchKind::PerDocumentId => (1, 0),
DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset),
};
Self {
timestamp: Some(OffsetDateTime::now_utc()),
user_agents: extract_user_agents(request).into_iter().collect(),
total_received: 1,
per_document_id: matches!(query, DocumentFetchKind::PerDocumentId),
per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
max_limit: limit,
max_offset: offset,
}
}
/// Aggregate one [DocumentsFetchAggregator] into another.
pub fn aggregate(&mut self, other: Self) {
if self.timestamp.is_none() {
self.timestamp = other.timestamp;
}
for user_agent in other.user_agents {
self.user_agents.insert(user_agent);
}
self.total_received = self.total_received.saturating_add(other.total_received);
self.per_document_id |= other.per_document_id;
self.per_filter |= other.per_filter;
self.max_limit = self.max_limit.max(other.max_limit);
self.max_offset = self.max_offset.max(other.max_offset);
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
// if we had no timestamp it means we never encountered any events and
// thus we don't need to send this event.
let timestamp = self.timestamp?;
Some(Track {
timestamp: Some(timestamp),
user: user.clone(),
event: event_name.to_string(),
properties: serde_json::to_value(self).ok()?,
..Default::default()
})
}
}

View File

@@ -1,6 +1,5 @@
use actix_web as aweb;
use aweb::error::{JsonPayloadError, QueryPayloadError};
use byte_unit::Byte;
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
@@ -21,14 +20,12 @@ pub enum MeilisearchHttpError {
InvalidContentType(String, Vec<String>),
#[error("Document `{0}` not found.")]
DocumentNotFound(String),
#[error("Sending an empty filter is forbidden.")]
EmptyFilter,
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value),
#[error("A {0} payload is missing.")]
MissingPayload(PayloadType),
#[error("The provided payload reached the size limit. The maximum accepted payload size is {}.", Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
PayloadTooLarge(usize),
#[error("The provided payload reached the size limit.")]
PayloadTooLarge,
#[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
)]
@@ -61,9 +58,8 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter,
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal,

View File

@@ -11,7 +11,6 @@ use crate::error::MeilisearchHttpError;
pub struct Payload {
payload: Decompress<dev::Payload>,
limit: usize,
remaining: usize,
}
pub struct PayloadConfig {
@@ -44,7 +43,6 @@ impl FromRequest for Payload {
ready(Ok(Payload {
payload: Decompress::from_headers(payload.take(), req.headers()),
limit,
remaining: limit,
}))
}
}
@@ -56,14 +54,12 @@ impl Stream for Payload {
fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
match Pin::new(&mut self.payload).poll_next(cx) {
Poll::Ready(Some(result)) => match result {
Ok(bytes) => match self.remaining.checked_sub(bytes.len()) {
Ok(bytes) => match self.limit.checked_sub(bytes.len()) {
Some(new_limit) => {
self.remaining = new_limit;
self.limit = new_limit;
Poll::Ready(Some(Ok(bytes)))
}
None => {
Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge(self.limit))))
}
None => Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge))),
},
x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))),
},

View File

@@ -232,7 +232,6 @@ fn open_or_create_database_unchecked(
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
max_number_of_tasks: 1_000_000,

View File

@@ -29,11 +29,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
);
setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) {
@@ -186,9 +181,9 @@ Anonymous telemetry:\t\"Enabled\""
}
eprintln!();
eprintln!("Documentation:\t\thttps://www.meilisearch.com/docs");
eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
eprintln!("Discord:\t\thttps://discord.meilisearch.com");
eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html");
eprintln!();
}

View File

@@ -4,32 +4,20 @@ use prometheus::{
register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
};
/// Create evenly distributed buckets
fn create_buckets() -> [f64; 29] {
(0..10)
.chain((10..100).step_by(10))
.chain((100..=1000).step_by(100))
.map(|i| i as f64 / 1000.)
.collect::<Vec<_>>()
.try_into()
.unwrap()
}
const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[
0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015,
0.002, 0.003, 1.0,
];
lazy_static! {
pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets();
pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"),
pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
opts!("http_requests_total", "HTTP requests total"),
&["method", "path"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes"))
.expect("Can't create a metric");
pub static ref MEILISEARCH_USED_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!(
"meilisearch_used_db_size_bytes",
"Meilisearch Used DB Size In Bytes"
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
.expect("Can't create a metric");
@@ -38,16 +26,11 @@ lazy_static! {
&["index"]
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
pub static ref HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
"http_response_time_seconds",
"HTTP response times",
&["method", "path"],
HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
)
.expect("Can't create a metric");
pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!(
opts!("meilisearch_nb_tasks", "Meilisearch Number of tasks"),
&["kind", "value"]
)
.expect("Can't create a metric");
}

View File

@@ -52,11 +52,11 @@ where
if is_registered_resource {
let request_method = req.method().to_string();
histogram_timer = Some(
crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
crate::metrics::HTTP_RESPONSE_TIME_SECONDS
.with_label_values(&[&request_method, request_path])
.start_timer(),
);
crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
crate::metrics::HTTP_REQUESTS_TOTAL
.with_label_values(&[&request_method, request_path])
.inc();
}

View File

@@ -48,8 +48,6 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
@@ -295,11 +293,6 @@ pub struct Opt {
#[serde(default)]
pub experimental_enable_metrics: bool,
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
#[serde(default)]
pub experimental_reduce_indexing_memory_usage: bool,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@@ -392,7 +385,6 @@ impl Opt {
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics,
experimental_enable_metrics: enable_metrics_route,
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -434,10 +426,6 @@ impl Opt {
MEILI_EXPERIMENTAL_ENABLE_METRICS,
enable_metrics_route.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
reduce_indexing_memory_usage.to_string(),
);
indexer_options.export_to_env();
}

View File

@@ -4,20 +4,19 @@ use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::actix_web::AwebQueryParameter;
use deserr::Deserr;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index};
@@ -29,7 +28,7 @@ use tempfile::tempfile;
use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::analytics::{Analytics, DocumentDeletionKind};
use crate::error::MeilisearchHttpError;
use crate::error::PayloadError::ReceivePayload;
use crate::extractors::authentication::policies::*;
@@ -37,7 +36,6 @@ use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::search::parse_filter;
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@@ -68,17 +66,13 @@ pub struct DocumentParam {
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_documents)))
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(replace_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
)
// these routes need to be before the /documents/{document_id} to match properly
.service(
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
)
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
// this route needs to be before the /documents/{document_id} to match properly
.service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
.service(
web::resource("/{document_id}")
.route(web::get().to(SeqHandler(get_document)))
@@ -97,14 +91,10 @@ pub async fn get_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
document_param: web::Path<DocumentParam>,
params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
let GetDocument { fields } = params.into_inner();
let attributes_to_retrieve = fields.merge_star_and_none();
@@ -137,103 +127,29 @@ pub async fn delete_document(
#[derive(Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct BrowseQueryGet {
pub struct BrowseQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
offset: Param<usize>,
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidDocumentLimit>)]
limit: Param<usize>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
fields: OptionStarOrList<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
filter: Option<String>,
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct BrowseQuery {
#[deserr(default, error = DeserrJsonError<InvalidDocumentOffset>)]
offset: usize,
#[deserr(default = PAGINATION_DEFAULT_LIMIT, error = DeserrJsonError<InvalidDocumentLimit>)]
limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
fields: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
filter: Option<Value>,
}
pub async fn documents_by_query_post(
pub async fn get_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: AwebJson<BrowseQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with body: {:?}", body);
let body = body.into_inner();
analytics.post_fetch_documents(
&DocumentFetchKind::Normal {
with_filter: body.filter.is_some(),
limit: body.limit,
offset: body.offset,
},
&req,
);
documents_by_query(&index_scheduler, index_uid, body)
}
pub async fn get_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params);
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
let filter = match filter {
Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v),
_ => Some(Value::String(f)),
},
None => None,
};
let query = BrowseQuery {
offset: offset.0,
limit: limit.0,
fields: fields.merge_star_and_none(),
filter,
};
analytics.get_fetch_documents(
&DocumentFetchKind::Normal {
with_filter: query.filter.is_some(),
limit: query.limit,
offset: query.offset,
},
&req,
);
documents_by_query(&index_scheduler, index_uid, query)
}
fn documents_by_query(
index_scheduler: &IndexScheduler,
index_uid: web::Path<String>,
query: BrowseQuery,
params: AwebQueryParameter<BrowseQuery, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let BrowseQuery { offset, limit, fields, filter } = query;
debug!("called with params: {:?}", params);
let BrowseQuery { limit, offset, fields } = params.into_inner();
let attributes_to_retrieve = fields.merge_star_and_none();
let index = index_scheduler.index(&index_uid)?;
let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;
let (total, documents) = retrieve_documents(&index, offset.0, limit.0, attributes_to_retrieve)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
let ret = PaginationView::new(offset.0, limit.0, total as usize, documents);
debug!("returns: {:?}", ret);
Ok(HttpResponse::Ok().json(ret))
@@ -457,7 +373,7 @@ async fn document_addition(
Ok(task.into())
}
pub async fn delete_documents_batch(
pub async fn delete_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: web::Json<Vec<Value>>,
@@ -483,42 +399,6 @@ pub async fn delete_documents_batch(
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct DocumentDeletionByFilter {
#[deserr(error = DeserrJsonError<InvalidDocumentFilter>, missing_field_error = DeserrJsonError::missing_document_filter)]
filter: Value,
}
pub async fn delete_documents_by_filter(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body);
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter;
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
// we ensure the filter is well formed before enqueuing it
|| -> Result<_, ResponseError> {
Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
}()
// and whatever was the error, the error code should always be an InvalidDocumentFilter
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub async fn clear_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
@@ -536,15 +416,14 @@ pub async fn clear_all_documents(
Ok(HttpResponse::Accepted().json(task))
}
fn some_documents<'a, 't: 'a>(
index: &'a Index,
rtxn: &'t RoTxn,
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
fn all_documents<'a>(
index: &Index,
rtxn: &'a RoTxn,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
Ok(index.all_documents(rtxn)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
})
@@ -555,50 +434,24 @@ fn retrieve_documents<S: AsRef<str>>(
index: &Index,
offset: usize,
limit: usize,
filter: Option<Value>,
attributes_to_retrieve: Option<Vec<S>>,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let filter = &filter;
let filter = if let Some(filter) = filter {
parse_filter(filter)
.map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
} else {
None
};
let candidates = if let Some(filter) = filter {
filter.evaluate(&rtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
}
e => e.into(),
})?
} else {
index.documents_ids(&rtxn)?
};
let mut documents = Vec::new();
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
let document = match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
};
documents.push(document);
}
let (it, number_of_documents) = {
let number_of_documents = candidates.len();
(
some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
number_of_documents,
)
};
let documents: Result<Vec<_>, ResponseError> = it
.map(|document| {
Ok(match &attributes_to_retrieve {
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
&document?,
attributes_to_retrieve.iter().map(|s| s.as_ref()),
),
None => document?,
})
})
.collect();
Ok((number_of_documents, documents?))
let number_of_documents = index.number_of_documents(&rtxn)?;
Ok((number_of_documents, documents))
}
fn retrieve_document<S: AsRef<str>>(

View File

@@ -0,0 +1,133 @@
use std::collections::{BTreeSet, HashSet};
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use serde_json::Value;
use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::search::{
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(search)));
}
// TODO improve the error messages
#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct FacetSearchQuery {
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
pub facet_query: Option<String>,
#[deserr(error = DeserrJsonError<InvalidFacetSearchName>)]
pub facet_name: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
pub highlight_pre_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
pub highlight_post_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
}
pub async fn search(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner();
debug!("facet search called with params: {:?}", query);
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
let facet_query = query.facet_query.clone();
let facet_name = query.facet_name.clone();
let mut search_query = SearchQuery::from(query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut search_query, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name)
})
.await?;
if let Ok(ref search_result) = search_result {
aggregate.succeed(search_result);
}
analytics.post_facet_search(aggregate);
let search_result = search_result?;
debug!("returns: {:?}", search_result);
Ok(HttpResponse::Ok().json(search_result))
}
impl From<FacetSearchQuery> for SearchQuery {
fn from(value: FacetSearchQuery) -> Self {
SearchQuery {
q: value.q,
offset: value.offset,
limit: value.limit,
page: value.page,
hits_per_page: value.hits_per_page,
attributes_to_retrieve: value.attributes_to_retrieve,
attributes_to_crop: value.attributes_to_crop,
crop_length: value.crop_length,
attributes_to_highlight: value.attributes_to_highlight,
show_matches_position: value.show_matches_position,
filter: value.filter,
sort: value.sort,
facets: value.facets,
highlight_pre_tag: value.highlight_pre_tag,
highlight_post_tag: value.highlight_post_tag,
crop_marker: value.crop_marker,
matching_strategy: value.matching_strategy,
}
}
}

View File

@@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
pub mod facet_search;
pub mod search;
pub mod settings;
@@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
.service(web::scope("/documents").configure(documents::configure))
.service(web::scope("/search").configure(search::configure))
.service(web::scope("/facet-search").configure(facet_search::configure))
.service(web::scope("/settings").configure(settings::configure)),
);
}

View File

@@ -56,10 +56,6 @@ pub struct SearchQueryGet {
sort: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchFacets>)]
facets: Option<CS<String>>,
#[deserr( default = DEFAULT_HIGHLIGHT_PRE_TAG(), error = DeserrQueryParamError<InvalidSearchHighlightPreTag>)]
@@ -95,8 +91,6 @@ impl From<SearchQueryGet> for SearchQuery {
filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0,
facets: other.facets.map(|o| o.into_iter().collect()),
highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag,

View File

@@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
pub async fn get_metrics(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
auth_controller: Data<AuthController>,
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<AuthController>>,
) -> Result<HttpResponse, ResponseError> {
let auth_filters = index_scheduler.filters();
if !auth_filters.all_indexes_authorized() {
@@ -28,10 +28,10 @@ pub async fn get_metrics(
return Err(error);
}
let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?;
let response =
create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), auth_filters)?;
crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
for (index, value) in response.indexes.iter() {
@@ -40,14 +40,6 @@ pub async fn get_metrics(
.set(value.number_of_documents as i64);
}
for (kind, value) in index_scheduler.get_stats()? {
for (value, count) in value {
crate::metrics::MEILISEARCH_NB_TASKS
.with_label_values(&[&kind, &value])
.set(count as i64);
}
}
let encoder = TextEncoder::new();
let mut buffer = vec![];
encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");

View File

@@ -231,8 +231,6 @@ pub async fn running() -> HttpResponse {
#[serde(rename_all = "camelCase")]
pub struct Stats {
pub database_size: u64,
#[serde(skip)]
pub used_database_size: u64,
#[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
pub last_update: Option<OffsetDateTime>,
pub indexes: BTreeMap<String, indexes::IndexStats>,
@@ -261,7 +259,6 @@ pub fn create_all_stats(
let mut last_task: Option<OffsetDateTime> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let mut used_database_size = 0;
for index_uid in index_scheduler.index_names()? {
// Accumulate the size of all indexes, even unauthorized ones, so
@@ -269,7 +266,6 @@ pub fn create_all_stats(
// See <https://github.com/meilisearch/meilisearch/pull/3541#discussion_r1126747643> for context.
let stats = index_scheduler.index_stats(&index_uid)?;
database_size += stats.inner_stats.database_size;
used_database_size += stats.inner_stats.used_database_size;
if !filters.is_index_authorized(&index_uid) {
continue;
@@ -282,14 +278,10 @@ pub fn create_all_stats(
}
database_size += index_scheduler.size()?;
used_database_size += index_scheduler.used_size()?;
database_size += auth_controller.size()?;
used_database_size += auth_controller.used_size()?;
let update_file_size = index_scheduler.compute_update_file_size()?;
database_size += update_file_size;
used_database_size += update_file_size;
database_size += index_scheduler.compute_update_file_size()?;
let stats = Stats { database_size, used_database_size, last_update: last_task, indexes };
let stats = Stats { database_size, last_update: last_task, indexes };
Ok(stats)
}

View File

@@ -99,7 +99,7 @@ pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub deleted_tasks: Option<Option<u64>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub original_filter: Option<Option<String>>,
pub original_filter: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub dump_uid: Option<Option<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
@@ -131,17 +131,8 @@ impl From<Details> for DetailsView {
} => DetailsView {
provided_ids: Some(received_document_ids),
deleted_documents: Some(deleted_documents),
original_filter: Some(None),
..DetailsView::default()
},
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
DetailsView {
provided_ids: Some(0),
original_filter: Some(Some(original_filter)),
deleted_documents: Some(deleted_documents),
..DetailsView::default()
}
}
Details::ClearAll { deleted_documents } => {
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
}
@@ -149,7 +140,7 @@ impl From<Details> for DetailsView {
DetailsView {
matched_tasks: Some(matched_tasks),
canceled_tasks: Some(canceled_tasks),
original_filter: Some(Some(original_filter)),
original_filter: Some(original_filter),
..DetailsView::default()
}
}
@@ -157,7 +148,7 @@ impl From<Details> for DetailsView {
DetailsView {
matched_tasks: Some(matched_tasks),
deleted_tasks: Some(deleted_tasks),
original_filter: Some(Some(original_filter)),
original_filter: Some(original_filter),
..DetailsView::default()
}
}

View File

@@ -8,8 +8,9 @@ use either::Either;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::{FacetValueHit, SearchForFacetValues};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@@ -55,10 +56,6 @@ pub struct SearchQuery {
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
@@ -108,10 +105,6 @@ pub struct SearchQueryWithIndex {
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScore>, default)]
pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
@@ -143,8 +136,6 @@ impl SearchQueryWithIndex {
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_ranking_score,
show_ranking_score_details,
show_matches_position,
filter,
sort,
@@ -166,8 +157,6 @@ impl SearchQueryWithIndex {
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_ranking_score,
show_ranking_score_details,
show_matches_position,
filter,
sort,
@@ -183,7 +172,7 @@ impl SearchQueryWithIndex {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
/// Remove query words from last to first
@@ -207,7 +196,7 @@ impl From<MatchingStrategy> for TermsMatchingStrategy {
}
}
#[derive(Debug, Clone, Serialize, PartialEq)]
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct SearchHit {
#[serde(flatten)]
pub document: Document,
@@ -215,10 +204,6 @@ pub struct SearchHit {
pub formatted: Document,
#[serde(rename = "_matchesPosition", skip_serializing_if = "Option::is_none")]
pub matches_position: Option<MatchesPosition>,
#[serde(rename = "_rankingScore", skip_serializing_if = "Option::is_none")]
pub ranking_score: Option<f64>,
#[serde(rename = "_rankingScoreDetails", skip_serializing_if = "Option::is_none")]
pub ranking_score_details: Option<serde_json::Map<String, serde_json::Value>>,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
@@ -258,6 +243,14 @@ pub struct FacetStats {
pub max: f64,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct FacetSearchResult {
pub hits: Vec<FacetValueHit>,
pub query: Option<String>,
pub processing_time_ms: u128,
}
/// Incorporate search rules in search query
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
@@ -278,14 +271,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
}
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let mut search = index.search(&rtxn);
fn prepare_search<'t>(
index: &'t Index,
rtxn: &'t RoTxn,
query: &'t SearchQuery,
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn);
if let Some(ref query) = query.q {
search.query(query);
@@ -295,16 +286,11 @@ pub fn perform_search(
search.terms_matching_strategy(query.matching_strategy.into());
let max_total_hits = index
.pagination_max_total_hits(&rtxn)
.pagination_max_total_hits(rtxn)
.map_err(milli::Error::from)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
ScoringStrategy::Detailed
} else {
ScoringStrategy::Skip
});
// compute the offset on the limit depending on the pagination mode.
let (offset, limit) = if is_finite_pagination {
@@ -342,8 +328,20 @@ pub fn perform_search(
search.sort_criteria(sort);
}
let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
search.execute()?;
Ok((search, is_finite_pagination, max_total_hits, offset))
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
) -> Result<SearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query)?;
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -415,7 +413,7 @@ pub fn perform_search(
let documents_iter = index.documents(&rtxn, documents_ids)?;
for ((_id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
for (_id, obkv) in documents_iter {
// First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
@@ -439,18 +437,7 @@ pub fn perform_search(
insert_geo_distance(sort, &mut document);
}
let ranking_score =
query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
let ranking_score_details =
query.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter()));
let hit = SearchHit {
document,
formatted,
matches_position,
ranking_score_details,
ranking_score,
};
let hit = SearchHit { document, formatted, matches_position };
documents.push(hit);
}
@@ -507,6 +494,30 @@ pub fn perform_search(
Ok(result)
}
pub fn perform_facet_search(
index: &Index,
search_query: SearchQuery,
facet_query: Option<String>,
facet_name: String,
) -> Result<FacetSearchResult, MeilisearchHttpError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
let mut facet_search = SearchForFacetValues::new(facet_name, search);
if let Some(facet_query) = &facet_query {
facet_search.query(facet_query);
}
let hits = facet_search.execute()?;
Ok(FacetSearchResult {
hits,
query: facet_query,
processing_time_ms: before_search.elapsed().as_millis(),
})
}
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
lazy_static::lazy_static! {
static ref GEO_REGEX: Regex =
@@ -779,7 +790,7 @@ fn format_value<A: AsRef<[u8]>>(
}
}
pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
match facets {
Value::String(expr) => {
let condition = Filter::from_str(expr)?;

View File

@@ -16,11 +16,8 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("GET", "/indexes/products/search") => hashset!{"search", "*"},
("POST", "/indexes/products/documents") => hashset!{"documents.add", "documents.*", "*"},
("GET", "/indexes/products/documents") => hashset!{"documents.get", "documents.*", "*"},
("POST", "/indexes/products/documents/fetch") => hashset!{"documents.get", "documents.*", "*"},
("GET", "/indexes/products/documents/0") => hashset!{"documents.get", "documents.*", "*"},
("DELETE", "/indexes/products/documents/0") => hashset!{"documents.delete", "documents.*", "*"},
("POST", "/indexes/products/documents/delete-batch") => hashset!{"documents.delete", "documents.*", "*"},
("POST", "/indexes/products/documents/delete") => hashset!{"documents.delete", "documents.*", "*"},
("GET", "/tasks") => hashset!{"tasks.get", "tasks.*", "*"},
("DELETE", "/tasks") => hashset!{"tasks.delete", "tasks.*", "*"},
("GET", "/tasks?indexUid=products") => hashset!{"tasks.get", "tasks.*", "*"},

View File

@@ -198,11 +198,6 @@ impl Index<'_> {
self.service.get(url).await
}
pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
self.service.post(url, payload).await
}
pub async fn get_all_documents_raw(&self, options: &str) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), options);
self.service.get(url).await
@@ -230,11 +225,6 @@ impl Index<'_> {
self.service.delete(url).await
}
pub async fn delete_document_by_filter(&self, body: Value) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents/delete", urlencode(self.uid.as_ref()));
self.service.post_encoded(url, body, self.encoder).await
}
pub async fn clear_all_documents(&self) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
self.service.delete(url).await

View File

@@ -1781,7 +1781,7 @@ async fn error_add_documents_payload_size() {
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"message": "The provided payload reached the size limit. The maximum accepted payload size is 10.00 MiB.",
"message": "The provided payload reached the size limit.",
"code": "payload_too_large",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#payload_too_large"

View File

@@ -1,4 +1,3 @@
use meili_snap::{json_string, snapshot};
use serde_json::json;
use crate::common::{GetAllDocumentsOptions, Server};
@@ -136,254 +135,3 @@ async fn delete_no_document_batch() {
assert_eq!(code, 200);
assert_eq!(response["results"].as_array().unwrap().len(), 3);
}
#[actix_rt::test]
async fn delete_document_by_filter() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings_filterable_attributes(json!(["color"])).await;
index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(1).await;
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 2,
"indexUid": "doggo",
"status": "enqueued",
"type": "documentDeletion",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(2).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 2,
"originalFilter": "\"color = blue\""
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 3
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "color NOT EXISTS"})).await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"taskUid": 3,
"indexUid": "doggo",
"status": "enqueued",
"type": "documentDeletion",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(3).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 3,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 1,
"originalFilter": "\"color NOT EXISTS\""
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"color": "red"
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
}
#[actix_rt::test]
async fn delete_document_by_complex_filter() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings_filterable_attributes(json!(["color"])).await;
index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3, "color": "green" },
{ "id": 4 },
]),
Some("id"),
)
.await;
index.wait_task(1).await;
let (response, code) = index
.delete_document_by_filter(
json!({ "filter": ["color != red", "color != green", "color EXISTS"] }),
)
.await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 2,
"indexUid": "doggo",
"status": "enqueued",
"type": "documentDeletion",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(2).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 2,
"originalFilter": "[\"color != red\",\"color != green\",\"color EXISTS\"]"
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 3,
"color": "green"
},
{
"id": 4
}
],
"offset": 0,
"limit": 20,
"total": 3
}
"###);
let (response, code) = index
.delete_document_by_filter(json!({ "filter": [["color = green", "color NOT EXISTS"]] }))
.await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"taskUid": 3,
"indexUid": "doggo",
"status": "enqueued",
"type": "documentDeletion",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(3).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 3,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 4,
"originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"color": "red"
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
}

View File

@@ -82,111 +82,6 @@ async fn get_all_documents_bad_limit() {
"###);
}
#[actix_rt::test]
async fn get_all_documents_bad_filter() {
let server = Server::new().await;
let index = server.index("test");
// Since the filter can't be parsed automatically by deserr, we have the wrong error message
// if the index does not exist: we could expect to get an error message about the invalid filter before
// the existence of the index is checked, but it is not the case.
let (response, code) = index.get_all_documents_raw("?filter").await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
let (response, code) = index.create(None).await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "test",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
let response = server.wait_task(0).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###"
{
"uid": 0,
"indexUid": "test",
"status": "succeeded",
"type": "indexCreation",
"canceledBy": null,
"details": {
"primaryKey": null
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index.get_all_documents_raw("?filter").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo=bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
}
#[actix_rt::test]
async fn delete_documents_batch() {
let server = Server::new().await;
@@ -523,264 +418,3 @@ async fn update_documents_csv_delimiter_with_bad_content_type() {
}
"###);
}
#[actix_rt::test]
async fn delete_document_by_filter() {
let server = Server::new().await;
let index = server.index("doggo");
// send a bad payload type
let (response, code) = index.delete_document_by_filter(json!("hello")).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type: expected an object, but found a string: `\"hello\"`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// send bad payload type
let (response, code) = index.delete_document_by_filter(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
// send bad filter
let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
// send empty filter
let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Sending an empty filter is forbidden.",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
// do not send any filter
let (response, code) = index.delete_document_by_filter(json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Missing field `filter`",
"code": "missing_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_document_filter"
}
"###);
// index does not exists
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
{
"uid": 0,
"indexUid": "doggo",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Index `doggo` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index.create(None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response["taskUid"].as_u64().unwrap()).await;
// no filterable are set
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
{
"uid": 2,
"indexUid": "doggo",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (response, code) = index.update_settings_filterable_attributes(json!(["doggo"])).await;
snapshot!(code, @"202 Accepted");
server.wait_task(response["taskUid"].as_u64().unwrap()).await;
// not filterable while there is a filterable attribute
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "catto = jorts"})).await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
{
"uid": 4,
"indexUid": "doggo",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"catto = jorts\""
},
"error": {
"message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn fetch_document_by_filter() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings_filterable_attributes(json!(["color"])).await;
index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(1).await;
let (response, code) = index.get_document_by_filter(json!(null)).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type: expected an object, but found null",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_document_offset",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_offset"
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
"code": "invalid_document_limit",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_limit"
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
"code": "invalid_document_fields",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_fields"
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
let (response, code) =
index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
}
"###);
}

View File

@@ -1,6 +1,5 @@
use actix_web::test;
use http::header::ACCEPT_ENCODING;
use meili_snap::*;
use serde_json::{json, Value};
use urlencoding::encode as urlencode;
@@ -379,164 +378,3 @@ async fn get_documents_displayed_attributes_is_ignored() {
assert_eq!(response.as_object().unwrap().keys().count(), 16);
assert!(response.as_object().unwrap().get("gender").is_some());
}
#[actix_rt::test]
async fn get_document_by_filter() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings_filterable_attributes(json!(["color"])).await;
index
.add_documents(
json!([
{ "id": 0, "color": "red" },
{ "id": 1, "color": "blue" },
{ "id": 2, "color": "blue" },
{ "id": 3 },
]),
Some("id"),
)
.await;
index.wait_task(1).await;
let (response, code) = index.get_document_by_filter(json!({})).await;
let (response2, code2) = index.get_all_documents_raw("").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
},
{
"id": 3
}
],
"offset": 0,
"limit": 20,
"total": 4
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await;
let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" }))
.await;
let (response2, code2) =
index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 3
}
],
"offset": 1,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
let (response, code) = index
.get_document_by_filter(
json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
)
.await;
let (response2, code2) =
index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"color": "red"
}
],
"offset": 0,
"limit": 1,
"total": 2
}
"###);
assert_eq!(code, code2);
assert_eq!(response, response2);
// Now testing more complex filter that the get route can't represent
let (response, code) =
index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
},
{
"id": 1,
"color": "blue"
},
{
"id": 2,
"color": "blue"
}
],
"offset": 0,
"limit": 20,
"total": 3
}
"###);
let (response, code) = index
.get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] }))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"results": [
{
"id": 0,
"color": "red"
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
}

View File

@@ -946,7 +946,7 @@ async fn sort_unset_ranking_rule() {
index.wait_task(1).await;
let expected_response = json!({
"message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.",
"message": "The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.",
"code": "invalid_search_sort",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_sort"

View File

@@ -1,4 +1,3 @@
use insta::{allow_duplicates, assert_json_snapshot};
use serde_json::json;
use super::*;
@@ -19,43 +18,30 @@ async fn formatted_contain_wildcard() {
|response, code|
{
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852",
"cattos": "<em>pésti</em>"
},
"_matchesPosition": {
"cattos": [
{
"start": 0,
"length": 5
}
]
}
}
"###);
}
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
"cattos": "<em>pésti</em>",
},
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
})
);
}
)
.await;
index
.search(json!({ "q": "pésti", "attributesToRetrieve": ["*"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti"
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
})
);
})
.await;
@@ -64,29 +50,20 @@ async fn formatted_contain_wildcard() {
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToHighlight": ["id"], "showMatchesPosition": true }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
},
"_matchesPosition": {
"cattos": [
{
"start": 0,
"length": 5
}
]
}
}
"###)
}
})
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
},
"_matchesPosition": {"cattos": [{"start": 0, "length": 5}]},
})
);
}
)
.await;
index
@@ -94,20 +71,17 @@ async fn formatted_contain_wildcard() {
json!({ "q": "pésti", "attributesToRetrieve": ["*"], "attributesToCrop": ["*"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
}
}
"###);
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
}
})
);
},
)
.await;
@@ -115,20 +89,17 @@ async fn formatted_contain_wildcard() {
index
.search(json!({ "q": "pésti", "attributesToCrop": ["*"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"cattos": "pésti",
"_formatted": {
"id": "852",
"cattos": "pésti",
}
})
);
})
.await;
}
@@ -145,24 +116,21 @@ async fn format_nested() {
index
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
]
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
"age": 2,
},
{
"name": "buddy",
"age": 4,
},
],
})
);
})
.await;
@@ -171,22 +139,19 @@ async fn format_nested() {
json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
})
);
},
)
.await;
@@ -196,30 +161,20 @@ async fn format_nested() {
json!({ "q": "bobby", "attributesToRetrieve": ["doggos.name"], "showMatchesPosition": true }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
],
"_matchesPosition": {
"doggos.name": [
{
"start": 0,
"length": 5
}
]
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
"_matchesPosition": {"doggos.name": [{"start": 0, "length": 5}]},
})
);
}
)
.await;
@@ -228,24 +183,21 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
})
.await;
@@ -253,24 +205,21 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToCrop": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
]
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"name": "bobby",
},
{
"name": "buddy",
},
],
},
})
);
})
.await;
@@ -278,61 +227,55 @@ async fn format_nested() {
.search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos.name"], "attributesToHighlight": ["doggos.age"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"doggos": [
{
"name": "bobby"
},
{
"name": "buddy"
}
],
"_formatted": {
assert_eq!(
response["hits"][0],
json!({
"doggos": [
{
"name": "bobby",
"age": "2"
},
{
"name": "buddy",
"age": "4"
}
]
}
}
"###)
}
})
{
"name": "bobby",
},
{
"name": "buddy",
},
],
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
})
.await;
index
.search(json!({ "q": "pésti", "attributesToRetrieve": [], "attributesToHighlight": ["doggos.age"], "attributesToCrop": ["doggos.name"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"doggos": [
{
"_formatted": {
"doggos": [
{
"name": "bobby",
"age": "2"
},
{
"name": "buddy",
"age": "4"
}
]
}
}
"###)
}
"name": "bobby",
"age": "2",
},
{
"name": "buddy",
"age": "4",
},
],
},
})
);
}
)
.await;
@@ -354,66 +297,54 @@ async fn displayedattr_2_smol() {
.search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
})
.await;
index
.search(json!({ "attributesToRetrieve": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
})
.await;
index
.search(json!({ "attributesToHighlight": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
})
.await;
index
.search(json!({ "attributesToCrop": ["id"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
})
.await;
@@ -422,18 +353,15 @@ async fn displayedattr_2_smol() {
json!({ "attributesToHighlight": ["id"], "attributesToCrop": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852,
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
"_formatted": {
"id": "852",
}
})
);
},
)
.await;
@@ -441,41 +369,31 @@ async fn displayedattr_2_smol() {
index
.search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
})
.await;
index
.search(json!({ "attributesToCrop": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"id": 852
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"id": 852,
})
);
})
.await;
index
.search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@"{}")
}
assert_eq!(response["hits"][0], json!({}));
})
.await;
@@ -484,11 +402,7 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["cattos"], "attributesToCrop": ["cattos"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@"{}")
}
assert_eq!(response["hits"][0], json!({}));
}
)
@@ -499,17 +413,14 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToHighlight": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
},
)
.await;
@@ -519,17 +430,14 @@ async fn displayedattr_2_smol() {
json!({ "attributesToRetrieve": ["cattos"], "attributesToCrop": ["id"] }),
|response, code| {
assert_eq!(code, 200, "{}", response);
allow_duplicates! {
assert_json_snapshot!(response["hits"][0],
{ "._rankingScore" => "[score]" },
@r###"
{
"_formatted": {
"id": "852"
}
}
"###)
}
assert_eq!(
response["hits"][0],
json!({
"_formatted": {
"id": "852",
}
})
);
},
)
.await;

View File

@@ -65,7 +65,7 @@ async fn simple_search_single_index() {
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",
@@ -170,7 +170,7 @@ async fn simple_search_two_indexes() {
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]", ".**._rankingScore" => "[score]" }, @r###"
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",

View File

@@ -413,7 +413,7 @@ async fn test_summarized_document_addition_or_update() {
}
#[actix_web::test]
async fn test_summarized_delete_documents_by_batch() {
async fn test_summarized_delete_batch() {
let server = Server::new().await;
let index = server.index("test");
index.delete_batch(vec![1, 2, 3]).await;
@@ -430,8 +430,7 @@ async fn test_summarized_delete_documents_by_batch() {
"canceledBy": null,
"details": {
"providedIds": 3,
"deletedDocuments": 0,
"originalFilter": null
"deletedDocuments": 0
},
"error": {
"message": "Index `test` not found.",
@@ -461,8 +460,7 @@ async fn test_summarized_delete_documents_by_batch() {
"canceledBy": null,
"details": {
"providedIds": 1,
"deletedDocuments": 0,
"originalFilter": null
"deletedDocuments": 0
},
"error": null,
"duration": "[duration]",
@@ -474,100 +472,7 @@ async fn test_summarized_delete_documents_by_batch() {
}
#[actix_web::test]
async fn test_summarized_delete_documents_by_filter() {
let server = Server::new().await;
let index = server.index("test");
index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
index.wait_task(0).await;
let (task, _) = index.get_task(0).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 0,
"indexUid": "test",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
index.create(None).await;
index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
index.wait_task(2).await;
let (task, _) = index.get_task(2).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 2,
"indexUid": "test",
"status": "failed",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"doggo = bernese\""
},
"error": {
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
index.update_settings(json!({ "filterableAttributes": ["doggo"] })).await;
index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
index.wait_task(4).await;
let (task, _) = index.get_task(4).await;
assert_json_snapshot!(task,
{ ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
@r###"
{
"uid": 4,
"indexUid": "test",
"status": "succeeded",
"type": "documentDeletion",
"canceledBy": null,
"details": {
"providedIds": 0,
"deletedDocuments": 0,
"originalFilter": "\"doggo = bernese\""
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_web::test]
async fn test_summarized_delete_document_by_id() {
async fn test_summarized_delete_document() {
let server = Server::new().await;
let index = server.index("test");
index.delete_document(1).await;
@@ -584,8 +489,7 @@ async fn test_summarized_delete_document_by_id() {
"canceledBy": null,
"details": {
"providedIds": 1,
"deletedDocuments": 0,
"originalFilter": null
"deletedDocuments": 0
},
"error": {
"message": "Index `test` not found.",
@@ -615,8 +519,7 @@ async fn test_summarized_delete_document_by_id() {
"canceledBy": null,
"details": {
"providedIds": 1,
"deletedDocuments": 0,
"originalFilter": null
"deletedDocuments": 0
},
"error": null,
"duration": "[duration]",

View File

@@ -25,13 +25,8 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.5.1"
grenad = { version = "0.4.4", default-features = false, features = [
"tempfile",
] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-features = false, features = [
"lmdb",
"sync-read-txn",
] }
grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memmap2 = "0.5.10"
@@ -44,17 +39,12 @@ rstar = { version = "0.10.0", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
slice-group-by = "0.3.0"
smallstr = { version = "0.3.0", features = ["serde"] }
smallstr = { version = "0.3.0", features = ["serde"] }
smallvec = "1.10.0"
smartstring = "1.0.1"
tempfile = "3.5.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
uuid = { version = "1.3.1", features = ["v4"] }
filter-parser = { path = "../filter-parser" }
@@ -73,13 +63,13 @@ big_s = "1.0.2"
insta = "1.29.0"
maplit = "1.0.2"
md5 = "0.7.0"
rand = { version = "0.8.5", features = ["small_rng"] }
rand = {version = "0.8.5", features = ["small_rng"] }
[target.'cfg(fuzzing)'.dev-dependencies]
fuzzcheck = "0.12.1"
[features]
all-tokenizations = ["charabia/default"]
default = [ "charabia/default" ]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml

View File

@@ -53,7 +53,6 @@ fn main() -> Result<(), Box<dyn Error>> {
&mut ctx,
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
TermsMatchingStrategy::Last,
milli::score_details::ScoringStrategy::Skip,
false,
&None,
&None,

View File

@@ -112,8 +112,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
InvalidGeoField(#[from] GeoError),
#[error("{0}")]
InvalidFilter(String),
#[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))]
InvalidFilterExpression(&'static [&'static str], Value),
#[error("Attribute `{}` is not sortable. {}",
.field,
match .valid_fields.is_empty() {
@@ -124,9 +122,19 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
}
)]
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
#[error("Attribute `{}` is not filterable. {}",
.field,
match .valid_fields.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
),
}
)]
InvalidSearchFacet { field: String, valid_fields: BTreeSet<String> },
#[error("{}", HeedError::BadOpenOptions)]
InvalidLmdbOpenOptions,
#[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
#[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
SortRankingRuleMissing,
#[error("The database file is in an invalid state.")]
InvalidStoreFile,

View File

@@ -0,0 +1,23 @@
use std::borrow::Cow;
use fst::Set;
use heed::{BytesDecode, BytesEncode};
/// A codec for values of type `Set<&[u8]>`.
pub struct FstSetCodec;
impl<'a> BytesEncode<'a> for FstSetCodec {
type EItem = Set<Vec<u8>>;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item.as_fst().as_bytes()))
}
}
impl<'a> BytesDecode<'a> for FstSetCodec {
type DItem = Set<&'a [u8]>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Set::new(bytes).ok()
}
}

View File

@@ -2,6 +2,7 @@ mod beu32_str_codec;
mod byte_slice_ref;
pub mod facet;
mod field_id_word_count_codec;
mod fst_set_codec;
mod obkv_codec;
mod roaring_bitmap;
mod roaring_bitmap_length;
@@ -15,6 +16,7 @@ pub use str_ref::StrRefCodec;
pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap_length::{

View File

@@ -49,7 +49,7 @@ impl CboRoaringBitmapCodec {
} else {
// Otherwise, it means we used the classic RoaringBitmapCodec and
// that the header takes threshold integers.
RoaringBitmap::deserialize_unchecked_from(bytes)
RoaringBitmap::deserialize_from(bytes)
}
}
@@ -69,7 +69,7 @@ impl CboRoaringBitmapCodec {
vec.push(integer);
}
} else {
roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
roaring |= RoaringBitmap::deserialize_from(bytes.as_ref())?;
}
}

View File

@@ -8,7 +8,7 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmap::deserialize_unchecked_from(bytes).ok()
RoaringBitmap::deserialize_from(bytes).ok()
}
}

View File

@@ -19,11 +19,12 @@ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32,
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
Search, U8StrStrCodec, BEU16, BEU32,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -84,6 +85,7 @@ pub mod db_name {
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
pub const DOCUMENTS: &str = "documents";
@@ -110,6 +112,9 @@ pub struct Index {
/// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
pub exact_word_prefix_docids: Database<Str, RoaringBitmapCodec>,
/// Maps a word and a document id (u32) to all the positions where the given word appears.
pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
/// Maps the proximity between a pair of words with all the docids where this relation appears.
pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
/// Maps the proximity between a pair of word and prefix with all the docids where this relation appears.
@@ -143,6 +148,8 @@ pub struct Index {
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
/// Maps the facet field id of the string facets with an FST containing all the facets values.
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
/// Maps the document id, the facet field id and the numbers.
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
@@ -162,49 +169,38 @@ impl Index {
) -> Result<Index> {
use db_name::*;
options.max_dbs(23);
options.max_dbs(24);
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
let env = options.open(path)?;
let mut wtxn = env.write_txn()?;
let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
let exact_word_prefix_docids =
env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
let word_pair_proximity_docids =
env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
let script_language_docids =
env.create_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?;
let main = env.create_poly_database(Some(MAIN))?;
let word_docids = env.create_database(Some(WORD_DOCIDS))?;
let exact_word_docids = env.create_database(Some(EXACT_WORD_DOCIDS))?;
let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?;
let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?;
let word_prefix_pair_proximity_docids =
env.create_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
let prefix_word_pair_proximity_docids =
env.create_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
let field_id_word_count_docids =
env.create_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
let word_prefix_position_docids =
env.create_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?;
let word_prefix_fid_docids =
env.create_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids =
env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
let facet_id_exists_docids =
env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
let facet_id_is_null_docids =
env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
let facet_id_is_empty_docids =
env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?;
let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?;
let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
let facet_id_string_fst = env.create_database(Some(FACET_ID_STRING_FST))?;
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
let field_id_docid_facet_f64s =
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
let field_id_docid_facet_strings =
env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
wtxn.commit()?;
env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?;
let documents = env.create_database(Some(DOCUMENTS))?;
Index::set_creation_dates(&env, main, created_at, updated_at)?;
@@ -215,6 +211,7 @@ impl Index {
exact_word_docids,
word_prefix_docids,
exact_word_prefix_docids,
docid_word_positions,
word_pair_proximity_docids,
script_language_docids,
word_prefix_pair_proximity_docids,
@@ -226,6 +223,7 @@ impl Index {
field_id_word_count_docids,
facet_id_f64_docids,
facet_id_string_docids,
facet_id_string_fst,
facet_id_exists_docids,
facet_id_is_null_docids,
facet_id_is_empty_docids,
@@ -1039,15 +1037,16 @@ impl Index {
/* documents */
/// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
pub fn iter_documents<'a, 't: 'a>(
&'a self,
/// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
pub fn documents<'t>(
&self,
rtxn: &'t RoTxn,
ids: impl IntoIterator<Item = DocumentId> + 'a,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
ids: impl IntoIterator<Item = DocumentId>,
) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
let mut documents = Vec::new();
Ok(ids.into_iter().map(move |id| {
for id in ids {
if soft_deleted_documents.contains(id) {
return Err(UserError::AccessingSoftDeletedDocument { document_id: id })?;
}
@@ -1055,25 +1054,27 @@ impl Index {
.documents
.get(rtxn, &BEU32::new(id))?
.ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
Ok((id, kv))
}))
}
documents.push((id, kv));
}
/// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
pub fn documents<'t>(
&self,
rtxn: &'t RoTxn,
ids: impl IntoIterator<Item = DocumentId>,
) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
self.iter_documents(rtxn, ids)?.collect()
Ok(documents)
}
/// Returns an iterator over all the documents in the index.
pub fn all_documents<'a, 't: 'a>(
&'a self,
pub fn all_documents<'t>(
&self,
rtxn: &'t RoTxn,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
self.iter_documents(rtxn, self.documents_ids(rtxn)?)
) -> Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReaderU16<'t>)>>> {
let soft_deleted_docids = self.soft_deleted_documents_ids(rtxn)?;
Ok(self
.documents
.iter(rtxn)?
// we cast the BEU32 to a DocumentId
.map(|document| document.map(|(id, obkv)| (id.get(), obkv)))
.filter(move |document| {
document.as_ref().map_or(true, |(id, _)| !soft_deleted_docids.contains(*id))
}))
}
pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
@@ -1465,11 +1466,11 @@ pub(crate) mod tests {
db_snap!(index, field_distribution);
db_snap!(index, field_distribution,
@r###"
age 1 |
id 2 |
name 2 |
"###
@"
age 1
id 2
name 2
"
);
// snapshot_index!(&index, "1", include: "^field_distribution$");
@@ -1486,10 +1487,10 @@ pub(crate) mod tests {
db_snap!(index, field_distribution,
@r###"
age 1 |
id 2 |
name 2 |
"###
age 1
id 2
name 2
"###
);
// then we update a document by removing one field and another by adding one field
@@ -1502,10 +1503,10 @@ pub(crate) mod tests {
db_snap!(index, field_distribution,
@r###"
has_dog 1 |
id 2 |
name 2 |
"###
has_dog 1
id 2
name 2
"###
);
}
@@ -2488,12 +2489,8 @@ pub(crate) mod tests {
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);
let SearchResult {
matching_words: _,
candidates: _,
document_scores: _,
mut documents_ids,
} = search.execute().unwrap();
let SearchResult { matching_words: _, candidates: _, mut documents_ids } =
search.execute().unwrap();
let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
documents_ids.sort_unstable();
let docs = index.documents(&rtxn, documents_ids).unwrap();

View File

@@ -5,6 +5,52 @@
#[global_allocator]
pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
// #[cfg(test)]
// pub mod allocator {
// use std::alloc::{GlobalAlloc, System};
// use std::sync::atomic::{self, AtomicI64};
// #[global_allocator]
// pub static ALLOC: CountingAlloc = CountingAlloc {
// max_resident: AtomicI64::new(0),
// resident: AtomicI64::new(0),
// allocated: AtomicI64::new(0),
// };
// pub struct CountingAlloc {
// pub max_resident: AtomicI64,
// pub resident: AtomicI64,
// pub allocated: AtomicI64,
// }
// unsafe impl GlobalAlloc for CountingAlloc {
// unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
// self.allocated.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
// let old_resident =
// self.resident.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
// let resident = old_resident + layout.size() as i64;
// self.max_resident.fetch_max(resident, atomic::Ordering::SeqCst);
// // if layout.size() > 1_000_000 {
// // eprintln!(
// // "allocating {} with new resident size: {resident}",
// // layout.size() / 1_000_000
// // );
// // // let trace = std::backtrace::Backtrace::capture();
// // // let t = trace.to_string();
// // // eprintln!("{t}");
// // }
// System.alloc(layout)
// }
// unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
// self.resident.fetch_sub(layout.size() as i64, atomic::Ordering::Relaxed);
// System.dealloc(ptr, layout)
// }
// }
// }
#[macro_use]
pub mod documents;
@@ -17,7 +63,6 @@ mod fields_ids_map;
pub mod heed_codec;
pub mod index;
pub mod proximity;
pub mod score_details;
mod search;
pub mod update;
@@ -54,8 +99,9 @@ pub use self::heed_codec::{
};
pub use self::index::Index;
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
DEFAULT_VALUES_PER_FACET,
};
pub type Result<T> = std::result::Result<T, error::Error>;

View File

@@ -1,316 +0,0 @@
use serde::Serialize;
use crate::distance_between_two_points;
#[derive(Debug, Clone, PartialEq)]
pub enum ScoreDetails {
Words(Words),
Typo(Typo),
Proximity(Rank),
Fid(Rank),
Position(Rank),
ExactAttribute(ExactAttribute),
Exactness(Rank),
Sort(Sort),
GeoSort(GeoSort),
}
impl ScoreDetails {
pub fn local_score(&self) -> Option<f64> {
self.rank().map(Rank::local_score)
}
pub fn rank(&self) -> Option<Rank> {
match self {
ScoreDetails::Words(details) => Some(details.rank()),
ScoreDetails::Typo(details) => Some(details.rank()),
ScoreDetails::Proximity(details) => Some(*details),
ScoreDetails::Fid(details) => Some(*details),
ScoreDetails::Position(details) => Some(*details),
ScoreDetails::ExactAttribute(details) => Some(details.rank()),
ScoreDetails::Exactness(details) => Some(*details),
ScoreDetails::Sort(_) => None,
ScoreDetails::GeoSort(_) => None,
}
}
pub fn global_score<'a>(details: impl Iterator<Item = &'a Self>) -> f64 {
Rank::global_score(details.filter_map(Self::rank))
}
/// Panics
///
/// - If Position is not preceded by Fid
/// - If Exactness is not preceded by ExactAttribute
pub fn to_json_map<'a>(
details: impl Iterator<Item = &'a Self>,
) -> serde_json::Map<String, serde_json::Value> {
let mut order = 0;
let mut fid_details = None;
let mut details_map = serde_json::Map::default();
for details in details {
match details {
ScoreDetails::Words(words) => {
let words_details = serde_json::json!({
"order": order,
"matchingWords": words.matching_words,
"maxMatchingWords": words.max_matching_words,
"score": words.rank().local_score(),
});
details_map.insert("words".into(), words_details);
order += 1;
}
ScoreDetails::Typo(typo) => {
let typo_details = serde_json::json!({
"order": order,
"typoCount": typo.typo_count,
"maxTypoCount": typo.max_typo_count,
"score": typo.rank().local_score(),
});
details_map.insert("typo".into(), typo_details);
order += 1;
}
ScoreDetails::Proximity(proximity) => {
let proximity_details = serde_json::json!({
"order": order,
"score": proximity.local_score(),
});
details_map.insert("proximity".into(), proximity_details);
order += 1;
}
ScoreDetails::Fid(fid) => {
// copy the rank for future use in Position.
fid_details = Some(*fid);
// For now, fid is a virtual rule always followed by the "position" rule
let fid_details = serde_json::json!({
"order": order,
"attributes_ranking_order": fid.local_score(),
});
details_map.insert("attribute".into(), fid_details);
order += 1;
}
ScoreDetails::Position(position) => {
// For now, position is a virtual rule always preceded by the "fid" rule
let attribute_details = details_map
.get_mut("attribute")
.expect("position not preceded by attribute");
let attribute_details = attribute_details
.as_object_mut()
.expect("attribute details was not an object");
let Some(fid_details) = fid_details
else {
panic!("position not preceded by attribute");
};
attribute_details.insert(
"attributes_query_word_order".into(),
position.local_score().into(),
);
let score = Rank::global_score([fid_details, *position].iter().copied());
attribute_details.insert("score".into(), score.into());
// do not update the order since this was already done by fid
}
ScoreDetails::ExactAttribute(exact_attribute) => {
let exactness_details = serde_json::json!({
"order": order,
"matchType": exact_attribute,
"score": exact_attribute.rank().local_score(),
});
details_map.insert("exactness".into(), exactness_details);
order += 1;
}
ScoreDetails::Exactness(details) => {
// For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
let exactness_details = details_map
.get_mut("exactness")
.expect("Exactness not preceded by exactAttribute");
let exactness_details = exactness_details
.as_object_mut()
.expect("exactness details was not an object");
if exactness_details.get("matchType").expect("missing 'matchType'")
== &serde_json::json!(ExactAttribute::NoExactMatch)
{
let score = Rank::global_score(
[ExactAttribute::NoExactMatch.rank(), *details].iter().copied(),
);
*exactness_details.get_mut("score").expect("missing score") = score.into();
}
// do not update the order since this was already done by exactAttribute
}
ScoreDetails::Sort(details) => {
let sort = if details.redacted {
format!("<hidden-rule-{order}>")
} else {
format!(
"{}:{}",
details.field_name,
if details.ascending { "asc" } else { "desc" }
)
};
let value =
if details.redacted { "<hidden>".into() } else { details.value.clone() };
let sort_details = serde_json::json!({
"order": order,
"value": value,
});
details_map.insert(sort, sort_details);
order += 1;
}
ScoreDetails::GeoSort(details) => {
let sort = format!(
"_geoPoint({}, {}):{}",
details.target_point[0],
details.target_point[1],
if details.ascending { "asc" } else { "desc" }
);
let point = if let Some(value) = details.value {
serde_json::json!({ "lat": value[0], "lng": value[1]})
} else {
serde_json::Value::Null
};
let sort_details = serde_json::json!({
"order": order,
"value": point,
"distance": details.distance(),
});
details_map.insert(sort, sort_details);
order += 1;
}
}
}
details_map
}
}
/// The strategy to compute scores.
///
/// It makes sense to pass down this strategy to the internals of the search, because
/// some optimizations (today, mainly skipping ranking rules for universes of a single document)
/// are not correct to do when computing the scores.
///
/// This strategy could feasibly be extended to differentiate between the normalized score and the
/// detailed scores, but it is not useful today as the normalized score is *derived from* the
/// detailed scores.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ScoringStrategy {
/// Don't compute scores
#[default]
Skip,
/// Compute detailed scores
Detailed,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Words {
pub matching_words: u32,
pub max_matching_words: u32,
}
impl Words {
pub fn rank(&self) -> Rank {
Rank { rank: self.matching_words, max_rank: self.max_matching_words }
}
pub(crate) fn from_rank(rank: Rank) -> Words {
Words { matching_words: rank.rank, max_matching_words: rank.max_rank }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Typo {
pub typo_count: u32,
pub max_typo_count: u32,
}
impl Typo {
pub fn rank(&self) -> Rank {
Rank {
rank: self.max_typo_count - self.typo_count + 1,
max_rank: (self.max_typo_count + 1),
}
}
// max_rank = max_typo + 1
// max_typo = max_rank - 1
//
// rank = max_typo - typo + 1
// rank = max_rank - 1 - typo + 1
// rank + typo = max_rank
// typo = max_rank - rank
pub fn from_rank(rank: Rank) -> Typo {
Typo { typo_count: rank.max_rank - rank.rank, max_typo_count: rank.max_rank - 1 }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Rank {
/// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
///
/// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned
/// (they don't match the query).
pub rank: u32,
/// The maximum possible rank. Documents with this rank have a score of 1.
///
/// The max rank should not be 0.
pub max_rank: u32,
}
impl Rank {
pub fn local_score(self) -> f64 {
self.rank as f64 / self.max_rank as f64
}
pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
let mut rank = Rank { rank: 1, max_rank: 1 };
for inner_rank in details {
rank.rank -= 1;
rank.rank *= inner_rank.max_rank;
rank.max_rank *= inner_rank.max_rank;
rank.rank += inner_rank.rank;
}
rank.local_score()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum ExactAttribute {
ExactMatch,
MatchesStart,
NoExactMatch,
}
impl ExactAttribute {
pub fn rank(&self) -> Rank {
let rank = match self {
ExactAttribute::ExactMatch => 3,
ExactAttribute::MatchesStart => 2,
ExactAttribute::NoExactMatch => 1,
};
Rank { rank, max_rank: 3 }
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Sort {
pub field_name: String,
pub ascending: bool,
pub redacted: bool,
pub value: serde_json::Value,
}
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub struct GeoSort {
pub target_point: [f64; 2],
pub ascending: bool,
pub value: Option<[f64; 2]>,
}
impl GeoSort {
pub fn distance(&self) -> Option<f64> {
self.value.map(|value| distance_between_two_points(&self.target_point, &value))
}
}

View File

@@ -5,7 +5,6 @@ use std::ops::Bound::{self, Excluded, Included};
use either::Either;
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
use roaring::RoaringBitmap;
use serde_json::Value;
use super::facet_range_search;
use crate::error::{Error, UserError};
@@ -113,52 +112,6 @@ impl<'a> From<Filter<'a>> for FilterCondition<'a> {
}
impl<'a> Filter<'a> {
pub fn from_json(facets: &'a Value) -> Result<Option<Self>> {
match facets {
Value::String(expr) => {
let condition = Filter::from_str(expr)?;
Ok(condition)
}
Value::Array(arr) => Self::parse_filter_array(arr),
v => Err(Error::UserError(UserError::InvalidFilterExpression(
&["String", "Array"],
v.clone(),
))),
}
}
fn parse_filter_array(arr: &'a [Value]) -> Result<Option<Self>> {
let mut ands = Vec::new();
for value in arr {
match value {
Value::String(s) => ands.push(Either::Right(s.as_str())),
Value::Array(arr) => {
let mut ors = Vec::new();
for value in arr {
match value {
Value::String(s) => ors.push(s.as_str()),
v => {
return Err(Error::UserError(UserError::InvalidFilterExpression(
&["String"],
v.clone(),
)))
}
}
}
ands.push(Either::Left(ors));
}
v => {
return Err(Error::UserError(UserError::InvalidFilterExpression(
&["String", "[String]"],
v.clone(),
)))
}
}
}
Filter::from_array(ands)
}
pub fn from_array<I, J>(array: I) -> Result<Option<Self>>
where
I: IntoIterator<Item = Either<J, &'a str>>,

View File

@@ -1,15 +1,20 @@
use std::fmt;
use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
use self::new::PartialSearchResult;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::error::UserError;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldIdMapMissingEntry, Index,
Result, SearchContext, BEU16,
};
// Building these factories is not free.
@@ -17,6 +22,9 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
/// The maximum number of facets returned by the facet search route.
const MAX_NUMBER_OF_FACETS: usize = 100;
pub mod facet;
mod fst_utils;
pub mod new;
@@ -30,7 +38,6 @@ pub struct Search<'a> {
sort_criteria: Option<Vec<AscDesc>>,
geo_strategy: new::GeoSortStrategy,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
words_limit: usize,
exhaustive_number_hits: bool,
rtxn: &'a heed::RoTxn<'a>,
@@ -47,7 +54,6 @@ impl<'a> Search<'a> {
sort_criteria: None,
geo_strategy: new::GeoSortStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
scoring_strategy: Default::default(),
exhaustive_number_hits: false,
words_limit: 10,
rtxn,
@@ -80,11 +86,6 @@ impl<'a> Search<'a> {
self
}
pub fn scoring_strategy(&mut self, value: ScoringStrategy) -> &mut Search<'a> {
self.scoring_strategy = value;
self
}
pub fn words_limit(&mut self, value: usize) -> &mut Search<'a> {
self.words_limit = value;
self
@@ -101,7 +102,7 @@ impl<'a> Search<'a> {
self
}
/// Forces the search to exhaustively compute the number of candidates,
/// Force the search to exhastivelly compute the number of candidates,
/// this will increase the search time but allows finite pagination.
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
self.exhaustive_number_hits = exhaustive_number_hits;
@@ -110,12 +111,11 @@ impl<'a> Search<'a> {
pub fn execute(&self) -> Result<SearchResult> {
let mut ctx = SearchContext::new(self.index, self.rtxn);
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
let PartialSearchResult { located_query_terms, candidates, documents_ids } =
execute_search(
&mut ctx,
&self.query,
self.terms_matching_strategy,
self.scoring_strategy,
self.exhaustive_number_hits,
&self.filter,
&self.sort_criteria,
@@ -133,7 +133,7 @@ impl<'a> Search<'a> {
None => MatchingWords::default(),
};
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
Ok(SearchResult { matching_words, candidates, documents_ids })
}
}
@@ -147,7 +147,6 @@ impl fmt::Debug for Search<'_> {
sort_criteria,
geo_strategy: _,
terms_matching_strategy,
scoring_strategy,
words_limit,
exhaustive_number_hits,
rtxn: _,
@@ -160,7 +159,6 @@ impl fmt::Debug for Search<'_> {
.field("limit", limit)
.field("sort_criteria", sort_criteria)
.field("terms_matching_strategy", terms_matching_strategy)
.field("scoring_strategy", scoring_strategy)
.field("exhaustive_number_hits", exhaustive_number_hits)
.field("words_limit", words_limit)
.finish()
@@ -171,8 +169,8 @@ impl fmt::Debug for Search<'_> {
pub struct SearchResult {
pub matching_words: MatchingWords,
pub candidates: RoaringBitmap,
// TODO those documents ids should be associated with their criteria scores.
pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -210,16 +208,182 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
}
}
pub struct SearchForFacetValues<'a> {
query: Option<String>,
facet: String,
search_query: Search<'a>,
}
impl<'a> SearchForFacetValues<'a> {
pub fn new(facet: String, search_query: Search<'a>) -> SearchForFacetValues<'a> {
SearchForFacetValues { query: None, facet, search_query }
}
pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
self.query = Some(query.into());
self
}
pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
return Err(UserError::InvalidSearchFacet {
field: self.facet.clone(),
valid_fields: filterable_fields.into_iter().collect(),
}
.into());
}
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fid = match fields_ids_map.id(&self.facet) {
Some(fid) => fid,
None => {
return Err(FieldIdMapMissingEntry::FieldName {
field_name: self.facet.clone(),
process: "search for facet values",
}
.into());
}
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? {
Some(fst) => fst,
None => return Ok(vec![]),
};
let search_candidates = self.search_query.execute()?.candidates;
match self.query.as_ref() {
Some(query) => {
let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
let field_authorizes_typos =
!self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
if authorize_typos && field_authorizes_typos {
let mut results = vec![];
let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
let key =
FacetGroupKey { field_id: fid, level: 0, left_bound: query.as_ref() };
if let Some(FacetGroupValue { bitmap, .. }) =
index.facet_id_string_docids.get(rtxn, &key)?
{
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
results.push(FacetValueHit { value: query.to_string(), count });
}
}
} else {
let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
let is_prefix = true;
let automaton = if query.len() < one_typo as usize {
build_dfa(query, 0, is_prefix)
} else if query.len() < two_typos as usize {
build_dfa(query, 1, is_prefix)
} else {
build_dfa(query, 2, is_prefix)
};
let mut stream = fst.search(automaton).into_stream();
let mut length = 0;
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!(
"the facet value is missing from the facet database: {key:?}"
);
continue;
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
results.push(FacetValueHit { value: value.to_string(), count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
}
Ok(results)
} else {
let automaton = Str::new(query).starts_with();
let mut stream = fst.search(automaton).into_stream();
let mut results = vec![];
let mut length = 0;
while let Some(facet_value) = stream.next() {
let value = std::str::from_utf8(facet_value)?;
let key = FacetGroupKey { field_id: fid, level: 0, left_bound: value };
let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
Some(FacetGroupValue { bitmap, .. }) => bitmap,
None => {
error!(
"the facet value is missing from the facet database: {key:?}"
);
continue;
}
};
let count = search_candidates.intersection_len(&docids);
if count != 0 {
results.push(FacetValueHit { value: value.to_string(), count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
Ok(results)
}
}
None => {
let mut results = vec![];
let mut length = 0;
let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
result?;
let count = search_candidates.intersection_len(&bitmap);
if count != 0 {
results.push(FacetValueHit { value: left_bound.to_string(), count });
length += 1;
}
if length >= MAX_NUMBER_OF_FACETS {
break;
}
}
Ok(results)
}
}
}
}
#[derive(Debug, Clone, serde::Serialize, PartialEq)]
pub struct FacetValueHit {
/// The original facet value
pub value: String,
/// The number of documents associated to this facet
pub count: u64,
}
#[cfg(test)]
mod test {
#[allow(unused_imports)]
use super::*;
use crate::index::tests::TempIndex;
#[cfg(feature = "japanese")]
#[cfg(feature = "default")]
#[test]
fn test_kanji_language_detection() {
use crate::index::tests::TempIndex;
let index = TempIndex::new();
index

View File

@@ -3,18 +3,14 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
use super::SearchContext;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
use crate::Result;
pub struct BucketSortOutput {
pub docids: Vec<u32>,
pub scores: Vec<Vec<ScoreDetails>>,
pub all_candidates: RoaringBitmap,
}
// TODO: would probably be good to regroup some of these inside of a struct?
#[allow(clippy::too_many_arguments)]
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
@@ -22,7 +18,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
universe: &RoaringBitmap,
from: usize,
length: usize,
scoring_strategy: ScoringStrategy,
logger: &mut dyn SearchLogger<Q>,
) -> Result<BucketSortOutput> {
logger.initial_query(query);
@@ -36,11 +31,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
};
if universe.len() < from as u64 {
return Ok(BucketSortOutput {
docids: vec![],
scores: vec![],
all_candidates: universe.clone(),
});
return Ok(BucketSortOutput { docids: vec![], all_candidates: universe.clone() });
}
if ranking_rules.is_empty() {
if let Some(distinct_fid) = distinct_fid {
@@ -58,32 +49,22 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
}
let mut all_candidates = universe - excluded;
all_candidates.extend(results.iter().copied());
return Ok(BucketSortOutput {
scores: vec![Default::default(); results.len()],
docids: results,
all_candidates,
});
return Ok(BucketSortOutput { docids: results, all_candidates });
} else {
let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
return Ok(BucketSortOutput {
scores: vec![Default::default(); docids.len()],
docids,
all_candidates: universe.clone(),
});
let docids = universe.iter().skip(from).take(length).collect();
return Ok(BucketSortOutput { docids, all_candidates: universe.clone() });
};
}
let ranking_rules_len = ranking_rules.len();
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
let mut ranking_rule_universes: Vec<RoaringBitmap> =
vec![RoaringBitmap::default(); ranking_rules_len];
ranking_rule_universes[0] = universe.clone();
let mut cur_ranking_rule_index = 0;
/// Finish iterating over the current ranking rule, yielding
@@ -108,16 +89,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
} else {
cur_ranking_rule_index -= 1;
}
// FIXME: check off by one
if ranking_rule_scores.len() > cur_ranking_rule_index {
ranking_rule_scores.pop();
}
};
}
let mut all_candidates = universe.clone();
let mut valid_docids = vec![];
let mut valid_scores = vec![];
let mut cur_offset = 0usize;
macro_rules! maybe_add_to_results {
@@ -128,26 +104,21 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
length,
logger,
&mut valid_docids,
&mut valid_scores,
&mut all_candidates,
&mut ranking_rule_universes,
&mut ranking_rules,
cur_ranking_rule_index,
&mut cur_offset,
distinct_fid,
&ranking_rule_scores,
$candidates,
)?;
};
}
while valid_docids.len() < length {
// The universe for this bucket is zero, so we don't need to sort
// anything, just go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|| (scoring_strategy == ScoringStrategy::Skip
&& ranking_rule_universes[cur_ranking_rule_index].len() == 1)
{
// The universe for this bucket is zero or one element, so we don't need to sort
// anything, just extend the results and go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
maybe_add_to_results!(bucket);
back!();
@@ -159,8 +130,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue;
};
ranking_rule_scores.push(next_bucket.score);
logger.next_bucket_ranking_rule(
cur_ranking_rule_index,
ranking_rules[cur_ranking_rule_index].as_ref(),
@@ -174,12 +143,10 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
if cur_ranking_rule_index == ranking_rules_len - 1
|| (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
|| next_bucket.candidates.len() <= 1
|| cur_offset + (next_bucket.candidates.len() as usize) < from
{
maybe_add_to_results!(next_bucket.candidates);
// FIXME: use index based logic like all the other rules so that you don't have to maintain the pop/push?
ranking_rule_scores.pop();
continue;
}
@@ -199,7 +166,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
)?;
}
Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates })
Ok(BucketSortOutput { docids: valid_docids, all_candidates })
}
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
@@ -212,18 +179,14 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
logger: &mut dyn SearchLogger<Q>,
valid_docids: &mut Vec<u32>,
valid_scores: &mut Vec<Vec<ScoreDetails>>,
all_candidates: &mut RoaringBitmap,
ranking_rule_universes: &mut [RoaringBitmap],
ranking_rules: &mut [BoxRankingRule<'ctx, Q>],
cur_ranking_rule_index: usize,
cur_offset: &mut usize,
distinct_fid: Option<u16>,
ranking_rule_scores: &[ScoreDetails],
candidates: RoaringBitmap,
) -> Result<()> {
// First apply the distinct rule on the candidates, reducing the universes if necessary
@@ -268,17 +231,13 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
let candidates =
candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>();
logger.add_to_results(&candidates);
valid_docids.extend_from_slice(&candidates);
valid_scores
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
valid_docids.extend(&candidates);
}
} else {
// if we have passed the offset already, add some of the documents (up to the limit)
let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>();
logger.add_to_results(&candidates);
valid_docids.extend_from_slice(&candidates);
valid_scores
.extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
valid_docids.extend(&candidates);
}
*cur_offset += candidates.len() as usize;

View File

@@ -2,7 +2,6 @@ use roaring::{MultiOps, RoaringBitmap};
use super::query_graph::QueryGraph;
use super::ranking_rules::{RankingRule, RankingRuleOutput};
use crate::score_details::{self, ScoreDetails};
use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::ExactTerm;
use crate::{Result, SearchContext, SearchLogger};
@@ -245,13 +244,7 @@ impl State {
candidates &= universe;
(
State::AttributeStarts(query_graph.clone(), candidates_per_attribute),
Some(RankingRuleOutput {
query: query_graph,
candidates,
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::ExactMatch,
),
}),
Some(RankingRuleOutput { query: query_graph, candidates }),
)
}
State::AttributeStarts(query_graph, candidates_per_attribute) => {
@@ -264,24 +257,12 @@ impl State {
candidates &= universe;
(
State::Empty(query_graph.clone()),
Some(RankingRuleOutput {
query: query_graph,
candidates,
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::MatchesStart,
),
}),
Some(RankingRuleOutput { query: query_graph, candidates }),
)
}
State::Empty(query_graph) => (
State::Empty(query_graph.clone()),
Some(RankingRuleOutput {
query: query_graph,
candidates: universe.clone(),
score: ScoreDetails::ExactAttribute(
score_details::ExactAttribute::NoExactMatch,
),
}),
Some(RankingRuleOutput { query: query_graph, candidates: universe.clone() }),
),
};
(state, output)

View File

@@ -8,7 +8,6 @@ use rstar::RTree;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
use crate::score_details::{self, ScoreDetails};
use crate::{
distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext,
SearchLogger,
@@ -81,7 +80,7 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
field_ids: Option<[u16; 2]>,
rtree: Option<RTree<GeoPoint>>,
cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
cached_sorted_docids: VecDeque<u32>,
geo_candidates: RoaringBitmap,
}
@@ -131,7 +130,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
let point = lat_lng_to_xyz(&self.point);
for point in rtree.nearest_neighbor_iter(&point) {
if self.geo_candidates.contains(point.data.0) {
self.cached_sorted_docids.push_back(point.data);
self.cached_sorted_docids.push_back(point.data.0);
if self.cached_sorted_docids.len() >= cache_size {
break;
}
@@ -143,7 +142,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
let point = lat_lng_to_xyz(&opposite_of(self.point));
for point in rtree.nearest_neighbor_iter(&point) {
if self.geo_candidates.contains(point.data.0) {
self.cached_sorted_docids.push_front(point.data);
self.cached_sorted_docids.push_front(point.data.0);
if self.cached_sorted_docids.len() >= cache_size {
break;
}
@@ -178,7 +177,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
// computing the distance between two points is expensive thus we cache the result
documents
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
self.cached_sorted_docids.extend(documents.into_iter());
self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id));
};
Ok(())
@@ -221,19 +220,12 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Q>>> {
assert!(universe.len() > 1);
let query = self.query.as_ref().unwrap().clone();
self.geo_candidates &= universe;
if self.geo_candidates.is_empty() {
return Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: None,
}),
}));
return Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }));
}
let ascending = self.ascending;
@@ -244,16 +236,11 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
cache.pop_back()
}
};
while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
while let Some(id) = next(&mut self.cached_sorted_docids) {
if self.geo_candidates.contains(id) {
return Ok(Some(RankingRuleOutput {
query,
candidates: RoaringBitmap::from_iter([id]),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point),
}),
}));
}
}

View File

@@ -46,21 +46,14 @@ use super::logger::SearchLogger;
use super::query_graph::QueryNode;
use super::ranking_rule_graph::{
ConditionDocIdsCache, DeadEndsCache, ExactnessGraph, FidGraph, PositionGraph, ProximityGraph,
RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, WordsGraph,
RankingRuleGraph, RankingRuleGraphTrait, TypoGraph,
};
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::score_details::Rank;
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::PathVisitor;
use crate::{Result, TermsMatchingStrategy};
pub type Words = GraphBasedRankingRule<WordsGraph>;
impl GraphBasedRankingRule<WordsGraph> {
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
}
}
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
impl GraphBasedRankingRule<ProximityGraph> {
pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
@@ -119,8 +112,6 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
all_costs: MappedInterner<QueryNode, Vec<u64>>,
/// An index in the first element of `all_distances`, giving the cost of the next bucket
cur_cost: u64,
/// One above the highest possible cost for this rule
next_max_cost: u64,
}
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
@@ -134,20 +125,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
_universe: &RoaringBitmap,
query_graph: &QueryGraph,
) -> Result<()> {
// the `next_max_cost` is the successor integer to the maximum cost of the paths in the graph.
//
// When there is a matching strategy, it also factors the additional costs of:
// 1. The words that are matched in phrases
// 2. Skipping words (by adding them to the paths with a cost)
let mut next_max_cost = 1;
let removal_cost = if let Some(terms_matching_strategy) = self.terms_matching_strategy {
// add the cost of the phrase to the next_max_cost
next_max_cost += query_graph
.words_in_phrases_count(ctx)
// remove 1 from the words in phrases count, because when there is a phrase we can now have a document
// where only the phrase is matching, and none of the non-phrase words.
// With the `1` that `next_max_cost` is initialized with, this gets counted twice.
.saturating_sub(1) as u64;
match terms_matching_strategy {
TermsMatchingStrategy::Last => {
let removal_order =
@@ -155,12 +133,13 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let mut forbidden_nodes =
SmallBitmap::for_interned_values_in(&query_graph.nodes);
let mut costs = query_graph.nodes.map(|_| None);
// FIXME: this works because only words uses termsmatchingstrategy at the moment.
let mut cost = 100;
for ns in removal_order {
for n in ns.iter() {
*costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
*costs.get_mut(n) = Some((cost, forbidden_nodes.clone()));
}
forbidden_nodes.union(&ns);
cost += 100;
}
costs
}
@@ -177,16 +156,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// Then pre-compute the cost of all paths from each node to the end node
let all_costs = graph.find_all_costs_to_end();
next_max_cost +=
all_costs.get(graph.query_graph.root_node).iter().copied().max().unwrap_or(0);
let state = GraphBasedRankingRuleState {
graph,
conditions_cache: condition_docids_cache,
dead_ends_cache,
all_costs,
cur_cost: 0,
next_max_cost,
};
self.state = Some(state);
@@ -200,13 +175,17 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
// If universe.len() <= 1, the bucket sort algorithm
// should not have called this function.
assert!(universe.len() > 1);
// Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
// should never happen
let mut state = self.state.take().unwrap();
let all_costs = state.all_costs.get(state.graph.query_graph.root_node);
// Retrieve the cost of the paths to compute
let Some(&cost) = all_costs
let Some(&cost) = state
.all_costs
.get(state.graph.query_graph.root_node)
.iter()
.find(|c| **c >= state.cur_cost) else {
self.state = None;
@@ -222,12 +201,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
dead_ends_cache,
all_costs,
cur_cost: _,
next_max_cost,
} = &mut state;
let rank = *next_max_cost - cost;
let score = G::rank_to_score(Rank { rank: rank as u32, max_rank: *next_max_cost as u32 });
let mut universe = universe.clone();
let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
@@ -314,6 +289,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// We modify the next query graph so that it only contains the subgraph
// that was used to compute this bucket
// But we only do it in case the bucket length is >1, because otherwise
// we know the child ranking rule won't be called anyway
let paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>> = good_paths
.into_iter()
@@ -342,7 +319,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
self.state = Some(state);
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score }))
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket }))
}
fn end_iteration(

View File

@@ -4,6 +4,7 @@ use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf};
use std::time::Instant;
// use rand::random;
use roaring::RoaringBitmap;
use crate::search::new::interner::Interned;
@@ -12,7 +13,6 @@ use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::ranking_rule_graph::{
Edge, FidCondition, FidGraph, PositionCondition, PositionGraph, ProximityCondition,
ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
WordsCondition, WordsGraph,
};
use crate::search::new::ranking_rules::BoxRankingRule;
use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
@@ -24,12 +24,11 @@ pub enum SearchEvents {
RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 },
RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 },
ExtendResults { new: Vec<u32> },
WordsGraph { query_graph: QueryGraph },
ProximityGraph { graph: RankingRuleGraph<ProximityGraph> },
ProximityPaths { paths: Vec<Vec<Interned<ProximityCondition>>> },
TypoGraph { graph: RankingRuleGraph<TypoGraph> },
TypoPaths { paths: Vec<Vec<Interned<TypoCondition>>> },
WordsGraph { graph: RankingRuleGraph<WordsGraph> },
WordsPaths { paths: Vec<Vec<Interned<WordsCondition>>> },
FidGraph { graph: RankingRuleGraph<FidGraph> },
FidPaths { paths: Vec<Vec<Interned<FidCondition>>> },
PositionGraph { graph: RankingRuleGraph<PositionGraph> },
@@ -140,11 +139,8 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
let Some(location) = self.location.last() else { return };
match location {
Location::Words => {
if let Some(graph) = state.downcast_ref::<RankingRuleGraph<WordsGraph>>() {
self.events.push(SearchEvents::WordsGraph { graph: graph.clone() });
}
if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<WordsCondition>>>>() {
self.events.push(SearchEvents::WordsPaths { paths: paths.clone() });
if let Some(query_graph) = state.downcast_ref::<QueryGraph>() {
self.events.push(SearchEvents::WordsGraph { query_graph: query_graph.clone() });
}
}
Location::Typo => {
@@ -333,6 +329,7 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
SearchEvents::ExtendResults { new } => {
self.write_extend_results(new)?;
}
SearchEvents::WordsGraph { query_graph } => self.write_words_graph(query_graph)?,
SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?,
SearchEvents::ProximityPaths { paths } => {
self.write_rr_graph_paths::<ProximityGraph>(paths)?;
@@ -341,10 +338,6 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
SearchEvents::TypoPaths { paths } => {
self.write_rr_graph_paths::<TypoGraph>(paths)?;
}
SearchEvents::WordsGraph { graph } => self.write_rr_graph(&graph)?,
SearchEvents::WordsPaths { paths } => {
self.write_rr_graph_paths::<WordsGraph>(paths)?;
}
SearchEvents::FidGraph { graph } => self.write_rr_graph(&graph)?,
SearchEvents::FidPaths { paths } => {
self.write_rr_graph_paths::<FidGraph>(paths)?;
@@ -462,7 +455,7 @@ fill: \"#B6E2D3\"
shape: class
max_nbr_typo: {}",
term_subset.description(ctx),
term_subset.max_typo_cost(ctx)
term_subset.max_nbr_typos(ctx)
)?;
for w in term_subset.all_single_words_except_prefix_db(ctx)? {
@@ -489,6 +482,13 @@ fill: \"#B6E2D3\"
}
Ok(())
}
fn write_words_graph(&mut self, qg: QueryGraph) -> Result<()> {
self.make_new_file_for_internal_state_if_needed()?;
self.write_query_graph(&qg)?;
Ok(())
}
fn write_rr_graph<R: RankingRuleGraphTrait>(
&mut self,
graph: &RankingRuleGraph<R>,

View File

@@ -52,7 +52,7 @@ impl MatchingWords {
words.push(LocatedMatchingWords {
value: matching_words,
positions: located_term.positions.clone(),
is_prefix: term.is_prefix(),
is_prefix: term.is_cached_prefix(),
original_char_count: term.original_word(&ctx).chars().count(),
});
}
@@ -244,8 +244,6 @@ pub(crate) mod tests {
temp_index
.add_documents(documents!([
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
{ "id": 2, "name": "Westfália" },
{ "id": 3, "name": "Ŵôřlḑôle" },
]))
.unwrap();
temp_index
@@ -307,7 +305,7 @@ pub(crate) mod tests {
..Default::default()
})
.next(),
Some(MatchType::Full { char_len: 5, ids: &(2..=2) })
None
);
assert_eq!(
matching_words

View File

@@ -499,37 +499,17 @@ mod tests {
use charabia::TokenizerBuilder;
use matching_words::tests::temp_index_with_documents;
use super::super::located_query_terms_from_tokens;
use super::*;
use crate::index::tests::TempIndex;
use crate::{execute_search, SearchContext};
use crate::SearchContext;
impl<'a> MatcherBuilder<'a, &[u8]> {
fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
let mut ctx = SearchContext::new(index, rtxn);
let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
&mut ctx,
&Some(query.to_string()),
crate::TermsMatchingStrategy::default(),
crate::score_details::ScoringStrategy::Skip,
false,
&None,
&None,
crate::search::new::GeoSortStrategy::default(),
0,
100,
Some(10),
&mut crate::DefaultSearchLogger,
&mut crate::DefaultSearchLogger,
)
.unwrap();
// consume context and located_query_terms to build MatchingWords.
let matching_words = match located_query_terms {
Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
None => MatchingWords::default(),
};
MatcherBuilder::new(matching_words, TokenizerBuilder::new().build())
pub fn new_test(mut ctx: SearchContext, query: &'a str) -> Self {
let tokenizer = TokenizerBuilder::new().build();
let tokens = tokenizer.tokenize(query);
let query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
let matching_words = MatchingWords::new(ctx, query_terms);
Self::new(matching_words, TokenizerBuilder::new().build())
}
}
@@ -537,7 +517,8 @@ mod tests {
fn format_identity() {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let ctx = SearchContext::new(&temp_index, &rtxn);
let builder = MatcherBuilder::new_test(ctx, "split the world");
let format_options = FormatOptions { highlight: false, crop: None };
@@ -564,7 +545,8 @@ mod tests {
fn format_highlight() {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let ctx = SearchContext::new(&temp_index, &rtxn);
let builder = MatcherBuilder::new_test(ctx, "split the world");
let format_options = FormatOptions { highlight: true, crop: None };
@@ -607,7 +589,8 @@ mod tests {
fn highlight_unicode() {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "world");
let ctx = SearchContext::new(&temp_index, &rtxn);
let builder = MatcherBuilder::new_test(ctx, "world");
let format_options = FormatOptions { highlight: true, crop: None };
// Text containing prefix match.
@@ -616,7 +599,7 @@ mod tests {
// no crop should return complete text with highlighted matches.
insta::assert_snapshot!(
matcher.format(format_options),
@"<em>Ŵôřlḑ</em>ôle"
@"<em>Ŵôřlḑôle</em>"
);
// Text containing unicode match.
@@ -628,7 +611,8 @@ mod tests {
@"<em>Ŵôřlḑ</em>"
);
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "westfali");
let ctx = SearchContext::new(&temp_index, &rtxn);
let builder = MatcherBuilder::new_test(ctx, "westfali");
let format_options = FormatOptions { highlight: true, crop: None };
// Text containing unicode match.
@@ -637,7 +621,7 @@ mod tests {
// no crop should return complete text with highlighted matches.
insta::assert_snapshot!(
matcher.format(format_options),
@"<em>Westfáli</em>a"
@"<em>Westfália</em>"
);
}
@@ -645,7 +629,8 @@ mod tests {
fn format_crop() {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let ctx = SearchContext::new(&temp_index, &rtxn);
let builder = MatcherBuilder::new_test(ctx, "split the world");
let format_options = FormatOptions { highlight: false, crop: Some(10) };
@@ -742,7 +727,8 @@ mod tests {
fn format_highlight_crop() {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let ctx = SearchContext::new(&temp_index, &rtxn);
let builder = MatcherBuilder::new_test(ctx, "split the world");
let format_options = FormatOptions { highlight: true, crop: Some(10) };
@@ -804,7 +790,8 @@ mod tests {
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
let ctx = SearchContext::new(&temp_index, &rtxn);
let builder = MatcherBuilder::new_test(ctx, "split the world");
let text = "void void split the world void void.";
@@ -840,8 +827,8 @@ mod tests {
fn partial_matches() {
let temp_index = temp_index_with_documents();
let rtxn = temp_index.read_txn().unwrap();
let mut builder =
MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
let ctx = SearchContext::new(&temp_index, &rtxn);
let mut builder = MatcherBuilder::new_test(ctx, "the \"t he\" door \"do or\"");
builder.highlight_prefix("_".to_string());
builder.highlight_suffix("_".to_string());

View File

@@ -15,7 +15,11 @@ mod resolve_query_graph;
mod small_bitmap;
mod exact_attribute;
// TODO: documentation + comments
// implementation is currently an adaptation of the previous implementation to fit with the new model
mod sort;
// TODO: documentation + comments
mod words;
#[cfg(test)]
mod tests;
@@ -39,12 +43,11 @@ use ranking_rules::{
use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
use roaring::RoaringBitmap;
use sort::Sort;
use words::Words;
use self::geo_sort::GeoSort;
pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words;
use self::interner::Interned;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::search::new::distinct::apply_distinct_rule;
use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
@@ -199,11 +202,6 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
let mut sorted_fields = HashSet::new();
let mut geo_sorted = false;
// Don't add the `words` ranking rule if the term matching strategy is `All`
if matches!(terms_matching_strategy, TermsMatchingStrategy::All) {
words = true;
}
let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
for rr in settings_ranking_rules {
@@ -351,7 +349,6 @@ pub fn execute_search(
ctx: &mut SearchContext,
query: &Option<String>,
terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy,
exhaustive_number_hits: bool,
filters: &Option<Filter>,
sort_criteria: &Option<Vec<AscDesc>>,
@@ -400,8 +397,8 @@ pub fn execute_search(
None
};
let bucket_sort_output = if let Some(query_terms) = query_terms {
let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?;
located_query_terms = Some(new_located_query_terms);
let graph = QueryGraph::from_query(ctx, &query_terms)?;
located_query_terms = Some(query_terms);
let ranking_rules = get_ranking_rules_for_query_graph_search(
ctx,
@@ -413,16 +410,7 @@ pub fn execute_search(
universe =
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
bucket_sort(
ctx,
ranking_rules,
&graph,
&universe,
from,
length,
scoring_strategy,
query_graph_logger,
)?
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
} else {
let ranking_rules =
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
@@ -433,20 +421,17 @@ pub fn execute_search(
&universe,
from,
length,
scoring_strategy,
placeholder_search_logger,
)?
};
let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output;
let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output;
// The candidates is the universe unless the exhaustive number of hits
// is requested and a distinct attribute is set.
if exhaustive_number_hits {
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
if let Some(distinct_fid) = fields_ids_map.id(f) {
if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) {
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
}
}
@@ -454,7 +439,6 @@ pub fn execute_search(
Ok(PartialSearchResult {
candidates: all_candidates,
document_scores: scores,
documents_ids: docids,
located_query_terms,
})
@@ -506,5 +490,4 @@ pub struct PartialSearchResult {
pub located_query_terms: Option<Vec<LocatedQueryTerm>>,
pub candidates: RoaringBitmap,
pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>,
}

View File

@@ -88,15 +88,12 @@ pub struct QueryGraph {
}
impl QueryGraph {
/// Build the query graph from the parsed user search query, return an updated list of the located query terms
/// which contains ngrams.
/// Build the query graph from the parsed user search query.
pub fn from_query(
ctx: &mut SearchContext,
// NOTE: the terms here must be consecutive
terms: &[LocatedQueryTerm],
) -> Result<(QueryGraph, Vec<LocatedQueryTerm>)> {
let mut new_located_query_terms = terms.to_vec();
) -> Result<QueryGraph> {
let nbr_typos = number_of_typos_allowed(ctx)?;
let mut nodes_data: Vec<QueryNodeData> = vec![QueryNodeData::Start, QueryNodeData::End];
@@ -110,11 +107,10 @@ impl QueryGraph {
let original_terms_len = terms.len();
for term_idx in 0..original_terms_len {
let mut new_nodes = vec![];
let new_node_idx = add_node(
&mut nodes_data,
QueryNodeData::Term(LocatedQueryTermSubset {
term_subset: QueryTermSubset::full(terms[term_idx].value),
term_subset: QueryTermSubset::full(Interned::from_raw(term_idx as u16)),
positions: terms[term_idx].positions.clone(),
term_ids: term_idx as u8..=term_idx as u8,
}),
@@ -125,7 +121,6 @@ impl QueryGraph {
if let Some(ngram) =
query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)?
{
new_located_query_terms.push(ngram.clone());
let ngram_idx = add_node(
&mut nodes_data,
QueryNodeData::Term(LocatedQueryTermSubset {
@@ -141,7 +136,6 @@ impl QueryGraph {
if let Some(ngram) =
query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)?
{
new_located_query_terms.push(ngram.clone());
let ngram_idx = add_node(
&mut nodes_data,
QueryNodeData::Term(LocatedQueryTermSubset {
@@ -173,7 +167,7 @@ impl QueryGraph {
let mut graph = QueryGraph { root_node, end_node, nodes };
graph.build_initial_edges();
Ok((graph, new_located_query_terms))
Ok(graph)
}
/// Remove the given nodes, connecting all their predecessors to all their successors.
@@ -342,25 +336,6 @@ impl QueryGraph {
}
res
}
/// Number of words in the phrases in this query graph
pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext) -> usize {
let mut word_count = 0;
for (_, node) in self.nodes.iter() {
match &node.data {
QueryNodeData::Term(term) => {
let Some(phrase) = term.term_subset.original_phrase(ctx)
else {
continue
};
let phrase = ctx.phrase_interner.get(phrase);
word_count += phrase.words.iter().copied().filter(|a| a.is_some()).count()
}
_ => continue,
}
}
word_count
}
}
fn add_node(nodes_data: &mut Vec<QueryNodeData>, node_data: QueryNodeData) -> u16 {

View File

@@ -28,14 +28,16 @@ pub enum ZeroOrOneTypo {
impl Interned<QueryTerm> {
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> {
let s = ctx.term_interner.get_mut(self);
if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() {
if s.max_nbr_typos == 0 {
s.one_typo = Lazy::Init(OneTypoTerm::default());
s.two_typo = Lazy::Init(TwoTypoTerm::default());
} else if s.max_nbr_typos == 1 && s.one_typo.is_uninit() {
assert!(s.two_typo.is_uninit());
// Initialize one_typo subterm even if max_nbr_typo is 0 because of split words
self.initialize_one_typo_subterm(ctx)?;
let s = ctx.term_interner.get_mut(self);
assert!(s.one_typo.is_init());
s.two_typo = Lazy::Init(TwoTypoTerm::default());
} else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() {
} else if s.max_nbr_typos > 1 && s.two_typo.is_uninit() {
assert!(s.two_typo.is_uninit());
self.initialize_one_and_two_typo_subterm(ctx)?;
let s = ctx.term_interner.get_mut(self);
@@ -185,7 +187,7 @@ pub fn partially_initialized_term_from_word(
original: ctx.word_interner.insert(word.to_owned()),
ngram_words: None,
is_prefix: false,
max_levenshtein_distance: 0,
max_nbr_typos: 0,
zero_typo: <_>::default(),
one_typo: Lazy::Init(<_>::default()),
two_typo: Lazy::Init(<_>::default()),
@@ -256,7 +258,7 @@ pub fn partially_initialized_term_from_word(
Ok(QueryTerm {
original: word_interned,
ngram_words: None,
max_levenshtein_distance: max_typo,
max_nbr_typos: max_typo,
is_prefix,
zero_typo,
one_typo: Lazy::Uninit,
@@ -275,16 +277,7 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Intern
impl Interned<QueryTerm> {
fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
let self_mut = ctx.term_interner.get_mut(self);
let allows_split_words = self_mut.allows_split_words();
let QueryTerm {
original,
is_prefix,
one_typo,
max_levenshtein_distance: max_nbr_typos,
..
} = self_mut;
let QueryTerm { original, is_prefix, one_typo, .. } = self_mut;
let original = *original;
let is_prefix = *is_prefix;
// let original_str = ctx.word_interner.get(*original).to_owned();
@@ -293,33 +286,26 @@ impl Interned<QueryTerm> {
}
let mut one_typo_words = BTreeSet::new();
if *max_nbr_typos > 0 {
find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
match nbr_typos {
ZeroOrOneTypo::Zero => {}
ZeroOrOneTypo::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
} else {
return Ok(ControlFlow::Break(()));
}
find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
match nbr_typos {
ZeroOrOneTypo::Zero => {}
ZeroOrOneTypo::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
} else {
return Ok(ControlFlow::Break(()));
}
}
Ok(ControlFlow::Continue(()))
})?;
}
let split_words = if allows_split_words {
let original_str = ctx.word_interner.get(original).to_owned();
find_split_words(ctx, original_str.as_str())?
} else {
None
};
}
Ok(ControlFlow::Continue(()))
})?;
let original_str = ctx.word_interner.get(original).to_owned();
let split_words = find_split_words(ctx, original_str.as_str())?;
let self_mut = ctx.term_interner.get_mut(self);
// Only add the split words to the derivations if:
// 1. the term is neither an ngram nor a phrase; OR
// 1. the term is not an ngram; OR
// 2. the term is an ngram, but the split words are different from the ngram's component words
let split_words = if let Some((ngram_words, split_words)) =
self_mut.ngram_words.as_ref().zip(split_words.as_ref())
@@ -341,13 +327,7 @@ impl Interned<QueryTerm> {
}
fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
let self_mut = ctx.term_interner.get_mut(self);
let QueryTerm {
original,
is_prefix,
two_typo,
max_levenshtein_distance: max_nbr_typos,
..
} = self_mut;
let QueryTerm { original, is_prefix, two_typo, .. } = self_mut;
let original_str = ctx.word_interner.get(*original).to_owned();
if two_typo.is_init() {
return Ok(());
@@ -355,37 +335,34 @@ impl Interned<QueryTerm> {
let mut one_typo_words = BTreeSet::new();
let mut two_typo_words = BTreeSet::new();
if *max_nbr_typos > 0 {
find_zero_one_two_typo_derivations(
*original,
*is_prefix,
ctx.index.words_fst(ctx.txn)?,
&mut ctx.word_interner,
|derived_word, nbr_typos| {
if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
&& two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
{
// No chance we will add either one- or two-typo derivations anymore, stop iterating.
return Ok(ControlFlow::Break(()));
}
match nbr_typos {
NumberOfTypos::Zero => {}
NumberOfTypos::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
}
}
NumberOfTypos::Two => {
if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
two_typo_words.insert(derived_word);
}
find_zero_one_two_typo_derivations(
*original,
*is_prefix,
ctx.index.words_fst(ctx.txn)?,
&mut ctx.word_interner,
|derived_word, nbr_typos| {
if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
&& two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
{
// No chance we will add either one- or two-typo derivations anymore, stop iterating.
return Ok(ControlFlow::Break(()));
}
match nbr_typos {
NumberOfTypos::Zero => {}
NumberOfTypos::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
}
}
Ok(ControlFlow::Continue(()))
},
)?;
}
NumberOfTypos::Two => {
if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
two_typo_words.insert(derived_word);
}
}
}
Ok(ControlFlow::Continue(()))
},
)?;
let split_words = find_split_words(ctx, original_str.as_str())?;
let self_mut = ctx.term_interner.get_mut(self);

View File

@@ -43,7 +43,7 @@ pub struct QueryTermSubset {
pub struct QueryTerm {
original: Interned<String>,
ngram_words: Option<Vec<Interned<String>>>,
max_levenshtein_distance: u8,
max_nbr_typos: u8,
is_prefix: bool,
zero_typo: ZeroTypoTerm,
// May not be computed yet
@@ -342,16 +342,10 @@ impl QueryTermSubset {
}
None
}
pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {
pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 {
let t = ctx.term_interner.get(self.original);
match t.max_levenshtein_distance {
0 => {
if t.allows_split_words() {
1
} else {
0
}
}
match t.max_nbr_typos {
0 => 0,
1 => {
if self.one_typo_subset.is_empty() {
0
@@ -444,9 +438,6 @@ impl QueryTerm {
self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty()
}
fn allows_split_words(&self) -> bool {
self.zero_typo.phrase.is_none()
}
}
impl Interned<QueryTerm> {
@@ -479,9 +470,6 @@ impl QueryTerm {
pub fn is_cached_prefix(&self) -> bool {
self.zero_typo.use_prefix_db.is_some()
}
pub fn is_prefix(&self) -> bool {
self.is_prefix
}
pub fn original_word(&self, ctx: &SearchContext) -> String {
ctx.word_interner.get(self.original).clone()
}

View File

@@ -77,9 +77,13 @@ pub fn located_query_terms_from_tokens(
}
}
TokenKind::Separator(separator_kind) => {
// add penalty for hard separators
if let SeparatorKind::Hard = separator_kind {
position = position.wrapping_add(7);
match separator_kind {
SeparatorKind::Hard => {
position += 1;
}
SeparatorKind::Soft => {
position += 0;
}
}
phrase = 'phrase: {
@@ -213,7 +217,7 @@ pub fn make_ngram(
original: ngram_str_interned,
ngram_words: Some(words_interned),
is_prefix,
max_levenshtein_distance: max_nbr_typos,
max_nbr_typos,
zero_typo: term.zero_typo,
one_typo: Lazy::Uninit,
two_typo: Lazy::Uninit,
@@ -267,7 +271,7 @@ impl PhraseBuilder {
QueryTerm {
original: ctx.word_interner.insert(phrase_desc),
ngram_words: None,
max_levenshtein_distance: 0,
max_nbr_typos: 0,
is_prefix: false,
zero_typo: ZeroTypoTerm {
phrase: Some(phrase),
@@ -284,36 +288,3 @@ impl PhraseBuilder {
})
}
}
#[cfg(test)]
mod tests {
use charabia::TokenizerBuilder;
use super::*;
use crate::index::tests::TempIndex;
fn temp_index_with_documents() -> TempIndex {
let temp_index = TempIndex::new();
temp_index
.add_documents(documents!([
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
{ "id": 2, "name": "Westfália" },
{ "id": 3, "name": "Ŵôřlḑôle" },
]))
.unwrap();
temp_index
}
#[test]
fn start_with_hard_separator() -> Result<()> {
let tokenizer = TokenizerBuilder::new().build();
let tokens = tokenizer.tokenize(".");
let index = temp_index_with_documents();
let rtxn = index.read_txn()?;
let mut ctx = SearchContext::new(&index, &rtxn);
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?;
assert!(located_query_terms.is_empty());
Ok(())
}
}

View File

@@ -49,15 +49,10 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
if let Some((cost_of_ignoring, forbidden_nodes)) =
cost_of_ignoring_node.get(dest_idx)
{
let dest = graph_nodes.get(dest_idx);
let dest_size = match &dest.data {
QueryNodeData::Term(term) => term.term_ids.len(),
_ => panic!(),
};
let new_edge_id = edges_store.insert(Some(Edge {
source_node: source_id,
dest_node: dest_idx,
cost: *cost_of_ignoring * dest_size as u32,
cost: *cost_of_ignoring,
condition: None,
nodes_to_skip: forbidden_nodes.clone(),
}));

View File

@@ -205,12 +205,18 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
pub fn find_all_costs_to_end(&self) -> MappedInterner<QueryNode, Vec<u64>> {
let mut costs_to_end = self.query_graph.nodes.map(|_| vec![]);
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| {
if cur_node == self.query_graph.end_node {
*costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
return;
}
let mut node_stack = VecDeque::new();
*costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
for prev_node in self.query_graph.nodes.get(self.query_graph.end_node).predecessors.iter() {
node_stack.push_back(prev_node);
enqueued.insert(prev_node);
}
while let Some(cur_node) = node_stack.pop_front() {
let mut self_costs = Vec::<u64>::new();
let cur_node_edges = &self.edges_of_node.get(cur_node);
@@ -226,7 +232,13 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
self_costs.dedup();
*costs_to_end.get_mut(cur_node) = self_costs;
});
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
if !enqueued.contains(prev_node) {
node_stack.push_back(prev_node);
enqueued.insert(prev_node);
}
}
}
costs_to_end
}
@@ -235,12 +247,17 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
node_with_removed_outgoing_conditions: Interned<QueryNode>,
costs: &mut MappedInterner<QueryNode, Vec<u64>>,
) {
// Traverse the graph backward from the target node, recomputing the cost for each of its predecessors.
// We first check that no other node is contributing the same total cost to a predecessor before removing
// the cost from the predecessor.
self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| {
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
let mut node_stack = VecDeque::new();
enqueued.insert(node_with_removed_outgoing_conditions);
node_stack.push_back(node_with_removed_outgoing_conditions);
'main_loop: while let Some(cur_node) = node_stack.pop_front() {
let mut costs_to_remove = FxHashSet::default();
costs_to_remove.extend(costs.get(cur_node).iter().copied());
for c in costs.get(cur_node) {
costs_to_remove.insert(*c);
}
let cur_node_edges = &self.edges_of_node.get(cur_node);
for edge_idx in cur_node_edges.iter() {
@@ -248,75 +265,22 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
for cost in costs.get(edge.dest_node).iter() {
costs_to_remove.remove(&(*cost + edge.cost as u64));
if costs_to_remove.is_empty() {
return;
continue 'main_loop;
}
}
}
if costs_to_remove.is_empty() {
return;
continue 'main_loop;
}
let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
for c in costs_to_remove {
new_costs.remove(&c);
}
*costs.get_mut(cur_node) = new_costs.into_iter().collect();
});
}
/// Traverse the graph backwards from the given node such that every time
/// a node is visited, we are guaranteed that all its successors either:
/// 1. have already been visited; OR
/// 2. were not reachable from the given node
pub fn traverse_breadth_first_backward(
&self,
from: Interned<QueryNode>,
mut visit: impl FnMut(Interned<QueryNode>),
) {
let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
{
// go backward to get the set of all reachable nodes from the given node
// the nodes that are not reachable will be set as `visited`
let mut stack = VecDeque::new();
let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
enqueued.insert(from);
stack.push_back(from);
while let Some(n) = stack.pop_front() {
if reachable.contains(n) {
continue;
}
reachable.insert(n);
for prev_node in self.query_graph.nodes.get(n).predecessors.iter() {
if !enqueued.contains(prev_node) && !reachable.contains(prev_node) {
stack.push_back(prev_node);
enqueued.insert(prev_node);
}
}
}
};
let mut unreachable_or_visited =
SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
for (n, _) in self.query_graph.nodes.iter() {
if !reachable.contains(n) {
unreachable_or_visited.insert(n);
}
}
let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
let mut stack = VecDeque::new();
enqueued.insert(from);
stack.push_back(from);
while let Some(cur_node) = stack.pop_front() {
if !self.query_graph.nodes.get(cur_node).successors.is_subset(&unreachable_or_visited) {
stack.push_back(cur_node);
continue;
}
unreachable_or_visited.insert(cur_node);
visit(cur_node);
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) {
stack.push_back(prev_node);
if !enqueued.contains(prev_node) {
node_stack.push_back(prev_node);
enqueued.insert(prev_node);
}
}

View File

@@ -1,7 +1,6 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@@ -85,8 +84,4 @@ impl RankingRuleGraphTrait for ExactnessGraph {
Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Exactness(rank)
}
}

View File

@@ -2,7 +2,6 @@ use fxhash::FxHashSet;
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
@@ -69,7 +68,7 @@ impl RankingRuleGraphTrait for FidGraph {
}
let mut edges = vec![];
for fid in all_fields.iter().copied() {
for fid in all_fields {
// TODO: We can improve performances and relevancy by storing
// the term subsets associated to each field ids fetched.
edges.push((
@@ -81,35 +80,6 @@ impl RankingRuleGraphTrait for FidGraph {
));
}
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
let max_fid: Option<u16> = {
if let Some(max_fid) = ctx
.index
.searchable_fields_ids(ctx.txn)?
.map(|field_ids| field_ids.into_iter().max())
{
max_fid
} else {
ctx.index.fields_ids_map(ctx.txn)?.ids().max()
}
};
if let Some(max_fid) = max_fid {
if !all_fields.contains(&max_fid) {
edges.push((
max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
conditions_interner.insert(FidCondition {
term: term.clone(), // TODO remove this ugly clone
fid: max_fid,
}),
));
}
}
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Fid(rank)
}
}

View File

@@ -20,8 +20,6 @@ mod position;
mod proximity;
/// Implementation of the `typo` ranking rule
mod typo;
/// Implementation of the `words` ranking rule
mod words;
use std::collections::BTreeSet;
use std::hash::Hash;
@@ -35,13 +33,11 @@ pub use position::{PositionCondition, PositionGraph};
pub use proximity::{ProximityCondition, ProximityGraph};
use roaring::RoaringBitmap;
pub use typo::{TypoCondition, TypoGraph};
pub use words::{WordsCondition, WordsGraph};
use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
use super::query_term::LocatedQueryTermSubset;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::score_details::{Rank, ScoreDetails};
use crate::Result;
pub struct ComputedCondition {
@@ -111,9 +107,6 @@ pub trait RankingRuleGraphTrait: Sized + 'static {
source_node: Option<&LocatedQueryTermSubset>,
dest_node: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<Self::Condition>)>>;
/// Convert the rank of a path to its corresponding score for the ranking rule
fn rank_to_score(rank: Rank) -> ScoreDetails;
}
/// The graph used by graph-based ranking rules.

View File

@@ -2,7 +2,6 @@ use fxhash::{FxHashMap, FxHashSet};
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_position;
@@ -78,8 +77,6 @@ impl RankingRuleGraphTrait for PositionGraph {
let mut positions_for_costs = FxHashMap::<u32, Vec<u16>>::default();
for position in all_positions {
// FIXME: bucketed position???
let distance = position.abs_diff(*term.positions.start());
let cost = {
let mut cost = 0;
for i in 0..term.term_ids.len() {
@@ -87,17 +84,15 @@ impl RankingRuleGraphTrait for PositionGraph {
// Because if two words are in the same bucketed position (e.g. 32) and consecutive,
// then their position cost will be 32+32=64, but an ngram of these two words at the
// same position will have a cost of 32+32+1=65
cost += cost_from_distance(distance as u32 + i as u32);
cost += cost_from_position(position as u32 + i as u32);
}
cost
};
positions_for_costs.entry(cost).or_default().push(position);
}
let max_cost = term.term_ids.len() as u32 * 10;
let max_cost_exists = positions_for_costs.contains_key(&max_cost);
let mut edges = vec![];
for (cost, positions) in positions_for_costs {
// TODO: We can improve performances and relevancy by storing
// the term subsets associated to each position fetched
@@ -110,35 +105,29 @@ impl RankingRuleGraphTrait for PositionGraph {
));
}
if !max_cost_exists {
// artificial empty condition for computing max cost
edges.push((
max_cost,
conditions_interner
.insert(PositionCondition { term: term.clone(), positions: Vec::default() }),
));
}
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Position(rank)
}
}
fn cost_from_distance(distance: u32) -> u32 {
match distance {
0 => 0,
1 => 1,
2..=4 => 2,
5..=7 => 3,
8..=11 => 4,
12..=16 => 5,
17..=24 => 6,
25..=64 => 7,
65..=256 => 8,
257..=1024 => 9,
_ => 10,
fn cost_from_position(sum_positions: u32) -> u32 {
match sum_positions {
0 | 1 | 2 | 3 => sum_positions,
4 | 5 => 4,
6 | 7 => 5,
8 | 9 => 6,
10 | 11 => 7,
12 | 13 => 8,
14 | 15 => 9,
16 | 17..=24 => 10,
25..=32 => 11,
33..=64 => 12,
65..=128 => 13,
129..=256 => 14,
257..=512 => 15,
513..=1024 => 16,
1025..=2048 => 17,
2049..=4096 => 18,
4097..=8192 => 19,
_ => 20,
}
}

View File

@@ -12,11 +12,11 @@ pub fn build_edges(
left_term: Option<&LocatedQueryTermSubset>,
right_term: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<ProximityCondition>)>> {
let right_ngram_max = right_term.term_ids.len().saturating_sub(1);
let right_ngram_length = right_term.term_ids.len();
let Some(left_term) = left_term else {
return Ok(vec![(
right_ngram_max as u32,
(right_ngram_length - 1) as u32,
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
)])
};
@@ -29,25 +29,25 @@ pub fn build_edges(
// The remaining query graph represents `the sun .. are beautiful`
// but `sun` and `are` have no proximity condition between them
return Ok(vec![(
right_ngram_max as u32,
(right_ngram_length - 1) as u32,
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
)]);
}
let mut conditions = vec![];
for cost in right_ngram_max..(7 + right_ngram_max) {
for cost in right_ngram_length..(7 + right_ngram_length) {
conditions.push((
cost as u32,
conditions_interner.insert(ProximityCondition::Uninit {
left_term: left_term.clone(),
right_term: right_term.clone(),
cost: (cost + 1) as u8,
cost: cost as u8,
}),
))
}
conditions.push((
(7 + right_ngram_max) as u32,
(7 + right_ngram_length) as u32,
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
));

View File

@@ -4,7 +4,6 @@ pub mod compute_docids;
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::SearchContext;
@@ -37,8 +36,4 @@ impl RankingRuleGraphTrait for ProximityGraph {
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
build::build_edges(ctx, conditions_interner, source_term, dest_term)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Proximity(rank)
}
}

View File

@@ -1,7 +1,6 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{self, Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@@ -51,7 +50,7 @@ impl RankingRuleGraphTrait for TypoGraph {
// 3-gram -> equivalent to 2 typos
let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 };
for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) {
for nbr_typos in 0..=term.term_subset.max_nbr_typos(ctx) {
let mut term = term.clone();
match nbr_typos {
0 => {
@@ -76,8 +75,4 @@ impl RankingRuleGraphTrait for TypoGraph {
}
Ok(edges)
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Typo(score_details::Typo::from_rank(rank))
}
}

View File

@@ -1,51 +0,0 @@
use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{self, Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
use crate::search::new::SearchContext;
use crate::Result;
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct WordsCondition {
term: LocatedQueryTermSubset,
}
pub enum WordsGraph {}
impl RankingRuleGraphTrait for WordsGraph {
type Condition = WordsCondition;
fn resolve_condition(
ctx: &mut SearchContext,
condition: &Self::Condition,
universe: &RoaringBitmap,
) -> Result<ComputedCondition> {
let WordsCondition { term, .. } = condition;
// maybe compute_query_term_subset_docids should accept a universe as argument
let mut docids = compute_query_term_subset_docids(ctx, &term.term_subset)?;
docids &= universe;
Ok(ComputedCondition {
docids,
universe_len: universe.len(),
start_term_subset: None,
end_term_subset: term.clone(),
})
}
fn build_edges(
_ctx: &mut SearchContext,
conditions_interner: &mut DedupInterner<Self::Condition>,
_from: Option<&LocatedQueryTermSubset>,
to_term: &LocatedQueryTermSubset,
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
}
fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Words(score_details::Words::from_rank(rank))
}
}

View File

@@ -2,7 +2,6 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{QueryGraph, SearchContext};
use crate::score_details::ScoreDetails;
use crate::Result;
/// An internal trait implemented by only [`PlaceholderQuery`] and [`QueryGraph`]
@@ -67,6 +66,4 @@ pub struct RankingRuleOutput<Q> {
pub query: Q,
/// The allowed candidates for the child ranking rule
pub candidates: RoaringBitmap,
/// The score for the candidates of the current bucket
pub score: ScoreDetails,
}

View File

@@ -1,11 +1,9 @@
use heed::BytesDecode;
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::score_details::{self, ScoreDetails};
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::{FieldId, Index, Result};
@@ -51,7 +49,6 @@ pub struct Sort<'ctx, Query> {
is_ascending: bool,
original_query: Option<Query>,
iter: Option<RankingRuleOutputIterWrapper<'ctx, Query>>,
must_redact: bool,
}
impl<'ctx, Query> Sort<'ctx, Query> {
pub fn new(
@@ -62,30 +59,15 @@ impl<'ctx, Query> Sort<'ctx, Query> {
) -> Result<Self> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let field_id = fields_ids_map.id(&field_name);
let must_redact = Self::must_redact(index, rtxn, &field_name)?;
Ok(Self {
field_name,
field_id,
is_ascending,
original_query: None,
iter: None,
must_redact,
})
}
fn must_redact(index: &Index, rtxn: &'ctx heed::RoTxn, field_name: &str) -> Result<bool> {
let Some(displayed_fields) = index.displayed_fields(rtxn)?
else { return Ok(false); };
Ok(!displayed_fields.iter().any(|&field| field == field_name))
Ok(Self { field_name, field_id, is_ascending, original_query: None, iter: None })
}
}
impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, Query> {
fn id(&self) -> String {
let Self { field_name, is_ascending, .. } = self;
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " })
}
fn start_iteration(
&mut self,
@@ -136,45 +118,12 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
let number_iter = number_iter.map(|r| -> Result<_> {
let (docids, bytes) = r?;
Ok((
docids,
serde_json::Value::Number(
serde_json::Number::from_f64(
OrderedF64Codec::bytes_decode(bytes).expect("some number"),
)
.expect("too big float"),
),
))
});
let string_iter = string_iter.map(|r| -> Result<_> {
let (docids, bytes) = r?;
Ok((
docids,
serde_json::Value::String(
StrRefCodec::bytes_decode(bytes).expect("some string").to_owned(),
),
))
});
let query_graph = parent_query.clone();
let ascending = self.is_ascending;
let field_name = self.field_name.clone();
let must_redact = self.must_redact;
RankingRuleOutputIterWrapper::new(Box::new(number_iter.chain(string_iter).map(
move |r| {
let (docids, value) = r?;
Ok(RankingRuleOutput {
query: query_graph.clone(),
candidates: docids,
score: ScoreDetails::Sort(score_details::Sort {
field_name: field_name.clone(),
ascending,
redacted: must_redact,
value,
}),
})
let (docids, _) = r?;
Ok(RankingRuleOutput { query: query_graph.clone(), candidates: docids })
},
)))
}
@@ -201,16 +150,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
Ok(Some(bucket))
} else {
let query = self.original_query.as_ref().unwrap().clone();
Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::Sort(score_details::Sort {
field_name: self.field_name.clone(),
ascending: self.is_ascending,
redacted: self.must_redact,
value: serde_json::Value::Null,
}),
}))
Ok(Some(RankingRuleOutput { query, candidates: universe.clone() }))
}
}

Some files were not shown because too many files have changed in this diff Show More