mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 04:11:07 +00:00
Compare commits
374 Commits
prototype-
...
reduce-mx-
Author | SHA1 | Date | |
---|---|---|---|
17f6d65840 | |||
daa4138df9 | |||
e4035ff3ec | |||
3625389057 | |||
eace6df91b | |||
83ab8cf4e5 | |||
cd2573fcc3 | |||
9f7981df28 | |||
e615fa5ec6 | |||
13f1277637 | |||
4919774f2e | |||
a3da680ce6 | |||
11e394dba1 | |||
469d2f2a9c | |||
ce6507d20c | |||
b92da5d15a | |||
ed3dfbe729 | |||
441641397b | |||
a35d3fc708 | |||
745c1a2668 | |||
a95128df6b | |||
e0537c3870 | |||
da220294f6 | |||
78e611f282 | |||
d8381eb790 | |||
b212aef5db | |||
6bf66f35be | |||
52ab114f6c | |||
dcbfecf42c | |||
9ca6f59546 | |||
aa7537a11e | |||
972bb2831c | |||
f9ddd32545 | |||
d5059520aa | |||
1c3642c9b2 | |||
d2d2bacaf2 | |||
30edba3497 | |||
84e7bd9342 | |||
2b74e4d116 | |||
b5fe0b2b07 | |||
0f0cd2d929 | |||
fc8c1d118d | |||
0548ab9038 | |||
143acb9cdc | |||
4b92f1b269 | |||
c12a1cd956 | |||
8af8aa5a33 | |||
6df2ba93a9 | |||
3680a6bf1e | |||
732c52093d | |||
05cc463fbc | |||
1afde4fea5 | |||
f8f190cd40 | |||
3a408e8287 | |||
d3e5b10e23 | |||
1aaf24ccbf | |||
90bc230820 | |||
342c4ff85d | |||
c85392ce40 | |||
8875d24a48 | |||
c470b67fa2 | |||
c0e081cd98 | |||
b60840ebff | |||
fdc1763838 | |||
75819bc940 | |||
7b8cc25625 | |||
2be641f373 | |||
ddcb661c19 | |||
d09b771bce | |||
d89d2efb7e | |||
f284a9c0dd | |||
134e7fc433 | |||
0cba919228 | |||
aa63091752 | |||
58735d6d8f | |||
1b514517f5 | |||
11f814821d | |||
30fb1153cc | |||
3b2c8b9f25 | |||
2a7f9adf78 | |||
608ceea440 | |||
79001b9c97 | |||
59b12fca87 | |||
48f5bb1693 | |||
93188b3c88 | |||
bc4efca611 | |||
feaf25a95d | |||
414b3fae89 | |||
899baa0ea5 | |||
374095d42c | |||
dd007dceca | |||
3ae587205c | |||
1bf2694604 | |||
ed9cc1af55 | |||
b41a6cbd7a | |||
c8af572697 | |||
249053e514 | |||
ff2cf2a5ae | |||
b448aca49c | |||
55bad07c16 | |||
380469665f | |||
3421125a55 | |||
0b2200e6e7 | |||
0fd5ab9fcc | |||
14293f6c8f | |||
d3a94e8b25 | |||
1944077a7f | |||
8195d366fa | |||
cfd1b2cc97 | |||
19b044b4e6 | |||
e0730b55b3 | |||
729fa3770d | |||
9cbc85b2f9 | |||
a3cf104736 | |||
a109802d45 | |||
2d8060df80 | |||
47b66e49b8 | |||
8f2e971879 | |||
654a3a9e19 | |||
d1fdbb63da | |||
fb9d9239b2 | |||
a7a0891210 | |||
84d9c731f8 | |||
11f4724957 | |||
85182497ab | |||
3e4a356638 | |||
dfd9c384aa | |||
f0b4046c43 | |||
4b953d62fb | |||
c2f4b6ced0 | |||
1e6cbcaf12 | |||
066c6bd875 | |||
fd583501d7 | |||
bff4bde0ce | |||
cd45d21d6e | |||
f9960be115 | |||
bd9aba4d77 | |||
8edad8291b | |||
b3f60ee805 | |||
5acf953298 | |||
d9cebff61c | |||
30f7bd03f6 | |||
df0d9bb878 | |||
5230ddb3ea | |||
d6a7c28e4d | |||
e55efc419e | |||
644e136aee | |||
ec0ecb5515 | |||
b4fabce36d | |||
9350a7b017 | |||
be69ab320d | |||
d59d75c9cd | |||
38b7b31beb | |||
7a01f20df7 | |||
c20c38a7fa | |||
5ab46324c4 | |||
325f17488a | |||
e7ff987c46 | |||
244003e36f | |||
1f813a6f3b | |||
96183e804a | |||
5cfb066b0a | |||
a5f44a5ceb | |||
7ab48ed8c7 | |||
a94e78ffb0 | |||
e7bb8c940f | |||
8cb85294ef | |||
d0e9d65025 | |||
540a396e49 | |||
a81165f0d8 | |||
d6585eb10b | |||
f7d90ad19f | |||
bc25f378e8 | |||
31630c85d0 | |||
ab09dc0167 | |||
618c54915d | |||
130d2061bd | |||
66ddee4390 | |||
90a6c01495 | |||
e58426109a | |||
f513cf930a | |||
8a13ed7e3f | |||
1b8e4d0301 | |||
996619b22a | |||
2c9822a337 | |||
7276deee0a | |||
6a068fe36a | |||
f7e7f438f8 | |||
8d826e478f | |||
ba8dcc2d78 | |||
4d308d5237 | |||
7ca91ebb71 | |||
1ba8a40d61 | |||
47f6a3ad3d | |||
ae17c62e24 | |||
a1148c09c2 | |||
9c5f64769a | |||
ebe23b04c9 | |||
13b7c826c1 | |||
5440f43fd3 | |||
d9460a76f4 | |||
d1ddaa223d | |||
f7ecea142e | |||
337e75b0e4 | |||
b5691802a3 | |||
6e50f23896 | |||
4c8a0179ba | |||
c69cbec64a | |||
ce328c329d | |||
959e4607bb | |||
4b4ffb8ec9 | |||
3951fe22ab | |||
4d5bc9df4c | |||
ec2f8e8040 | |||
406b8bd248 | |||
62b9c6fbee | |||
b439d36807 | |||
faceb661e3 | |||
4129d657e2 | |||
3f13608002 | |||
4708d9b016 | |||
0d2e7bcc13 | |||
55fbfb6124 | |||
58fe260c72 | |||
24e5f6f7a9 | |||
9b87c36200 | |||
12b26cd54e | |||
061b1e6d7c | |||
0d6e8b5c31 | |||
d48cdc67a0 | |||
35c16ad047 | |||
2997d1f186 | |||
2a5997fb20 | |||
ee8a9e0bad | |||
3b0737a092 | |||
fdd02105ac | |||
aa9592455c | |||
01e24dd630 | |||
ae6bb1ce17 | |||
5fd28620cd | |||
728710d63a | |||
fa81381865 | |||
b96a682f16 | |||
d0f048c068 | |||
223e82a10d | |||
9507ff5e31 | |||
c2b025946a | |||
3a818c5e87 | |||
d74134ce3a | |||
5ac129bfa1 | |||
abb4522f76 | |||
ef084ef042 | |||
3524bd1257 | |||
d4f6216966 | |||
77acafe534 | |||
53afda3237 | |||
abb19d368d | |||
b4a52a622e | |||
8d7d8cdc2f | |||
626a93b348 | |||
af65fe201a | |||
9b83b1deb0 | |||
e9eb271499 | |||
3281a88d08 | |||
5a644054ab | |||
16fefd364e | |||
00bad8c716 | |||
862714a18b | |||
d18ebe4f3a | |||
7169d85115 | |||
f5f5f03ec0 | |||
9b2653427d | |||
56b7209f26 | |||
9b1f439a91 | |||
01c7d2de8f | |||
a86aeba411 | |||
384fdc2df4 | |||
83e5b4ed0d | |||
272cd7ebbd | |||
c63c7377e6 | |||
9259cdb12e | |||
5b50e49522 | |||
65474c8de5 | |||
fbb1ba3de0 | |||
a59ca28e2c | |||
825f742000 | |||
dd491320e5 | |||
c6ff97a220 | |||
49240c367a | |||
1e6e624078 | |||
8b4e07e1a3 | |||
2853009987 | |||
aa59c3bc2c | |||
7b1d8f4c6d | |||
a49ddec9df | |||
05fe856e6e | |||
c0cdaf9f53 | |||
e9cf58d584 | |||
31628c5cd4 | |||
3004e281d7 | |||
14e8d0aaa2 | |||
1c58cf8426 | |||
5155fd2bf1 | |||
9ec9c204d3 | |||
78b9304d52 | |||
0465ba4a05 | |||
2099991dd1 | |||
c232cdabf5 | |||
4e266211bf | |||
57fa689131 | |||
10626dddfc | |||
9051065c22 | |||
e8c76cf7bf | |||
3f1729a17f | |||
cab2b6bcda | |||
c4979a2fda | |||
23931f8a4f | |||
aa414565bb | |||
1db152046e | |||
c27ea2677f | |||
caa1e1b923 | |||
71f18e4379 | |||
600e3dd1c5 | |||
362eb0de86 | |||
998d46ac10 | |||
6c85c0d95e | |||
0e1fbbf7c6 | |||
6806640ef0 | |||
173e37584c | |||
6ba4d5e987 | |||
dd12d44134 | |||
a61495d660 | |||
c8e251bf24 | |||
a938fbde4a | |||
dcf3f1d18a | |||
66d0c63694 | |||
132191360b | |||
345c99d5bd | |||
89d696c1e3 | |||
c645853529 | |||
a70ab8b072 | |||
48aae76b15 | |||
23bf572dea | |||
864f6410ed | |||
c9bf6bb2fa | |||
46249ea901 | |||
ce0d1e0e13 | |||
5065d8b0c1 | |||
a83007c013 | |||
79e0a6dd4e | |||
2d88089129 | |||
1d937f831b | |||
6c659dc12f | |||
a8531053a0 | |||
cf34d1c95f | |||
1a9c58a7ab | |||
64571c8288 | |||
72123c458b | |||
d5881519cb | |||
ea016d97af | |||
fa2ea4a379 | |||
030263caa3 | |||
c25779afba | |||
df48ac8803 | |||
ff86073288 | |||
0ad53784e7 | |||
e064c52544 | |||
e106b16148 | |||
b1d61f5a02 | |||
7dc04747fd | |||
7c0cd7172d | |||
43ff236df8 | |||
19ab4d1a15 | |||
9287858997 |
@ -2,3 +2,4 @@ target
|
||||
Dockerfile
|
||||
.dockerignore
|
||||
.gitignore
|
||||
**/.git
|
||||
|
19
.github/uffizzi/Dockerfile
vendored
19
.github/uffizzi/Dockerfile
vendored
@ -1,19 +0,0 @@
|
||||
# Run
|
||||
FROM uffizzi/ttyd:alpine
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
ENV MEILI_SERVER_PROVIDER docker
|
||||
ENV MEILI_NO_ANALYTICS true
|
||||
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
|
||||
COPY target/x86_64-unknown-linux-musl/release/meilisearch /bin/meilisearch
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
||||
WORKDIR /meili_data
|
||||
|
||||
EXPOSE 7700/tcp
|
||||
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
CMD ["ttyd", "/bin/zsh"]
|
26
.github/uffizzi/docker-compose.uffizzi.yml
vendored
26
.github/uffizzi/docker-compose.uffizzi.yml
vendored
@ -1,26 +0,0 @@
|
||||
version: "3"
|
||||
|
||||
x-uffizzi:
|
||||
ingress:
|
||||
service: nginx
|
||||
port: 8081
|
||||
|
||||
services:
|
||||
meilisearch:
|
||||
image: "${MEILISEARCH_IMAGE}"
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "7681:7681"
|
||||
- "7700:7700"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 500M
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8081:8081"
|
||||
volumes:
|
||||
- ./.github/uffizzi/nginx:/etc/nginx
|
28
.github/uffizzi/nginx/nginx.conf
vendored
28
.github/uffizzi/nginx/nginx.conf
vendored
@ -1,28 +0,0 @@
|
||||
|
||||
events {
|
||||
worker_connections 4096; ## Default: 1024
|
||||
}
|
||||
|
||||
http {
|
||||
map $http_upgrade $connection_upgrade {
|
||||
default upgrade;
|
||||
'' close;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 8081;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:7681;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection $connection_upgrade;
|
||||
}
|
||||
|
||||
location /meilisearch/ {
|
||||
# rewrite /meilisearch/(.*) /$1 break;
|
||||
proxy_pass http://localhost:7700/;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
7
.github/workflows/publish-docker-images.yml
vendored
7
.github/workflows/publish-docker-images.yml
vendored
@ -58,9 +58,13 @@ jobs:
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
with:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
with:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
@ -88,10 +92,13 @@ jobs:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ github.sha }}
|
||||
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
|
||||
GIT_TAG=${{ github.ref_name }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
# /!\ Don't touch this without checking with Cloud team
|
||||
- name: Send CI information to Cloud team
|
||||
|
200
.github/workflows/sdks-tests.yml
vendored
Normal file
200
.github/workflows/sdks-tests.yml
vendored
Normal file
@ -0,0 +1,200 @@
|
||||
# If any test fails, the engine team should ensure the "breaking" changes are expected and contact the integration team
|
||||
name: SDKs tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 6 * * MON" # Every Monday at 6:00AM
|
||||
|
||||
env:
|
||||
MEILI_MASTER_KEY: 'masterKey'
|
||||
MEILI_NO_ANALYTICS: 'true'
|
||||
|
||||
jobs:
|
||||
|
||||
meilisearch-js-tests:
|
||||
name: JS SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
run: yarn --dev
|
||||
- name: Run tests
|
||||
run: yarn test
|
||||
- name: Build project
|
||||
run: yarn build
|
||||
- name: Run ESM env
|
||||
run: yarn test:env:esm
|
||||
- name: Run Node.js env
|
||||
run: yarn test:env:nodejs
|
||||
- name: Run node typescript env
|
||||
run: yarn test:env:node-ts
|
||||
- name: Run Browser env
|
||||
run: yarn test:env:browser
|
||||
|
||||
instant-meilisearch-tests:
|
||||
name: instant-meilisearch tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/instant-meilisearch
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
run: yarn install
|
||||
- name: Run tests
|
||||
run: yarn test
|
||||
- name: Build all the playgrounds and the packages
|
||||
run: yarn build
|
||||
|
||||
meilisearch-php-tests:
|
||||
name: PHP SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-php
|
||||
- name: Install PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
coverage: none
|
||||
- name: Validate composer.json and composer.lock
|
||||
run: composer validate
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
composer remove --dev friendsofphp/php-cs-fixer --no-update --no-interaction
|
||||
composer update --prefer-dist --no-progress
|
||||
- name: Run test suite - default HTTP client (Guzzle 7)
|
||||
run: |
|
||||
sh scripts/tests.sh
|
||||
composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle
|
||||
|
||||
meilisearch-python-tests:
|
||||
name: Python SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-python
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
- name: Install pipenv
|
||||
uses: dschep/install-pipenv-action@v1
|
||||
- name: Install dependencies
|
||||
run: pipenv install --dev --python=${{ matrix.python-version }}
|
||||
- name: Test with pytest
|
||||
run: pipenv run pytest
|
||||
|
||||
meilisearch-go-tests:
|
||||
name: Go SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-go
|
||||
- name: Get dependencies
|
||||
run: |
|
||||
go get -v -t -d ./...
|
||||
if [ -f Gopkg.toml ]; then
|
||||
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
|
||||
dep ensure
|
||||
fi
|
||||
- name: Run integration tests
|
||||
run: go test -v ./...
|
||||
|
||||
meilisearch-ruby-tests:
|
||||
name: Ruby SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-ruby
|
||||
- name: Set up Ruby 3
|
||||
uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
ruby-version: 3
|
||||
- name: Install ruby dependencies
|
||||
run: bundle install --with test
|
||||
- name: Run test suite
|
||||
run: bundle exec rspec
|
||||
|
||||
meilisearch-rust-tests:
|
||||
name: Rust SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-rust
|
||||
- name: Build
|
||||
run: cargo build --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --verbose
|
5
.github/workflows/test-suite.yml
vendored
5
.github/workflows/test-suite.yml
vendored
@ -138,7 +138,7 @@ jobs:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.67.0
|
||||
toolchain: 1.69.0
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
@ -147,8 +147,7 @@ jobs:
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
# allow unlined_format_args https://github.com/rust-lang/rust-clippy/issues/10087
|
||||
args: --all-targets -- --deny warnings --allow clippy::uninlined_format_args
|
||||
args: --all-targets -- --deny warnings
|
||||
|
||||
fmt:
|
||||
name: Run Rustfmt
|
||||
|
120
.github/workflows/uffizzi-build.yml
vendored
120
.github/workflows/uffizzi-build.yml
vendored
@ -1,120 +0,0 @@
|
||||
name: Uffizzi - Build PR Image
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened,synchronize,reopened,closed]
|
||||
|
||||
jobs:
|
||||
build-meilisearch:
|
||||
name: Build and push `meilisearch`
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
if: ${{ github.event.action != 'closed' }}
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- run: sudo apt-get install musl-tools
|
||||
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
target: x86_64-unknown-linux-musl
|
||||
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --target x86_64-unknown-linux-musl --release
|
||||
|
||||
- name: Remove dockerignore so we can use the target folder in our docker build
|
||||
run: rm -f .dockerignore
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Generate UUID image name
|
||||
id: uuid
|
||||
run: echo "UUID_TAG=$(uuidgen)" >> $GITHUB_ENV
|
||||
|
||||
- name: Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: registry.uffizzi.com/${{ env.UUID_TAG }}
|
||||
tags: |
|
||||
type=raw,value=60d
|
||||
|
||||
- name: Build Image
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: ./
|
||||
file: .github/uffizzi/Dockerfile
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
push: true
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
render-compose-file:
|
||||
name: Render Docker Compose File
|
||||
# Pass output of this workflow to another triggered by `workflow_run` event.
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build-meilisearch
|
||||
outputs:
|
||||
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
steps:
|
||||
- name: Checkout git repo
|
||||
uses: actions/checkout@v3
|
||||
- name: Render Compose File
|
||||
run: |
|
||||
MEILISEARCH_IMAGE=$(echo ${{ needs.build-meilisearch.outputs.tags }})
|
||||
export MEILISEARCH_IMAGE
|
||||
# Render simple template from environment variables.
|
||||
envsubst < .github/uffizzi/docker-compose.uffizzi.yml > docker-compose.rendered.yml
|
||||
cat docker-compose.rendered.yml
|
||||
- name: Upload Rendered Compose File as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: docker-compose.rendered.yml
|
||||
retention-days: 2
|
||||
- name: Serialize PR Event to File
|
||||
run: |
|
||||
cat << EOF > event.json
|
||||
${{ toJSON(github.event) }}
|
||||
|
||||
EOF
|
||||
- name: Upload PR Event as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: event.json
|
||||
retention-days: 2
|
||||
|
||||
delete-preview:
|
||||
name: Call for Preview Deletion
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.action == 'closed' }}
|
||||
steps:
|
||||
# If this PR is closing, we will not render a compose file nor pass it to the next workflow.
|
||||
- name: Serialize PR Event to File
|
||||
run: |
|
||||
cat << EOF > event.json
|
||||
${{ toJSON(github.event) }}
|
||||
|
||||
EOF
|
||||
- name: Upload PR Event as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: event.json
|
||||
retention-days: 2
|
103
.github/workflows/uffizzi-preview-deploy.yml
vendored
103
.github/workflows/uffizzi-preview-deploy.yml
vendored
@ -1,103 +0,0 @@
|
||||
name: Uffizzi - Deploy Preview
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows:
|
||||
- "Uffizzi - Build PR Image"
|
||||
types:
|
||||
- completed
|
||||
|
||||
jobs:
|
||||
cache-compose-file:
|
||||
name: Cache Compose File
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
||||
outputs:
|
||||
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
pr-number: ${{ env.PR_NUMBER }}
|
||||
expected-url: ${{ env.EXPECTED_URL }}
|
||||
steps:
|
||||
- name: 'Download artifacts'
|
||||
# Fetch output (zip archive) from the workflow run that triggered this workflow.
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
run_id: context.payload.workflow_run.id,
|
||||
});
|
||||
let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
|
||||
return artifact.name == "preview-spec"
|
||||
})[0];
|
||||
let download = await github.rest.actions.downloadArtifact({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
artifact_id: matchArtifact.id,
|
||||
archive_format: 'zip',
|
||||
});
|
||||
let fs = require('fs');
|
||||
fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/preview-spec.zip`, Buffer.from(download.data));
|
||||
|
||||
- name: 'Unzip artifact'
|
||||
run: unzip preview-spec.zip
|
||||
|
||||
- name: Read Event into ENV
|
||||
run: |
|
||||
echo 'EVENT_JSON<<EOF' >> $GITHUB_ENV
|
||||
cat event.json >> $GITHUB_ENV
|
||||
echo 'EOF' >> $GITHUB_ENV
|
||||
|
||||
- name: Hash Rendered Compose File
|
||||
id: hash
|
||||
# If the previous workflow was triggered by a PR close event, we will not have a compose file artifact.
|
||||
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
|
||||
run: echo "COMPOSE_FILE_HASH=$(md5sum docker-compose.rendered.yml | awk '{ print $1 }')" >> $GITHUB_ENV
|
||||
|
||||
- name: Cache Rendered Compose File
|
||||
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: docker-compose.rendered.yml
|
||||
key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
|
||||
- name: Read PR Number From Event Object
|
||||
id: pr
|
||||
run: echo "PR_NUMBER=${{ fromJSON(env.EVENT_JSON).number }}" >> $GITHUB_ENV
|
||||
|
||||
- name: DEBUG - Print Job Outputs
|
||||
if: ${{ runner.debug }}
|
||||
run: |
|
||||
echo "PR number: ${{ env.PR_NUMBER }}"
|
||||
echo "Compose file hash: ${{ env.COMPOSE_FILE_HASH }}"
|
||||
cat event.json
|
||||
|
||||
- name: Add expected URL env var
|
||||
if: ${{ runner.debug }}
|
||||
run: |
|
||||
REPO=$(echo ${{ github.repository }} | sed 's/\./+/g')
|
||||
echo "EXPECTED_URL=${{ inputs.server }}/github.com/$REPO/pull/${{ env.PR_NUMBER }}" >> $GITHUB_ENV
|
||||
|
||||
deploy-uffizzi-preview:
|
||||
name: Use Remote Workflow to Preview on Uffizzi
|
||||
needs:
|
||||
- cache-compose-file
|
||||
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@v2
|
||||
with:
|
||||
# If this workflow was triggered by a PR close event, cache-key will be an empty string
|
||||
# and this reusable workflow will delete the preview deployment.
|
||||
compose-file-cache-key: ${{ needs.cache-compose-file.outputs.compose-file-cache-key }}
|
||||
compose-file-cache-path: docker-compose.rendered.yml
|
||||
server: https://app.uffizzi.com
|
||||
pr-number: ${{ needs.cache-compose-file.outputs.pr-number }}
|
||||
description: |
|
||||
The meilisearch preview environment contains a web terminal from where you can run the
|
||||
`meilisearch` command. You should be able to access this instance of meilisearch running in
|
||||
the preview from the link Meilisearch Endpoint link given below.
|
||||
|
||||
Web Terminal Endpoint : <uffizzi-url>
|
||||
Meilisearch Endpoint : <uffizzi-url>/meilisearch
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
id-token: write
|
@ -18,9 +18,9 @@ If Meilisearch does not offer optimized support for your language, please consid
|
||||
|
||||
## Assumptions
|
||||
|
||||
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
|
||||
2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
|
||||
3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
|
||||
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests (PR)](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) workflow.**
|
||||
2. **You've read the Meilisearch [documentation](https://www.meilisearch.com/docs).**
|
||||
3. **You know about the [Meilisearch community on Discord](https://discord.meilisearch.com).
|
||||
Please use this for help.**
|
||||
|
||||
## How to Contribute
|
||||
@ -120,29 +120,9 @@ The full Meilisearch release process is described in [this guide](https://github
|
||||
|
||||
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
|
||||
|
||||
The prototype name must follow this convention: `prototype-X-Y` where
|
||||
- `X` is the feature name formatted in `kebab-case`. It should not end with a single number.
|
||||
- `Y` is the version of the prototype, starting from `0`.
|
||||
|
||||
✅ Example: `prototype-auto-resize-0`. </br>
|
||||
❌ Bad example: `auto-resize-0`: lacks the `prototype` prefix. </br>
|
||||
❌ Bad example: `prototype-auto-resize`: lacks the version suffix. </br>
|
||||
❌ Bad example: `prototype-auto-resize-0-0`: feature name ends with a single number.
|
||||
|
||||
Steps to create a prototype:
|
||||
|
||||
1. In your terminal, go to the last commit of your branch (the one you want to provide as a prototype).
|
||||
2. Create a tag following the convention: `git tag prototype-X-Y`
|
||||
3. Run Meilisearch and check that its launch summary features a line: `Prototype: prototype-X-Y` (you may need to switch branches and back after tagging for this to work).
|
||||
3. Push the tag: `git push origin prototype-X-Y`
|
||||
4. Check the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
|
||||
|
||||
🐳 Once the CI has finished to run (~1h30), a Docker image named `prototype-X-Y` will be available on [DockerHub](https://hub.docker.com/repository/docker/getmeili/meilisearch/general). People can use it with the following command: `docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-X-Y`. <br>
|
||||
More information about [how to run Meilisearch with Docker](https://docs.meilisearch.com/learn/cookbooks/docker.html#download-meilisearch-with-docker).
|
||||
|
||||
⚙️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-X-Y` tag in the Meilisearch repository and compile from the source code.
|
||||
|
||||
⚠️ When sharing a prototype with users, remind them to not use it in production. Prototypes are solely for test purposes.
|
||||
This happens in two steps:
|
||||
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
|
||||
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
|
||||
|
||||
### Release assets
|
||||
|
||||
|
1142
Cargo.lock
generated
1142
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -17,7 +17,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.1.0"
|
||||
version = "1.1.1"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
@ -1,3 +1,4 @@
|
||||
# syntax=docker/dockerfile:1.4
|
||||
# Compile
|
||||
FROM rust:alpine3.16 AS compiler
|
||||
|
||||
@ -11,7 +12,7 @@ ARG GIT_TAG
|
||||
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
COPY . .
|
||||
COPY --link . .
|
||||
RUN set -eux; \
|
||||
apkArch="$(apk --print-arch)"; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
@ -30,7 +31,7 @@ RUN apk update --quiet \
|
||||
|
||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||
# to find.
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
COPY --from=compiler --link /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
45
README.md
45
README.md
@ -7,15 +7,15 @@
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://docs.meilisearch.com">Documentation</a> |
|
||||
<a href="https://docs.meilisearch.com/faq/">FAQ</a> |
|
||||
<a href="https://www.meilisearch.com/docs">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com">Discord</a>
|
||||
</h4>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
|
||||
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
|
||||
<a href="https://app.bors.tech/repositories/26457"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
</p>
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
@ -36,27 +36,27 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering and faceted search](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://docs.meilisearch.com/learn/advanced/sorting.html):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://docs.meilisearch.com/learn/what_is_meilisearch/language.html):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://docs.meilisearch.com/learn/security/master_api_keys.html):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://docs.meilisearch.com/learn/security/tenant_tokens.html):** personalize search results for any number of application tenants
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://docs.meilisearch.com/reference/api/overview.html):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://docs.meilisearch.com](https://docs.meilisearch.com/).
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://docs.meilisearch.com/learn/getting_started/quick_start.html) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://docs.meilisearch.com/learn/getting_started/filtering_and_sorting.html) for an introduction to some of Meilisearch's most popular features.
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ☁️ Meilisearch cloud
|
||||
|
||||
@ -66,25 +66,25 @@ Let us manage your infrastructure so you can focus on integrating a great search
|
||||
|
||||
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
|
||||
|
||||
Take a look at the complete [Meilisearch integration list](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html).
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).
|
||||
|
||||
[](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html)
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
|
||||
|
||||
## ⚙️ Advanced usage
|
||||
|
||||
Experienced users will want to keep our [API Reference](https://docs.meilisearch.com/reference/api) close at hand.
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
|
||||
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html), [sorting](https://docs.meilisearch.com/learn/advanced/sorting.html), [geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html), [API keys](https://docs.meilisearch.com/learn/security/master_api_keys.html), and [tenant tokens](https://docs.meilisearch.com/learn/security/tenant_tokens.html).
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/advanced/filtering), [sorting](https://www.meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://docs.meilisearch.com/learn/core_concepts/documents.html) and [indexes](https://docs.meilisearch.com/learn/core_concepts/indexes.html).
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html) of our documentation.
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
@ -97,7 +97,6 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
|
||||
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
|
||||
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
|
||||
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
|
||||
- For everything else, please check [this page listing some of the other places where you can find us](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html)
|
||||
|
||||
Thank you for your support!
|
||||
|
||||
|
@ -11,11 +11,11 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
csv = "1.1.6"
|
||||
milli = { path = "../milli", default-features = false }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
anyhow = "1.0.70"
|
||||
csv = "1.2.1"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.4.0", features = ["html_reports"] }
|
||||
@ -24,14 +24,14 @@ rand_chacha = "0.3.1"
|
||||
roaring = "0.10.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.65"
|
||||
bytes = "1.2.1"
|
||||
anyhow = "1.0.70"
|
||||
bytes = "1.4.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.24"
|
||||
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.25"
|
||||
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/default"]
|
||||
default = ["milli/all-tokenizations"]
|
||||
|
||||
[[bench]]
|
||||
name = "search_songs"
|
||||
@ -48,7 +48,3 @@ harness = false
|
||||
[[bench]]
|
||||
name = "indexing"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "formatting"
|
||||
harness = false
|
||||
|
@ -119,9 +119,9 @@ _[Download the `smol-wiki` dataset](https://milli-benchmarks.fra1.digitaloceansp
|
||||
|
||||
### Movies
|
||||
|
||||
`movies` is a really small dataset we uses as our example in the [getting started](https://docs.meilisearch.com/learn/getting_started/)
|
||||
`movies` is a really small dataset we uses as our example in the [getting started](https://www.meilisearch.com/docs/learn/getting_started/quick_start)
|
||||
|
||||
_[Download the `movies` dataset](https://docs.meilisearch.com/movies.json)._
|
||||
_[Download the `movies` dataset](https://www.meilisearch.com/movies.json)._
|
||||
|
||||
|
||||
### All Countries
|
||||
|
@ -1,67 +0,0 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
struct Conf<'a> {
|
||||
name: &'a str,
|
||||
text: &'a str,
|
||||
matching_words: MatcherBuilder<'a, Vec<u8>>,
|
||||
}
|
||||
|
||||
fn bench_formatting(c: &mut criterion::Criterion) {
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
Conf {
|
||||
name: "'the door d'",
|
||||
text: r#"He used to do the door sounds in "Star Trek" with his mouth, phssst, phssst. The MD-11 passenger and cargo doors also tend to behave like electromagnetic apertures, because the doors do not have continuous electrical contact with the door frames around the door perimeter. But Theodor said that the doors don't work."#,
|
||||
matching_words: MatcherBuilder::new(MatchingWords::new(vec![
|
||||
(vec![Rc::new(MatchingWord::new("t".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("he".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("the".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("door".to_string(), 1, false).unwrap())], vec![1]),
|
||||
(vec![Rc::new(MatchingWord::new("do".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("or".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("thedoor".to_string(), 1, false).unwrap())], vec![0, 1]),
|
||||
(vec![Rc::new(MatchingWord::new("d".to_string(), 0, true).unwrap())], vec![2]),
|
||||
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
|
||||
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
|
||||
]
|
||||
).unwrap(), TokenizerBuilder::default().build()),
|
||||
},
|
||||
];
|
||||
|
||||
let format_options = &[
|
||||
FormatOptions { highlight: false, crop: None },
|
||||
FormatOptions { highlight: true, crop: None },
|
||||
FormatOptions { highlight: false, crop: Some(10) },
|
||||
FormatOptions { highlight: true, crop: Some(10) },
|
||||
FormatOptions { highlight: false, crop: Some(20) },
|
||||
FormatOptions { highlight: true, crop: Some(20) },
|
||||
];
|
||||
|
||||
for option in format_options {
|
||||
let highlight = if option.highlight { "highlight" } else { "no-highlight" };
|
||||
|
||||
let name = match option.crop {
|
||||
Some(size) => format!("{}-crop({})", highlight, size),
|
||||
None => format!("{}-no-crop", highlight),
|
||||
};
|
||||
|
||||
let mut group = c.benchmark_group(&name);
|
||||
for conf in confs {
|
||||
group.bench_function(conf.name, |b| {
|
||||
b.iter(|| {
|
||||
let mut matcher = conf.matching_words.build(conf.text);
|
||||
matcher.format(*option);
|
||||
})
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_formatting);
|
||||
criterion_main!(benches);
|
52
config.toml
52
config.toml
@ -1,43 +1,43 @@
|
||||
# This file shows the default configuration of Meilisearch.
|
||||
# All variables are defined here: https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
|
||||
# All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables
|
||||
|
||||
db_path = "./data.ms"
|
||||
# Designates the location where database files will be created and retrieved.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#database-path
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path
|
||||
|
||||
env = "development"
|
||||
# Configures the instance's environment. Value must be either `production` or `development`.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#environment
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#environment
|
||||
|
||||
http_addr = "localhost:7700"
|
||||
# The address on which the HTTP server will listen.
|
||||
|
||||
# master_key = "YOUR_MASTER_KEY_VALUE"
|
||||
# Sets the instance's master key, automatically protecting all routes except GET /health.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#master-key
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key
|
||||
|
||||
# no_analytics = true
|
||||
# Deactivates Meilisearch's built-in telemetry when provided.
|
||||
# Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||
# All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-analytics
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics
|
||||
|
||||
http_payload_size_limit = "100 MB"
|
||||
# Sets the maximum size of accepted payloads.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#payload-limit-size
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size
|
||||
|
||||
log_level = "INFO"
|
||||
# Defines how much detail should be present in Meilisearch's logs.
|
||||
# Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#log-level
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level
|
||||
|
||||
# max_indexing_memory = "2 GiB"
|
||||
# Sets the maximum amount of RAM Meilisearch can use when indexing.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-memory
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory
|
||||
|
||||
# max_indexing_threads = 4
|
||||
# Sets the maximum number of threads Meilisearch can use during indexing.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-threads
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads
|
||||
|
||||
#############
|
||||
### DUMPS ###
|
||||
@ -45,19 +45,19 @@ log_level = "INFO"
|
||||
|
||||
dump_dir = "dumps/"
|
||||
# Sets the directory where Meilisearch will create dump files.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#dump-directory
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory
|
||||
|
||||
# import_dump = "./path/to/my/file.dump"
|
||||
# Imports the dump file located at the specified path. Path must point to a .dump file.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-dump
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump
|
||||
|
||||
ignore_missing_dump = false
|
||||
# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-dump
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump
|
||||
|
||||
ignore_dump_if_db_exists = false
|
||||
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-dump-if-db-exists
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists
|
||||
|
||||
|
||||
#################
|
||||
@ -68,23 +68,23 @@ schedule_snapshot = false
|
||||
# Enables scheduled snapshots when true, disable when false (the default).
|
||||
# If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval
|
||||
# between each snapshot, in seconds.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#schedule-snapshot-creation
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation
|
||||
|
||||
snapshot_dir = "snapshots/"
|
||||
# Sets the directory where Meilisearch will store snapshots.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-destination
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination
|
||||
|
||||
# import_snapshot = "./path/to/my/snapshot"
|
||||
# Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-snapshot
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot
|
||||
|
||||
ignore_missing_snapshot = false
|
||||
# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-snapshot
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot
|
||||
|
||||
ignore_snapshot_if_db_exists = false
|
||||
# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-snapshot-if-db-exists
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists
|
||||
|
||||
|
||||
###########
|
||||
@ -93,31 +93,31 @@ ignore_snapshot_if_db_exists = false
|
||||
|
||||
# ssl_auth_path = "./path/to/root"
|
||||
# Enables client authentication in the specified path.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-authentication-path
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path
|
||||
|
||||
# ssl_cert_path = "./path/to/certfile"
|
||||
# Sets the server's SSL certificates.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-certificates-path
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path
|
||||
|
||||
# ssl_key_path = "./path/to/private-key"
|
||||
# Sets the server's SSL key files.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-key-path
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path
|
||||
|
||||
# ssl_ocsp_path = "./path/to/ocsp-file"
|
||||
# Sets the server's OCSP file.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-ocsp-path
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path
|
||||
|
||||
ssl_require_auth = false
|
||||
# Makes SSL authentication mandatory.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-require-auth
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth
|
||||
|
||||
ssl_resumption = false
|
||||
# Activates SSL session resumption.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-resumption
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption
|
||||
|
||||
ssl_tickets = false
|
||||
# Activates SSL tickets.
|
||||
# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
|
||||
# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets
|
||||
|
||||
#############################
|
||||
### Experimental features ###
|
||||
@ -126,5 +126,3 @@ ssl_tickets = false
|
||||
experimental_enable_metrics = false
|
||||
# Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
|
||||
# Enables the Prometheus metrics on the `GET /metrics` endpoint.
|
||||
|
||||
|
||||
|
@ -103,7 +103,7 @@ not_available_failure_usage() {
|
||||
printf "$RED%s\n$DEFAULT" 'ERROR: Meilisearch binary is not available for your OS distribution or your architecture yet.'
|
||||
echo ''
|
||||
echo 'However, you can easily compile the binary from the source files.'
|
||||
echo 'Follow the steps at the page ("Source" tab): https://docs.meilisearch.com/learn/getting_started/installation.html'
|
||||
echo 'Follow the steps at the page ("Source" tab): https://www.meilisearch.com/docs/learn/getting_started/installation'
|
||||
}
|
||||
|
||||
fetch_release_failure_usage() {
|
||||
|
@ -11,22 +11,22 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
flate2 = "1.0.22"
|
||||
http = "0.2.8"
|
||||
anyhow = "1.0.70"
|
||||
flate2 = "1.0.25"
|
||||
http = "0.2.9"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.15.0"
|
||||
regex = "1.6.0"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
once_cell = "1.17.1"
|
||||
regex = "1.7.3"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
@ -101,6 +101,9 @@ pub enum KindDump {
|
||||
documents_ids: Vec<String>,
|
||||
},
|
||||
DocumentClear,
|
||||
DocumentDeletionByFilter {
|
||||
filter: serde_json::Value,
|
||||
},
|
||||
Settings {
|
||||
settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
|
||||
is_deletion: bool,
|
||||
@ -166,6 +169,9 @@ impl From<KindWithContent> for KindDump {
|
||||
KindWithContent::DocumentDeletion { documents_ids, .. } => {
|
||||
KindDump::DocumentDeletion { documents_ids }
|
||||
}
|
||||
KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
|
||||
KindDump::DocumentDeletionByFilter { filter: filter_expr }
|
||||
}
|
||||
KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
|
||||
KindWithContent::SettingsUpdate {
|
||||
new_settings,
|
||||
|
@ -25,7 +25,6 @@ impl CompatV2ToV3 {
|
||||
CompatV2ToV3::Compat(compat) => compat.index_uuid(),
|
||||
};
|
||||
v2_uuids
|
||||
.into_iter()
|
||||
.into_iter()
|
||||
.map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid })
|
||||
.collect()
|
||||
|
@ -11,9 +11,9 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
faux = "0.1.8"
|
||||
faux = "0.1.9"
|
||||
|
@ -12,8 +12,8 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
nom = "7.1.1"
|
||||
nom_locate = "4.0.0"
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.21.0"
|
||||
insta = "1.29.0"
|
||||
|
@ -20,6 +20,8 @@ pub enum Condition<'a> {
|
||||
GreaterThanOrEqual(Token<'a>),
|
||||
Equal(Token<'a>),
|
||||
NotEqual(Token<'a>),
|
||||
Null,
|
||||
Empty,
|
||||
Exists,
|
||||
LowerThan(Token<'a>),
|
||||
LowerThanOrEqual(Token<'a>),
|
||||
@ -44,6 +46,38 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
Ok((input, condition))
|
||||
}
|
||||
|
||||
/// null = value "IS" WS+ "NULL"
|
||||
pub fn parse_is_null(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NULL")))(input)?;
|
||||
Ok((input, FilterCondition::Condition { fid: key, op: Null }))
|
||||
}
|
||||
|
||||
/// null = value "IS" WS+ "NOT" WS+ "NULL"
|
||||
pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("NULL")))(input)?;
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
|
||||
}
|
||||
|
||||
/// empty = value "IS" WS+ "EMPTY"
|
||||
pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?;
|
||||
Ok((input, FilterCondition::Condition { fid: key, op: Empty }))
|
||||
}
|
||||
|
||||
/// empty = value "IS" WS+ "NOT" WS+ "EMPTY"
|
||||
pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?;
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty }))))
|
||||
}
|
||||
|
||||
/// exist = value "EXISTS"
|
||||
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
|
||||
|
@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> {
|
||||
ErrorKind::MissingClosingDelimiter(c) => {
|
||||
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
||||
}
|
||||
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::InvalidPrimary => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
|
||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||
}
|
||||
ErrorKind::ExpectedEof => {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
|
@ -47,7 +47,10 @@ mod value;
|
||||
use std::fmt::Debug;
|
||||
|
||||
pub use condition::{parse_condition, parse_to, Condition};
|
||||
use condition::{parse_exists, parse_not_exists};
|
||||
use condition::{
|
||||
parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
|
||||
parse_not_exists,
|
||||
};
|
||||
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
||||
pub use error::{Error, ErrorKind};
|
||||
use nom::branch::alt;
|
||||
@ -442,6 +445,10 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
parse_in,
|
||||
parse_not_in,
|
||||
parse_condition,
|
||||
parse_is_null,
|
||||
parse_is_not_null,
|
||||
parse_is_empty,
|
||||
parse_is_not_empty,
|
||||
parse_exists,
|
||||
parse_not_exists,
|
||||
parse_to,
|
||||
@ -526,14 +533,30 @@ pub mod tests {
|
||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
|
||||
|
||||
// Test NOT + EXISTS
|
||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
// Test NOT
|
||||
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
|
||||
// Test NULL + NOT NULL
|
||||
insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
|
||||
// Test EMPTY + NOT EMPTY
|
||||
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
|
||||
// Test EXISTS + NOT EXITS
|
||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
|
||||
// Test nested NOT
|
||||
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
||||
@ -606,7 +629,7 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("'OR'"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
1:5 'OR'
|
||||
"###);
|
||||
|
||||
@ -616,12 +639,12 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
1:14 channel Ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
19:19 channel = Ponce OR
|
||||
"###);
|
||||
|
||||
@ -706,12 +729,12 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
1:17 colour NOT EXIST
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
1:23 subscribers 100 TO1000
|
||||
"###);
|
||||
|
||||
@ -772,6 +795,39 @@ pub mod tests {
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
||||
1:11 value NULL
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
||||
1:15 value NOT NULL
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
||||
1:12 value EMPTY
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
||||
1:16 value NOT EMPTY
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
||||
1:9 value IS
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
||||
1:13 value IS NOT
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
||||
1:16 value IS EXISTS
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
||||
1:20 value IS NOT EXISTS
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -853,6 +909,8 @@ impl<'a> std::fmt::Display for Condition<'a> {
|
||||
Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
|
||||
Condition::Equal(token) => write!(f, "= {token}"),
|
||||
Condition::NotEqual(token) => write!(f, "!= {token}"),
|
||||
Condition::Null => write!(f, "IS NULL"),
|
||||
Condition::Empty => write!(f, "IS EMPTY"),
|
||||
Condition::Exists => write!(f, "EXISTS"),
|
||||
Condition::LowerThan(token) => write!(f, "< {token}"),
|
||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||
|
@ -183,7 +183,20 @@ fn is_syntax_component(c: char) -> bool {
|
||||
}
|
||||
|
||||
fn is_keyword(s: &str) -> bool {
|
||||
matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox")
|
||||
matches!(
|
||||
s,
|
||||
"AND"
|
||||
| "OR"
|
||||
| "IN"
|
||||
| "NOT"
|
||||
| "TO"
|
||||
| "EXISTS"
|
||||
| "IS"
|
||||
| "NULL"
|
||||
| "EMPTY"
|
||||
| "_geoRadius"
|
||||
| "_geoBoundingBox"
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -4,51 +4,56 @@ use serde_json::{Map, Value};
|
||||
|
||||
pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
|
||||
let mut obj = Map::new();
|
||||
let mut all_keys = vec![];
|
||||
insert_object(&mut obj, None, json, &mut all_keys);
|
||||
for key in all_keys {
|
||||
obj.entry(key).or_insert(Value::Array(vec![]));
|
||||
let mut all_entries = vec![];
|
||||
insert_object(&mut obj, None, json, &mut all_entries);
|
||||
for (key, old_val) in all_entries {
|
||||
obj.entry(key).or_insert(old_val.clone());
|
||||
}
|
||||
obj
|
||||
}
|
||||
|
||||
fn insert_object(
|
||||
fn insert_object<'a>(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: Option<&str>,
|
||||
object: &Map<String, Value>,
|
||||
all_keys: &mut Vec<String>,
|
||||
object: &'a Map<String, Value>,
|
||||
all_entries: &mut Vec<(String, &'a Value)>,
|
||||
) {
|
||||
for (key, value) in object {
|
||||
let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
|
||||
all_keys.push(new_key.clone());
|
||||
all_entries.push((new_key.clone(), value));
|
||||
if let Some(array) = value.as_array() {
|
||||
insert_array(base_json, &new_key, array, all_keys);
|
||||
insert_array(base_json, &new_key, array, all_entries);
|
||||
} else if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(&new_key), object, all_keys);
|
||||
insert_object(base_json, Some(&new_key), object, all_entries);
|
||||
} else {
|
||||
insert_value(base_json, &new_key, value.clone());
|
||||
insert_value(base_json, &new_key, value.clone(), false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_array(
|
||||
fn insert_array<'a>(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: &str,
|
||||
array: &Vec<Value>,
|
||||
all_keys: &mut Vec<String>,
|
||||
array: &'a Vec<Value>,
|
||||
all_entries: &mut Vec<(String, &'a Value)>,
|
||||
) {
|
||||
for value in array {
|
||||
if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(base_key), object, all_keys);
|
||||
insert_object(base_json, Some(base_key), object, all_entries);
|
||||
} else if let Some(sub_array) = value.as_array() {
|
||||
insert_array(base_json, base_key, sub_array, all_keys);
|
||||
insert_array(base_json, base_key, sub_array, all_entries);
|
||||
} else {
|
||||
insert_value(base_json, base_key, value.clone());
|
||||
insert_value(base_json, base_key, value.clone(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) {
|
||||
fn insert_value(
|
||||
base_json: &mut Map<String, Value>,
|
||||
key: &str,
|
||||
to_insert: Value,
|
||||
came_from_array: bool,
|
||||
) {
|
||||
debug_assert!(!to_insert.is_object());
|
||||
debug_assert!(!to_insert.is_array());
|
||||
|
||||
@ -63,6 +68,8 @@ fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value)
|
||||
base_json[key] = Value::Array(vec![value, to_insert]);
|
||||
}
|
||||
// if it does not exist we can push the value untouched
|
||||
} else if came_from_array {
|
||||
base_json.insert(key.to_string(), Value::Array(vec![to_insert]));
|
||||
} else {
|
||||
base_json.insert(key.to_string(), to_insert);
|
||||
}
|
||||
@ -113,7 +120,11 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [],
|
||||
"a": {
|
||||
"b": "c",
|
||||
"d": "e",
|
||||
"f": "g"
|
||||
},
|
||||
"a.b": "c",
|
||||
"a.d": "e",
|
||||
"a.f": "g"
|
||||
@ -164,7 +175,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": 42,
|
||||
"a": [42],
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
@ -186,7 +197,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": null,
|
||||
"a": [null],
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
@ -208,7 +219,9 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [],
|
||||
"a": {
|
||||
"b": "c"
|
||||
},
|
||||
"a.b": ["c", "d"],
|
||||
})
|
||||
.as_object()
|
||||
@ -234,7 +247,7 @@ mod tests {
|
||||
json!({
|
||||
"a.b": ["c", "d", "f"],
|
||||
"a.c": "e",
|
||||
"a": 35,
|
||||
"a": [35],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
@ -302,4 +315,53 @@ mod tests {
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_nested_values_keep_original_values() {
|
||||
let mut base: Value = json!({
|
||||
"tags": {
|
||||
"t1": "v1"
|
||||
},
|
||||
"prices": {
|
||||
"p1": [null],
|
||||
"p1000": {"tamo": {"le": {}}}
|
||||
},
|
||||
"kiki": [[]]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&flat).unwrap());
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"prices": {
|
||||
"p1": [null],
|
||||
"p1000": {
|
||||
"tamo": {
|
||||
"le": {}
|
||||
}
|
||||
}
|
||||
},
|
||||
"prices.p1": [null],
|
||||
"prices.p1000": {
|
||||
"tamo": {
|
||||
"le": {}
|
||||
}
|
||||
},
|
||||
"prices.p1000.tamo": {
|
||||
"le": {}
|
||||
},
|
||||
"prices.p1000.tamo.le": {},
|
||||
"tags": {
|
||||
"t1": "v1"
|
||||
},
|
||||
"tags.t1": "v1",
|
||||
"kiki": [[]]
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -11,29 +11,29 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.64"
|
||||
anyhow = "1.0.70"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.1.6"
|
||||
derive_builder = "0.11.2"
|
||||
csv = "1.2.1"
|
||||
derive_builder = "0.12.0"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "1.1.3"
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
log = "0.4.14"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.2"
|
||||
insta = { version = "1.19.1", features = ["json", "redactions"] }
|
||||
insta = { version = "1.29.0", features = ["json", "redactions"] }
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
||||
|
@ -25,6 +25,7 @@ enum AutobatchKind {
|
||||
primary_key: Option<String>,
|
||||
},
|
||||
DocumentDeletion,
|
||||
DocumentDeletionByFilter,
|
||||
DocumentClear,
|
||||
Settings {
|
||||
allow_index_creation: bool,
|
||||
@ -64,6 +65,9 @@ impl From<KindWithContent> for AutobatchKind {
|
||||
} => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
|
||||
KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
|
||||
KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
|
||||
KindWithContent::DocumentDeletionByFilter { .. } => {
|
||||
AutobatchKind::DocumentDeletionByFilter
|
||||
}
|
||||
KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
|
||||
AutobatchKind::Settings {
|
||||
allow_index_creation: allow_index_creation && !is_deletion,
|
||||
@ -97,6 +101,9 @@ pub enum BatchKind {
|
||||
DocumentDeletion {
|
||||
deletion_ids: Vec<TaskId>,
|
||||
},
|
||||
DocumentDeletionByFilter {
|
||||
id: TaskId,
|
||||
},
|
||||
ClearAndSettings {
|
||||
other: Vec<TaskId>,
|
||||
allow_index_creation: bool,
|
||||
@ -195,6 +202,9 @@ impl BatchKind {
|
||||
K::DocumentDeletion => {
|
||||
(Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
|
||||
}
|
||||
K::DocumentDeletionByFilter => {
|
||||
(Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false)
|
||||
}
|
||||
K::Settings { allow_index_creation } => (
|
||||
Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
|
||||
allow_index_creation,
|
||||
@ -212,7 +222,7 @@ impl BatchKind {
|
||||
|
||||
match (self, kind) {
|
||||
// We don't batch any of these operations
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this),
|
||||
(this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
|
||||
// We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
|
||||
(this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
|
||||
Break(this)
|
||||
@ -311,18 +321,9 @@ impl BatchKind {
|
||||
})
|
||||
}
|
||||
(
|
||||
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
|
||||
this @ BatchKind::DocumentOperation { .. },
|
||||
K::DocumentDeletion,
|
||||
) => {
|
||||
operation_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids,
|
||||
})
|
||||
}
|
||||
) => Break(this),
|
||||
// but we can't autobatch documents if it's not the same kind
|
||||
// this match branch MUST be AFTER the previous one
|
||||
(
|
||||
@ -345,35 +346,7 @@ impl BatchKind {
|
||||
deletion_ids.push(id);
|
||||
Continue(BatchKind::DocumentClear { ids: deletion_ids })
|
||||
}
|
||||
// we can autobatch the deletion and import if the index already exists
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids },
|
||||
K::DocumentImport { method, allow_index_creation, primary_key }
|
||||
) if index_already_exists => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can autobatch the deletion and import if both can't create an index
|
||||
(
|
||||
BatchKind::DocumentDeletion { mut deletion_ids },
|
||||
K::DocumentImport { method, allow_index_creation, primary_key }
|
||||
) if !allow_index_creation => {
|
||||
deletion_ids.push(id);
|
||||
|
||||
Continue(BatchKind::DocumentOperation {
|
||||
method,
|
||||
allow_index_creation,
|
||||
primary_key,
|
||||
operation_ids: deletion_ids,
|
||||
})
|
||||
}
|
||||
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
|
||||
// we can't autobatch a deletion and an import
|
||||
(
|
||||
this @ BatchKind::DocumentDeletion { .. },
|
||||
K::DocumentImport { .. }
|
||||
@ -508,7 +481,8 @@ impl BatchKind {
|
||||
BatchKind::IndexCreation { .. }
|
||||
| BatchKind::IndexDeletion { .. }
|
||||
| BatchKind::IndexUpdate { .. }
|
||||
| BatchKind::IndexSwap { .. },
|
||||
| BatchKind::IndexSwap { .. }
|
||||
| BatchKind::DocumentDeletionByFilter { .. },
|
||||
_,
|
||||
) => {
|
||||
unreachable!()
|
||||
@ -674,36 +648,36 @@ mod tests {
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
|
||||
|
||||
// We can autobatch document addition with document deletion
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
// And the other way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
|
||||
// We can't autobatch document addition with document deletion
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
|
||||
// we also can't do the only way around
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -28,9 +28,10 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::heed::CompactionOption;
|
||||
use meilisearch_types::milli::update::{
|
||||
DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
|
||||
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
Settings as MilliSettings,
|
||||
};
|
||||
use meilisearch_types::milli::{self, BEU32};
|
||||
use meilisearch_types::milli::{self, Filter, BEU32};
|
||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||
@ -65,6 +66,10 @@ pub(crate) enum Batch {
|
||||
op: IndexOperation,
|
||||
must_create_index: bool,
|
||||
},
|
||||
IndexDocumentDeletionByFilter {
|
||||
index_uid: String,
|
||||
task: Task,
|
||||
},
|
||||
IndexCreation {
|
||||
index_uid: String,
|
||||
primary_key: Option<String>,
|
||||
@ -149,6 +154,7 @@ impl Batch {
|
||||
| Batch::TaskDeletion(task)
|
||||
| Batch::Dump(task)
|
||||
| Batch::IndexCreation { task, .. }
|
||||
| Batch::IndexDocumentDeletionByFilter { task, .. }
|
||||
| Batch::IndexUpdate { task, .. } => vec![task.uid],
|
||||
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
|
||||
tasks.iter().map(|task| task.uid).collect()
|
||||
@ -187,7 +193,8 @@ impl Batch {
|
||||
IndexOperation { op, .. } => Some(op.index_uid()),
|
||||
IndexCreation { index_uid, .. }
|
||||
| IndexUpdate { index_uid, .. }
|
||||
| IndexDeletion { index_uid, .. } => Some(index_uid),
|
||||
| IndexDeletion { index_uid, .. }
|
||||
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -227,6 +234,18 @@ impl IndexScheduler {
|
||||
},
|
||||
must_create_index,
|
||||
})),
|
||||
BatchKind::DocumentDeletionByFilter { id } => {
|
||||
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
match &task.kind {
|
||||
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
|
||||
Ok(Some(Batch::IndexDocumentDeletionByFilter {
|
||||
index_uid: index_uid.clone(),
|
||||
task,
|
||||
}))
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
BatchKind::DocumentOperation { method, operation_ids, .. } => {
|
||||
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
|
||||
let primary_key = tasks
|
||||
@ -867,6 +886,51 @@ impl IndexScheduler {
|
||||
|
||||
Ok(tasks)
|
||||
}
|
||||
Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
|
||||
let (index_uid, filter) =
|
||||
if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
|
||||
&task.kind
|
||||
{
|
||||
(index_uid, filter_expr)
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
let index = {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.index_mapper.index(&rtxn, index_uid)?
|
||||
};
|
||||
let deleted_documents = delete_document_by_filter(filter, index);
|
||||
let original_filter = if let Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: _,
|
||||
}) = task.details
|
||||
{
|
||||
original_filter
|
||||
} else {
|
||||
// In the case of a `documentDeleteByFilter` the details MUST be set
|
||||
unreachable!();
|
||||
};
|
||||
|
||||
match deleted_documents {
|
||||
Ok(deleted_documents) => {
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: Some(deleted_documents),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
task.status = Status::Failed;
|
||||
task.details = Some(Details::DocumentDeletionByFilter {
|
||||
original_filter,
|
||||
deleted_documents: Some(0),
|
||||
});
|
||||
task.error = Some(e.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(vec![task])
|
||||
}
|
||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||
let wtxn = self.env.write_txn()?;
|
||||
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
||||
@ -1421,3 +1485,20 @@ impl IndexScheduler {
|
||||
Ok(content_files_to_delete)
|
||||
}
|
||||
}
|
||||
|
||||
fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
|
||||
let filter = Filter::from_json(filter)?;
|
||||
Ok(if let Some(filter) = filter {
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let candidates = filter.evaluate(&wtxn, &index)?;
|
||||
let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
|
||||
delete_operation.delete_documents(&candidates);
|
||||
let deleted_documents =
|
||||
delete_operation.execute().map(|result| result.deleted_documents)?;
|
||||
wtxn.commit()?;
|
||||
deleted_documents
|
||||
} else {
|
||||
0
|
||||
})
|
||||
}
|
||||
|
@ -61,6 +61,8 @@ pub enum Error {
|
||||
SwapDuplicateIndexesFound(Vec<String>),
|
||||
#[error("Index `{0}` not found.")]
|
||||
SwapIndexNotFound(String),
|
||||
#[error("Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.")]
|
||||
NoSpaceLeftInTaskQueue,
|
||||
#[error(
|
||||
"Indexes {} not found.",
|
||||
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
|
||||
@ -132,6 +134,48 @@ pub enum Error {
|
||||
TaskDatabaseUpdate(Box<Self>),
|
||||
#[error(transparent)]
|
||||
HeedTransaction(heed::Error),
|
||||
|
||||
#[cfg(test)]
|
||||
#[error("Planned failure for tests.")]
|
||||
PlannedFailure,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn is_recoverable(&self) -> bool {
|
||||
match self {
|
||||
Error::IndexNotFound(_)
|
||||
| Error::IndexAlreadyExists(_)
|
||||
| Error::SwapDuplicateIndexFound(_)
|
||||
| Error::SwapDuplicateIndexesFound(_)
|
||||
| Error::SwapIndexNotFound(_)
|
||||
| Error::NoSpaceLeftInTaskQueue
|
||||
| Error::SwapIndexesNotFound(_)
|
||||
| Error::CorruptedDump
|
||||
| Error::InvalidTaskDate { .. }
|
||||
| Error::InvalidTaskUids { .. }
|
||||
| Error::InvalidTaskStatuses { .. }
|
||||
| Error::InvalidTaskTypes { .. }
|
||||
| Error::InvalidTaskCanceledBy { .. }
|
||||
| Error::InvalidIndexUid { .. }
|
||||
| Error::TaskNotFound(_)
|
||||
| Error::TaskDeletionWithEmptyQuery
|
||||
| Error::TaskCancelationWithEmptyQuery
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
| Error::Anyhow(_) => true,
|
||||
Error::CreateBatch(_)
|
||||
| Error::CorruptedTaskQueue
|
||||
| Error::TaskDatabaseUpdate(_)
|
||||
| Error::HeedTransaction(_) => false,
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorCode for Error {
|
||||
@ -152,6 +196,8 @@ impl ErrorCode for Error {
|
||||
Error::TaskNotFound(_) => Code::TaskNotFound,
|
||||
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
|
||||
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
|
||||
// TODO: not sure of the Code to use
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli(e) => e.error_code(),
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
@ -167,6 +213,9 @@ impl ErrorCode for Error {
|
||||
Error::CorruptedDump => Code::Internal,
|
||||
Error::TaskDatabaseUpdate(_) => Code::Internal,
|
||||
Error::CreateBatch(_) => Code::Internal,
|
||||
|
||||
#[cfg(test)]
|
||||
Error::PlannedFailure => Code::Internal,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
started_at,
|
||||
finished_at,
|
||||
index_mapper,
|
||||
max_number_of_tasks: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
@ -183,6 +184,9 @@ fn snapshot_details(d: &Details) -> String {
|
||||
provided_ids: received_document_ids,
|
||||
deleted_documents,
|
||||
} => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"),
|
||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => format!(
|
||||
"{{ original_filter: {original_filter}, deleted_documents: {deleted_documents:?} }}"
|
||||
),
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
format!("{{ deleted_documents: {deleted_documents:?} }}")
|
||||
},
|
||||
|
@ -51,6 +51,7 @@ use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmap
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, map_bound};
|
||||
use uuid::Uuid;
|
||||
@ -241,6 +242,9 @@ pub struct IndexSchedulerOptions {
|
||||
/// Set to `true` iff the index scheduler is allowed to automatically
|
||||
/// batch tasks together, to process multiple tasks at once.
|
||||
pub autobatching_enabled: bool,
|
||||
/// The maximum number of tasks stored in the task queue before starting
|
||||
/// to auto schedule task deletions.
|
||||
pub max_number_of_tasks: usize,
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
@ -290,6 +294,10 @@ pub struct IndexScheduler {
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
@ -339,6 +347,7 @@ impl IndexScheduler {
|
||||
index_mapper: self.index_mapper.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
@ -412,6 +421,7 @@ impl IndexScheduler {
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
auth_path: options.auth_path,
|
||||
@ -429,6 +439,13 @@ impl IndexScheduler {
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the index scheduler is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.all_tasks.first(&rtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn index_budget(
|
||||
tasks_path: &Path,
|
||||
base_map_size: usize,
|
||||
@ -523,13 +540,7 @@ impl IndexScheduler {
|
||||
Err(e) => {
|
||||
log::error!("{}", e);
|
||||
// Wait one second when an irrecoverable error occurs.
|
||||
if matches!(
|
||||
e,
|
||||
Error::CorruptedTaskQueue
|
||||
| Error::TaskDatabaseUpdate(_)
|
||||
| Error::HeedTransaction(_)
|
||||
| Error::CreateBatch(_)
|
||||
) {
|
||||
if !e.is_recoverable() {
|
||||
std::thread::sleep(Duration::from_secs(1));
|
||||
}
|
||||
}
|
||||
@ -821,6 +832,13 @@ impl IndexScheduler {
|
||||
pub fn register(&self, kind: KindWithContent) -> Result<Task> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
|
||||
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
|
||||
&& (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50
|
||||
{
|
||||
return Err(Error::NoSpaceLeftInTaskQueue);
|
||||
}
|
||||
|
||||
let mut task = Task {
|
||||
uid: self.next_task_id(&wtxn)?,
|
||||
enqueued_at: OffsetDateTime::now_utc(),
|
||||
@ -926,14 +944,15 @@ impl IndexScheduler {
|
||||
|
||||
/// Perform one iteration of the run loop.
|
||||
///
|
||||
/// 1. Find the next batch of tasks to be processed.
|
||||
/// 2. Update the information of these tasks following the start of their processing.
|
||||
/// 3. Update the in-memory list of processed tasks accordingly.
|
||||
/// 4. Process the batch:
|
||||
/// 1. See if we need to cleanup the task queue
|
||||
/// 2. Find the next batch of tasks to be processed.
|
||||
/// 3. Update the information of these tasks following the start of their processing.
|
||||
/// 4. Update the in-memory list of processed tasks accordingly.
|
||||
/// 5. Process the batch:
|
||||
/// - perform the actions of each batched task
|
||||
/// - update the information of each batched task following the end
|
||||
/// of their processing.
|
||||
/// 5. Reset the in-memory list of processed tasks.
|
||||
/// 6. Reset the in-memory list of processed tasks.
|
||||
///
|
||||
/// Returns the number of processed tasks.
|
||||
fn tick(&self) -> Result<TickOutcome> {
|
||||
@ -943,6 +962,8 @@ impl IndexScheduler {
|
||||
self.breakpoint(Breakpoint::Start);
|
||||
}
|
||||
|
||||
self.cleanup_task_queue()?;
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
let batch =
|
||||
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
|
||||
@ -1079,6 +1100,55 @@ impl IndexScheduler {
|
||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
fn cleanup_task_queue(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
|
||||
let nb_tasks = self.all_task_ids(&rtxn)?.len();
|
||||
// if we have less than 1M tasks everything is fine
|
||||
if nb_tasks < self.max_number_of_tasks as u64 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let finished = self.status.get(&rtxn, &Status::Succeeded)?.unwrap_or_default()
|
||||
| self.status.get(&rtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.status.get(&rtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
if to_delete.len() < 2 {
|
||||
log::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
// the only thing we can do is hope that the user tasks are going to finish
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
log::info!(
|
||||
"The task queue is almost full. Deleting the oldest {} finished tasks.",
|
||||
to_delete.len()
|
||||
);
|
||||
|
||||
// it's safe to unwrap here because we checked the len above
|
||||
let newest_task_id = to_delete.iter().last().unwrap();
|
||||
let last_task_to_delete =
|
||||
self.get_task(&rtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
drop(rtxn);
|
||||
|
||||
// increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date.
|
||||
let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1);
|
||||
|
||||
self.register(KindWithContent::TaskDeletion {
|
||||
query: format!(
|
||||
"?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled",
|
||||
delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?,
|
||||
),
|
||||
tasks: to_delete,
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn index_stats(&self, index_uid: &str) -> Result<IndexStats> {
|
||||
let is_indexing = self.is_index_processing(index_uid)?;
|
||||
let rtxn = self.read_txn()?;
|
||||
@ -1194,6 +1264,12 @@ impl<'a> Dump<'a> {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentDeletionByFilter { filter } => {
|
||||
KindWithContent::DocumentDeletionByFilter {
|
||||
filter_expr: filter,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
}
|
||||
}
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
@ -1336,9 +1412,10 @@ mod tests {
|
||||
use big_s::S;
|
||||
use crossbeam::channel::RecvTimeoutError;
|
||||
use file_store::File;
|
||||
use meili_snap::snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::document_formats::DocumentFormatError;
|
||||
use meilisearch_types::error::ErrorCode;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli::obkv_to_json;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||
@ -1369,13 +1446,22 @@ mod tests {
|
||||
pub fn test(
|
||||
autobatching_enabled: bool,
|
||||
planned_failures: Vec<(usize, FailureLocation)>,
|
||||
) -> (Self, IndexSchedulerHandle) {
|
||||
Self::test_with_custom_config(planned_failures, |config| {
|
||||
config.autobatching_enabled = autobatching_enabled;
|
||||
})
|
||||
}
|
||||
|
||||
pub fn test_with_custom_config(
|
||||
planned_failures: Vec<(usize, FailureLocation)>,
|
||||
configuration: impl Fn(&mut IndexSchedulerOptions),
|
||||
) -> (Self, IndexSchedulerHandle) {
|
||||
let tempdir = TempDir::new().unwrap();
|
||||
let (sender, receiver) = crossbeam::channel::bounded(0);
|
||||
|
||||
let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };
|
||||
|
||||
let options = IndexSchedulerOptions {
|
||||
let mut options = IndexSchedulerOptions {
|
||||
version_file_path: tempdir.path().join(VERSION_FILE_NAME),
|
||||
auth_path: tempdir.path().join("auth"),
|
||||
tasks_path: tempdir.path().join("db_path"),
|
||||
@ -1388,8 +1474,10 @@ mod tests {
|
||||
index_growth_amount: 1000 * 1000, // 1 MB
|
||||
index_count: 5,
|
||||
indexer_config,
|
||||
autobatching_enabled,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
};
|
||||
configuration(&mut options);
|
||||
|
||||
let index_scheduler = Self::new(options, sender, planned_failures).unwrap();
|
||||
|
||||
@ -1412,7 +1500,7 @@ mod tests {
|
||||
(index_scheduler, index_scheduler_handle)
|
||||
}
|
||||
|
||||
/// Return a [`CorruptedTaskQueue`](Error::CorruptedTaskQueue) error if a failure is planned
|
||||
/// Return a [`PlannedFailure`](Error::PlannedFailure) error if a failure is planned
|
||||
/// for the given location and current run loop iteration.
|
||||
pub fn maybe_fail(&self, location: FailureLocation) -> Result<()> {
|
||||
if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location))
|
||||
@ -1421,7 +1509,7 @@ mod tests {
|
||||
FailureLocation::PanicInsideProcessBatch => {
|
||||
panic!("simulated panic")
|
||||
}
|
||||
_ => Err(Error::CorruptedTaskQueue),
|
||||
_ => Err(Error::PlannedFailure),
|
||||
}
|
||||
} else {
|
||||
Ok(())
|
||||
@ -1929,105 +2017,6 @@ mod tests {
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_addition_and_document_deletion() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let content = r#"[
|
||||
{ "id": 1, "doggo": "jean bob" },
|
||||
{ "id": 2, "catto": "jorts" },
|
||||
{ "id": 3, "doggo": "bork" }
|
||||
]"#;
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
|
||||
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
|
||||
file.persist().unwrap();
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
})
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentDeletion {
|
||||
index_uid: S("doggos"),
|
||||
documents_ids: vec![S("1"), S("2")],
|
||||
})
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
|
||||
handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch");
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn document_deletion_and_document_addition() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentDeletion {
|
||||
index_uid: S("doggos"),
|
||||
documents_ids: vec![S("1"), S("2")],
|
||||
})
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
|
||||
|
||||
let content = r#"[
|
||||
{ "id": 1, "doggo": "jean bob" },
|
||||
{ "id": 2, "catto": "jorts" },
|
||||
{ "id": 3, "doggo": "bork" }
|
||||
]"#;
|
||||
|
||||
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
|
||||
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
|
||||
file.persist().unwrap();
|
||||
index_scheduler
|
||||
.register(KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: S("doggos"),
|
||||
primary_key: Some(S("id")),
|
||||
method: ReplaceDocuments,
|
||||
content_file: uuid,
|
||||
documents_count,
|
||||
allow_index_creation: true,
|
||||
})
|
||||
.unwrap();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
|
||||
|
||||
// The deletion should have failed because it can't create an index
|
||||
handle.advance_one_failed_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion");
|
||||
|
||||
// The addition should works
|
||||
handle.advance_one_successful_batch();
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition");
|
||||
|
||||
let index = index_scheduler.index("doggos").unwrap();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
|
||||
let documents = index
|
||||
.all_documents(&rtxn)
|
||||
.unwrap()
|
||||
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn do_not_batch_task_of_different_indexes() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
@ -3803,4 +3792,127 @@ mod tests {
|
||||
// No matter what happens in process_batch, the index_scheduler should be internally consistent
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_queue_is_full() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
// that's the minimum map size possible
|
||||
config.task_db_size = 1048576;
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
// on average this task takes ~600 bytes
|
||||
loop {
|
||||
let result = index_scheduler.register(KindWithContent::IndexCreation {
|
||||
index_uid: S("doggo"),
|
||||
primary_key: None,
|
||||
});
|
||||
if result.is_err() {
|
||||
break;
|
||||
}
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
// at this point the task DB shoud have reached its limit and we should not be able to register new tasks
|
||||
let result = index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// Even the task deletion that doesn't delete anything shouldn't be accepted
|
||||
let result = index_scheduler
|
||||
.register(KindWithContent::TaskDeletion {
|
||||
query: S("test"),
|
||||
tasks: RoaringBitmap::new(),
|
||||
})
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// But a task deletion that delete something should works
|
||||
index_scheduler
|
||||
.register(KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() })
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
// Now we should be able to enqueue a few tasks again
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_deletion_of_tasks() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
config.max_number_of_tasks = 2;
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// at this point the max number of tasks is reached
|
||||
// we can still enqueue multiple tasks
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
drop(rtxn);
|
||||
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
drop(rtxn);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
// a new task deletion has been enqueued
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
drop(rtxn);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
}
|
||||
}
|
||||
|
@ -1,43 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
"documentDeletion" [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -1,9 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bork"
|
||||
}
|
||||
]
|
@ -1,37 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -1,40 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
"documentDeletion" [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -1,43 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,]
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -1,46 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [1,]
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -1,17 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "jean bob"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"catto": "jorts"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bork"
|
||||
}
|
||||
]
|
@ -1,36 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -1,40 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
1 {uid: 1, status: succeeded, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }}
|
||||
2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }}
|
||||
2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -0,0 +1,68 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 5,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"TaskDeletion": {
|
||||
"matched_tasks": 2,
|
||||
"deleted_tasks": 2,
|
||||
"original_filter": "[filter]"
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
16,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
0,
|
||||
4,
|
||||
0
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,48 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 6,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"TaskDeletion": {
|
||||
"matched_tasks": 2,
|
||||
"deleted_tasks": 2,
|
||||
"original_filter": "[filter]"
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
16,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
3,
|
||||
0,
|
||||
5,
|
||||
0
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,133 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 0,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 1,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": {
|
||||
"message": "Index `doggo` already exists.",
|
||||
"code": "index_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_already_exists"
|
||||
},
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "failed",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 2,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 4,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"TaskDeletion": {
|
||||
"matched_tasks": 2,
|
||||
"deleted_tasks": null,
|
||||
"original_filter": "[filter]"
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
16,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,88 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 2,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 4,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"TaskDeletion": {
|
||||
"matched_tasks": 2,
|
||||
"deleted_tasks": 2,
|
||||
"original_filter": "[filter]"
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
16,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -0,0 +1,90 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 0,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 1,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": {
|
||||
"message": "Index `doggo` already exists.",
|
||||
"code": "index_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_already_exists"
|
||||
},
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "failed",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 2,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
@ -239,6 +239,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
|
||||
match &mut task.kind {
|
||||
K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::DocumentClear { index_uid } => index_uids.push(index_uid),
|
||||
K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
|
||||
K::IndexDeletion { index_uid } => index_uids.push(index_uid),
|
||||
@ -464,6 +465,29 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
}
|
||||
Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => {
|
||||
assert_eq!(kind.as_kind(), Kind::DocumentDeletionByFilter);
|
||||
let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter {
|
||||
ref index_uid,
|
||||
ref filter_expr,
|
||||
} = kind
|
||||
{
|
||||
(index_uid, filter_expr)
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
assert_eq!(&task_index_uid.unwrap(), index_uid);
|
||||
|
||||
match status {
|
||||
Status::Enqueued | Status::Processing => (),
|
||||
Status::Succeeded => {
|
||||
assert!(deleted_documents.is_some());
|
||||
}
|
||||
Status::Failed | Status::Canceled => {
|
||||
assert!(deleted_documents == Some(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
assert!(matches!(
|
||||
kind.as_kind(),
|
||||
|
@ -11,6 +11,6 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
insta = { version = "^1.19.1", features = ["json", "redactions"] }
|
||||
insta = { version = "^1.29.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.15"
|
||||
once_cell = "1.17"
|
||||
|
@ -11,16 +11,16 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.13.1"
|
||||
enum-iterator = "1.1.3"
|
||||
base64 = "0.21.0"
|
||||
enum-iterator = "1.4.0"
|
||||
hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sha2 = "0.10.6"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
@ -34,6 +34,12 @@ impl AuthController {
|
||||
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth controller is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
self.store.health()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the size of the `AuthController` database in bytes.
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
self.store.size()
|
||||
@ -304,6 +310,7 @@ pub const MASTER_KEY_MIN_SIZE: usize = 16;
|
||||
const MASTER_KEY_GEN_SIZE: usize = 32;
|
||||
|
||||
pub fn generate_master_key() -> String {
|
||||
use base64::Engine;
|
||||
use rand::rngs::OsRng;
|
||||
use rand::RngCore;
|
||||
|
||||
@ -314,5 +321,5 @@ pub fn generate_master_key() -> String {
|
||||
|
||||
// let's encode the random bytes to base64 to make them human-readable and not too long.
|
||||
// We're using the URL_SAFE alphabet that will produce keys without =, / or other unusual characters.
|
||||
base64::encode_config(buf, base64::URL_SAFE_NO_PAD)
|
||||
base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(buf)
|
||||
}
|
||||
|
@ -61,6 +61,13 @@ impl HeedAuthStore {
|
||||
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth store is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.keys.first(&rtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the size in bytes of database
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
Ok(self.env.real_disk_size()?)
|
||||
|
@ -11,36 +11,36 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.2.1", default-features = false }
|
||||
anyhow = "1.0.65"
|
||||
actix-web = { version = "4.3.1", default-features = false }
|
||||
anyhow = "1.0.70"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.1.6"
|
||||
csv = "1.2.1"
|
||||
deserr = "0.5.0"
|
||||
either = { version = "1.6.1", features = ["serde"] }
|
||||
enum-iterator = "1.1.3"
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.24"
|
||||
flate2 = "1.0.25"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.5.7"
|
||||
milli = { path = "../milli", default-features = false }
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
memmap2 = "0.5.10"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.85"
|
||||
serde_json = "1.0.95"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.24"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.27"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.19.1"
|
||||
insta = "1.29.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
[features]
|
||||
# all specialized tokenizations
|
||||
default = ["milli/default"]
|
||||
all-tokenizations = ["milli/all-tokenizations"]
|
||||
|
||||
# chinese specialized tokenization
|
||||
chinese = ["milli/chinese"]
|
||||
@ -50,3 +50,6 @@ hebrew = ["milli/hebrew"]
|
||||
japanese = ["milli/japanese"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["milli/greek"]
|
||||
|
@ -214,10 +214,12 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
|
||||
InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFields , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentDeleteFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
|
||||
@ -315,6 +317,7 @@ impl ErrorCode for milli::Error {
|
||||
UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
|
||||
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
|
||||
UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
|
||||
UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
|
||||
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
|
||||
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
|
||||
Code::InvalidDocumentId
|
||||
|
@ -49,6 +49,7 @@ impl Task {
|
||||
| IndexSwap { .. } => None,
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| DocumentDeletion { index_uid, .. }
|
||||
| DocumentDeletionByFilter { index_uid, .. }
|
||||
| DocumentClear { index_uid }
|
||||
| SettingsUpdate { index_uid, .. }
|
||||
| IndexCreation { index_uid, .. }
|
||||
@ -67,6 +68,7 @@ impl Task {
|
||||
match self.kind {
|
||||
KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
|
||||
KindWithContent::DocumentDeletion { .. }
|
||||
| KindWithContent::DocumentDeletionByFilter { .. }
|
||||
| KindWithContent::DocumentClear { .. }
|
||||
| KindWithContent::SettingsUpdate { .. }
|
||||
| KindWithContent::IndexDeletion { .. }
|
||||
@ -96,6 +98,10 @@ pub enum KindWithContent {
|
||||
index_uid: String,
|
||||
documents_ids: Vec<String>,
|
||||
},
|
||||
DocumentDeletionByFilter {
|
||||
index_uid: String,
|
||||
filter_expr: serde_json::Value,
|
||||
},
|
||||
DocumentClear {
|
||||
index_uid: String,
|
||||
},
|
||||
@ -145,6 +151,7 @@ impl KindWithContent {
|
||||
match self {
|
||||
KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
|
||||
KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
|
||||
KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
|
||||
KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
|
||||
KindWithContent::SettingsUpdate { .. } => Kind::SettingsUpdate,
|
||||
KindWithContent::IndexCreation { .. } => Kind::IndexCreation,
|
||||
@ -168,6 +175,7 @@ impl KindWithContent {
|
||||
| TaskDeletion { .. } => vec![],
|
||||
DocumentAdditionOrUpdate { index_uid, .. }
|
||||
| DocumentDeletion { index_uid, .. }
|
||||
| DocumentDeletionByFilter { index_uid, .. }
|
||||
| DocumentClear { index_uid }
|
||||
| SettingsUpdate { index_uid, .. }
|
||||
| IndexCreation { index_uid, .. }
|
||||
@ -200,6 +208,12 @@ impl KindWithContent {
|
||||
deleted_documents: None,
|
||||
})
|
||||
}
|
||||
KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
|
||||
Some(Details::DocumentDeletionByFilter {
|
||||
original_filter: filter_expr.to_string(),
|
||||
deleted_documents: None,
|
||||
})
|
||||
}
|
||||
KindWithContent::DocumentClear { .. } | KindWithContent::IndexDeletion { .. } => {
|
||||
Some(Details::ClearAll { deleted_documents: None })
|
||||
}
|
||||
@ -242,6 +256,12 @@ impl KindWithContent {
|
||||
deleted_documents: Some(0),
|
||||
})
|
||||
}
|
||||
KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
|
||||
Some(Details::DocumentDeletionByFilter {
|
||||
original_filter: filter_expr.to_string(),
|
||||
deleted_documents: Some(0),
|
||||
})
|
||||
}
|
||||
KindWithContent::DocumentClear { .. } => {
|
||||
Some(Details::ClearAll { deleted_documents: None })
|
||||
}
|
||||
@ -282,6 +302,7 @@ impl From<&KindWithContent> for Option<Details> {
|
||||
})
|
||||
}
|
||||
KindWithContent::DocumentDeletion { .. } => None,
|
||||
KindWithContent::DocumentDeletionByFilter { .. } => None,
|
||||
KindWithContent::DocumentClear { .. } => None,
|
||||
KindWithContent::SettingsUpdate { new_settings, .. } => {
|
||||
Some(Details::SettingsUpdate { settings: new_settings.clone() })
|
||||
@ -374,6 +395,7 @@ impl std::error::Error for ParseTaskStatusError {}
|
||||
pub enum Kind {
|
||||
DocumentAdditionOrUpdate,
|
||||
DocumentDeletion,
|
||||
DocumentDeletionByFilter,
|
||||
SettingsUpdate,
|
||||
IndexCreation,
|
||||
IndexDeletion,
|
||||
@ -390,6 +412,7 @@ impl Kind {
|
||||
match self {
|
||||
Kind::DocumentAdditionOrUpdate
|
||||
| Kind::DocumentDeletion
|
||||
| Kind::DocumentDeletionByFilter
|
||||
| Kind::SettingsUpdate
|
||||
| Kind::IndexCreation
|
||||
| Kind::IndexDeletion
|
||||
@ -407,6 +430,7 @@ impl Display for Kind {
|
||||
match self {
|
||||
Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
|
||||
Kind::DocumentDeletion => write!(f, "documentDeletion"),
|
||||
Kind::DocumentDeletionByFilter => write!(f, "documentDeletionByFilter"),
|
||||
Kind::SettingsUpdate => write!(f, "settingsUpdate"),
|
||||
Kind::IndexCreation => write!(f, "indexCreation"),
|
||||
Kind::IndexDeletion => write!(f, "indexDeletion"),
|
||||
@ -478,6 +502,7 @@ pub enum Details {
|
||||
SettingsUpdate { settings: Box<Settings<Unchecked>> },
|
||||
IndexInfo { primary_key: Option<String> },
|
||||
DocumentDeletion { provided_ids: usize, deleted_documents: Option<u64> },
|
||||
DocumentDeletionByFilter { original_filter: String, deleted_documents: Option<u64> },
|
||||
ClearAll { deleted_documents: Option<u64> },
|
||||
TaskCancelation { matched_tasks: u64, canceled_tasks: Option<u64>, original_filter: String },
|
||||
TaskDeletion { matched_tasks: u64, deleted_tasks: Option<u64>, original_filter: String },
|
||||
@ -493,6 +518,9 @@ impl Details {
|
||||
*indexed_documents = Some(0)
|
||||
}
|
||||
Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
|
||||
Self::DocumentDeletionByFilter { deleted_documents, .. } => {
|
||||
*deleted_documents = Some(0)
|
||||
}
|
||||
Self::ClearAll { deleted_documents } => *deleted_documents = Some(0),
|
||||
Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0),
|
||||
Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0),
|
||||
|
@ -46,7 +46,7 @@ pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
|
||||
pub enum VersionFileError {
|
||||
#[error(
|
||||
"Meilisearch (v{}) failed to infer the version of the database.
|
||||
To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
|
||||
To update Meilisearch please follow our guide on https://www.meilisearch.com/docs/learn/update_and_migration/updating.",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)]
|
||||
MissingVersionFile,
|
||||
@ -54,7 +54,7 @@ pub enum VersionFileError {
|
||||
MalformedVersionFile,
|
||||
#[error(
|
||||
"Your database version ({major}.{minor}.{patch}) is incompatible with your current engine version ({}).\n\
|
||||
To migrate data between Meilisearch versions, please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
|
||||
To migrate data between Meilisearch versions, please follow our guide on https://www.meilisearch.com/docs/learn/update_and_migration/updating.",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)]
|
||||
VersionMismatch { major: String, minor: String, patch: String },
|
||||
|
@ -13,106 +13,107 @@ license.workspace = true
|
||||
default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.6.3"
|
||||
actix-http = { version = "3.2.2", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] }
|
||||
actix-web = { version = "4.2.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
|
||||
actix-cors = "0.6.4"
|
||||
actix-http = { version = "3.3.1", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] }
|
||||
actix-web = { version = "4.3.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
|
||||
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
|
||||
anyhow = { version = "1.0.65", features = ["backtrace"] }
|
||||
async-stream = "0.3.3"
|
||||
async-trait = "0.1.57"
|
||||
bstr = "1.0.1"
|
||||
byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
|
||||
bytes = "1.2.1"
|
||||
clap = { version = "4.0.9", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.6"
|
||||
anyhow = { version = "1.0.70", features = ["backtrace"] }
|
||||
async-stream = "0.3.5"
|
||||
async-trait = "0.1.68"
|
||||
bstr = "1.4.0"
|
||||
byte-unit = { version = "4.0.19", default-features = false, features = ["std", "serde"] }
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
dump = { path = "../dump" }
|
||||
either = "1.8.0"
|
||||
env_logger = "0.9.1"
|
||||
either = "1.8.1"
|
||||
env_logger = "0.10.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.24"
|
||||
flate2 = "1.0.25"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.24"
|
||||
futures-util = "0.3.24"
|
||||
http = "0.2.8"
|
||||
futures = "0.3.28"
|
||||
futures-util = "0.3.28"
|
||||
http = "0.2.9"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "1.9.1", features = ["serde-1"] }
|
||||
indexmap = { version = "1.9.3", features = ["serde-1"] }
|
||||
itertools = "0.10.5"
|
||||
jsonwebtoken = "8.1.1"
|
||||
jsonwebtoken = "8.3.0"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.1"
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.15.0"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.15.0"
|
||||
once_cell = "1.17.1"
|
||||
parking_lot = "0.12.1"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.2", features = ["process"] }
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.5.3"
|
||||
regex = "1.6.0"
|
||||
reqwest = { version = "0.11.12", features = ["rustls-tls", "json"], default-features = false }
|
||||
rustls = "0.20.6"
|
||||
rustls-pemfile = "1.0.1"
|
||||
segment = { version = "0.2.1", optional = true }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
reqwest = { version = "0.11.16", features = ["rustls-tls", "json"], default-features = false }
|
||||
rustls = "0.20.8"
|
||||
rustls-pemfile = "1.0.2"
|
||||
segment = { version = "0.2.2", optional = true }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sha2 = "0.10.6"
|
||||
siphasher = "0.3.10"
|
||||
slice-group-by = "0.3.0"
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
sysinfo = "0.26.4"
|
||||
sysinfo = "0.28.4"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = { version = "1.24.2", features = ["full"] }
|
||||
tokio-stream = "0.1.10"
|
||||
toml = "0.5.9"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.2"
|
||||
yaup = "0.2.0"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = { version = "1.27.0", features = ["full"] }
|
||||
tokio-stream = "0.1.12"
|
||||
toml = "0.7.3"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.3"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
actix-utils = "3.0.1"
|
||||
atty = "0.2.14"
|
||||
termcolor = "1.1.3"
|
||||
termcolor = "1.2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.7.0"
|
||||
actix-rt = "2.8.0"
|
||||
assert-json-diff = "2.0.2"
|
||||
brotli = "3.3.4"
|
||||
insta = "1.19.1"
|
||||
insta = "1.29.0"
|
||||
manifest-dir-macros = "0.1.16"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = {path = "../meili-snap"}
|
||||
temp-env = "0.3.1"
|
||||
temp-env = "0.3.3"
|
||||
urlencoding = "2.1.2"
|
||||
yaup = "0.2.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.65", optional = true }
|
||||
cargo_toml = { version = "0.14.0", optional = true }
|
||||
anyhow = { version = "1.0.70", optional = true }
|
||||
cargo_toml = { version = "0.15.2", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.0", optional = true }
|
||||
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
tempfile = { version = "3.3.0", optional = true }
|
||||
vergen = { version = "7.4.2", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.2", optional = true }
|
||||
tempfile = { version = "3.5.0", optional = true }
|
||||
vergen = { version = "7.5.1", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.4", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.7/build.zip"
|
||||
|
@ -64,6 +64,7 @@ pub enum DocumentDeletionKind {
|
||||
PerDocumentId,
|
||||
ClearAll,
|
||||
PerBatch,
|
||||
PerFilter,
|
||||
}
|
||||
|
||||
pub trait Analytics: Sync + Send {
|
||||
|
@ -86,7 +86,7 @@ impl SegmentAnalytics {
|
||||
pub async fn new(
|
||||
opt: &Opt,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) -> Arc<dyn Analytics> {
|
||||
let instance_uid = super::find_user_id(&opt.db_path);
|
||||
let first_time_run = instance_uid.is_none();
|
||||
@ -376,7 +376,11 @@ impl Segment {
|
||||
})
|
||||
}
|
||||
|
||||
async fn run(mut self, index_scheduler: Arc<IndexScheduler>, auth_controller: AuthController) {
|
||||
async fn run(
|
||||
mut self,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
||||
// The first batch must be sent after one hour.
|
||||
let mut interval =
|
||||
@ -408,10 +412,10 @@ impl Segment {
|
||||
async fn tick(
|
||||
&mut self,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
if let Ok(stats) =
|
||||
create_all_stats(index_scheduler.into(), auth_controller, &AuthFilter::default())
|
||||
create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
|
||||
{
|
||||
// Replace the version number with the prototype name if any.
|
||||
let version = if let Some(prototype) = crate::prototype_name() {
|
||||
@ -945,6 +949,7 @@ pub struct DocumentsDeletionAggregator {
|
||||
per_document_id: bool,
|
||||
clear_all: bool,
|
||||
per_batch: bool,
|
||||
per_filter: bool,
|
||||
}
|
||||
|
||||
impl DocumentsDeletionAggregator {
|
||||
@ -958,6 +963,7 @@ impl DocumentsDeletionAggregator {
|
||||
DocumentDeletionKind::PerDocumentId => ret.per_document_id = true,
|
||||
DocumentDeletionKind::ClearAll => ret.clear_all = true,
|
||||
DocumentDeletionKind::PerBatch => ret.per_batch = true,
|
||||
DocumentDeletionKind::PerFilter => ret.per_filter = true,
|
||||
}
|
||||
|
||||
ret
|
||||
@ -977,6 +983,7 @@ impl DocumentsDeletionAggregator {
|
||||
self.per_document_id |= other.per_document_id;
|
||||
self.clear_all |= other.clear_all;
|
||||
self.per_batch |= other.per_batch;
|
||||
self.per_filter |= other.per_filter;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
|
@ -20,6 +20,8 @@ pub enum MeilisearchHttpError {
|
||||
InvalidContentType(String, Vec<String>),
|
||||
#[error("Document `{0}` not found.")]
|
||||
DocumentNotFound(String),
|
||||
#[error("Sending an empty filter is forbidden.")]
|
||||
EmptyFilter,
|
||||
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
InvalidExpression(&'static [&'static str], Value),
|
||||
#[error("A {0} payload is missing.")]
|
||||
@ -58,6 +60,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
|
||||
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
|
||||
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
|
||||
MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentDeleteFilter,
|
||||
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
|
||||
MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge,
|
||||
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
|
||||
|
@ -4,6 +4,7 @@ use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::pin::Pin;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::FromRequest;
|
||||
pub use error::AuthenticationError;
|
||||
use futures::future::err;
|
||||
@ -23,7 +24,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
|
||||
async fn auth_bearer(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
data: Option<D>,
|
||||
@ -43,7 +44,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
}
|
||||
|
||||
async fn auth_token(auth: AuthController, data: Option<D>) -> Result<Self, ResponseError>
|
||||
async fn auth_token(auth: Data<AuthController>, data: Option<D>) -> Result<Self, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
@ -60,7 +61,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
|
||||
async fn authenticate(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
) -> Result<Option<AuthFilter>, ResponseError>
|
||||
@ -90,7 +91,7 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
|
||||
req: &actix_web::HttpRequest,
|
||||
_payload: &mut actix_web::dev::Payload,
|
||||
) -> Self::Future {
|
||||
match req.app_data::<AuthController>().cloned() {
|
||||
match req.app_data::<Data<AuthController>>().cloned() {
|
||||
Some(auth) => match req
|
||||
.headers()
|
||||
.get("Authorization")
|
||||
@ -122,10 +123,15 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
|
||||
}
|
||||
|
||||
pub trait Policy {
|
||||
fn authenticate(auth: AuthController, token: &str, index: Option<&str>) -> Option<AuthFilter>;
|
||||
fn authenticate(
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter>;
|
||||
}
|
||||
|
||||
pub mod policies {
|
||||
use actix_web::web::Data;
|
||||
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
||||
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
||||
// reexport actions in policies in order to be used in routes configuration.
|
||||
@ -178,7 +184,7 @@ pub mod policies {
|
||||
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
|
||||
/// (that may contain more indexes than requested).
|
||||
fn authenticate(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter> {
|
||||
|
@ -88,7 +88,7 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Data<AuthController>,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
enable_dashboard: bool,
|
||||
@ -136,7 +136,7 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
||||
let snapshot_path_exists = snapshot_path.exists();
|
||||
@ -195,6 +195,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
let index_scheduler = Arc::new(index_scheduler);
|
||||
let auth_controller = Arc::new(auth_controller);
|
||||
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
|
||||
let snapshot_delay = Duration::from_secs(snapshot_delay);
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
@ -233,6 +234,7 @@ fn open_or_create_database_unchecked(
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
})?)
|
||||
@ -380,7 +382,7 @@ fn import_dump(
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
opt: &Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) {
|
||||
|
@ -74,13 +74,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
async fn run_http(
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) -> anyhow::Result<()> {
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let index_scheduler = Data::from(index_scheduler);
|
||||
let auth_controller = Data::from(auth_controller);
|
||||
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(
|
||||
@ -148,7 +149,7 @@ pub fn print_launch_resume(
|
||||
"
|
||||
Thank you for using Meilisearch!
|
||||
|
||||
\nWe collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html
|
||||
\nWe collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry
|
||||
|
||||
Anonymous telemetry:\t\"Enabled\""
|
||||
);
|
||||
@ -180,9 +181,9 @@ Anonymous telemetry:\t\"Enabled\""
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
|
||||
eprintln!("Documentation:\t\thttps://www.meilisearch.com/docs");
|
||||
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
|
||||
eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html");
|
||||
eprintln!("Discord:\t\thttps://discord.meilisearch.com");
|
||||
eprintln!();
|
||||
}
|
||||
|
||||
|
@ -68,7 +68,7 @@ const DEFAULT_LOG_EVERY_N: usize = 100_000;
|
||||
// The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be
|
||||
// opened simultaneously.
|
||||
pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
|
||||
pub const TASK_DB_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB
|
||||
pub const TASK_DB_SIZE: u64 = 20 * 1024 * 1024 * 1024; // 20 GiB
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "UPPERCASE")]
|
||||
@ -323,10 +323,10 @@ impl Opt {
|
||||
.clone()
|
||||
.unwrap_or_else(|| PathBuf::from(DEFAULT_CONFIG_FILE_PATH));
|
||||
|
||||
match std::fs::read(&config_file_path) {
|
||||
match std::fs::read_to_string(&config_file_path) {
|
||||
Ok(config) => {
|
||||
// If the file is successfully read, we deserialize it with `toml`.
|
||||
let opt_from_config = toml::from_slice::<Opt>(&config)?;
|
||||
let opt_from_config = toml::from_str::<Opt>(&config)?;
|
||||
// Return an error if config file contains 'config_file_path'
|
||||
// Using that key in the config file doesn't make sense bc it creates a logical loop (config file referencing itself)
|
||||
if opt_from_config.config_file_path.is_some() {
|
||||
|
@ -1,5 +1,6 @@
|
||||
use std::str;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
}
|
||||
|
||||
pub async fn create_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, Data<AuthController>>,
|
||||
body: AwebJson<CreateApiKey, DeserrJsonError>,
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@ -66,7 +67,7 @@ impl ListApiKeys {
|
||||
}
|
||||
|
||||
pub async fn list_api_keys(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
list_api_keys: AwebQueryParameter<ListApiKeys, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let paginate = list_api_keys.into_inner().as_pagination();
|
||||
@ -84,7 +85,7 @@ pub async fn list_api_keys(
|
||||
}
|
||||
|
||||
pub async fn get_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
@ -103,7 +104,7 @@ pub async fn get_api_key(
|
||||
}
|
||||
|
||||
pub async fn patch_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, Data<AuthController>>,
|
||||
body: AwebJson<PatchApiKey, DeserrJsonError>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@ -123,7 +124,7 @@ pub async fn patch_api_key(
|
||||
}
|
||||
|
||||
pub async fn delete_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
|
@ -19,7 +19,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
|
||||
pub async fn create_dump(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
|
@ -4,19 +4,20 @@ use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
|
||||
use bstr::ByteSlice;
|
||||
use deserr::actix_web::AwebQueryParameter;
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::DeserrQueryParamError;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::milli::DocumentId;
|
||||
use meilisearch_types::star_or::OptionStarOrList;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::{milli, Document, Index};
|
||||
@ -36,6 +37,7 @@ use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::search::parse_filter;
|
||||
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
|
||||
@ -66,13 +68,17 @@ pub struct DocumentParam {
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
web::resource("")
|
||||
.route(web::get().to(SeqHandler(get_all_documents)))
|
||||
.route(web::get().to(SeqHandler(get_documents)))
|
||||
.route(web::post().to(SeqHandler(replace_documents)))
|
||||
.route(web::put().to(SeqHandler(update_documents)))
|
||||
.route(web::delete().to(SeqHandler(clear_all_documents))),
|
||||
)
|
||||
// this route needs to be before the /documents/{document_id} to match properly
|
||||
.service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
|
||||
// these routes need to be before the /documents/{document_id} to match properly
|
||||
.service(
|
||||
web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
|
||||
)
|
||||
.service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
|
||||
.service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
|
||||
.service(
|
||||
web::resource("/{document_id}")
|
||||
.route(web::get().to(SeqHandler(get_document)))
|
||||
@ -127,29 +133,79 @@ pub async fn delete_document(
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct BrowseQuery {
|
||||
pub struct BrowseQueryGet {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
|
||||
offset: Param<usize>,
|
||||
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidDocumentLimit>)]
|
||||
limit: Param<usize>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
|
||||
fields: OptionStarOrList<String>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
|
||||
filter: Option<String>,
|
||||
}
|
||||
|
||||
pub async fn get_all_documents(
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct BrowseQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentOffset>)]
|
||||
offset: usize,
|
||||
#[deserr(default = PAGINATION_DEFAULT_LIMIT, error = DeserrJsonError<InvalidDocumentLimit>)]
|
||||
limit: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
|
||||
fields: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
|
||||
filter: Option<Value>,
|
||||
}
|
||||
|
||||
pub async fn documents_by_query_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<BrowseQuery, DeserrQueryParamError>,
|
||||
body: AwebJson<BrowseQuery, DeserrJsonError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with body: {:?}", body);
|
||||
|
||||
documents_by_query(&index_scheduler, index_uid, body.into_inner())
|
||||
}
|
||||
|
||||
pub async fn get_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", params);
|
||||
|
||||
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
|
||||
|
||||
let filter = match filter {
|
||||
Some(f) => match serde_json::from_str(&f) {
|
||||
Ok(v) => Some(v),
|
||||
_ => Some(Value::String(f)),
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
let query = BrowseQuery {
|
||||
offset: offset.0,
|
||||
limit: limit.0,
|
||||
fields: fields.merge_star_and_none(),
|
||||
filter,
|
||||
};
|
||||
|
||||
documents_by_query(&index_scheduler, index_uid, query)
|
||||
}
|
||||
|
||||
fn documents_by_query(
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: web::Path<String>,
|
||||
query: BrowseQuery,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
debug!("called with params: {:?}", params);
|
||||
let BrowseQuery { limit, offset, fields } = params.into_inner();
|
||||
let attributes_to_retrieve = fields.merge_star_and_none();
|
||||
let BrowseQuery { offset, limit, fields, filter } = query;
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let (total, documents) = retrieve_documents(&index, offset.0, limit.0, attributes_to_retrieve)?;
|
||||
let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;
|
||||
|
||||
let ret = PaginationView::new(offset.0, limit.0, total as usize, documents);
|
||||
let ret = PaginationView::new(offset, limit, total as usize, documents);
|
||||
|
||||
debug!("returns: {:?}", ret);
|
||||
Ok(HttpResponse::Ok().json(ret))
|
||||
@ -373,7 +429,7 @@ async fn document_addition(
|
||||
Ok(task.into())
|
||||
}
|
||||
|
||||
pub async fn delete_documents(
|
||||
pub async fn delete_documents_batch(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: web::Json<Vec<Value>>,
|
||||
@ -399,6 +455,42 @@ pub async fn delete_documents(
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct DocumentDeletionByFilter {
|
||||
#[deserr(error = DeserrJsonError<InvalidDocumentDeleteFilter>)]
|
||||
filter: Value,
|
||||
}
|
||||
|
||||
pub async fn delete_documents_by_filter(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!("called with params: {:?}", body);
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner().filter;
|
||||
|
||||
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
|
||||
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
|| -> Result<_, ResponseError> {
|
||||
Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
|
||||
}()
|
||||
// and whatever was the error, the error code should always be an InvalidDocumentDeleteFilter
|
||||
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentDeleteFilter))?;
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub async fn clear_all_documents(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
@ -416,14 +508,15 @@ pub async fn clear_all_documents(
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
fn all_documents<'a>(
|
||||
index: &Index,
|
||||
rtxn: &'a RoTxn,
|
||||
fn some_documents<'a, 't: 'a>(
|
||||
index: &'a Index,
|
||||
rtxn: &'t RoTxn,
|
||||
doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
|
||||
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
|
||||
Ok(index.all_documents(rtxn)?.map(move |ret| {
|
||||
Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
|
||||
ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
|
||||
Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
|
||||
})
|
||||
@ -434,24 +527,45 @@ fn retrieve_documents<S: AsRef<str>>(
|
||||
index: &Index,
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
filter: Option<Value>,
|
||||
attributes_to_retrieve: Option<Vec<S>>,
|
||||
) -> Result<(u64, Vec<Document>), ResponseError> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let filter = &filter;
|
||||
let filter = if let Some(filter) = filter {
|
||||
parse_filter(filter)
|
||||
.map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut documents = Vec::new();
|
||||
for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
|
||||
let document = match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document?,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
),
|
||||
None => document?,
|
||||
};
|
||||
documents.push(document);
|
||||
}
|
||||
let candidates = if let Some(filter) = filter {
|
||||
filter.evaluate(&rtxn, index)?
|
||||
} else {
|
||||
index.documents_ids(&rtxn)?
|
||||
};
|
||||
|
||||
let number_of_documents = index.number_of_documents(&rtxn)?;
|
||||
Ok((number_of_documents, documents))
|
||||
let (it, number_of_documents) = {
|
||||
let number_of_documents = candidates.len();
|
||||
(
|
||||
some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
|
||||
number_of_documents,
|
||||
)
|
||||
};
|
||||
|
||||
let documents: Result<Vec<_>, ResponseError> = it
|
||||
.map(|document| {
|
||||
Ok(match &attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
|
||||
&document?,
|
||||
attributes_to_retrieve.iter().map(|s| s.as_ref()),
|
||||
),
|
||||
None => document?,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok((number_of_documents, documents?))
|
||||
}
|
||||
|
||||
fn retrieve_document<S: AsRef<str>>(
|
||||
|
@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
|
||||
|
||||
pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<AuthController>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
|
@ -238,7 +238,7 @@ pub struct Stats {
|
||||
|
||||
async fn get_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@ -253,7 +253,7 @@ async fn get_stats(
|
||||
|
||||
pub fn create_all_stats(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Data<AuthController>,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
) -> Result<Stats, ResponseError> {
|
||||
let mut last_task: Option<OffsetDateTime> = None;
|
||||
@ -318,9 +318,14 @@ struct KeysResponse {
|
||||
|
||||
pub async fn get_health(
|
||||
req: HttpRequest,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.health_seen(&req);
|
||||
|
||||
index_scheduler.health().unwrap();
|
||||
auth_controller.health().unwrap();
|
||||
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
|
||||
}
|
||||
|
@ -133,6 +133,14 @@ impl From<Details> for DetailsView {
|
||||
deleted_documents: Some(deleted_documents),
|
||||
..DetailsView::default()
|
||||
},
|
||||
Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
|
||||
DetailsView {
|
||||
provided_ids: Some(0),
|
||||
original_filter: Some(original_filter),
|
||||
deleted_documents: Some(deleted_documents),
|
||||
..DetailsView::default()
|
||||
}
|
||||
}
|
||||
Details::ClearAll { deleted_documents } => {
|
||||
DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
|
||||
}
|
||||
@ -721,7 +729,7 @@ mod tests {
|
||||
let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
|
||||
snapshot!(meili_snap::json_string!(err), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
|
@ -745,7 +745,7 @@ fn format_value<A: AsRef<[u8]>>(
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||
pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
|
||||
match facets {
|
||||
Value::String(expr) => {
|
||||
let condition = Filter::from_str(expr)?;
|
||||
|
@ -60,7 +60,7 @@ async fn create_api_key_bad_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-zA-Z], found `o` at 2",
|
||||
"message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-fA-F-], found `o` at 2",
|
||||
"code": "invalid_api_key_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_uid"
|
||||
|
@ -198,6 +198,11 @@ impl Index<'_> {
|
||||
self.service.get(url).await
|
||||
}
|
||||
|
||||
pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
|
||||
self.service.post(url, payload).await
|
||||
}
|
||||
|
||||
pub async fn get_all_documents_raw(&self, options: &str) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), options);
|
||||
self.service.get(url).await
|
||||
@ -225,6 +230,11 @@ impl Index<'_> {
|
||||
self.service.delete(url).await
|
||||
}
|
||||
|
||||
pub async fn delete_document_by_filter(&self, body: Value) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/documents/delete", urlencode(self.uid.as_ref()));
|
||||
self.service.post_encoded(url, body, self.encoder).await
|
||||
}
|
||||
|
||||
pub async fn clear_all_documents(&self) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
|
||||
self.service.delete(url).await
|
||||
|
@ -82,7 +82,7 @@ impl Server {
|
||||
> {
|
||||
actix_web::test::init_service(create_app(
|
||||
self.service.index_scheduler.clone().into(),
|
||||
self.service.auth.clone(),
|
||||
self.service.auth.clone().into(),
|
||||
self.service.options.clone(),
|
||||
analytics::MockAnalytics::new(&self.service.options),
|
||||
true,
|
||||
|
@ -13,7 +13,7 @@ use crate::common::encoder::Encoder;
|
||||
|
||||
pub struct Service {
|
||||
pub index_scheduler: Arc<IndexScheduler>,
|
||||
pub auth: AuthController,
|
||||
pub auth: Arc<AuthController>,
|
||||
pub options: Opt,
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
@ -107,7 +107,7 @@ impl Service {
|
||||
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
|
||||
let app = test::init_service(create_app(
|
||||
self.index_scheduler.clone().into(),
|
||||
self.auth.clone(),
|
||||
self.auth.clone().into(),
|
||||
self.options.clone(),
|
||||
analytics::MockAnalytics::new(&self.options),
|
||||
true,
|
||||
|
@ -1773,7 +1773,7 @@ async fn error_add_documents_payload_size() {
|
||||
"content": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec metus erat, consequat in blandit venenatis, ultricies eu ipsum. Etiam luctus elit et mollis ultrices. Nam turpis risus, dictum non eros in, eleifend feugiat elit. Morbi non dolor pulvinar, sagittis mi sed, ultricies lorem. Nulla ultricies sem metus. Donec at suscipit quam, sed elementum mi. Suspendisse potenti. Fusce pharetra turpis tortor, sed eleifend odio dapibus ut. Nulla facilisi. Suspendisse elementum, dui eget aliquet dignissim, ex tellus aliquam nisl, at eleifend nisl metus tempus diam. Mauris fermentum sollicitudin efficitur. Donec dignissim est vitae elit finibus faucibus"
|
||||
}
|
||||
);
|
||||
let documents: Vec<_> = (0..16000).into_iter().map(|_| document.clone()).collect();
|
||||
let documents: Vec<_> = (0..16000).map(|_| document.clone()).collect();
|
||||
let documents = json!(documents);
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
|
||||
@ -1934,7 +1934,6 @@ async fn batch_several_documents_addition() {
|
||||
let index = server.index("test");
|
||||
|
||||
let mut documents: Vec<_> = (0..150usize)
|
||||
.into_iter()
|
||||
.map(|id| {
|
||||
json!(
|
||||
{
|
||||
|
@ -1,3 +1,4 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::common::{GetAllDocumentsOptions, Server};
|
||||
@ -135,3 +136,254 @@ async fn delete_no_document_batch() {
|
||||
assert_eq!(code, 200);
|
||||
assert_eq!(response["results"].as_array().unwrap().len(), 3);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete_document_by_filter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
index.update_settings_filterable_attributes(json!(["color"])).await;
|
||||
index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
{ "id": 1, "color": "blue" },
|
||||
{ "id": 2, "color": "blue" },
|
||||
{ "id": 3 },
|
||||
]),
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
let (response, code) =
|
||||
index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "enqueued",
|
||||
"type": "documentDeletion",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let response = index.wait_task(2).await;
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentDeletion",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 2,
|
||||
"originalFilter": "\"color = blue\""
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
},
|
||||
{
|
||||
"id": 3
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) =
|
||||
index.delete_document_by_filter(json!({ "filter": "color NOT EXISTS"})).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"taskUid": 3,
|
||||
"indexUid": "doggo",
|
||||
"status": "enqueued",
|
||||
"type": "documentDeletion",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let response = index.wait_task(3).await;
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"uid": 3,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentDeletion",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 1,
|
||||
"originalFilter": "\"color NOT EXISTS\""
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete_document_by_complex_filter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
index.update_settings_filterable_attributes(json!(["color"])).await;
|
||||
index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
{ "id": 1, "color": "blue" },
|
||||
{ "id": 2, "color": "blue" },
|
||||
{ "id": 3, "color": "green" },
|
||||
{ "id": 4 },
|
||||
]),
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
let (response, code) = index
|
||||
.delete_document_by_filter(
|
||||
json!({ "filter": ["color != red", "color != green", "color EXISTS"] }),
|
||||
)
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "enqueued",
|
||||
"type": "documentDeletion",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let response = index.wait_task(2).await;
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentDeletion",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 2,
|
||||
"originalFilter": "[\"color != red\",\"color != green\",\"color EXISTS\"]"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"color": "green"
|
||||
},
|
||||
{
|
||||
"id": 4
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 3
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.delete_document_by_filter(json!({ "filter": [["color = green", "color NOT EXISTS"]] }))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"taskUid": 3,
|
||||
"indexUid": "doggo",
|
||||
"status": "enqueued",
|
||||
"type": "documentDeletion",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let response = index.wait_task(3).await;
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"uid": 3,
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentDeletion",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 4,
|
||||
"originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -82,6 +82,111 @@ async fn get_all_documents_bad_limit() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_all_documents_bad_filter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
// Since the filter can't be parsed automatically by deserr, we have the wrong error message
|
||||
// if the index does not exist: we could expect to get an error message about the invalid filter before
|
||||
// the existence of the index is checked, but it is not the case.
|
||||
let (response, code) = index.get_all_documents_raw("?filter").await;
|
||||
snapshot!(code, @"404 Not Found");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
|
||||
snapshot!(code, @"404 Not Found");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
|
||||
snapshot!(code, @"404 Not Found");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.create(None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "test",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
let response = server.wait_task(0).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "test",
|
||||
"status": "succeeded",
|
||||
"type": "indexCreation",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"primaryKey": null
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_all_documents_raw("?filter").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"results": [],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 0
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo=bernese",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete_documents_batch() {
|
||||
let server = Server::new().await;
|
||||
@ -418,3 +523,229 @@ async fn update_documents_csv_delimiter_with_bad_content_type() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete_document_by_filter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
|
||||
// send a bad payload type
|
||||
let (response, code) = index.delete_document_by_filter(json!("hello")).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type: expected an object, but found a string: `\"hello\"`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
// send bad payload type
|
||||
let (response, code) = index.delete_document_by_filter(json!({ "filter": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
|
||||
"code": "invalid_document_delete_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter"
|
||||
}
|
||||
"###);
|
||||
|
||||
// send bad filter
|
||||
let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
|
||||
"code": "invalid_document_delete_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter"
|
||||
}
|
||||
"###);
|
||||
|
||||
// send empty filter
|
||||
let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Sending an empty filter is forbidden.",
|
||||
"code": "invalid_document_delete_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter"
|
||||
}
|
||||
"###);
|
||||
|
||||
// index does not exists
|
||||
let (response, code) =
|
||||
index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentDeletion",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 0,
|
||||
"originalFilter": "\"doggo = bernese\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Index `doggo` not found.",
|
||||
"code": "index_not_found",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_not_found"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.create(None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||
|
||||
// no filterable are set
|
||||
let (response, code) =
|
||||
index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
|
||||
{
|
||||
"uid": 2,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentDeletion",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 0,
|
||||
"originalFilter": "\"doggo = bernese\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.update_settings_filterable_attributes(json!(["doggo"])).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
server.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||
|
||||
// not filterable while there is a filterable attribute
|
||||
let (response, code) =
|
||||
index.delete_document_by_filter(json!({ "filter": "catto = jorts"})).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
|
||||
{
|
||||
"uid": 4,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "documentDeletion",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"providedIds": 0,
|
||||
"deletedDocuments": 0,
|
||||
"originalFilter": "\"catto = jorts\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn fetch_document_by_filter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
index.update_settings_filterable_attributes(json!(["color"])).await;
|
||||
index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
{ "id": 1, "color": "blue" },
|
||||
{ "id": 2, "color": "blue" },
|
||||
{ "id": 3 },
|
||||
]),
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!(null)).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type: expected an object, but found null",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_document_offset",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_offset"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_document_limit",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_limit"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
|
||||
"code": "invalid_document_fields",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_fields"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use actix_web::test;
|
||||
use http::header::ACCEPT_ENCODING;
|
||||
use meili_snap::*;
|
||||
use serde_json::{json, Value};
|
||||
use urlencoding::encode as urlencode;
|
||||
|
||||
@ -378,3 +379,164 @@ async fn get_documents_displayed_attributes_is_ignored() {
|
||||
assert_eq!(response.as_object().unwrap().keys().count(), 16);
|
||||
assert!(response.as_object().unwrap().get("gender").is_some());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn get_document_by_filter() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("doggo");
|
||||
index.update_settings_filterable_attributes(json!(["color"])).await;
|
||||
index
|
||||
.add_documents(
|
||||
json!([
|
||||
{ "id": 0, "color": "red" },
|
||||
{ "id": 1, "color": "blue" },
|
||||
{ "id": 2, "color": "blue" },
|
||||
{ "id": 3 },
|
||||
]),
|
||||
Some("id"),
|
||||
)
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({})).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"color": "blue"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"color": "blue"
|
||||
},
|
||||
{
|
||||
"id": 3
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 4
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await;
|
||||
let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 1,
|
||||
"color": "blue"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"color": "blue"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index
|
||||
.get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" }))
|
||||
.await;
|
||||
let (response2, code2) =
|
||||
index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 3
|
||||
}
|
||||
],
|
||||
"offset": 1,
|
||||
"limit": 1,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
let (response, code) = index
|
||||
.get_document_by_filter(
|
||||
json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
|
||||
)
|
||||
.await;
|
||||
let (response2, code2) =
|
||||
index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"color": "red"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 1,
|
||||
"total": 2
|
||||
}
|
||||
"###);
|
||||
assert_eq!(code, code2);
|
||||
assert_eq!(response, response2);
|
||||
|
||||
// Now testing more complex filter that the get route can't represent
|
||||
|
||||
let (response, code) =
|
||||
index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"color": "blue"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"color": "blue"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 3
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) = index
|
||||
.get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] }))
|
||||
.await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"id": 0,
|
||||
"color": "red"
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 1
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
|
@ -97,7 +97,7 @@ async fn task_bad_types() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
@ -108,7 +108,7 @@ async fn task_bad_types() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
@ -119,7 +119,7 @@ async fn task_bad_types() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
|
||||
"code": "invalid_task_types",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_task_types"
|
||||
|
@ -12,40 +12,40 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
bimap = { version = "0.6.2", features = ["serde"] }
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.0.1"
|
||||
bstr = "1.4.0"
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.7.1", default-features = false }
|
||||
charabia = { version = "0.7.2", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.6"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
either = "1.8.0"
|
||||
either = "1.8.1"
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.3", default-features = false, features = ["tempfile"] }
|
||||
grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] }
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.5.7"
|
||||
memmap2 = "0.5.10"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.15.0"
|
||||
ordered-float = "3.2.0"
|
||||
rayon = "1.5.3"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.6.0"
|
||||
rayon = "1.7.0"
|
||||
roaring = "0.10.1"
|
||||
rstar = { version = "0.9.3", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
rstar = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.0"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallvec = "1.10.0"
|
||||
smartstring = "1.0.1"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["v4"] }
|
||||
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
@ -55,11 +55,12 @@ itertools = "0.10.5"
|
||||
# logging
|
||||
log = "0.4.17"
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.1.6"
|
||||
csv = "1.2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
big_s = "1.0.2"
|
||||
insta = "1.21.0"
|
||||
insta = "1.29.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
rand = {version = "0.8.5", features = ["small_rng"] }
|
||||
@ -68,7 +69,7 @@ rand = {version = "0.8.5", features = ["small_rng"] }
|
||||
fuzzcheck = "0.12.1"
|
||||
|
||||
[features]
|
||||
default = [ "charabia/default" ]
|
||||
all-tokenizations = [ "charabia/default" ]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
# For more information on this feature, see heed's Cargo.toml
|
||||
@ -89,3 +90,6 @@ korean = ["charabia/korean"]
|
||||
|
||||
# allow thai specialized tokenization
|
||||
thai = ["charabia/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["charabia/greek"]
|
||||
|
114
milli/examples/index.rs
Normal file
114
milli/examples/index.rs
Normal file
@ -0,0 +1,114 @@
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Cursor, Seek};
|
||||
use std::path::Path;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{Index, Object};
|
||||
|
||||
fn usage(error: &str, program_name: &str) -> String {
|
||||
format!(
|
||||
"{}. Usage: {} <PATH-TO-INDEX> <PATH-TO-DATASET> [searchable_fields] [filterable_fields]",
|
||||
error, program_name
|
||||
)
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut args = std::env::args();
|
||||
let program_name = args.next().expect("No program name");
|
||||
let index_path =
|
||||
args.next().unwrap_or_else(|| panic!("{}", usage("Missing path to index.", &program_name)));
|
||||
let dataset_path = args
|
||||
.next()
|
||||
.unwrap_or_else(|| panic!("{}", usage("Missing path to source dataset.", &program_name)));
|
||||
// let primary_key = args.next().unwrap_or_else(|| "id".into());
|
||||
// "title overview"
|
||||
let searchable_fields: Vec<String> = args
|
||||
.next()
|
||||
.map(|arg| arg.split_whitespace().map(ToString::to_string).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
println!("{searchable_fields:?}");
|
||||
// "release_date genres"
|
||||
let filterable_fields: Vec<String> = args
|
||||
.next()
|
||||
.map(|arg| arg.split_whitespace().map(ToString::to_string).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
std::fs::create_dir_all(&index_path).unwrap();
|
||||
let index = Index::new(options, index_path).unwrap();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
// builder.set_primary_key(primary_key);
|
||||
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
|
||||
let documents = documents_from(
|
||||
&dataset_path,
|
||||
Path::new(&dataset_path).extension().unwrap_or_default().to_str().unwrap_or_default(),
|
||||
);
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
Ok(())
|
||||
}
|
||||
fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
|
||||
let reader = File::open(filename)
|
||||
.unwrap_or_else(|_| panic!("could not find the dataset in: {}", filename));
|
||||
let reader = BufReader::new(reader);
|
||||
let documents = match filetype {
|
||||
"csv" => documents_from_csv(reader).unwrap(),
|
||||
"json" => documents_from_json(reader).unwrap(),
|
||||
"jsonl" => documents_from_jsonl(reader).unwrap(),
|
||||
otherwise => panic!("invalid update format {:?}", otherwise),
|
||||
};
|
||||
DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||
}
|
||||
|
||||
fn documents_from_jsonl(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
|
||||
let object = result.unwrap();
|
||||
documents.append_json_object(&object)?;
|
||||
}
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_json(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
documents.append_json_array(reader)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_csv(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let csv = csv::Reader::from_reader(reader);
|
||||
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
documents.append_csv(csv)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
117
milli/examples/search.rs
Normal file
117
milli/examples/search.rs
Normal file
@ -0,0 +1,117 @@
|
||||
use std::error::Error;
|
||||
use std::io::stdin;
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, SearchLogger,
|
||||
TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut args = std::env::args();
|
||||
let program_name = args.next().expect("No program name");
|
||||
let dataset = args.next().unwrap_or_else(|| {
|
||||
panic!(
|
||||
"Missing path to index. Usage: {} <PATH-TO-INDEX> [<logger-dir>] [print-documents]",
|
||||
program_name
|
||||
)
|
||||
});
|
||||
let detailed_logger_dir = args.next();
|
||||
let print_documents: bool =
|
||||
if let Some(arg) = args.next() { arg == "print-documents" } else { false };
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
let index = Index::new(options, dataset)?;
|
||||
let txn = index.read_txn()?;
|
||||
let mut query = String::new();
|
||||
while stdin().read_line(&mut query)? > 0 {
|
||||
for _ in 0..2 {
|
||||
let mut default_logger = DefaultSearchLogger;
|
||||
// FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible.
|
||||
// Workaround'd here by recreating the logger on each iteration of the loop
|
||||
let mut detailed_logger = detailed_logger_dir
|
||||
.as_ref()
|
||||
.map(|logger_dir| (milli::VisualSearchLogger::default(), logger_dir));
|
||||
let logger: &mut dyn SearchLogger<_> =
|
||||
if let Some((detailed_logger, _)) = detailed_logger.as_mut() {
|
||||
detailed_logger
|
||||
} else {
|
||||
&mut default_logger
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let docs = execute_search(
|
||||
&mut ctx,
|
||||
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
|
||||
TermsMatchingStrategy::Last,
|
||||
false,
|
||||
&None,
|
||||
&None,
|
||||
GeoSortStrategy::default(),
|
||||
0,
|
||||
20,
|
||||
None,
|
||||
&mut DefaultSearchLogger,
|
||||
logger,
|
||||
)?;
|
||||
if let Some((logger, dir)) = detailed_logger {
|
||||
logger.finish(&mut ctx, Path::new(dir))?;
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
if print_documents {
|
||||
let documents = index
|
||||
.documents(&txn, docs.documents_ids.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(id, obkv)| {
|
||||
let mut object = serde_json::Map::default();
|
||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||
let value = obkv.get(fid).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||
object.insert(fid_name.to_owned(), value);
|
||||
}
|
||||
(id, serde_json::to_string_pretty(&object).unwrap())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (id, document) in documents {
|
||||
println!("{id}:");
|
||||
println!("{document}");
|
||||
}
|
||||
|
||||
let documents = index
|
||||
.documents(&txn, docs.documents_ids.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(id, obkv)| {
|
||||
let mut object = serde_json::Map::default();
|
||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||
let value = obkv.get(fid).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||
object.insert(fid_name.to_owned(), value);
|
||||
}
|
||||
(id, serde_json::to_string_pretty(&object).unwrap())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
for (id, document) in documents {
|
||||
println!("{id}:");
|
||||
println!("{document}");
|
||||
}
|
||||
}
|
||||
}
|
||||
query.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
33
milli/examples/settings.rs
Normal file
33
milli/examples/settings.rs
Normal file
@ -0,0 +1,33 @@
|
||||
// use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
// use maplit::hashset;
|
||||
use milli::{
|
||||
update::{IndexerConfig, Settings},
|
||||
Criterion, Index,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
let index = Index::new(options, "data_movies.ms").unwrap();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
// builder.set_min_word_len_one_typo(5);
|
||||
// builder.set_min_word_len_two_typos(7);
|
||||
// builder.set_sortable_fields(hashset! { S("release_date") });
|
||||
builder.set_criteria(vec![
|
||||
Criterion::Words,
|
||||
Criterion::Typo,
|
||||
Criterion::Proximity,
|
||||
Criterion::Attribute,
|
||||
Criterion::Sort,
|
||||
Criterion::Exactness,
|
||||
]);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
}
|
@ -112,6 +112,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
InvalidGeoField(#[from] GeoError),
|
||||
#[error("{0}")]
|
||||
InvalidFilter(String),
|
||||
#[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))]
|
||||
InvalidFilterExpression(&'static [&'static str], Value),
|
||||
#[error("Attribute `{}` is not sortable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
|
@ -21,5 +21,5 @@ pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
};
|
||||
pub use self::script_language_codec::ScriptLanguageCodec;
|
||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
|
||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
|
@ -36,3 +36,39 @@ impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StrBEU16Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
|
||||
type DItem = (&'a str, u16);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let footer_len = size_of::<u16>();
|
||||
|
||||
if bytes.len() < footer_len + 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let (_, word) = word_plus_nul_byte.split_last()?;
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
|
||||
Some((word, pos))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
type EItem = (&'a str, u16);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
@ -19,12 +19,12 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ScriptLanguageCodec, StrRefCodec};
|
||||
use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
|
||||
Search, StrBEU32Codec, U8StrStrCodec, BEU16, BEU32,
|
||||
Search, U8StrStrCodec, BEU16, BEU32,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@ -76,10 +76,14 @@ pub mod db_name {
|
||||
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
|
||||
pub const PREFIX_WORD_PAIR_PROXIMITY_DOCIDS: &str = "prefix-word-pair-proximity-docids";
|
||||
pub const WORD_POSITION_DOCIDS: &str = "word-position-docids";
|
||||
pub const WORD_FIELD_ID_DOCIDS: &str = "word-field-id-docids";
|
||||
pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids";
|
||||
pub const WORD_PREFIX_FIELD_ID_DOCIDS: &str = "word-prefix-field-id-docids";
|
||||
pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
|
||||
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
|
||||
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
|
||||
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
|
||||
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
|
||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
@ -118,17 +122,26 @@ pub struct Index {
|
||||
pub prefix_word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the word and the position with the docids that corresponds to it.
|
||||
pub word_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||
pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word and the field id with the docids that corresponds to it.
|
||||
pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the field id and the word count with the docids that corresponds to it.
|
||||
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the position of a word prefix with all the docids where this prefix appears.
|
||||
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a position with all the docids where the prefix appears at the position.
|
||||
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
|
||||
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the script and language with all the docids that corresponds to it.
|
||||
pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and the docids for which this field exists
|
||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is set as null
|
||||
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is considered empty
|
||||
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
@ -153,7 +166,7 @@ impl Index {
|
||||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(19);
|
||||
options.max_dbs(23);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
@ -170,11 +183,15 @@ impl Index {
|
||||
let prefix_word_pair_proximity_docids =
|
||||
env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
|
||||
let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?;
|
||||
let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?;
|
||||
let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
|
||||
let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
|
||||
let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
|
||||
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
|
||||
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
|
||||
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||
let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
|
||||
let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
|
||||
|
||||
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||
let field_id_docid_facet_strings =
|
||||
@ -196,11 +213,15 @@ impl Index {
|
||||
word_prefix_pair_proximity_docids,
|
||||
prefix_word_pair_proximity_docids,
|
||||
word_position_docids,
|
||||
word_fid_docids,
|
||||
word_prefix_position_docids,
|
||||
word_prefix_fid_docids,
|
||||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
documents,
|
||||
@ -833,6 +854,30 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id set as null
|
||||
pub fn null_faceted_documents_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id and that is considered empty
|
||||
pub fn empty_faceted_documents_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id
|
||||
pub fn exists_faceted_documents_ids(
|
||||
&self,
|
||||
@ -987,16 +1032,15 @@ impl Index {
|
||||
|
||||
/* documents */
|
||||
|
||||
/// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
|
||||
pub fn documents<'t>(
|
||||
&self,
|
||||
/// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
|
||||
pub fn iter_documents<'a, 't: 'a>(
|
||||
&'a self,
|
||||
rtxn: &'t RoTxn,
|
||||
ids: impl IntoIterator<Item = DocumentId>,
|
||||
) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
|
||||
ids: impl IntoIterator<Item = DocumentId> + 'a,
|
||||
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
|
||||
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
|
||||
let mut documents = Vec::new();
|
||||
|
||||
for id in ids {
|
||||
Ok(ids.into_iter().map(move |id| {
|
||||
if soft_deleted_documents.contains(id) {
|
||||
return Err(UserError::AccessingSoftDeletedDocument { document_id: id })?;
|
||||
}
|
||||
@ -1004,27 +1048,25 @@ impl Index {
|
||||
.documents
|
||||
.get(rtxn, &BEU32::new(id))?
|
||||
.ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
|
||||
documents.push((id, kv));
|
||||
}
|
||||
Ok((id, kv))
|
||||
}))
|
||||
}
|
||||
|
||||
Ok(documents)
|
||||
/// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
|
||||
pub fn documents<'t>(
|
||||
&self,
|
||||
rtxn: &'t RoTxn,
|
||||
ids: impl IntoIterator<Item = DocumentId>,
|
||||
) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
|
||||
self.iter_documents(rtxn, ids)?.collect()
|
||||
}
|
||||
|
||||
/// Returns an iterator over all the documents in the index.
|
||||
pub fn all_documents<'t>(
|
||||
&self,
|
||||
pub fn all_documents<'a, 't: 'a>(
|
||||
&'a self,
|
||||
rtxn: &'t RoTxn,
|
||||
) -> Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReaderU16<'t>)>>> {
|
||||
let soft_deleted_docids = self.soft_deleted_documents_ids(rtxn)?;
|
||||
|
||||
Ok(self
|
||||
.documents
|
||||
.iter(rtxn)?
|
||||
// we cast the BEU32 to a DocumentId
|
||||
.map(|document| document.map(|(id, obkv)| (id.get(), obkv)))
|
||||
.filter(move |document| {
|
||||
document.as_ref().map_or(true, |(id, _)| !soft_deleted_docids.contains(*id))
|
||||
}))
|
||||
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
|
||||
self.iter_documents(rtxn, self.documents_ids(rtxn)?)
|
||||
}
|
||||
|
||||
pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
|
||||
@ -1284,10 +1326,10 @@ pub(crate) mod tests {
|
||||
let index_documents_config = IndexDocumentsConfig::default();
|
||||
Self { inner, indexer_config, index_documents_config, _tempdir }
|
||||
}
|
||||
/// Creates a temporary index, with a default `4096 * 1000` size. This should be enough for
|
||||
/// Creates a temporary index, with a default `4096 * 2000` size. This should be enough for
|
||||
/// most tests.
|
||||
pub fn new() -> Self {
|
||||
Self::new_with_map_size(4096 * 1000)
|
||||
Self::new_with_map_size(4096 * 2000)
|
||||
}
|
||||
pub fn add_documents_using_wtxn<'t, R>(
|
||||
&'t self,
|
||||
@ -1416,11 +1458,11 @@ pub(crate) mod tests {
|
||||
db_snap!(index, field_distribution);
|
||||
|
||||
db_snap!(index, field_distribution,
|
||||
@"
|
||||
age 1
|
||||
id 2
|
||||
name 2
|
||||
"
|
||||
@r###"
|
||||
age 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
);
|
||||
|
||||
// snapshot_index!(&index, "1", include: "^field_distribution$");
|
||||
@ -1437,10 +1479,10 @@ pub(crate) mod tests {
|
||||
|
||||
db_snap!(index, field_distribution,
|
||||
@r###"
|
||||
age 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
age 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
);
|
||||
|
||||
// then we update a document by removing one field and another by adding one field
|
||||
@ -1453,10 +1495,10 @@ pub(crate) mod tests {
|
||||
|
||||
db_snap!(index, field_distribution,
|
||||
@r###"
|
||||
has_dog 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
has_dog 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,56 @@
|
||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(test)]
|
||||
#[global_allocator]
|
||||
pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// #[cfg(test)]
|
||||
// pub mod allocator {
|
||||
// use std::alloc::{GlobalAlloc, System};
|
||||
// use std::sync::atomic::{self, AtomicI64};
|
||||
|
||||
// #[global_allocator]
|
||||
// pub static ALLOC: CountingAlloc = CountingAlloc {
|
||||
// max_resident: AtomicI64::new(0),
|
||||
// resident: AtomicI64::new(0),
|
||||
// allocated: AtomicI64::new(0),
|
||||
// };
|
||||
|
||||
// pub struct CountingAlloc {
|
||||
// pub max_resident: AtomicI64,
|
||||
// pub resident: AtomicI64,
|
||||
// pub allocated: AtomicI64,
|
||||
// }
|
||||
// unsafe impl GlobalAlloc for CountingAlloc {
|
||||
// unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
|
||||
// self.allocated.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
|
||||
// let old_resident =
|
||||
// self.resident.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
|
||||
|
||||
// let resident = old_resident + layout.size() as i64;
|
||||
// self.max_resident.fetch_max(resident, atomic::Ordering::SeqCst);
|
||||
|
||||
// // if layout.size() > 1_000_000 {
|
||||
// // eprintln!(
|
||||
// // "allocating {} with new resident size: {resident}",
|
||||
// // layout.size() / 1_000_000
|
||||
// // );
|
||||
// // // let trace = std::backtrace::Backtrace::capture();
|
||||
// // // let t = trace.to_string();
|
||||
// // // eprintln!("{t}");
|
||||
// // }
|
||||
|
||||
// System.alloc(layout)
|
||||
// }
|
||||
|
||||
// unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
|
||||
// self.resident.fetch_sub(layout.size() as i64, atomic::Ordering::Relaxed);
|
||||
// System.dealloc(ptr, layout)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
#[macro_use]
|
||||
pub mod documents;
|
||||
|
||||
@ -26,6 +78,10 @@ use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}
|
||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||
use fxhash::{FxHasher32, FxHasher64};
|
||||
pub use grenad::CompressionType;
|
||||
pub use search::new::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, SearchContext, SearchLogger,
|
||||
VisualSearchLogger,
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use {charabia as tokenizer, heed};
|
||||
|
||||
@ -43,9 +99,8 @@ pub use self::heed_codec::{
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
CriterionImplementationStrategy, FacetDistribution, Filter, FormatOptions, MatchBounds,
|
||||
MatcherBuilder, MatchingWord, MatchingWords, Search, SearchResult, TermsMatchingStrategy,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
|
||||
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
@ -100,6 +155,23 @@ pub fn relative_from_absolute_position(absolute: Position) -> (FieldId, Relative
|
||||
pub fn absolute_from_relative_position(field_id: FieldId, relative: RelativePosition) -> Position {
|
||||
(field_id as u32) << 16 | (relative as u32)
|
||||
}
|
||||
// TODO: this is wrong, but will do for now
|
||||
/// Compute the "bucketed" absolute position from the field id and relative position in the field.
|
||||
///
|
||||
/// In a bucketed position, the accuracy of the relative position is reduced exponentially as it gets larger.
|
||||
pub fn bucketed_position(relative: u16) -> u16 {
|
||||
// The first few relative positions are kept intact.
|
||||
if relative < 16 {
|
||||
relative
|
||||
} else if relative < 24 {
|
||||
// Relative positions between 16 and 24 all become equal to 24
|
||||
24
|
||||
} else {
|
||||
// Then, groups of positions that have the same base-2 logarithm are reduced to
|
||||
// the same relative position: the smallest power of 2 that is greater than them
|
||||
(relative as f64).log2().ceil().exp2() as u16
|
||||
}
|
||||
}
|
||||
|
||||
/// Transform a raw obkv store into a JSON Object.
|
||||
pub fn obkv_to_json(
|
||||
|
@ -2,7 +2,7 @@ use std::cmp;
|
||||
|
||||
use crate::{relative_from_absolute_position, Position};
|
||||
|
||||
pub const MAX_DISTANCE: u32 = 8;
|
||||
pub const MAX_DISTANCE: u32 = 4;
|
||||
|
||||
pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
||||
if lhs <= rhs {
|
||||
|
@ -1,569 +0,0 @@
|
||||
use std::mem::take;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use itertools::Itertools;
|
||||
use log::debug;
|
||||
use ordered_float::OrderedFloat;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::CriterionImplementationStrategy;
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 1000;
|
||||
|
||||
pub struct AscDesc<'t> {
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
field_name: String,
|
||||
field_id: Option<FieldId>,
|
||||
is_ascending: bool,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
faceted_candidates: RoaringBitmap,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
}
|
||||
|
||||
impl<'t> AscDesc<'t> {
|
||||
pub fn asc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, field_name, true, implementation_strategy)
|
||||
}
|
||||
|
||||
pub fn desc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, field_name, false, implementation_strategy)
|
||||
}
|
||||
|
||||
fn new(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
is_ascending: bool,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let field_id = fields_ids_map.id(&field_name);
|
||||
let faceted_candidates = match field_id {
|
||||
Some(field_id) => {
|
||||
let number_faceted =
|
||||
index.faceted_documents_ids(rtxn, field_id, FacetType::Number)?;
|
||||
let string_faceted =
|
||||
index.faceted_documents_ids(rtxn, field_id, FacetType::String)?;
|
||||
number_faceted | string_faceted
|
||||
}
|
||||
None => RoaringBitmap::default(),
|
||||
};
|
||||
|
||||
Ok(AscDesc {
|
||||
index,
|
||||
rtxn,
|
||||
field_name,
|
||||
field_id,
|
||||
is_ascending,
|
||||
query_tree: None,
|
||||
candidates: Box::new(std::iter::empty()),
|
||||
allowed_candidates: RoaringBitmap::new(),
|
||||
faceted_candidates,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
implementation_strategy,
|
||||
parent,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for AscDesc<'t> {
|
||||
#[logging_timer::time("AscDesc::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
self.allowed_candidates -= params.excluded_candidates;
|
||||
|
||||
loop {
|
||||
debug!(
|
||||
"Facet {}({}) iteration",
|
||||
if self.is_ascending { "Asc" } else { "Desc" },
|
||||
self.field_name
|
||||
);
|
||||
|
||||
match self.candidates.next().transpose()? {
|
||||
None if !self.allowed_candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(take(&mut self.allowed_candidates)),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
self.query_tree = query_tree;
|
||||
let mut candidates = match (&self.query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||
resolve_query_tree(&context, qt, params.wdcache)?
|
||||
}
|
||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||
self.candidates = match self.field_id {
|
||||
Some(field_id) => facet_ordered(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
field_id,
|
||||
self.is_ascending,
|
||||
candidates & &self.faceted_candidates,
|
||||
self.implementation_strategy,
|
||||
)?,
|
||||
None => Box::new(std::iter::empty()),
|
||||
};
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
Some(mut candidates) => {
|
||||
candidates -= params.excluded_candidates;
|
||||
self.allowed_candidates -= &candidates;
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn facet_ordered_iterative<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
let number_iter = iterative_facet_number_ordered_iter(
|
||||
index,
|
||||
rtxn,
|
||||
field_id,
|
||||
is_ascending,
|
||||
candidates.clone(),
|
||||
)?;
|
||||
let string_iter =
|
||||
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
|
||||
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||
}
|
||||
|
||||
fn facet_extreme_value<'t>(
|
||||
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
|
||||
) -> Result<Option<f64>> {
|
||||
let extreme_value =
|
||||
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
|
||||
let (_, extreme_value) = extreme_value?;
|
||||
|
||||
Ok(OrderedF64Codec::bytes_decode(extreme_value))
|
||||
}
|
||||
|
||||
pub fn facet_min_value<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||
facet_extreme_value(it)
|
||||
}
|
||||
|
||||
pub fn facet_max_value<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||
facet_extreme_value(it)
|
||||
}
|
||||
|
||||
fn facet_ordered_set_based<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
let number_db =
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let string_db =
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
|
||||
let (number_iter, string_iter) = if is_ascending {
|
||||
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
|
||||
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
|
||||
|
||||
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
|
||||
} else {
|
||||
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
|
||||
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
|
||||
|
||||
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
||||
};
|
||||
|
||||
Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids))))
|
||||
}
|
||||
|
||||
/// Returns an iterator over groups of the given candidates in ascending or descending order.
|
||||
///
|
||||
/// It will either use an iterative or a recursive method on the whole facet database depending
|
||||
/// on the number of candidates to rank.
|
||||
fn facet_ordered<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
match implementation_strategy {
|
||||
CriterionImplementationStrategy::OnlyIterative => {
|
||||
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
CriterionImplementationStrategy::OnlySetBased => {
|
||||
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
CriterionImplementationStrategy::Dynamic => {
|
||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
|
||||
} else {
|
||||
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch the whole list of candidates facet number values one by one and order them by it.
|
||||
///
|
||||
/// This function is fast when the amount of candidates to rank is small.
|
||||
fn iterative_facet_number_ordered_iter<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||
for docid in candidates.iter() {
|
||||
let left = (field_id, docid, f64::MIN);
|
||||
let right = (field_id, docid, f64::MAX);
|
||||
let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?;
|
||||
let entry = if is_ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||
docids_values.push((docid, OrderedFloat(value)));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, v)| *v);
|
||||
let iter = docids_values.into_iter();
|
||||
let iter = if is_ascending {
|
||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||
} else {
|
||||
Box::new(iter.rev())
|
||||
};
|
||||
|
||||
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||
// required to collect the result into an owned collection (a Vec).
|
||||
// https://github.com/rust-itertools/itertools/issues/499
|
||||
#[allow(clippy::needless_collect)]
|
||||
let vec: Vec<_> = iter
|
||||
.group_by(|(_, v)| *v)
|
||||
.into_iter()
|
||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||
.collect();
|
||||
|
||||
Ok(vec.into_iter())
|
||||
}
|
||||
|
||||
/// Fetch the whole list of candidates facet string values one by one and order them by it.
|
||||
///
|
||||
/// This function is fast when the amount of candidates to rank is small.
|
||||
fn iterative_facet_string_ordered_iter<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||
for docid in candidates.iter() {
|
||||
let left = (field_id, docid, "");
|
||||
let right = (field_id, docid.saturating_add(1), "");
|
||||
// FIXME Doing this means that it will never be possible to retrieve
|
||||
// the document with id 2^32, not sure this is a real problem.
|
||||
let mut iter = index.field_id_docid_facet_strings.range(rtxn, &(left..right))?;
|
||||
let entry = if is_ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), _)) = entry.transpose()? {
|
||||
docids_values.push((docid, value));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, v)| *v);
|
||||
let iter = docids_values.into_iter();
|
||||
let iter = if is_ascending {
|
||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||
} else {
|
||||
Box::new(iter.rev())
|
||||
};
|
||||
|
||||
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||
// required to collect the result into an owned collection (a Vec).
|
||||
// https://github.com/rust-itertools/itertools/issues/499
|
||||
#[allow(clippy::needless_collect)]
|
||||
let vec: Vec<_> = iter
|
||||
.group_by(|(_, v)| *v)
|
||||
.into_iter()
|
||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||
.collect();
|
||||
|
||||
Ok(vec.into_iter())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{AscDesc, Criterion, Filter, Search, SearchResult};
|
||||
|
||||
// Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD
|
||||
// constant to 0 to ensure that the other sort algorithms are also correct.
|
||||
#[test]
|
||||
fn sort_criterion_placeholder() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings
|
||||
.set_sortable_fields(maplit::hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut docs = vec![];
|
||||
for i in 0..100 {
|
||||
docs.push(
|
||||
serde_json::json!({ "id": i, "mod_10": format!("{}", i % 10), "mod_20": i % 20 }),
|
||||
);
|
||||
}
|
||||
|
||||
index.add_documents(documents!(docs)).unwrap();
|
||||
|
||||
let all_ids = (0..100).collect::<Vec<_>>();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![AscDesc::from_str("mod_10:desc").unwrap()]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 8, 18, 28, 38, 48, 58, 68, 78, 88, 98, 7, 17, 27, 37, 47, 57, 67, 77, 87, 97, 6, 16, 26, 36, 46, 56, 66, 76, 86, 96, 5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 4, 14, 24, 34, 44, 54, 64, 74, 84, 94, 3, 13, 23, 33, 43, 53, 63, 73, 83, 93, 2, 12, 22, 32, 42, 52, 62, 72, 82, 92, 1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[99, 89, 79, 69, 59, 49, 39, 29, 19, 9, 98, 88, 78, 68, 58, 48, 38, 28, 18, 8, 97, 87, 77, 67, 57, 47, 37, 27, 17, 7, 96, 86, 76, 66, 56, 46, 36, 26, 16, 6, 95, 85, 75, 65, 55, 45, 35, 25, 15, 5, 94, 84, 74, 64, 54, 44, 34, 24, 14, 4, 93, 83, 73, 63, 53, 43, 33, 23, 13, 3, 92, 82, 72, 62, 52, 42, 32, 22, 12, 2, 91, 81, 71, 61, 51, 41, 31, 21, 11, 1, 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 29, 49, 69, 89, 19, 39, 59, 79, 99, 8, 28, 48, 68, 88, 18, 38, 58, 78, 98, 7, 27, 47, 67, 87, 17, 37, 57, 77, 97, 6, 26, 46, 66, 86, 16, 36, 56, 76, 96, 5, 25, 45, 65, 85, 15, 35, 55, 75, 95, 4, 24, 44, 64, 84, 14, 34, 54, 74, 94, 3, 23, 43, 63, 83, 13, 33, 53, 73, 93, 2, 22, 42, 62, 82, 12, 32, 52, 72, 92, 1, 21, 41, 61, 81, 11, 31, 51, 71, 91, 0, 20, 40, 60, 80, 10, 30, 50, 70, 90]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 39, 59, 79, 99, 9, 29, 49, 69, 89, 18, 38, 58, 78, 98, 8, 28, 48, 68, 88, 17, 37, 57, 77, 97, 7, 27, 47, 67, 87, 16, 36, 56, 76, 96, 6, 26, 46, 66, 86, 15, 35, 55, 75, 95, 5, 25, 45, 65, 85, 14, 34, 54, 74, 94, 4, 24, 44, 64, 84, 13, 33, 53, 73, 93, 3, 23, 43, 63, 83, 12, 32, 52, 72, 92, 2, 22, 42, 62, 82, 11, 31, 51, 71, 91, 1, 21, 41, 61, 81, 10, 30, 50, 70, 90, 0, 20, 40, 60, 80]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:desc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[99, 79, 59, 39, 19, 89, 69, 49, 29, 9, 98, 78, 58, 38, 18, 88, 68, 48, 28, 8, 97, 77, 57, 37, 17, 87, 67, 47, 27, 7, 96, 76, 56, 36, 16, 86, 66, 46, 26, 6, 95, 75, 55, 35, 15, 85, 65, 45, 25, 5, 94, 74, 54, 34, 14, 84, 64, 44, 24, 4, 93, 73, 53, 33, 13, 83, 63, 43, 23, 3, 92, 72, 52, 32, 12, 82, 62, 42, 22, 2, 91, 71, 51, 31, 11, 81, 61, 41, 21, 1, 90, 70, 50, 30, 10, 80, 60, 40, 20, 0]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
}
|
||||
|
||||
// Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD
|
||||
// constant to 0 to ensure that the other sort algorithms are also correct.
|
||||
#[test]
|
||||
fn sort_criterion_non_placeholder() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_sortable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut docs = vec![];
|
||||
for i in 0..100 {
|
||||
docs.push(
|
||||
serde_json::json!({ "id": i, "mod_10": format!("{}", i % 10), "mod_20": i % 20 }),
|
||||
);
|
||||
}
|
||||
|
||||
index.add_documents(documents!(docs)).unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [1, 0, 2] OR mod_20 IN [10, 13] OR id IN [5, 6]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in increasing value of the id modulo 10, followed by increasing value of the id modulo 20, followed by decreasing value of the id
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 93, 73, 53, 33, 13, 82, 62, 42, 22, 2, 92, 72, 52, 32, 12, 81, 61, 41, 21, 1, 91, 71, 51, 31, 11, 80, 60, 40, 20, 0, 90, 70, 50, 30, 10]");
|
||||
let expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[1, 0, 2].contains(&(id % 10))
|
||||
|| [10, 13].contains(&(id % 20))
|
||||
|| [5, 6].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
documents_ids.sort();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [7, 8, 0] OR mod_20 IN [1, 15, 16] OR id IN [0, 4]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:asc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in increasing value of the id modulo 10, followed by increasing value of the id modulo 20, followed by decreasing value of the id
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[80, 60, 40, 20, 0, 90, 70, 50, 30, 10, 81, 61, 41, 21, 1, 4, 95, 75, 55, 35, 15, 96, 76, 56, 36, 16, 87, 67, 47, 27, 7, 97, 77, 57, 37, 17, 88, 68, 48, 28, 8, 98, 78, 58, 38, 18]");
|
||||
let expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[7, 8, 0].contains(&(id % 10))
|
||||
|| [1, 15, 16].contains(&(id % 20))
|
||||
|| [0, 4].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
documents_ids.sort();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [1, 0, 2] OR mod_20 IN [10, 13] OR id IN [5, 6]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![AscDesc::from_str("id:desc").unwrap()]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in decreasing value of the id
|
||||
let mut expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[1, 0, 2].contains(&(id % 10))
|
||||
|| [10, 13].contains(&(id % 20))
|
||||
|| [5, 6].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
expected_ids.sort();
|
||||
expected_ids.reverse();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
}
|
||||
}
|
@ -1,709 +0,0 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
use std::collections::binary_heap::PeekMut;
|
||||
use std::collections::{btree_map, BTreeMap, BinaryHeap, HashMap};
|
||||
use std::iter::Peekable;
|
||||
use std::mem::take;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{InitialCandidates, Query};
|
||||
use crate::search::query_tree::{Operation, QueryKind};
|
||||
use crate::search::{
|
||||
build_dfa, word_derivations, CriterionImplementationStrategy, WordDerivationsCache,
|
||||
};
|
||||
use crate::Result;
|
||||
|
||||
/// To be able to divide integers by the number of words in the query
|
||||
/// we want to find a multiplier that allow us to divide by any number between 1 and 10.
|
||||
/// We chose the LCM of all numbers between 1 and 10 as the multiplier (https://en.wikipedia.org/wiki/Least_common_multiple).
|
||||
const LCM_10_FIRST_NUMBERS: u32 = 2520;
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 500;
|
||||
|
||||
type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
|
||||
|
||||
pub struct Attribute<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||
set_buckets: Option<BinaryHeap<Branch<'t>>>,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
}
|
||||
|
||||
impl<'t> Attribute<'t> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Self {
|
||||
Attribute {
|
||||
ctx,
|
||||
state: None,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
linear_buckets: None,
|
||||
set_buckets: None,
|
||||
implementation_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Attribute<'t> {
|
||||
#[logging_timer::time("Attribute::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||
*allowed_candidates -= params.excluded_candidates;
|
||||
}
|
||||
|
||||
loop {
|
||||
match self.state.take() {
|
||||
Some((query_tree, _, allowed_candidates)) if allowed_candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||
let found_candidates = if matches!(
|
||||
self.implementation_strategy,
|
||||
CriterionImplementationStrategy::OnlyIterative
|
||||
) || (matches!(
|
||||
self.implementation_strategy,
|
||||
CriterionImplementationStrategy::Dynamic
|
||||
) && allowed_candidates.len()
|
||||
< CANDIDATES_THRESHOLD)
|
||||
{
|
||||
let linear_buckets = match self.linear_buckets.as_mut() {
|
||||
Some(linear_buckets) => linear_buckets,
|
||||
None => {
|
||||
let new_buckets = initialize_linear_buckets(
|
||||
self.ctx,
|
||||
&flattened_query_tree,
|
||||
&allowed_candidates,
|
||||
)?;
|
||||
self.linear_buckets.get_or_insert(new_buckets.into_iter())
|
||||
}
|
||||
};
|
||||
|
||||
match linear_buckets.next() {
|
||||
Some((_score, candidates)) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let set_buckets = match self.set_buckets.as_mut() {
|
||||
Some(set_buckets) => set_buckets,
|
||||
None => {
|
||||
let new_buckets = initialize_set_buckets(
|
||||
self.ctx,
|
||||
&flattened_query_tree,
|
||||
&allowed_candidates,
|
||||
params.wdcache,
|
||||
)?;
|
||||
self.set_buckets.get_or_insert(new_buckets)
|
||||
}
|
||||
};
|
||||
|
||||
match set_compute_candidates(set_buckets, &allowed_candidates)? {
|
||||
Some((_score, candidates)) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(allowed_candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
allowed_candidates -= &found_candidates;
|
||||
|
||||
self.state =
|
||||
Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(found_candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||
self.linear_buckets = None;
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// QueryPositionIterator is an Iterator over positions of a Query,
|
||||
/// It contains iterators over words positions.
|
||||
struct QueryPositionIterator<'t> {
|
||||
#[allow(clippy::type_complexity)]
|
||||
inner:
|
||||
Vec<Peekable<Box<dyn Iterator<Item = heed::Result<((&'t str, u32), RoaringBitmap)>> + 't>>>,
|
||||
}
|
||||
|
||||
impl<'t> QueryPositionIterator<'t> {
|
||||
fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
queries: &[Query],
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Self> {
|
||||
let mut inner = Vec::with_capacity(queries.len());
|
||||
for query in queries {
|
||||
let in_prefix_cache = query.prefix && ctx.in_prefix_cache(query.kind.word());
|
||||
match &query.kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if !query.prefix || in_prefix_cache {
|
||||
let word = query.kind.word();
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
} else {
|
||||
for (word, _) in word_derivations(word, true, 0, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
}
|
||||
}
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
for (word, _) in
|
||||
word_derivations(word, query.prefix, *typo, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(Self { inner })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Iterator for QueryPositionIterator<'t> {
|
||||
type Item = heed::Result<(u32, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// sort inner words from the closest next position to the farthest next position.
|
||||
let expected_pos = self
|
||||
.inner
|
||||
.iter_mut()
|
||||
.filter_map(|wli| match wli.peek() {
|
||||
Some(Ok(((_, pos), _))) => Some(*pos),
|
||||
_ => None,
|
||||
})
|
||||
.min()?;
|
||||
|
||||
let mut candidates = None;
|
||||
for wli in self.inner.iter_mut() {
|
||||
if let Some(Ok(((_, pos), _))) = wli.peek() {
|
||||
if *pos > expected_pos {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
match wli.next() {
|
||||
Some(Ok((_, docids))) => {
|
||||
candidates = match candidates.take() {
|
||||
Some(candidates) => Some(candidates | docids),
|
||||
None => Some(docids),
|
||||
}
|
||||
}
|
||||
Some(Err(e)) => return Some(Err(e)),
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
|
||||
candidates.map(|candidates| Ok((expected_pos, candidates)))
|
||||
}
|
||||
}
|
||||
|
||||
/// A Branch is represent a possible alternative of the original query and is build with the Query Tree,
|
||||
/// This branch allows us to iterate over meta-interval of positions.
|
||||
struct Branch<'t> {
|
||||
query_level_iterator: Vec<(u32, RoaringBitmap, Peekable<QueryPositionIterator<'t>>)>,
|
||||
last_result: (u32, RoaringBitmap),
|
||||
branch_size: u32,
|
||||
}
|
||||
|
||||
impl<'t> Branch<'t> {
|
||||
fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
flatten_branch: &[Vec<Query>],
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<Self> {
|
||||
let mut query_level_iterator = Vec::new();
|
||||
for queries in flatten_branch {
|
||||
let mut qli = QueryPositionIterator::new(ctx, queries, wdcache)?.peekable();
|
||||
let (pos, docids) = qli.next().transpose()?.unwrap_or((0, RoaringBitmap::new()));
|
||||
query_level_iterator.push((pos, docids & allowed_candidates, qli));
|
||||
}
|
||||
|
||||
let mut branch = Self {
|
||||
query_level_iterator,
|
||||
last_result: (0, RoaringBitmap::new()),
|
||||
branch_size: flatten_branch.len() as u32,
|
||||
};
|
||||
|
||||
branch.update_last_result();
|
||||
|
||||
Ok(branch)
|
||||
}
|
||||
|
||||
/// return the next meta-interval of the branch,
|
||||
/// and update inner interval in order to be ranked by the BinaryHeap.
|
||||
fn next(&mut self, allowed_candidates: &RoaringBitmap) -> heed::Result<bool> {
|
||||
// update the first query.
|
||||
let index = self.lowest_iterator_index();
|
||||
match self.query_level_iterator.get_mut(index) {
|
||||
Some((cur_pos, cur_docids, qli)) => match qli.next().transpose()? {
|
||||
Some((next_pos, next_docids)) => {
|
||||
*cur_pos = next_pos;
|
||||
*cur_docids |= next_docids & allowed_candidates;
|
||||
self.update_last_result();
|
||||
Ok(true)
|
||||
}
|
||||
None => Ok(false),
|
||||
},
|
||||
None => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
fn lowest_iterator_index(&mut self) -> usize {
|
||||
let (index, _) = self
|
||||
.query_level_iterator
|
||||
.iter_mut()
|
||||
.map(|(pos, docids, qli)| {
|
||||
if docids.is_empty() {
|
||||
0
|
||||
} else {
|
||||
match qli.peek() {
|
||||
Some(result) => {
|
||||
result.as_ref().map(|(next_pos, _)| *next_pos - *pos).unwrap_or(0)
|
||||
}
|
||||
None => u32::MAX,
|
||||
}
|
||||
}
|
||||
})
|
||||
.enumerate()
|
||||
.min_by_key(|(_, diff)| *diff)
|
||||
.unwrap_or((0, 0));
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
fn update_last_result(&mut self) {
|
||||
let mut result_pos = 0;
|
||||
let mut result_docids = None;
|
||||
|
||||
for (pos, docids, _qli) in self.query_level_iterator.iter() {
|
||||
result_pos += pos;
|
||||
result_docids = result_docids
|
||||
.take()
|
||||
.map_or_else(|| Some(docids.clone()), |candidates| Some(candidates & docids));
|
||||
}
|
||||
|
||||
// remove last result docids from inner iterators
|
||||
if let Some(docids) = result_docids.as_ref() {
|
||||
for (_, query_docids, _) in self.query_level_iterator.iter_mut() {
|
||||
*query_docids -= docids;
|
||||
}
|
||||
}
|
||||
|
||||
self.last_result = (result_pos, result_docids.unwrap_or_default());
|
||||
}
|
||||
|
||||
/// return the score of the current inner interval.
|
||||
fn compute_rank(&self) -> u32 {
|
||||
// we compute a rank from the position.
|
||||
let (pos, _) = self.last_result;
|
||||
pos.saturating_sub((0..self.branch_size).sum()) * LCM_10_FIRST_NUMBERS / self.branch_size
|
||||
}
|
||||
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
let self_rank = self.compute_rank();
|
||||
let other_rank = other.compute_rank();
|
||||
|
||||
// lower rank is better, and because BinaryHeap give the higher ranked branch, we reverse it.
|
||||
self_rank.cmp(&other_rank).reverse()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Ord for Branch<'t> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.cmp(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> PartialOrd for Branch<'t> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> PartialEq for Branch<'t> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.cmp(other) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Eq for Branch<'t> {}
|
||||
|
||||
fn initialize_set_buckets<'t>(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<BinaryHeap<Branch<'t>>> {
|
||||
let mut heap = BinaryHeap::new();
|
||||
for flatten_branch in branches {
|
||||
let branch = Branch::new(ctx, flatten_branch, wdcache, allowed_candidates)?;
|
||||
heap.push(branch);
|
||||
}
|
||||
|
||||
Ok(heap)
|
||||
}
|
||||
|
||||
fn set_compute_candidates(
|
||||
branches_heap: &mut BinaryHeap<Branch>,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<Option<(u32, RoaringBitmap)>> {
|
||||
let mut final_candidates: Option<(u32, RoaringBitmap)> = None;
|
||||
let mut allowed_candidates = allowed_candidates.clone();
|
||||
|
||||
while let Some(mut branch) = branches_heap.peek_mut() {
|
||||
// if current is worst than best we break to return
|
||||
// candidates that correspond to the best rank
|
||||
let branch_rank = branch.compute_rank();
|
||||
if let Some((best_rank, _)) = final_candidates {
|
||||
if branch_rank > best_rank {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let candidates = take(&mut branch.last_result.1);
|
||||
if candidates.is_empty() {
|
||||
// we don't have candidates, get next interval.
|
||||
if !branch.next(&allowed_candidates)? {
|
||||
PeekMut::pop(branch);
|
||||
}
|
||||
} else {
|
||||
allowed_candidates -= &candidates;
|
||||
final_candidates = match final_candidates.take() {
|
||||
// we add current candidates to best candidates
|
||||
Some((best_rank, mut best_candidates)) => {
|
||||
best_candidates |= candidates;
|
||||
branch.next(&allowed_candidates)?;
|
||||
Some((best_rank, best_candidates))
|
||||
}
|
||||
// we take current candidates as best candidates
|
||||
None => {
|
||||
branch.next(&allowed_candidates)?;
|
||||
Some((branch_rank, candidates))
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(final_candidates)
|
||||
}
|
||||
|
||||
fn initialize_linear_buckets(
|
||||
ctx: &dyn Context,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<BTreeMap<u64, RoaringBitmap>> {
|
||||
fn compute_candidate_rank(
|
||||
branches: &FlattenedQueryTree,
|
||||
words_positions: HashMap<String, RoaringBitmap>,
|
||||
) -> u64 {
|
||||
let mut min_rank = u64::max_value();
|
||||
for branch in branches {
|
||||
let branch_len = branch.len();
|
||||
let mut branch_rank = Vec::with_capacity(branch_len);
|
||||
for derivates in branch {
|
||||
let mut position = None;
|
||||
for Query { prefix, kind } in derivates {
|
||||
// find the best position of the current word in the document.
|
||||
let current_position = match kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if *prefix {
|
||||
word_derivations(word, true, 0, &words_positions)
|
||||
.flat_map(|positions| positions.iter().next())
|
||||
.min()
|
||||
} else {
|
||||
words_positions
|
||||
.get(word)
|
||||
.and_then(|positions| positions.iter().next())
|
||||
}
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
word_derivations(word, *prefix, *typo, &words_positions)
|
||||
.flat_map(|positions| positions.iter().next())
|
||||
.min()
|
||||
}
|
||||
};
|
||||
|
||||
match (position, current_position) {
|
||||
(Some(p), Some(cp)) => position = Some(cmp::min(p, cp)),
|
||||
(None, Some(cp)) => position = Some(cp),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
// if a position is found, we add it to the branch score,
|
||||
// otherwise the branch is considered as unfindable in this document and we break.
|
||||
if let Some(position) = position {
|
||||
branch_rank.push(position as u64);
|
||||
} else {
|
||||
branch_rank.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !branch_rank.is_empty() {
|
||||
branch_rank.sort_unstable();
|
||||
// because several words in same query can't match all a the position 0,
|
||||
// we substract the word index to the position.
|
||||
let branch_rank: u64 =
|
||||
branch_rank.into_iter().enumerate().map(|(i, r)| r - i as u64).sum();
|
||||
// here we do the means of the words of the branch
|
||||
min_rank =
|
||||
min_rank.min(branch_rank * LCM_10_FIRST_NUMBERS as u64 / branch_len as u64);
|
||||
}
|
||||
}
|
||||
|
||||
min_rank
|
||||
}
|
||||
|
||||
fn word_derivations<'a>(
|
||||
word: &str,
|
||||
is_prefix: bool,
|
||||
max_typo: u8,
|
||||
words_positions: &'a HashMap<String, RoaringBitmap>,
|
||||
) -> impl Iterator<Item = &'a RoaringBitmap> {
|
||||
let dfa = build_dfa(word, max_typo, is_prefix);
|
||||
words_positions.iter().filter_map(move |(document_word, positions)| {
|
||||
use levenshtein_automata::Distance;
|
||||
match dfa.eval(document_word) {
|
||||
Distance::Exact(_) => Some(positions),
|
||||
Distance::AtLeast(_) => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
let mut candidates = BTreeMap::new();
|
||||
for docid in allowed_candidates {
|
||||
let words_positions = ctx.docid_words_positions(docid)?;
|
||||
let rank = compute_candidate_rank(branches, words_positions);
|
||||
candidates.entry(rank).or_insert_with(RoaringBitmap::new).insert(docid);
|
||||
}
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
// TODO can we keep refs of Query
|
||||
fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
||||
use crate::search::criteria::Operation::{And, Or, Phrase};
|
||||
|
||||
fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
|
||||
match tail.split_first() {
|
||||
Some((thead, tail)) => {
|
||||
let tail = and_recurse(thead, tail);
|
||||
let mut out = Vec::new();
|
||||
for array in recurse(head) {
|
||||
for tail_array in &tail {
|
||||
let mut array = array.clone();
|
||||
array.extend(tail_array.iter().cloned());
|
||||
out.push(array);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
None => recurse(head),
|
||||
}
|
||||
}
|
||||
|
||||
fn recurse(op: &Operation) -> FlattenedQueryTree {
|
||||
match op {
|
||||
And(ops) => ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)),
|
||||
Or(_, ops) => {
|
||||
if ops.iter().all(|op| op.query().is_some()) {
|
||||
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
|
||||
} else {
|
||||
ops.iter().flat_map(recurse).collect()
|
||||
}
|
||||
}
|
||||
Phrase(words) => {
|
||||
let queries = words
|
||||
.iter()
|
||||
.filter_map(|w| w.as_ref())
|
||||
.map(|word| vec![Query { prefix: false, kind: QueryKind::exact(word.clone()) }])
|
||||
.collect();
|
||||
vec![queries]
|
||||
}
|
||||
Operation::Query(query) => vec![vec![vec![query.clone()]]],
|
||||
}
|
||||
}
|
||||
|
||||
recurse(query_tree)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
|
||||
use super::*;
|
||||
use crate::search::criteria::QueryKind;
|
||||
|
||||
#[test]
|
||||
fn simple_flatten_query_tree() {
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythe")) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
|
||||
]),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("many")) }),
|
||||
Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("thefish")),
|
||||
}),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("the")),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("fish")),
|
||||
}),
|
||||
]),
|
||||
],
|
||||
),
|
||||
]),
|
||||
],
|
||||
);
|
||||
let result = flatten_query_tree(&query_tree);
|
||||
|
||||
insta::assert_debug_snapshot!(result, @r###"
|
||||
[
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "manythefish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "manythe",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "fish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "many",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "thefish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "many",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "the",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "fish",
|
||||
},
|
||||
],
|
||||
],
|
||||
]
|
||||
"###);
|
||||
}
|
||||
}
|
@ -1,766 +0,0 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use crate::search::criteria::{
|
||||
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
|
||||
InitialCandidates,
|
||||
};
|
||||
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||
use crate::{absolute_from_relative_position, FieldId, Result};
|
||||
|
||||
pub struct Exactness<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_tree: Option<Operation>,
|
||||
state: Option<State>,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
query: Vec<ExactQueryPart>,
|
||||
cache: Option<ExactWordsCombinationCache>,
|
||||
}
|
||||
|
||||
impl<'t> Exactness<'t> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
primitive_query: &[PrimitiveQueryPart],
|
||||
) -> heed::Result<Self> {
|
||||
let mut query: Vec<_> = Vec::with_capacity(primitive_query.len());
|
||||
for part in primitive_query {
|
||||
query.push(ExactQueryPart::from_primitive_query_part(ctx, part)?);
|
||||
}
|
||||
|
||||
Ok(Exactness {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
state: None,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
query,
|
||||
cache: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Exactness<'t> {
|
||||
#[logging_timer::time("Exactness::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some(state) = self.state.as_mut() {
|
||||
state.difference_with(params.excluded_candidates);
|
||||
}
|
||||
loop {
|
||||
debug!("Exactness at state {:?}", self.state);
|
||||
|
||||
match self.state.as_mut() {
|
||||
Some(state) if state.is_empty() => {
|
||||
// reset state
|
||||
self.state = None;
|
||||
self.query_tree = None;
|
||||
// we don't need to reset the combinations cache since it only depends on
|
||||
// the primitive query, which does not change
|
||||
}
|
||||
Some(state) => {
|
||||
let (candidates, state) =
|
||||
resolve_state(self.ctx, take(state), &self.query, &mut self.cache)?;
|
||||
self.state = state;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
self.state = Some(State::new(candidates));
|
||||
self.query_tree = Some(query_tree);
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum State {
|
||||
/// Extract the documents that have an attribute that contains exactly the query.
|
||||
ExactAttribute(RoaringBitmap),
|
||||
/// Extract the documents that have an attribute that starts with exactly the query.
|
||||
AttributeStartsWith(RoaringBitmap),
|
||||
/// Rank the remaining documents by the number of exact words contained.
|
||||
ExactWords(RoaringBitmap),
|
||||
Remainings(Vec<RoaringBitmap>),
|
||||
}
|
||||
|
||||
impl State {
|
||||
fn new(candidates: RoaringBitmap) -> Self {
|
||||
Self::ExactAttribute(candidates)
|
||||
}
|
||||
|
||||
fn difference_with(&mut self, lhs: &RoaringBitmap) {
|
||||
match self {
|
||||
Self::ExactAttribute(candidates)
|
||||
| Self::AttributeStartsWith(candidates)
|
||||
| Self::ExactWords(candidates) => *candidates -= lhs,
|
||||
Self::Remainings(candidates_array) => {
|
||||
candidates_array.iter_mut().for_each(|candidates| *candidates -= lhs);
|
||||
candidates_array.retain(|candidates| !candidates.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::ExactAttribute(candidates)
|
||||
| Self::AttributeStartsWith(candidates)
|
||||
| Self::ExactWords(candidates) => candidates.is_empty(),
|
||||
Self::Remainings(candidates_array) => {
|
||||
candidates_array.iter().all(RoaringBitmap::is_empty)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for State {
|
||||
fn default() -> Self {
|
||||
Self::Remainings(vec![])
|
||||
}
|
||||
}
|
||||
#[logging_timer::time("Exactness::{}")]
|
||||
fn resolve_state(
|
||||
ctx: &dyn Context,
|
||||
state: State,
|
||||
query: &[ExactQueryPart],
|
||||
cache: &mut Option<ExactWordsCombinationCache>,
|
||||
) -> Result<(RoaringBitmap, Option<State>)> {
|
||||
use State::*;
|
||||
match state {
|
||||
ExactAttribute(mut allowed_candidates) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
if let Ok(query_len) = u8::try_from(query.len()) {
|
||||
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||
for id in attributes_ids {
|
||||
if let Some(attribute_allowed_docids) =
|
||||
ctx.field_id_word_count_docids(id, query_len)?
|
||||
{
|
||||
let mut attribute_candidates_array =
|
||||
attribute_start_with_docids(ctx, id, query)?;
|
||||
attribute_candidates_array.push(attribute_allowed_docids);
|
||||
|
||||
candidates |= MultiOps::intersection(attribute_candidates_array);
|
||||
}
|
||||
}
|
||||
|
||||
// only keep allowed candidates
|
||||
candidates &= &allowed_candidates;
|
||||
// remove current candidates from allowed candidates
|
||||
allowed_candidates -= &candidates;
|
||||
}
|
||||
|
||||
Ok((candidates, Some(AttributeStartsWith(allowed_candidates))))
|
||||
}
|
||||
AttributeStartsWith(mut allowed_candidates) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||
for id in attributes_ids {
|
||||
let attribute_candidates_array = attribute_start_with_docids(ctx, id, query)?;
|
||||
candidates |= MultiOps::intersection(attribute_candidates_array);
|
||||
}
|
||||
|
||||
// only keep allowed candidates
|
||||
candidates &= &allowed_candidates;
|
||||
// remove current candidates from allowed candidates
|
||||
allowed_candidates -= &candidates;
|
||||
Ok((candidates, Some(ExactWords(allowed_candidates))))
|
||||
}
|
||||
ExactWords(allowed_candidates) => {
|
||||
// Retrieve the cache if it already exist, otherwise create it.
|
||||
let owned_cache = if let Some(cache) = cache.take() {
|
||||
cache
|
||||
} else {
|
||||
compute_combinations(ctx, query)?
|
||||
};
|
||||
// The cache contains the sets of documents which contain exactly 1,2,3,.. exact words
|
||||
// from the query. It cannot be empty. All the candidates in it are disjoint.
|
||||
|
||||
let mut candidates_array = owned_cache.combinations.clone();
|
||||
for candidates in candidates_array.iter_mut() {
|
||||
*candidates &= &allowed_candidates;
|
||||
}
|
||||
*cache = Some(owned_cache);
|
||||
|
||||
let best_candidates = candidates_array.pop().unwrap();
|
||||
|
||||
candidates_array.insert(0, allowed_candidates);
|
||||
Ok((best_candidates, Some(Remainings(candidates_array))))
|
||||
}
|
||||
// pop remainings candidates until the emptiness
|
||||
Remainings(mut candidates_array) => {
|
||||
let candidates = candidates_array.pop().unwrap_or_default();
|
||||
if !candidates_array.is_empty() {
|
||||
Ok((candidates, Some(Remainings(candidates_array))))
|
||||
} else {
|
||||
Ok((candidates, None))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn attribute_start_with_docids(
|
||||
ctx: &dyn Context,
|
||||
attribute_id: FieldId,
|
||||
query: &[ExactQueryPart],
|
||||
) -> heed::Result<Vec<RoaringBitmap>> {
|
||||
let mut attribute_candidates_array = Vec::new();
|
||||
// start from attribute first position
|
||||
let mut pos = absolute_from_relative_position(attribute_id, 0);
|
||||
for part in query {
|
||||
use ExactQueryPart::*;
|
||||
match part {
|
||||
Synonyms(synonyms) => {
|
||||
let mut synonyms_candidates = RoaringBitmap::new();
|
||||
for word in synonyms {
|
||||
let wc = ctx.word_position_docids(word, pos)?;
|
||||
if let Some(word_candidates) = wc {
|
||||
synonyms_candidates |= word_candidates;
|
||||
}
|
||||
}
|
||||
attribute_candidates_array.push(synonyms_candidates);
|
||||
pos += 1;
|
||||
}
|
||||
Phrase(phrase) => {
|
||||
for word in phrase {
|
||||
if let Some(word) = word {
|
||||
let wc = ctx.word_position_docids(word, pos)?;
|
||||
if let Some(word_candidates) = wc {
|
||||
attribute_candidates_array.push(word_candidates);
|
||||
}
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(attribute_candidates_array)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ExactQueryPart {
|
||||
Phrase(Vec<Option<String>>),
|
||||
Synonyms(Vec<String>),
|
||||
}
|
||||
|
||||
impl ExactQueryPart {
|
||||
fn from_primitive_query_part(
|
||||
ctx: &dyn Context,
|
||||
part: &PrimitiveQueryPart,
|
||||
) -> heed::Result<Self> {
|
||||
let part = match part {
|
||||
PrimitiveQueryPart::Word(word, _) => {
|
||||
match ctx.synonyms(word)? {
|
||||
Some(synonyms) => {
|
||||
let mut synonyms: Vec<_> = synonyms
|
||||
.into_iter()
|
||||
.filter_map(|mut array| {
|
||||
// keep 1 word synonyms only.
|
||||
match array.pop() {
|
||||
Some(word) if array.is_empty() => Some(word),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
synonyms.push(word.clone());
|
||||
ExactQueryPart::Synonyms(synonyms)
|
||||
}
|
||||
None => ExactQueryPart::Synonyms(vec![word.clone()]),
|
||||
}
|
||||
}
|
||||
PrimitiveQueryPart::Phrase(phrase) => ExactQueryPart::Phrase(phrase.clone()),
|
||||
};
|
||||
|
||||
Ok(part)
|
||||
}
|
||||
}
|
||||
|
||||
struct ExactWordsCombinationCache {
|
||||
// index 0 is only 1 word
|
||||
combinations: Vec<RoaringBitmap>,
|
||||
}
|
||||
|
||||
fn compute_combinations(
|
||||
ctx: &dyn Context,
|
||||
query: &[ExactQueryPart],
|
||||
) -> Result<ExactWordsCombinationCache> {
|
||||
let number_of_part = query.len();
|
||||
let mut parts_candidates_array = Vec::with_capacity(number_of_part);
|
||||
for part in query {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
use ExactQueryPart::*;
|
||||
match part {
|
||||
Synonyms(synonyms) => {
|
||||
for synonym in synonyms {
|
||||
if let Some(synonym_candidates) = ctx.word_docids(synonym)? {
|
||||
candidates |= synonym_candidates;
|
||||
}
|
||||
}
|
||||
}
|
||||
// compute intersection on pair of words with a proximity of 0.
|
||||
Phrase(phrase) => {
|
||||
candidates |= resolve_phrase(ctx, phrase)?;
|
||||
}
|
||||
}
|
||||
parts_candidates_array.push(candidates);
|
||||
}
|
||||
let combinations = create_disjoint_combinations(parts_candidates_array);
|
||||
|
||||
Ok(ExactWordsCombinationCache { combinations })
|
||||
}
|
||||
|
||||
/// Given a list of bitmaps `b0,b1,...,bn` , compute the list of bitmaps `X0,X1,...,Xn`
|
||||
/// such that `Xi` contains all the elements that are contained in **at least** `i+1` bitmaps among `b0,b1,...,bn`.
|
||||
///
|
||||
/// The returned vector is guaranteed to be of length `n`. It is equal to `vec![X0, X1, ..., Xn]`.
|
||||
///
|
||||
/// ## Implementation
|
||||
///
|
||||
/// We do so by iteratively building a map containing the union of all the different ways to intersect `J` bitmaps among `b0,b1,...,bn`.
|
||||
/// - The key of the map is the index `i` of the last bitmap in the intersections
|
||||
/// - The value is the union of all the possible intersections of J bitmaps such that the last bitmap in the intersection is `bi`
|
||||
///
|
||||
/// For example, with the bitmaps `b0,b1,b2,b3`, this map should look like this
|
||||
/// ```text
|
||||
/// Map 0: (first iteration, contains all the combinations of 1 bitmap)
|
||||
/// // What follows are unions of intersection of bitmaps asscociated with the index of their last component
|
||||
/// 0: [b0]
|
||||
/// 1: [b1]
|
||||
/// 2: [b2]
|
||||
/// 3: [b3]
|
||||
/// Map 1: (second iteration, combinations of 2 bitmaps)
|
||||
/// 1: [b0&b1]
|
||||
/// 2: [b0&b2 | b1&b2]
|
||||
/// 3: [b0&b3 | b1&b3 | b2&b3]
|
||||
/// Map 2: (third iteration, combinations of 3 bitmaps)
|
||||
/// 2: [b0&b1&b2]
|
||||
/// 3: [b0&b2&b3 | b1&b2&b3]
|
||||
/// Map 3: (fourth iteration, combinations of 4 bitmaps)
|
||||
/// 3: [b0&b1&b2&b3]
|
||||
/// ```
|
||||
///
|
||||
/// These maps are built one by one from the content of the preceding map.
|
||||
/// For example, to create Map 2, we look at each line of Map 1, for example:
|
||||
/// ```text
|
||||
/// 2: [b0&b2 | b1&b2]
|
||||
/// ```
|
||||
/// And then for each i > 2, we compute `(b0&b2 | b1&b2) & bi = b0&b2&bi | b1&b2&bi`
|
||||
/// and then add it the new map (Map 3) under the key `i` (if it is not empty):
|
||||
/// ```text
|
||||
/// 3: [b0&b2&b3 | b1&b2&b3]
|
||||
/// 4: [b0&b2&b4 | b1&b2&b4]
|
||||
/// 5: [b0&b2&b5 | b1&b2&b5]
|
||||
/// etc.
|
||||
/// ```
|
||||
/// We only keep two maps in memory at any one point. As soon as Map J is built, we flatten Map J-1 into
|
||||
/// a single bitmap by taking the union of all of its values. This union gives us Xj-1.
|
||||
///
|
||||
/// ## Memory Usage
|
||||
/// This function is expected to be called on a maximum of 10 bitmaps. The worst case thus happens when
|
||||
/// 10 identical large bitmaps are given.
|
||||
///
|
||||
/// In the context of Meilisearch, let's imagine that we are given 10 bitmaps containing all
|
||||
/// the document ids. If the dataset contains 16 million documents, then each bitmap will take
|
||||
/// around 2MB of memory.
|
||||
///
|
||||
/// When creating Map 3, we will have, in memory:
|
||||
/// 1. The 10 original bitmaps (20MB)
|
||||
/// 2. X0 : 2MB
|
||||
/// 3. Map 1, containing 9 bitmaps: 18MB
|
||||
/// 4. Map 2, containing 8 bitmaps: 16MB
|
||||
/// 5. X1: 2MB
|
||||
/// for a total of around 60MB of memory. This roughly represents the maximum memory usage of this function.
|
||||
///
|
||||
/// ## Time complexity
|
||||
/// Let N be the size of the given list of bitmaps and M the length of each individual bitmap.
|
||||
///
|
||||
/// We need to create N new bitmaps. The most expensive one to create is the second one, where we need to
|
||||
/// iterate over the N keys of Map 1, and for each of those keys `k_i`, we perform `N-k_i` bitmap unions.
|
||||
/// Unioning two bitmaps is O(M), and we need to do it O(N^2) times.
|
||||
///
|
||||
/// Therefore the time complexity is O(N^3 * M).
|
||||
fn create_non_disjoint_combinations(bitmaps: Vec<RoaringBitmap>) -> Vec<RoaringBitmap> {
|
||||
let nbr_parts = bitmaps.len();
|
||||
if nbr_parts == 1 {
|
||||
return bitmaps;
|
||||
}
|
||||
let mut flattened_levels = vec![];
|
||||
let mut last_level: BTreeMap<usize, RoaringBitmap> =
|
||||
bitmaps.clone().into_iter().enumerate().collect();
|
||||
|
||||
for _ in 2..=nbr_parts {
|
||||
let mut new_level = BTreeMap::new();
|
||||
for (last_part_index, base_combination) in last_level.iter() {
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for new_last_part_index in last_part_index + 1..nbr_parts {
|
||||
let new_combination = base_combination & &bitmaps[new_last_part_index];
|
||||
if !new_combination.is_empty() {
|
||||
match new_level.entry(new_last_part_index) {
|
||||
Entry::Occupied(mut b) => {
|
||||
*b.get_mut() |= new_combination;
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(new_combination);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now flatten the last level to save memory
|
||||
let flattened_last_level = MultiOps::union(last_level.into_values());
|
||||
flattened_levels.push(flattened_last_level);
|
||||
last_level = new_level;
|
||||
}
|
||||
// Flatten the last level
|
||||
let flattened_last_level = MultiOps::union(last_level.into_values());
|
||||
flattened_levels.push(flattened_last_level);
|
||||
flattened_levels
|
||||
}
|
||||
|
||||
/// Given a list of bitmaps `b0,b1,...,bn` , compute the list of bitmaps `X0,X1,...,Xn`
|
||||
/// such that `Xi` contains all the elements that are contained in **exactly** `i+1` bitmaps among `b0,b1,...,bn`.
|
||||
///
|
||||
/// The returned vector is guaranteed to be of length `n`. It is equal to `vec![X0, X1, ..., Xn]`.
|
||||
fn create_disjoint_combinations(parts_candidates_array: Vec<RoaringBitmap>) -> Vec<RoaringBitmap> {
|
||||
let non_disjoint_combinations = create_non_disjoint_combinations(parts_candidates_array);
|
||||
let mut disjoint_combinations = vec![];
|
||||
let mut combinations = non_disjoint_combinations.into_iter().peekable();
|
||||
while let Some(mut combination) = combinations.next() {
|
||||
if let Some(forbidden) = combinations.peek() {
|
||||
combination -= forbidden;
|
||||
}
|
||||
disjoint_combinations.push(combination)
|
||||
}
|
||||
|
||||
disjoint_combinations
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::criteria::exactness::{
|
||||
create_disjoint_combinations, create_non_disjoint_combinations,
|
||||
};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::{Criterion, SearchResult};
|
||||
|
||||
#[test]
|
||||
fn test_exact_words_subcriterion() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key(S("id"));
|
||||
settings.set_criteria(vec![Criterion::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
// not relevant
|
||||
{ "id": "0", "text": "cat good dog bad" },
|
||||
// 1 exact word
|
||||
{ "id": "1", "text": "they said: cats arebetter thandogs" },
|
||||
// 3 exact words
|
||||
{ "id": "2", "text": "they said: cats arebetter than dogs" },
|
||||
// 5 exact words
|
||||
{ "id": "3", "text": "they said: cats are better than dogs" },
|
||||
// attribute starts with the exact words
|
||||
{ "id": "4", "text": "cats are better than dogs except on Saturday" },
|
||||
// attribute equal to the exact words
|
||||
{ "id": "5", "text": "cats are better than dogs" },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } =
|
||||
index.search(&rtxn).query("cats are better than dogs").execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 4, 3, 2, 1]");
|
||||
}
|
||||
|
||||
fn print_combinations(rbs: &[RoaringBitmap]) -> String {
|
||||
let mut s = String::new();
|
||||
for rb in rbs {
|
||||
s.push_str(&format!("{}\n", &display_bitmap(rb)));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
// In these unit tests, the test bitmaps always contain all the multiple of a certain number.
|
||||
// This makes it easy to check the validity of the results of `create_disjoint_combinations` by
|
||||
// counting the number of dividers of elements in the returned bitmaps.
|
||||
fn assert_correct_combinations(combinations: &[RoaringBitmap], dividers: &[u32]) {
|
||||
for (i, set) in combinations.iter().enumerate() {
|
||||
let expected_nbr_dividers = i + 1;
|
||||
for el in set {
|
||||
let nbr_dividers = dividers.iter().map(|d| usize::from(el % d == 0)).sum::<usize>();
|
||||
assert_eq!(
|
||||
nbr_dividers, expected_nbr_dividers,
|
||||
"{el} is divisible by {nbr_dividers} elements, not {expected_nbr_dividers}."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_1() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, &[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_2() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 8, 9, 10, 14, 15, 16, 20, 21, 22, 26, 27, 28, 32, 33, 34, 38, 39, 40, 44, 45, 46, 50, 51, 52, 56, 57, 58, 62, 63, 64, 68, 69, 70, 74, 75, 76, 80, 81, 82, 86, 87, 88, 92, 93, 94, 98, 99, 100, 104, 105, 106, 110, 111, 112, 116, 117, 118, 122, 123, 124, 128, 129, 130, 134, 135, 136, 140, 141, 142, 146, 147, 148, ]
|
||||
[0, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144, ]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_4() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (0..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
let b3: RoaringBitmap = (0..).into_iter().map(|x| 7 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 16, 22, 25, 26, 27, 32, 33, 34, 38, 39, 44, 46, 49, 51, 52, 55, 57, 58, 62, 64, 65, 68, 69, 74, 76, 77, 81, 82, 85, 86, 87, 88, 91, 92, 93, 94, 95, 99, 104, 106, 111, 115, 116, 117, 118, 119, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 145, 146, 148, ]
|
||||
[6, 10, 12, 14, 15, 18, 20, 21, 24, 28, 35, 36, 40, 45, 48, 50, 54, 56, 63, 66, 72, 75, 78, 80, 96, 98, 100, 102, 108, 110, 112, 114, 130, 132, 135, 138, 144, 147, ]
|
||||
[30, 42, 60, 70, 84, 90, 105, 120, 126, 140, ]
|
||||
[0, ]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 7]);
|
||||
}
|
||||
#[test]
|
||||
fn compute_combinations_4_with_empty_results_at_end() {
|
||||
let b0: RoaringBitmap = (1..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (1..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (1..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
let b3: RoaringBitmap = (1..).into_iter().map(|x| 7 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 16, 22, 25, 26, 27, 32, 33, 34, 38, 39, 44, 46, 49, 51, 52, 55, 57, 58, 62, 64, 65, 68, 69, 74, 76, 77, 81, 82, 85, 86, 87, 88, 91, 92, 93, 94, 95, 99, 104, 106, 111, 115, 116, 117, 118, 119, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 145, 146, 148, ]
|
||||
[6, 10, 12, 14, 15, 18, 20, 21, 24, 28, 35, 36, 40, 45, 48, 50, 54, 56, 63, 66, 72, 75, 78, 80, 96, 98, 100, 102, 108, 110, 112, 114, 130, 132, 135, 138, 144, 147, ]
|
||||
[30, 42, 60, 70, 84, 90, 105, 120, 126, 140, ]
|
||||
[]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 7]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_4_with_some_equal_bitmaps() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (0..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
// b3 == b1
|
||||
let b3: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 4, 5, 8, 14, 16, 22, 25, 26, 28, 32, 34, 35, 38, 44, 46, 52, 55, 56, 58, 62, 64, 65, 68, 74, 76, 82, 85, 86, 88, 92, 94, 95, 98, 104, 106, 112, 115, 116, 118, 122, 124, 125, 128, 134, 136, 142, 145, 146, 148, ]
|
||||
[3, 9, 10, 20, 21, 27, 33, 39, 40, 50, 51, 57, 63, 69, 70, 80, 81, 87, 93, 99, 100, 110, 111, 117, 123, 129, 130, 140, 141, 147, ]
|
||||
[6, 12, 15, 18, 24, 36, 42, 45, 48, 54, 66, 72, 75, 78, 84, 96, 102, 105, 108, 114, 126, 132, 135, 138, 144, ]
|
||||
[0, 30, 60, 90, 120, ]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_10() {
|
||||
let dividers = [2, 3, 5, 7, 11, 6, 15, 35, 18, 14];
|
||||
let parts_candidates: Vec<RoaringBitmap> = dividers
|
||||
.iter()
|
||||
.map(|÷r| {
|
||||
(0..).into_iter().map(|x| divider * x).take_while(|x| *x <= 210).collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 11, 16, 25, 26, 27, 32, 34, 38, 39, 46, 49, 51, 52, 57, 58, 62, 64, 65, 68, 69, 74, 76, 81, 82, 85, 86, 87, 91, 92, 93, 94, 95, 104, 106, 111, 115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 143, 145, 146, 148, 152, 153, 155, 158, 159, 161, 164, 166, 171, 172, 177, 178, 183, 184, 185, 187, 188, 194, 201, 202, 203, 205, 206, 207, 208, 209, ]
|
||||
[10, 20, 21, 22, 33, 40, 44, 50, 55, 63, 77, 80, 88, 99, 100, 130, 147, 160, 170, 176, 189, 190, 200, ]
|
||||
[6, 12, 14, 15, 24, 28, 35, 45, 48, 56, 75, 78, 96, 98, 102, 110, 112, 114, 135, 138, 156, 174, 175, 182, 186, 192, 195, 196, 204, ]
|
||||
[18, 36, 54, 66, 72, 108, 132, 144, 154, 162, 165, ]
|
||||
[30, 42, 60, 70, 84, 105, 120, 140, 150, 168, 198, ]
|
||||
[90, 126, 180, ]
|
||||
[]
|
||||
[210, ]
|
||||
[]
|
||||
[0, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, ÷rs);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_30() {
|
||||
let dividers: [u32; 30] = [
|
||||
1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4,
|
||||
5,
|
||||
];
|
||||
let parts_candidates: Vec<RoaringBitmap> = dividers
|
||||
.iter()
|
||||
.map(|divider| {
|
||||
(0..).into_iter().map(|x| divider * x).take_while(|x| *x <= 100).collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let combinations = create_non_disjoint_combinations(parts_candidates.clone());
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
"###);
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[1, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 49, 53, 59, 61, 67, 71, 73, 77, 79, 83, 89, 91, 97, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[2, 3, 5, 9, 14, 21, 22, 25, 26, 27, 33, 34, 35, 38, 39, 46, 51, 55, 57, 58, 62, 63, 65, 69, 74, 81, 82, 85, 86, 87, 93, 94, 95, 98, 99, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[4, 6, 8, 10, 15, 16, 18, 28, 32, 42, 44, 45, 50, 52, 54, 56, 64, 66, 68, 70, 75, 76, 78, 88, 92, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[12, 20, 24, 30, 36, 40, 48, 72, 80, 84, 90, 96, 100, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[0, 60, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, ÷rs);
|
||||
}
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::WordDerivationsCache;
|
||||
use crate::Result;
|
||||
|
||||
/// The result of a call to the fetcher.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct FinalResult {
|
||||
/// The query tree corresponding to the current bucket of the last criterion.
|
||||
pub query_tree: Option<Operation>,
|
||||
/// The candidates of the current bucket of the last criterion.
|
||||
pub candidates: RoaringBitmap,
|
||||
/// Candidates that comes from the current bucket of the initial criterion.
|
||||
pub initial_candidates: InitialCandidates,
|
||||
}
|
||||
|
||||
pub struct Final<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
wdcache: WordDerivationsCache,
|
||||
returned_candidates: RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'t> Final<'t> {
|
||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Final<'t> {
|
||||
Final {
|
||||
ctx,
|
||||
parent,
|
||||
wdcache: WordDerivationsCache::new(),
|
||||
returned_candidates: RoaringBitmap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[logging_timer::time("Final::{}")]
|
||||
pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> Result<Option<FinalResult>> {
|
||||
debug!("Final iteration");
|
||||
let excluded_candidates = &self.returned_candidates | excluded_candidates;
|
||||
let mut criterion_parameters = CriterionParameters {
|
||||
wdcache: &mut self.wdcache,
|
||||
// returned_candidates is merged with excluded_candidates to avoid duplicas
|
||||
excluded_candidates: &excluded_candidates,
|
||||
};
|
||||
|
||||
match self.parent.next(&mut criterion_parameters)? {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match (candidates, query_tree.as_ref()) {
|
||||
(Some(candidates), _) => candidates,
|
||||
(None, Some(qt)) => {
|
||||
resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates
|
||||
}
|
||||
(None, None) => self.ctx.documents_ids()? - excluded_candidates,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
let initial_candidates = initial_candidates
|
||||
.unwrap_or_else(|| InitialCandidates::Estimated(candidates.clone()));
|
||||
|
||||
self.returned_candidates |= &candidates;
|
||||
|
||||
Ok(Some(FinalResult { query_tree, candidates, initial_candidates }))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user