mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-04 19:55:43 +00:00
Compare commits
387 Commits
v1.1.1
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f7e0a3edaf | ||
|
|
a669368f2d | ||
|
|
2e7ba2b092 | ||
|
|
f983bbe532 | ||
|
|
3cbcbad71c | ||
|
|
e83d9680ba | ||
|
|
29153417c4 | ||
|
|
57892e3d76 | ||
|
|
13d4aee912 | ||
|
|
ba5db80afa | ||
|
|
f8432dff5d | ||
|
|
ae3dad5b63 | ||
|
|
938ab16799 | ||
|
|
8252f3331f | ||
|
|
5b745cc8a2 | ||
|
|
19c3ef64bd | ||
|
|
3b346aac42 | ||
|
|
72a2469178 | ||
|
|
d7d085ef1e | ||
|
|
1e61870c2b | ||
|
|
4d2860a692 | ||
|
|
51ca77726d | ||
|
|
78e611f282 | ||
|
|
d8381eb790 | ||
|
|
b212aef5db | ||
|
|
6bf66f35be | ||
|
|
52ab114f6c | ||
|
|
dcbfecf42c | ||
|
|
9ca6f59546 | ||
|
|
aa7537a11e | ||
|
|
972bb2831c | ||
|
|
f9ddd32545 | ||
|
|
1afde4fea5 | ||
|
|
f8f190cd40 | ||
|
|
3a408e8287 | ||
|
|
d3e5b10e23 | ||
|
|
1aaf24ccbf | ||
|
|
90bc230820 | ||
|
|
342c4ff85d | ||
|
|
c85392ce40 | ||
|
|
8875d24a48 | ||
|
|
c470b67fa2 | ||
|
|
c0e081cd98 | ||
|
|
b60840ebff | ||
|
|
fdc1763838 | ||
|
|
75819bc940 | ||
|
|
7b8cc25625 | ||
|
|
2be641f373 | ||
|
|
ddcb661c19 | ||
|
|
d89d2efb7e | ||
|
|
f284a9c0dd | ||
|
|
134e7fc433 | ||
|
|
0cba919228 | ||
|
|
aa63091752 | ||
|
|
58735d6d8f | ||
|
|
1b514517f5 | ||
|
|
11f814821d | ||
|
|
30fb1153cc | ||
|
|
3b2c8b9f25 | ||
|
|
2a7f9adf78 | ||
|
|
608ceea440 | ||
|
|
79001b9c97 | ||
|
|
59b12fca87 | ||
|
|
48f5bb1693 | ||
|
|
93188b3c88 | ||
|
|
bc4efca611 | ||
|
|
feaf25a95d | ||
|
|
414b3fae89 | ||
|
|
899baa0ea5 | ||
|
|
374095d42c | ||
|
|
dd007dceca | ||
|
|
3ae587205c | ||
|
|
1bf2694604 | ||
|
|
ed9cc1af55 | ||
|
|
b41a6cbd7a | ||
|
|
c8af572697 | ||
|
|
249053e514 | ||
|
|
ff2cf2a5ae | ||
|
|
b448aca49c | ||
|
|
55bad07c16 | ||
|
|
380469665f | ||
|
|
3421125a55 | ||
|
|
0b2200e6e7 | ||
|
|
0fd5ab9fcc | ||
|
|
14293f6c8f | ||
|
|
d3a94e8b25 | ||
|
|
1944077a7f | ||
|
|
8195d366fa | ||
|
|
cfd1b2cc97 | ||
|
|
19b044b4e6 | ||
|
|
e0730b55b3 | ||
|
|
729fa3770d | ||
|
|
9cbc85b2f9 | ||
|
|
a3cf104736 | ||
|
|
a109802d45 | ||
|
|
2d8060df80 | ||
|
|
47b66e49b8 | ||
|
|
8f2e971879 | ||
|
|
654a3a9e19 | ||
|
|
d1fdbb63da | ||
|
|
fb9d9239b2 | ||
|
|
a7a0891210 | ||
|
|
84d9c731f8 | ||
|
|
11f4724957 | ||
|
|
85182497ab | ||
|
|
3e4a356638 | ||
|
|
dfd9c384aa | ||
|
|
f0b4046c43 | ||
|
|
bd9aba4d77 | ||
|
|
8edad8291b | ||
|
|
5acf953298 | ||
|
|
d9cebff61c | ||
|
|
30f7bd03f6 | ||
|
|
df0d9bb878 | ||
|
|
5230ddb3ea | ||
|
|
d6a7c28e4d | ||
|
|
e55efc419e | ||
|
|
644e136aee | ||
|
|
ec0ecb5515 | ||
|
|
38b7b31beb | ||
|
|
7a01f20df7 | ||
|
|
c20c38a7fa | ||
|
|
5ab46324c4 | ||
|
|
325f17488a | ||
|
|
e7ff987c46 | ||
|
|
244003e36f | ||
|
|
1f813a6f3b | ||
|
|
96183e804a | ||
|
|
5cfb066b0a | ||
|
|
a5f44a5ceb | ||
|
|
7ab48ed8c7 | ||
|
|
e7bb8c940f | ||
|
|
8cb85294ef | ||
|
|
d0e9d65025 | ||
|
|
540a396e49 | ||
|
|
a81165f0d8 | ||
|
|
d6585eb10b | ||
|
|
f7d90ad19f | ||
|
|
bc25f378e8 | ||
|
|
31630c85d0 | ||
|
|
ab09dc0167 | ||
|
|
618c54915d | ||
|
|
130d2061bd | ||
|
|
66ddee4390 | ||
|
|
90a6c01495 | ||
|
|
e58426109a | ||
|
|
f513cf930a | ||
|
|
8a13ed7e3f | ||
|
|
1b8e4d0301 | ||
|
|
996619b22a | ||
|
|
2c9822a337 | ||
|
|
7276deee0a | ||
|
|
6a068fe36a | ||
|
|
f7e7f438f8 | ||
|
|
8d826e478f | ||
|
|
ba8dcc2d78 | ||
|
|
4d308d5237 | ||
|
|
7ca91ebb71 | ||
|
|
1ba8a40d61 | ||
|
|
47f6a3ad3d | ||
|
|
b4c01581cd | ||
|
|
ae17c62e24 | ||
|
|
a1148c09c2 | ||
|
|
9c5f64769a | ||
|
|
ebe23b04c9 | ||
|
|
13b7c826c1 | ||
|
|
67fd3b08ef | ||
|
|
5440f43fd3 | ||
|
|
d9460a76f4 | ||
|
|
d1ddaa223d | ||
|
|
f7ecea142e | ||
|
|
337e75b0e4 | ||
|
|
b5691802a3 | ||
|
|
1690aec7f1 | ||
|
|
f267bed352 | ||
|
|
6e50f23896 | ||
|
|
597d57bf1d | ||
|
|
4c8a0179ba | ||
|
|
c69cbec64a | ||
|
|
01ac8344ad | ||
|
|
3508ba2f20 | ||
|
|
ce328c329d | ||
|
|
959e4607bb | ||
|
|
4b4ffb8ec9 | ||
|
|
3951fe22ab | ||
|
|
4d5bc9df4c | ||
|
|
ec2f8e8040 | ||
|
|
406b8bd248 | ||
|
|
62b9c6fbee | ||
|
|
b439d36807 | ||
|
|
faceb661e3 | ||
|
|
4129d657e2 | ||
|
|
1e6fe71a67 | ||
|
|
0fba08cd72 | ||
|
|
189d4c3b70 | ||
|
|
2fff6f7f23 | ||
|
|
fddfb37f1f | ||
|
|
52b4090286 | ||
|
|
3cabfb448b | ||
|
|
77cf5b3787 | ||
|
|
3acc5bbb15 | ||
|
|
114436926f | ||
|
|
0f7904fb38 | ||
|
|
3f13608002 | ||
|
|
590b1d8fb7 | ||
|
|
4708d9b016 | ||
|
|
0d2e7bcc13 | ||
|
|
55fbfb6124 | ||
|
|
be9741eb8a | ||
|
|
58fe260c72 | ||
|
|
24e5f6f7a9 | ||
|
|
0177d66149 | ||
|
|
9b87c36200 | ||
|
|
1861c69964 | ||
|
|
cb2b5eb38e | ||
|
|
53aa0a1b54 | ||
|
|
12b26cd54e | ||
|
|
061b1e6d7c | ||
|
|
0d6e8b5c31 | ||
|
|
d48cdc67a0 | ||
|
|
35c16ad047 | ||
|
|
2997d1f186 | ||
|
|
2a5997fb20 | ||
|
|
ee8a9e0bad | ||
|
|
3b0737a092 | ||
|
|
fdd02105ac | ||
|
|
aa9592455c | ||
|
|
01e24dd630 | ||
|
|
ae6bb1ce17 | ||
|
|
5fd28620cd | ||
|
|
728710d63a | ||
|
|
fa81381865 | ||
|
|
b96a682f16 | ||
|
|
d0f048c068 | ||
|
|
223e82a10d | ||
|
|
9507ff5e31 | ||
|
|
c2b025946a | ||
|
|
3a818c5e87 | ||
|
|
7871d12025 | ||
|
|
d74134ce3a | ||
|
|
5ac129bfa1 | ||
|
|
3fb67f94f7 | ||
|
|
cf5145b542 | ||
|
|
31bb61ba99 | ||
|
|
abb4522f76 | ||
|
|
ef084ef042 | ||
|
|
3524bd1257 | ||
|
|
d4f6216966 | ||
|
|
77acafe534 | ||
|
|
53afda3237 | ||
|
|
abb19d368d | ||
|
|
b4a52a622e | ||
|
|
8d7d8cdc2f | ||
|
|
626a93b348 | ||
|
|
af65fe201a | ||
|
|
9b83b1deb0 | ||
|
|
e9eb271499 | ||
|
|
3281a88d08 | ||
|
|
5a644054ab | ||
|
|
16fefd364e | ||
|
|
e7994cdeb3 | ||
|
|
00bad8c716 | ||
|
|
862714a18b | ||
|
|
d18ebe4f3a | ||
|
|
7169d85115 | ||
|
|
f5f5f03ec0 | ||
|
|
9b2653427d | ||
|
|
56b7209f26 | ||
|
|
9b1f439a91 | ||
|
|
01c7d2de8f | ||
|
|
a86aeba411 | ||
|
|
384fdc2df4 | ||
|
|
83e5b4ed0d | ||
|
|
272cd7ebbd | ||
|
|
c63c7377e6 | ||
|
|
9259cdb12e | ||
|
|
5b50e49522 | ||
|
|
65474c8de5 | ||
|
|
fbb1ba3de0 | ||
|
|
a59ca28e2c | ||
|
|
825f742000 | ||
|
|
dd491320e5 | ||
|
|
c6ff97a220 | ||
|
|
49240c367a | ||
|
|
1e6e624078 | ||
|
|
8b4e07e1a3 | ||
|
|
2853009987 | ||
|
|
aa59c3bc2c | ||
|
|
7b1d8f4c6d | ||
|
|
a49ddec9df | ||
|
|
05fe856e6e | ||
|
|
c0cdaf9f53 | ||
|
|
e9cf58d584 | ||
|
|
31628c5cd4 | ||
|
|
3004e281d7 | ||
|
|
14e8d0aaa2 | ||
|
|
1c58cf8426 | ||
|
|
5155fd2bf1 | ||
|
|
9ec9c204d3 | ||
|
|
78b9304d52 | ||
|
|
0465ba4a05 | ||
|
|
2099991dd1 | ||
|
|
c232cdabf5 | ||
|
|
4e266211bf | ||
|
|
57fa689131 | ||
|
|
10626dddfc | ||
|
|
9051065c22 | ||
|
|
e8c76cf7bf | ||
|
|
3f1729a17f | ||
|
|
cab2b6bcda | ||
|
|
c4979a2fda | ||
|
|
23931f8a4f | ||
|
|
aa414565bb | ||
|
|
1db152046e | ||
|
|
c27ea2677f | ||
|
|
caa1e1b923 | ||
|
|
71f18e4379 | ||
|
|
600e3dd1c5 | ||
|
|
362eb0de86 | ||
|
|
998d46ac10 | ||
|
|
6c85c0d95e | ||
|
|
0e1fbbf7c6 | ||
|
|
6806640ef0 | ||
|
|
173e37584c | ||
|
|
6ba4d5e987 | ||
|
|
dd12d44134 | ||
|
|
a61495d660 | ||
|
|
c8e251bf24 | ||
|
|
a938fbde4a | ||
|
|
dcf3f1d18a | ||
|
|
66d0c63694 | ||
|
|
132191360b | ||
|
|
345c99d5bd | ||
|
|
89d696c1e3 | ||
|
|
c645853529 | ||
|
|
a70ab8b072 | ||
|
|
48aae76b15 | ||
|
|
23bf572dea | ||
|
|
864f6410ed | ||
|
|
c9bf6bb2fa | ||
|
|
46249ea901 | ||
|
|
ce0d1e0e13 | ||
|
|
5065d8b0c1 | ||
|
|
a83007c013 | ||
|
|
79e0a6dd4e | ||
|
|
2d88089129 | ||
|
|
1d937f831b | ||
|
|
6c659dc12f | ||
|
|
a8531053a0 | ||
|
|
cf34d1c95f | ||
|
|
1a9c58a7ab | ||
|
|
64571c8288 | ||
|
|
72123c458b | ||
|
|
d5881519cb | ||
|
|
ea016d97af | ||
|
|
70c906d4b4 | ||
|
|
fa2ea4a379 | ||
|
|
030263caa3 | ||
|
|
c25779afba | ||
|
|
0f33a65468 | ||
|
|
7c9a8b1e1b | ||
|
|
f45daf8031 | ||
|
|
df48ac8803 | ||
|
|
ff86073288 | ||
|
|
0ad53784e7 | ||
|
|
7935bef4cd | ||
|
|
e064c52544 | ||
|
|
e106b16148 | ||
|
|
eddefb0e0f | ||
|
|
c5f22be6e1 | ||
|
|
b1d61f5a02 | ||
|
|
febc8d1b52 | ||
|
|
7dc04747fd | ||
|
|
7c0cd7172d | ||
|
|
43ff236df8 | ||
|
|
19ab4d1a15 | ||
|
|
9287858997 | ||
|
|
df3986cd83 | ||
|
|
34ed6518ae | ||
|
|
22219fd88f | ||
|
|
a9e17ab8c6 | ||
|
|
2dd948a4a1 | ||
|
|
76cf1bff87 | ||
|
|
c0d8eb295d | ||
|
|
bcd3f6054a | ||
|
|
3a0314f9de | ||
|
|
fa4d8b8348 |
@@ -2,3 +2,4 @@ target
|
||||
Dockerfile
|
||||
.dockerignore
|
||||
.gitignore
|
||||
**/.git
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -23,7 +23,8 @@ A clear and concise description of what you expected to happen.
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**Meilisearch version:** [e.g. v0.20.0]
|
||||
**Meilisearch version:**
|
||||
[e.g. v0.20.0]
|
||||
|
||||
**Additional context**
|
||||
Additional information that may be relevant to the issue.
|
||||
|
||||
34
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
Normal file
34
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
---
|
||||
name: New sprint issue
|
||||
about: ⚠️ Should only be used by the engine team ⚠️
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
|
||||
Related product discussion:
|
||||
Related spec: WIP
|
||||
|
||||
## Motivation
|
||||
|
||||
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
|
||||
## Usage
|
||||
|
||||
<!---Write a quick description of the usage if the usage has already been defined-->
|
||||
|
||||
Refer to the final spec to know the details and the final decisions about the usage.
|
||||
|
||||
## TODO
|
||||
|
||||
<!---Feel free to adapt this list with more technical/product steps-->
|
||||
|
||||
- [ ] Release a prototype
|
||||
- [ ] If prototype validated, merge changes into `main`
|
||||
- [ ] Update the spec
|
||||
|
||||
## Impacted teams
|
||||
|
||||
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
|
||||
19
.github/uffizzi/Dockerfile
vendored
19
.github/uffizzi/Dockerfile
vendored
@@ -1,19 +0,0 @@
|
||||
# Run
|
||||
FROM uffizzi/ttyd:alpine
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
ENV MEILI_SERVER_PROVIDER docker
|
||||
ENV MEILI_NO_ANALYTICS true
|
||||
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
|
||||
COPY target/x86_64-unknown-linux-musl/release/meilisearch /bin/meilisearch
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
||||
WORKDIR /meili_data
|
||||
|
||||
EXPOSE 7700/tcp
|
||||
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
CMD ["ttyd", "/bin/zsh"]
|
||||
26
.github/uffizzi/docker-compose.uffizzi.yml
vendored
26
.github/uffizzi/docker-compose.uffizzi.yml
vendored
@@ -1,26 +0,0 @@
|
||||
version: "3"
|
||||
|
||||
x-uffizzi:
|
||||
ingress:
|
||||
service: nginx
|
||||
port: 8081
|
||||
|
||||
services:
|
||||
meilisearch:
|
||||
image: "${MEILISEARCH_IMAGE}"
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "7681:7681"
|
||||
- "7700:7700"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 500M
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8081:8081"
|
||||
volumes:
|
||||
- ./.github/uffizzi/nginx:/etc/nginx
|
||||
28
.github/uffizzi/nginx/nginx.conf
vendored
28
.github/uffizzi/nginx/nginx.conf
vendored
@@ -1,28 +0,0 @@
|
||||
|
||||
events {
|
||||
worker_connections 4096; ## Default: 1024
|
||||
}
|
||||
|
||||
http {
|
||||
map $http_upgrade $connection_upgrade {
|
||||
default upgrade;
|
||||
'' close;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 8081;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:7681;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection $connection_upgrade;
|
||||
}
|
||||
|
||||
location /meilisearch/ {
|
||||
# rewrite /meilisearch/(.*) /$1 break;
|
||||
proxy_pass http://localhost:7700/;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks
|
||||
name: Benchmarks (manual)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks indexing (push)
|
||||
name: Benchmarks of indexing (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search geo (push)
|
||||
name: Benchmarks of search for geo (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search songs (push)
|
||||
name: Benchmarks of search for songs (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search wikipedia articles (push)
|
||||
name: Benchmarks of search for Wikipedia articles (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
28
.github/workflows/create-issue-dependencies.yml
vendored
28
.github/workflows/create-issue-dependencies.yml
vendored
@@ -1,28 +0,0 @@
|
||||
name: Create issue to upgrade dependencies
|
||||
on:
|
||||
schedule:
|
||||
# Run the first of the month, every 3 month
|
||||
- cron: '0 0 1 */3 *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
create-issue:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Create an issue
|
||||
uses: actions-ecosystem/action-create-issue@v1
|
||||
with:
|
||||
github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
title: Upgrade dependencies
|
||||
body: |
|
||||
This issue is about updating Meilisearch dependencies:
|
||||
- [ ] Cargo toml dependencies of Meilisearch; but also the main engine-team repositories that Meilisearch depends on (charabia, heed...)
|
||||
- [ ] If new Rust versions have been released, update the Rust version in the Clippy job of this [GitHub Action file](./.github/workflows/rust.yml)
|
||||
|
||||
⚠️ To avoid last minute bugs, this issue should only be done at the beginning of the sprint!
|
||||
|
||||
The GitHub action dependencies are managed by [Dependabot](./.github/dependabot.yml)
|
||||
labels: |
|
||||
dependencies
|
||||
maintenance
|
||||
24
.github/workflows/dependency-issue.yml
vendored
Normal file
24
.github/workflows/dependency-issue.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
name: Create issue to upgrade dependencies
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run the first of the month, every 3 month
|
||||
- cron: '0 0 1 */3 *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
create-issue:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ISSUE_TEMPLATE: issue-template.md
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Create issue
|
||||
run: |
|
||||
gh issue create \
|
||||
--title 'Upgrade dependencies' \
|
||||
--label 'dependencies,maintenance' \
|
||||
--body-file $ISSUE_TEMPLATE
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Publish to APT repository & Homebrew
|
||||
name: Publish to APT & Homebrew
|
||||
|
||||
on:
|
||||
release:
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
12
.github/workflows/publish-binaries.yml
vendored
12
.github/workflows/publish-binaries.yml
vendored
@@ -1,3 +1,5 @@
|
||||
name: Publish binaries to GitHub release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
@@ -5,8 +7,6 @@ on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
name: Publish binaries to release
|
||||
|
||||
jobs:
|
||||
check-version:
|
||||
name: Check the version validity
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@@ -183,7 +183,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
||||
12
.github/workflows/publish-docker-images.yml
vendored
12
.github/workflows/publish-docker-images.yml
vendored
@@ -1,4 +1,5 @@
|
||||
---
|
||||
name: Publish images to Docker Hub
|
||||
|
||||
on:
|
||||
push:
|
||||
# Will run for every tag pushed except `latest`
|
||||
@@ -12,8 +13,6 @@ on:
|
||||
- cron: '0 23 * * *' # Every day at 11:00pm
|
||||
workflow_dispatch:
|
||||
|
||||
name: Publish tagged images to Docker Hub
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: docker
|
||||
@@ -59,9 +58,13 @@ jobs:
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
with:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
with:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
@@ -89,10 +92,13 @@ jobs:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ github.sha }}
|
||||
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
|
||||
GIT_TAG=${{ github.ref_name }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
# /!\ Don't touch this without checking with Cloud team
|
||||
- name: Send CI information to Cloud team
|
||||
|
||||
145
.github/workflows/rust.yml
vendored
145
.github/workflows/rust.yml
vendored
@@ -1,145 +0,0 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Everyday at 5:00am
|
||||
- cron: '0 5 * * *'
|
||||
pull_request:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
branches:
|
||||
- trying
|
||||
- staging
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUST_BACKTRACE: 1
|
||||
RUSTFLAGS: "-D warnings"
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-18.04
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
# Disable cache due to disk space issues with Windows workers in CI
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
test-others:
|
||||
name: Tests on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-12, windows-2022]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --all
|
||||
|
||||
clippy:
|
||||
name: Run Clippy
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.67.0
|
||||
override: true
|
||||
components: clippy
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
# allow unlined_format_args https://github.com/rust-lang/rust-clippy/issues/10087
|
||||
args: --all-targets -- --deny warnings --allow clippy::uninlined_format_args
|
||||
|
||||
fmt:
|
||||
name: Run Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly
|
||||
override: true
|
||||
components: rustfmt
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
# we are going to create an empty file where rustfmt expects it.
|
||||
run: |
|
||||
echo -ne "\n" > benchmarks/benches/datasets_paths.rs
|
||||
cargo fmt --all -- --check
|
||||
200
.github/workflows/sdks-tests.yml
vendored
Normal file
200
.github/workflows/sdks-tests.yml
vendored
Normal file
@@ -0,0 +1,200 @@
|
||||
# If any test fails, the engine team should ensure the "breaking" changes are expected and contact the integration team
|
||||
name: SDKs tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 6 * * MON" # Every Monday at 6:00AM
|
||||
|
||||
env:
|
||||
MEILI_MASTER_KEY: 'masterKey'
|
||||
MEILI_NO_ANALYTICS: 'true'
|
||||
|
||||
jobs:
|
||||
|
||||
meilisearch-js-tests:
|
||||
name: JS SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
run: yarn --dev
|
||||
- name: Run tests
|
||||
run: yarn test
|
||||
- name: Build project
|
||||
run: yarn build
|
||||
- name: Run ESM env
|
||||
run: yarn test:env:esm
|
||||
- name: Run Node.js env
|
||||
run: yarn test:env:nodejs
|
||||
- name: Run node typescript env
|
||||
run: yarn test:env:node-ts
|
||||
- name: Run Browser env
|
||||
run: yarn test:env:browser
|
||||
|
||||
instant-meilisearch-tests:
|
||||
name: instant-meilisearch tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/instant-meilisearch
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
run: yarn install
|
||||
- name: Run tests
|
||||
run: yarn test
|
||||
- name: Build all the playgrounds and the packages
|
||||
run: yarn build
|
||||
|
||||
meilisearch-php-tests:
|
||||
name: PHP SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-php
|
||||
- name: Install PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
coverage: none
|
||||
- name: Validate composer.json and composer.lock
|
||||
run: composer validate
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
composer remove --dev friendsofphp/php-cs-fixer --no-update --no-interaction
|
||||
composer update --prefer-dist --no-progress
|
||||
- name: Run test suite - default HTTP client (Guzzle 7)
|
||||
run: |
|
||||
sh scripts/tests.sh
|
||||
composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle
|
||||
|
||||
meilisearch-python-tests:
|
||||
name: Python SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-python
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
- name: Install pipenv
|
||||
uses: dschep/install-pipenv-action@v1
|
||||
- name: Install dependencies
|
||||
run: pipenv install --dev --python=${{ matrix.python-version }}
|
||||
- name: Test with pytest
|
||||
run: pipenv run pytest
|
||||
|
||||
meilisearch-go-tests:
|
||||
name: Go SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-go
|
||||
- name: Get dependencies
|
||||
run: |
|
||||
go get -v -t -d ./...
|
||||
if [ -f Gopkg.toml ]; then
|
||||
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
|
||||
dep ensure
|
||||
fi
|
||||
- name: Run integration tests
|
||||
run: go test -v ./...
|
||||
|
||||
meilisearch-ruby-tests:
|
||||
name: Ruby SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-ruby
|
||||
- name: Set up Ruby 3
|
||||
uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
ruby-version: 3
|
||||
- name: Install ruby dependencies
|
||||
run: bundle install --with test
|
||||
- name: Run test suite
|
||||
run: bundle exec rspec
|
||||
|
||||
meilisearch-rust-tests:
|
||||
name: Rust SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-rust
|
||||
- name: Build
|
||||
run: cargo build --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --verbose
|
||||
171
.github/workflows/test-suite.yml
vendored
Normal file
171
.github/workflows/test-suite.yml
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
name: Test suite
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Everyday at 5:00am
|
||||
- cron: '0 5 * * *'
|
||||
pull_request:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
branches:
|
||||
- trying
|
||||
- staging
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUST_BACKTRACE: 1
|
||||
RUSTFLAGS: "-D warnings"
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-18.04
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
test-others:
|
||||
name: Tests on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-12, windows-2022]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
test-all-features:
|
||||
name: Tests all features on cron schedule only
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
if: github.event_name == 'schedule'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo build with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --workspace --locked --release --all-features
|
||||
- name: Run cargo test with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --workspace --locked --release --all-features
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --all
|
||||
|
||||
clippy:
|
||||
name: Run Clippy
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.69.0
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
args: --all-targets -- --deny warnings
|
||||
|
||||
fmt:
|
||||
name: Run Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
# we are going to create an empty file where rustfmt expects it.
|
||||
run: |
|
||||
echo -ne "\n" > benchmarks/benches/datasets_paths.rs
|
||||
cargo fmt --all -- --check
|
||||
120
.github/workflows/uffizzi-build.yml
vendored
120
.github/workflows/uffizzi-build.yml
vendored
@@ -1,120 +0,0 @@
|
||||
name: Uffizzi - Build PR Image
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened,synchronize,reopened,closed]
|
||||
|
||||
jobs:
|
||||
build-meilisearch:
|
||||
name: Build and push `meilisearch`
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
if: ${{ github.event.action != 'closed' }}
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- run: sudo apt-get install musl-tools
|
||||
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
target: x86_64-unknown-linux-musl
|
||||
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.0
|
||||
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --target x86_64-unknown-linux-musl --release
|
||||
|
||||
- name: Remove dockerignore so we can use the target folder in our docker build
|
||||
run: rm -f .dockerignore
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Generate UUID image name
|
||||
id: uuid
|
||||
run: echo "UUID_TAG=$(uuidgen)" >> $GITHUB_ENV
|
||||
|
||||
- name: Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3
|
||||
with:
|
||||
images: registry.uffizzi.com/${{ env.UUID_TAG }}
|
||||
tags: |
|
||||
type=raw,value=60d
|
||||
|
||||
- name: Build Image
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: ./
|
||||
file: .github/uffizzi/Dockerfile
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
push: true
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
render-compose-file:
|
||||
name: Render Docker Compose File
|
||||
# Pass output of this workflow to another triggered by `workflow_run` event.
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build-meilisearch
|
||||
outputs:
|
||||
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
steps:
|
||||
- name: Checkout git repo
|
||||
uses: actions/checkout@v3
|
||||
- name: Render Compose File
|
||||
run: |
|
||||
MEILISEARCH_IMAGE=$(echo ${{ needs.build-meilisearch.outputs.tags }})
|
||||
export MEILISEARCH_IMAGE
|
||||
# Render simple template from environment variables.
|
||||
envsubst < .github/uffizzi/docker-compose.uffizzi.yml > docker-compose.rendered.yml
|
||||
cat docker-compose.rendered.yml
|
||||
- name: Upload Rendered Compose File as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: docker-compose.rendered.yml
|
||||
retention-days: 2
|
||||
- name: Serialize PR Event to File
|
||||
run: |
|
||||
cat << EOF > event.json
|
||||
${{ toJSON(github.event) }}
|
||||
|
||||
EOF
|
||||
- name: Upload PR Event as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: event.json
|
||||
retention-days: 2
|
||||
|
||||
delete-preview:
|
||||
name: Call for Preview Deletion
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.action == 'closed' }}
|
||||
steps:
|
||||
# If this PR is closing, we will not render a compose file nor pass it to the next workflow.
|
||||
- name: Serialize PR Event to File
|
||||
run: |
|
||||
cat << EOF > event.json
|
||||
${{ toJSON(github.event) }}
|
||||
|
||||
EOF
|
||||
- name: Upload PR Event as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: event.json
|
||||
retention-days: 2
|
||||
103
.github/workflows/uffizzi-preview-deploy.yml
vendored
103
.github/workflows/uffizzi-preview-deploy.yml
vendored
@@ -1,103 +0,0 @@
|
||||
name: Uffizzi - Deploy Preview
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows:
|
||||
- "Uffizzi - Build PR Image"
|
||||
types:
|
||||
- completed
|
||||
|
||||
jobs:
|
||||
cache-compose-file:
|
||||
name: Cache Compose File
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
||||
outputs:
|
||||
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
pr-number: ${{ env.PR_NUMBER }}
|
||||
expected-url: ${{ env.EXPECTED_URL }}
|
||||
steps:
|
||||
- name: 'Download artifacts'
|
||||
# Fetch output (zip archive) from the workflow run that triggered this workflow.
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
run_id: context.payload.workflow_run.id,
|
||||
});
|
||||
let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
|
||||
return artifact.name == "preview-spec"
|
||||
})[0];
|
||||
let download = await github.rest.actions.downloadArtifact({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
artifact_id: matchArtifact.id,
|
||||
archive_format: 'zip',
|
||||
});
|
||||
let fs = require('fs');
|
||||
fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/preview-spec.zip`, Buffer.from(download.data));
|
||||
|
||||
- name: 'Unzip artifact'
|
||||
run: unzip preview-spec.zip
|
||||
|
||||
- name: Read Event into ENV
|
||||
run: |
|
||||
echo 'EVENT_JSON<<EOF' >> $GITHUB_ENV
|
||||
cat event.json >> $GITHUB_ENV
|
||||
echo 'EOF' >> $GITHUB_ENV
|
||||
|
||||
- name: Hash Rendered Compose File
|
||||
id: hash
|
||||
# If the previous workflow was triggered by a PR close event, we will not have a compose file artifact.
|
||||
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
|
||||
run: echo "COMPOSE_FILE_HASH=$(md5sum docker-compose.rendered.yml | awk '{ print $1 }')" >> $GITHUB_ENV
|
||||
|
||||
- name: Cache Rendered Compose File
|
||||
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: docker-compose.rendered.yml
|
||||
key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
|
||||
- name: Read PR Number From Event Object
|
||||
id: pr
|
||||
run: echo "PR_NUMBER=${{ fromJSON(env.EVENT_JSON).number }}" >> $GITHUB_ENV
|
||||
|
||||
- name: DEBUG - Print Job Outputs
|
||||
if: ${{ runner.debug }}
|
||||
run: |
|
||||
echo "PR number: ${{ env.PR_NUMBER }}"
|
||||
echo "Compose file hash: ${{ env.COMPOSE_FILE_HASH }}"
|
||||
cat event.json
|
||||
|
||||
- name: Add expected URL env var
|
||||
if: ${{ runner.debug }}
|
||||
run: |
|
||||
REPO=$(echo ${{ github.repository }} | sed 's/\./+/g')
|
||||
echo "EXPECTED_URL=${{ inputs.server }}/github.com/$REPO/pull/${{ env.PR_NUMBER }}" >> $GITHUB_ENV
|
||||
|
||||
deploy-uffizzi-preview:
|
||||
name: Use Remote Workflow to Preview on Uffizzi
|
||||
needs:
|
||||
- cache-compose-file
|
||||
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@v2
|
||||
with:
|
||||
# If this workflow was triggered by a PR close event, cache-key will be an empty string
|
||||
# and this reusable workflow will delete the preview deployment.
|
||||
compose-file-cache-key: ${{ needs.cache-compose-file.outputs.compose-file-cache-key }}
|
||||
compose-file-cache-path: docker-compose.rendered.yml
|
||||
server: https://app.uffizzi.com
|
||||
pr-number: ${{ needs.cache-compose-file.outputs.pr-number }}
|
||||
description: |
|
||||
The meilisearch preview environment contains a web terminal from where you can run the
|
||||
`meilisearch` command. You should be able to access this instance of meilisearch running in
|
||||
the preview from the link Meilisearch Endpoint link given below.
|
||||
|
||||
Web Terminal Endpoint : <uffizzi-url>
|
||||
Meilisearch Endpoint : <uffizzi-url>/meilisearch
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
id-token: write
|
||||
@@ -18,7 +18,7 @@ If Meilisearch does not offer optimized support for your language, please consid
|
||||
|
||||
## Assumptions
|
||||
|
||||
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
|
||||
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests (PR)](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) workflow.**
|
||||
2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
|
||||
3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
|
||||
Please use this for help.**
|
||||
@@ -120,29 +120,9 @@ The full Meilisearch release process is described in [this guide](https://github
|
||||
|
||||
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
|
||||
|
||||
The prototype name must follow this convention: `prototype-X-Y` where
|
||||
- `X` is the feature name formatted in `kebab-case`. It should not end with a single number.
|
||||
- `Y` is the version of the prototype, starting from `0`.
|
||||
|
||||
✅ Example: `prototype-auto-resize-0`. </br>
|
||||
❌ Bad example: `auto-resize-0`: lacks the `prototype` prefix. </br>
|
||||
❌ Bad example: `prototype-auto-resize`: lacks the version suffix. </br>
|
||||
❌ Bad example: `prototype-auto-resize-0-0`: feature name ends with a single number.
|
||||
|
||||
Steps to create a prototype:
|
||||
|
||||
1. In your terminal, go to the last commit of your branch (the one you want to provide as a prototype).
|
||||
2. Create a tag following the convention: `git tag prototype-X-Y`
|
||||
3. Run Meilisearch and check that its launch summary features a line: `Prototype: prototype-X-Y` (you may need to switch branches and back after tagging for this to work).
|
||||
3. Push the tag: `git push origin prototype-X-Y`
|
||||
4. Check the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
|
||||
|
||||
🐳 Once the CI has finished to run (~1h30), a Docker image named `prototype-X-Y` will be available on [DockerHub](https://hub.docker.com/repository/docker/getmeili/meilisearch/general). People can use it with the following command: `docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-X-Y`. <br>
|
||||
More information about [how to run Meilisearch with Docker](https://docs.meilisearch.com/learn/cookbooks/docker.html#download-meilisearch-with-docker).
|
||||
|
||||
⚙️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-X-Y` tag in the Meilisearch repository and compile from the source code.
|
||||
|
||||
⚠️ When sharing a prototype with users, remind them to not use it in production. Prototypes are solely for test purposes.
|
||||
This happens in two steps:
|
||||
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
|
||||
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
|
||||
|
||||
### Release assets
|
||||
|
||||
|
||||
1116
Cargo.lock
generated
1116
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,3 +1,4 @@
|
||||
# syntax=docker/dockerfile:1.4
|
||||
# Compile
|
||||
FROM rust:alpine3.16 AS compiler
|
||||
|
||||
@@ -11,7 +12,7 @@ ARG GIT_TAG
|
||||
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
COPY . .
|
||||
COPY --link . .
|
||||
RUN set -eux; \
|
||||
apkArch="$(apk --print-arch)"; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
@@ -30,7 +31,7 @@ RUN apk update --quiet \
|
||||
|
||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||
# to find.
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
COPY --from=compiler --link /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
||||
45
README.md
45
README.md
@@ -7,15 +7,15 @@
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://docs.meilisearch.com">Documentation</a> |
|
||||
<a href="https://docs.meilisearch.com/faq/">FAQ</a> |
|
||||
<a href="https://meilisearch.com/docs">Documentation</a> |
|
||||
<a href="https://meilisearch.com/docs/faq">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com">Discord</a>
|
||||
</h4>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://deps.rs/repo/github/meilisearch/meilisearch"><img src="https://deps.rs/repo/github/meilisearch/meilisearch/status.svg" alt="Dependency status"></a>
|
||||
<a href="https://github.com/meilisearch/meilisearch/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-informational" alt="License"></a>
|
||||
<a href="https://app.bors.tech/repositories/26457"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
<a href="https://ms-bors.herokuapp.com/repositories/52"><img src="https://bors.tech/images/badge_small.svg" alt="Bors enabled"></a>
|
||||
</p>
|
||||
|
||||
<p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>
|
||||
@@ -36,27 +36,27 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering and faceted search](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://docs.meilisearch.com/learn/advanced/sorting.html):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://docs.meilisearch.com/learn/what_is_meilisearch/language.html):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://docs.meilisearch.com/learn/security/master_api_keys.html):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://docs.meilisearch.com/learn/security/tenant_tokens.html):** personalize search results for any number of application tenants
|
||||
- **[Typo tolerance](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://docs.meilisearch.com/reference/api/overview.html):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **[RESTful API](https://meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://docs.meilisearch.com](https://docs.meilisearch.com/).
|
||||
You can consult Meilisearch's documentation at [https://meilisearch.com/docs](https://meilisearch.com/docs/).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://docs.meilisearch.com/learn/getting_started/quick_start.html) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://meilisearch.com/docs/learn/getting_started/quick_start) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://docs.meilisearch.com/learn/getting_started/filtering_and_sorting.html) for an introduction to some of Meilisearch's most popular features.
|
||||
You may also want to check out [Meilisearch 101](https://meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ☁️ Meilisearch cloud
|
||||
|
||||
@@ -66,25 +66,25 @@ Let us manage your infrastructure so you can focus on integrating a great search
|
||||
|
||||
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
|
||||
|
||||
Take a look at the complete [Meilisearch integration list](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html).
|
||||
Take a look at the complete [Meilisearch integration list](https://meilisearch.com/docs/learn/what_is_meilisearch/sdks).
|
||||
|
||||
[](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html)
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
|
||||
|
||||
## ⚙️ Advanced usage
|
||||
|
||||
Experienced users will want to keep our [API Reference](https://docs.meilisearch.com/reference/api) close at hand.
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
|
||||
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html), [sorting](https://docs.meilisearch.com/learn/advanced/sorting.html), [geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html), [API keys](https://docs.meilisearch.com/learn/security/master_api_keys.html), and [tenant tokens](https://docs.meilisearch.com/learn/security/tenant_tokens.html).
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://meilisearch.com/docs/learn/advanced/filtering), [sorting](https://meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://meilisearch.com/docs/learn/security/tenant_tokens).
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://docs.meilisearch.com/learn/core_concepts/documents.html) and [indexes](https://docs.meilisearch.com/learn/core_concepts/indexes.html).
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://meilisearch.com/docs/learn/core_concepts/indexes).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html) of our documentation.
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
@@ -97,7 +97,6 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
|
||||
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
|
||||
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
|
||||
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
|
||||
- For everything else, please check [this page listing some of the other places where you can find us](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html)
|
||||
|
||||
Thank you for your support!
|
||||
|
||||
|
||||
@@ -11,11 +11,11 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
csv = "1.1.6"
|
||||
anyhow = "1.0.70"
|
||||
csv = "1.2.1"
|
||||
milli = { path = "../milli", default-features = false }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.4.0", features = ["html_reports"] }
|
||||
@@ -24,11 +24,11 @@ rand_chacha = "0.3.1"
|
||||
roaring = "0.10.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.65"
|
||||
bytes = "1.2.1"
|
||||
anyhow = "1.0.70"
|
||||
bytes = "1.4.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.24"
|
||||
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.25"
|
||||
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/default"]
|
||||
@@ -48,7 +48,3 @@ harness = false
|
||||
[[bench]]
|
||||
name = "indexing"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "formatting"
|
||||
harness = false
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
struct Conf<'a> {
|
||||
name: &'a str,
|
||||
text: &'a str,
|
||||
matching_words: MatcherBuilder<'a, Vec<u8>>,
|
||||
}
|
||||
|
||||
fn bench_formatting(c: &mut criterion::Criterion) {
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
Conf {
|
||||
name: "'the door d'",
|
||||
text: r#"He used to do the door sounds in "Star Trek" with his mouth, phssst, phssst. The MD-11 passenger and cargo doors also tend to behave like electromagnetic apertures, because the doors do not have continuous electrical contact with the door frames around the door perimeter. But Theodor said that the doors don't work."#,
|
||||
matching_words: MatcherBuilder::new(MatchingWords::new(vec![
|
||||
(vec![Rc::new(MatchingWord::new("t".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("he".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("the".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("door".to_string(), 1, false).unwrap())], vec![1]),
|
||||
(vec![Rc::new(MatchingWord::new("do".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("or".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("thedoor".to_string(), 1, false).unwrap())], vec![0, 1]),
|
||||
(vec![Rc::new(MatchingWord::new("d".to_string(), 0, true).unwrap())], vec![2]),
|
||||
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
|
||||
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
|
||||
]
|
||||
).unwrap(), TokenizerBuilder::default().build()),
|
||||
},
|
||||
];
|
||||
|
||||
let format_options = &[
|
||||
FormatOptions { highlight: false, crop: None },
|
||||
FormatOptions { highlight: true, crop: None },
|
||||
FormatOptions { highlight: false, crop: Some(10) },
|
||||
FormatOptions { highlight: true, crop: Some(10) },
|
||||
FormatOptions { highlight: false, crop: Some(20) },
|
||||
FormatOptions { highlight: true, crop: Some(20) },
|
||||
];
|
||||
|
||||
for option in format_options {
|
||||
let highlight = if option.highlight { "highlight" } else { "no-highlight" };
|
||||
|
||||
let name = match option.crop {
|
||||
Some(size) => format!("{}-crop({})", highlight, size),
|
||||
None => format!("{}-no-crop", highlight),
|
||||
};
|
||||
|
||||
let mut group = c.benchmark_group(&name);
|
||||
for conf in confs {
|
||||
group.bench_function(conf.name, |b| {
|
||||
b.iter(|| {
|
||||
let mut matcher = conf.matching_words.build(conf.text);
|
||||
matcher.format(*option);
|
||||
})
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_formatting);
|
||||
criterion_main!(benches);
|
||||
@@ -103,7 +103,7 @@ not_available_failure_usage() {
|
||||
printf "$RED%s\n$DEFAULT" 'ERROR: Meilisearch binary is not available for your OS distribution or your architecture yet.'
|
||||
echo ''
|
||||
echo 'However, you can easily compile the binary from the source files.'
|
||||
echo 'Follow the steps at the page ("Source" tab): https://docs.meilisearch.com/learn/getting_started/installation.html'
|
||||
echo 'Follow the steps at the page ("Source" tab): https://www.meilisearch.com/docs/learn/getting_started/installation'
|
||||
}
|
||||
|
||||
fetch_release_failure_usage() {
|
||||
|
||||
@@ -11,22 +11,22 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
flate2 = "1.0.22"
|
||||
http = "0.2.8"
|
||||
anyhow = "1.0.70"
|
||||
flate2 = "1.0.25"
|
||||
http = "0.2.9"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.15.0"
|
||||
regex = "1.6.0"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
once_cell = "1.17.1"
|
||||
regex = "1.7.3"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -25,7 +25,6 @@ impl CompatV2ToV3 {
|
||||
CompatV2ToV3::Compat(compat) => compat.index_uuid(),
|
||||
};
|
||||
v2_uuids
|
||||
.into_iter()
|
||||
.into_iter()
|
||||
.map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid })
|
||||
.collect()
|
||||
|
||||
@@ -11,9 +11,9 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
faux = "0.1.8"
|
||||
faux = "0.1.9"
|
||||
|
||||
@@ -12,8 +12,8 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
nom = "7.1.1"
|
||||
nom_locate = "4.0.0"
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.21.0"
|
||||
insta = "1.29.0"
|
||||
|
||||
@@ -20,6 +20,8 @@ pub enum Condition<'a> {
|
||||
GreaterThanOrEqual(Token<'a>),
|
||||
Equal(Token<'a>),
|
||||
NotEqual(Token<'a>),
|
||||
Null,
|
||||
Empty,
|
||||
Exists,
|
||||
LowerThan(Token<'a>),
|
||||
LowerThanOrEqual(Token<'a>),
|
||||
@@ -44,6 +46,38 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
Ok((input, condition))
|
||||
}
|
||||
|
||||
/// null = value "IS" WS+ "NULL"
|
||||
pub fn parse_is_null(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NULL")))(input)?;
|
||||
Ok((input, FilterCondition::Condition { fid: key, op: Null }))
|
||||
}
|
||||
|
||||
/// null = value "IS" WS+ "NOT" WS+ "NULL"
|
||||
pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("NULL")))(input)?;
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
|
||||
}
|
||||
|
||||
/// empty = value "IS" WS+ "EMPTY"
|
||||
pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?;
|
||||
Ok((input, FilterCondition::Condition { fid: key, op: Empty }))
|
||||
}
|
||||
|
||||
/// empty = value "IS" WS+ "NOT" WS+ "EMPTY"
|
||||
pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?;
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty }))))
|
||||
}
|
||||
|
||||
/// exist = value "EXISTS"
|
||||
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
|
||||
|
||||
@@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> {
|
||||
ErrorKind::MissingClosingDelimiter(c) => {
|
||||
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
||||
}
|
||||
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::InvalidPrimary => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
|
||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||
}
|
||||
ErrorKind::ExpectedEof => {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
@@ -159,7 +157,7 @@ impl<'a> Display for Error<'a> {
|
||||
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
|
||||
}
|
||||
ErrorKind::ReservedGeo(name) => {
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
}
|
||||
ErrorKind::MisusedGeoRadius => {
|
||||
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
|
||||
|
||||
@@ -47,7 +47,10 @@ mod value;
|
||||
use std::fmt::Debug;
|
||||
|
||||
pub use condition::{parse_condition, parse_to, Condition};
|
||||
use condition::{parse_exists, parse_not_exists};
|
||||
use condition::{
|
||||
parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
|
||||
parse_not_exists,
|
||||
};
|
||||
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
||||
pub use error::{Error, ErrorKind};
|
||||
use nom::branch::alt;
|
||||
@@ -382,6 +385,34 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
|
||||
}
|
||||
|
||||
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoDistance but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
tag("_geoDistance"),
|
||||
// if we were able to parse `_geoDistance` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
|
||||
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))
|
||||
}
|
||||
|
||||
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geo but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
word_exact("_geo"),
|
||||
// if we were able to parse `_geo` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
|
||||
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))
|
||||
}
|
||||
|
||||
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
|
||||
match parse_condition(input) {
|
||||
Ok(result) => Ok(result),
|
||||
@@ -414,10 +445,16 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
parse_in,
|
||||
parse_not_in,
|
||||
parse_condition,
|
||||
parse_is_null,
|
||||
parse_is_not_null,
|
||||
parse_is_empty,
|
||||
parse_is_not_empty,
|
||||
parse_exists,
|
||||
parse_not_exists,
|
||||
parse_to,
|
||||
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||
parse_geo,
|
||||
parse_geo_distance,
|
||||
parse_geo_point,
|
||||
parse_error_reserved_keyword,
|
||||
))(input)
|
||||
@@ -496,14 +533,30 @@ pub mod tests {
|
||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
|
||||
|
||||
// Test NOT + EXISTS
|
||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
// Test NOT
|
||||
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
|
||||
// Test NULL + NOT NULL
|
||||
insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
|
||||
// Test EMPTY + NOT EMPTY
|
||||
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
|
||||
// Test EXISTS + NOT EXITS
|
||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
|
||||
// Test nested NOT
|
||||
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
||||
@@ -576,7 +629,7 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("'OR'"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
1:5 'OR'
|
||||
"###);
|
||||
|
||||
@@ -586,12 +639,12 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
1:14 channel Ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
19:19 channel = Ponce OR
|
||||
"###);
|
||||
|
||||
@@ -621,15 +674,35 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:22 _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:34 position <= _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
|
||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:25 _geoDistance(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
|
||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:37 position <= _geoDistance(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
|
||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:17 _geo(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
|
||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:29 position <= _geo(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
|
||||
The `_geoRadius` filter is an operation and can't be used as a value.
|
||||
13:35 position <= _geoRadius(12, 13, 14)
|
||||
@@ -656,12 +729,12 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
1:17 colour NOT EXIST
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
1:23 subscribers 100 TO1000
|
||||
"###);
|
||||
|
||||
@@ -722,6 +795,39 @@ pub mod tests {
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
||||
1:11 value NULL
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
||||
1:15 value NOT NULL
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
||||
1:12 value EMPTY
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
||||
1:16 value NOT EMPTY
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
||||
1:9 value IS
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
||||
1:13 value IS NOT
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
||||
1:16 value IS EXISTS
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
||||
1:20 value IS NOT EXISTS
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -803,6 +909,8 @@ impl<'a> std::fmt::Display for Condition<'a> {
|
||||
Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
|
||||
Condition::Equal(token) => write!(f, "= {token}"),
|
||||
Condition::NotEqual(token) => write!(f, "!= {token}"),
|
||||
Condition::Null => write!(f, "IS NULL"),
|
||||
Condition::Empty => write!(f, "IS EMPTY"),
|
||||
Condition::Exists => write!(f, "EXISTS"),
|
||||
Condition::LowerThan(token) => write!(f, "< {token}"),
|
||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||
|
||||
@@ -7,8 +7,8 @@ use nom::{InputIter, InputLength, InputTake, Slice};
|
||||
|
||||
use crate::error::{ExpectedValueKind, NomErrorExt};
|
||||
use crate::{
|
||||
parse_geo_bounding_box, parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span,
|
||||
Token,
|
||||
parse_geo, parse_geo_bounding_box, parse_geo_distance, parse_geo_point, parse_geo_radius,
|
||||
Error, ErrorKind, IResult, Span, Token,
|
||||
};
|
||||
|
||||
/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
|
||||
@@ -88,11 +88,16 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
// then, we want to check if the user is misusing a geo expression
|
||||
// This expression can’t finish without error.
|
||||
// We want to return an error in case of failure.
|
||||
if let Err(err) = parse_geo_point(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
let geo_reserved_parse_functions = [parse_geo_point, parse_geo_distance, parse_geo];
|
||||
|
||||
for parser in geo_reserved_parse_functions {
|
||||
if let Err(err) = parser(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match parse_geo_radius(input) {
|
||||
Ok(_) => {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
|
||||
@@ -178,7 +183,20 @@ fn is_syntax_component(c: char) -> bool {
|
||||
}
|
||||
|
||||
fn is_keyword(s: &str) -> bool {
|
||||
matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox")
|
||||
matches!(
|
||||
s,
|
||||
"AND"
|
||||
| "OR"
|
||||
| "IN"
|
||||
| "NOT"
|
||||
| "TO"
|
||||
| "EXISTS"
|
||||
| "IS"
|
||||
| "NULL"
|
||||
| "EMPTY"
|
||||
| "_geoRadius"
|
||||
| "_geoBoundingBox"
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -4,51 +4,56 @@ use serde_json::{Map, Value};
|
||||
|
||||
pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
|
||||
let mut obj = Map::new();
|
||||
let mut all_keys = vec![];
|
||||
insert_object(&mut obj, None, json, &mut all_keys);
|
||||
for key in all_keys {
|
||||
obj.entry(key).or_insert(Value::Array(vec![]));
|
||||
let mut all_entries = vec![];
|
||||
insert_object(&mut obj, None, json, &mut all_entries);
|
||||
for (key, old_val) in all_entries {
|
||||
obj.entry(key).or_insert(old_val.clone());
|
||||
}
|
||||
obj
|
||||
}
|
||||
|
||||
fn insert_object(
|
||||
fn insert_object<'a>(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: Option<&str>,
|
||||
object: &Map<String, Value>,
|
||||
all_keys: &mut Vec<String>,
|
||||
object: &'a Map<String, Value>,
|
||||
all_entries: &mut Vec<(String, &'a Value)>,
|
||||
) {
|
||||
for (key, value) in object {
|
||||
let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
|
||||
all_keys.push(new_key.clone());
|
||||
all_entries.push((new_key.clone(), value));
|
||||
if let Some(array) = value.as_array() {
|
||||
insert_array(base_json, &new_key, array, all_keys);
|
||||
insert_array(base_json, &new_key, array, all_entries);
|
||||
} else if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(&new_key), object, all_keys);
|
||||
insert_object(base_json, Some(&new_key), object, all_entries);
|
||||
} else {
|
||||
insert_value(base_json, &new_key, value.clone());
|
||||
insert_value(base_json, &new_key, value.clone(), false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_array(
|
||||
fn insert_array<'a>(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: &str,
|
||||
array: &Vec<Value>,
|
||||
all_keys: &mut Vec<String>,
|
||||
array: &'a Vec<Value>,
|
||||
all_entries: &mut Vec<(String, &'a Value)>,
|
||||
) {
|
||||
for value in array {
|
||||
if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(base_key), object, all_keys);
|
||||
insert_object(base_json, Some(base_key), object, all_entries);
|
||||
} else if let Some(sub_array) = value.as_array() {
|
||||
insert_array(base_json, base_key, sub_array, all_keys);
|
||||
insert_array(base_json, base_key, sub_array, all_entries);
|
||||
} else {
|
||||
insert_value(base_json, base_key, value.clone());
|
||||
insert_value(base_json, base_key, value.clone(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) {
|
||||
fn insert_value(
|
||||
base_json: &mut Map<String, Value>,
|
||||
key: &str,
|
||||
to_insert: Value,
|
||||
came_from_array: bool,
|
||||
) {
|
||||
debug_assert!(!to_insert.is_object());
|
||||
debug_assert!(!to_insert.is_array());
|
||||
|
||||
@@ -63,6 +68,8 @@ fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value)
|
||||
base_json[key] = Value::Array(vec![value, to_insert]);
|
||||
}
|
||||
// if it does not exist we can push the value untouched
|
||||
} else if came_from_array {
|
||||
base_json.insert(key.to_string(), Value::Array(vec![to_insert]));
|
||||
} else {
|
||||
base_json.insert(key.to_string(), to_insert);
|
||||
}
|
||||
@@ -113,7 +120,11 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [],
|
||||
"a": {
|
||||
"b": "c",
|
||||
"d": "e",
|
||||
"f": "g"
|
||||
},
|
||||
"a.b": "c",
|
||||
"a.d": "e",
|
||||
"a.f": "g"
|
||||
@@ -164,7 +175,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": 42,
|
||||
"a": [42],
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
@@ -186,7 +197,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": null,
|
||||
"a": [null],
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
@@ -208,7 +219,9 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [],
|
||||
"a": {
|
||||
"b": "c"
|
||||
},
|
||||
"a.b": ["c", "d"],
|
||||
})
|
||||
.as_object()
|
||||
@@ -234,7 +247,7 @@ mod tests {
|
||||
json!({
|
||||
"a.b": ["c", "d", "f"],
|
||||
"a.c": "e",
|
||||
"a": 35,
|
||||
"a": [35],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
@@ -302,4 +315,53 @@ mod tests {
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_nested_values_keep_original_values() {
|
||||
let mut base: Value = json!({
|
||||
"tags": {
|
||||
"t1": "v1"
|
||||
},
|
||||
"prices": {
|
||||
"p1": [null],
|
||||
"p1000": {"tamo": {"le": {}}}
|
||||
},
|
||||
"kiki": [[]]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&flat).unwrap());
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"prices": {
|
||||
"p1": [null],
|
||||
"p1000": {
|
||||
"tamo": {
|
||||
"le": {}
|
||||
}
|
||||
}
|
||||
},
|
||||
"prices.p1": [null],
|
||||
"prices.p1000": {
|
||||
"tamo": {
|
||||
"le": {}
|
||||
}
|
||||
},
|
||||
"prices.p1000.tamo": {
|
||||
"le": {}
|
||||
},
|
||||
"prices.p1000.tamo.le": {},
|
||||
"tags": {
|
||||
"t1": "v1"
|
||||
},
|
||||
"tags.t1": "v1",
|
||||
"kiki": [[]]
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,29 +11,29 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.64"
|
||||
anyhow = "1.0.70"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.1.6"
|
||||
derive_builder = "0.11.2"
|
||||
csv = "1.2.1"
|
||||
derive_builder = "0.12.0"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "1.1.3"
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
log = "0.4.14"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.2"
|
||||
insta = { version = "1.19.1", features = ["json", "redactions"] }
|
||||
insta = { version = "1.29.0", features = ["json", "redactions"] }
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
||||
|
||||
@@ -28,6 +28,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
started_at,
|
||||
finished_at,
|
||||
index_mapper,
|
||||
max_number_of_tasks: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
|
||||
@@ -31,6 +31,7 @@ mod uuid_codec;
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@@ -43,13 +44,14 @@ pub use error::Error;
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, map_bound};
|
||||
use uuid::Uuid;
|
||||
@@ -240,6 +242,9 @@ pub struct IndexSchedulerOptions {
|
||||
/// Set to `true` iff the index scheduler is allowed to automatically
|
||||
/// batch tasks together, to process multiple tasks at once.
|
||||
pub autobatching_enabled: bool,
|
||||
/// The maximum number of tasks stored in the task queue before starting
|
||||
/// to auto schedule task deletions.
|
||||
pub max_number_of_tasks: usize,
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
@@ -289,6 +294,10 @@ pub struct IndexScheduler {
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
@@ -338,6 +347,7 @@ impl IndexScheduler {
|
||||
index_mapper: self.index_mapper.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
@@ -411,6 +421,7 @@ impl IndexScheduler {
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
auth_path: options.auth_path,
|
||||
@@ -428,6 +439,13 @@ impl IndexScheduler {
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the index scheduler is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.all_tasks.first(&rtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn index_budget(
|
||||
tasks_path: &Path,
|
||||
base_map_size: usize,
|
||||
@@ -889,127 +907,8 @@ impl IndexScheduler {
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
// Currently we don't need to access the tasks queue while loading a dump thus I can block everything.
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, mut file) = self.create_update_file()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(file.as_file_mut());
|
||||
for doc in content_file {
|
||||
builder.append_json_object(&doc?)?;
|
||||
}
|
||||
builder.into_inner()?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
},
|
||||
};
|
||||
|
||||
self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
self.update_index(&mut wtxn, index, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
self.update_status(&mut wtxn, task.status, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
|
||||
(bitmap.insert(task.uid));
|
||||
})?;
|
||||
|
||||
utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(&mut wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(&mut wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
|
||||
wtxn.commit()?;
|
||||
self.wake_up.signal();
|
||||
|
||||
Ok(task)
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump> {
|
||||
Dump::new(self)
|
||||
}
|
||||
|
||||
/// Create a new index without any associated task.
|
||||
@@ -1051,14 +950,15 @@ impl IndexScheduler {
|
||||
|
||||
/// Perform one iteration of the run loop.
|
||||
///
|
||||
/// 1. Find the next batch of tasks to be processed.
|
||||
/// 2. Update the information of these tasks following the start of their processing.
|
||||
/// 3. Update the in-memory list of processed tasks accordingly.
|
||||
/// 4. Process the batch:
|
||||
/// 1. See if we need to cleanup the task queue
|
||||
/// 2. Find the next batch of tasks to be processed.
|
||||
/// 3. Update the information of these tasks following the start of their processing.
|
||||
/// 4. Update the in-memory list of processed tasks accordingly.
|
||||
/// 5. Process the batch:
|
||||
/// - perform the actions of each batched task
|
||||
/// - update the information of each batched task following the end
|
||||
/// of their processing.
|
||||
/// 5. Reset the in-memory list of processed tasks.
|
||||
/// 6. Reset the in-memory list of processed tasks.
|
||||
///
|
||||
/// Returns the number of processed tasks.
|
||||
fn tick(&self) -> Result<TickOutcome> {
|
||||
@@ -1068,6 +968,8 @@ impl IndexScheduler {
|
||||
self.breakpoint(Breakpoint::Start);
|
||||
}
|
||||
|
||||
self.cleanup_task_queue()?;
|
||||
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
let batch =
|
||||
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
|
||||
@@ -1204,6 +1106,55 @@ impl IndexScheduler {
|
||||
Ok(TickOutcome::TickAgain(processed_tasks))
|
||||
}
|
||||
|
||||
/// Register a task to cleanup the task queue if needed
|
||||
fn cleanup_task_queue(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||
|
||||
let nb_tasks = self.all_task_ids(&rtxn)?.len();
|
||||
// if we have less than 1M tasks everything is fine
|
||||
if nb_tasks < self.max_number_of_tasks as u64 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let finished = self.status.get(&rtxn, &Status::Succeeded)?.unwrap_or_default()
|
||||
| self.status.get(&rtxn, &Status::Failed)?.unwrap_or_default()
|
||||
| self.status.get(&rtxn, &Status::Canceled)?.unwrap_or_default();
|
||||
|
||||
let to_delete = RoaringBitmap::from_iter(finished.into_iter().rev().take(100_000));
|
||||
|
||||
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
|
||||
// the deletion tasks we enqueued ourselves.
|
||||
if to_delete.len() < 2 {
|
||||
log::warn!("The task queue is almost full, but no task can be deleted yet.");
|
||||
// the only thing we can do is hope that the user tasks are going to finish
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
log::info!(
|
||||
"The task queue is almost full. Deleting the oldest {} finished tasks.",
|
||||
to_delete.len()
|
||||
);
|
||||
|
||||
// it's safe to unwrap here because we checked the len above
|
||||
let newest_task_id = to_delete.iter().last().unwrap();
|
||||
let last_task_to_delete =
|
||||
self.get_task(&rtxn, newest_task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
drop(rtxn);
|
||||
|
||||
// increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date.
|
||||
let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1);
|
||||
|
||||
self.register(KindWithContent::TaskDeletion {
|
||||
query: format!(
|
||||
"?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled",
|
||||
delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?,
|
||||
),
|
||||
tasks: to_delete,
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn index_stats(&self, index_uid: &str) -> Result<IndexStats> {
|
||||
let is_indexing = self.is_index_processing(index_uid)?;
|
||||
let rtxn = self.read_txn()?;
|
||||
@@ -1244,6 +1195,184 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a, 'a>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
kinds: HashMap<Kind, RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'a> Dump<'a> {
|
||||
pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
|
||||
// While loading a dump no one should be able to access the scheduler thus I can block everything.
|
||||
let wtxn = index_scheduler.env.write_txn()?;
|
||||
|
||||
Ok(Dump {
|
||||
index_scheduler,
|
||||
wtxn,
|
||||
indexes: HashMap::new(),
|
||||
statuses: HashMap::new(),
|
||||
kinds: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, mut file) = self.index_scheduler.create_update_file()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(file.as_file_mut());
|
||||
for doc in content_file {
|
||||
builder.append_json_object(&doc?)?;
|
||||
}
|
||||
builder.into_inner()?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(task.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(task.uid);
|
||||
self.indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Commit all the changes and exit the importing dump state
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
for (index, bitmap) in self.indexes {
|
||||
self.index_scheduler.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.statuses {
|
||||
self.index_scheduler.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.kinds {
|
||||
self.index_scheduler.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
self.wtxn.commit()?;
|
||||
self.index_scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The outcome of calling the [`IndexScheduler::tick`] function.
|
||||
pub enum TickOutcome {
|
||||
/// The scheduler should immediately attempt another `tick`.
|
||||
@@ -1283,9 +1412,10 @@ mod tests {
|
||||
use big_s::S;
|
||||
use crossbeam::channel::RecvTimeoutError;
|
||||
use file_store::File;
|
||||
use meili_snap::snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_auth::AuthFilter;
|
||||
use meilisearch_types::document_formats::DocumentFormatError;
|
||||
use meilisearch_types::error::ErrorCode;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli::obkv_to_json;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::{
|
||||
@@ -1316,13 +1446,22 @@ mod tests {
|
||||
pub fn test(
|
||||
autobatching_enabled: bool,
|
||||
planned_failures: Vec<(usize, FailureLocation)>,
|
||||
) -> (Self, IndexSchedulerHandle) {
|
||||
Self::test_with_custom_config(planned_failures, |config| {
|
||||
config.autobatching_enabled = autobatching_enabled;
|
||||
})
|
||||
}
|
||||
|
||||
pub fn test_with_custom_config(
|
||||
planned_failures: Vec<(usize, FailureLocation)>,
|
||||
configuration: impl Fn(&mut IndexSchedulerOptions),
|
||||
) -> (Self, IndexSchedulerHandle) {
|
||||
let tempdir = TempDir::new().unwrap();
|
||||
let (sender, receiver) = crossbeam::channel::bounded(0);
|
||||
|
||||
let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };
|
||||
|
||||
let options = IndexSchedulerOptions {
|
||||
let mut options = IndexSchedulerOptions {
|
||||
version_file_path: tempdir.path().join(VERSION_FILE_NAME),
|
||||
auth_path: tempdir.path().join("auth"),
|
||||
tasks_path: tempdir.path().join("db_path"),
|
||||
@@ -1335,8 +1474,10 @@ mod tests {
|
||||
index_growth_amount: 1000 * 1000, // 1 MB
|
||||
index_count: 5,
|
||||
indexer_config,
|
||||
autobatching_enabled,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
};
|
||||
configuration(&mut options);
|
||||
|
||||
let index_scheduler = Self::new(options, sender, planned_failures).unwrap();
|
||||
|
||||
@@ -3651,4 +3792,127 @@ mod tests {
|
||||
// No matter what happens in process_batch, the index_scheduler should be internally consistent
|
||||
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_task_queue_is_full() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
// that's the minimum map size possible
|
||||
config.task_db_size = 1048576;
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
// on average this task takes ~600 bytes
|
||||
loop {
|
||||
let result = index_scheduler.register(KindWithContent::IndexCreation {
|
||||
index_uid: S("doggo"),
|
||||
primary_key: None,
|
||||
});
|
||||
if result.is_err() {
|
||||
break;
|
||||
}
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
index_scheduler.assert_internally_consistent();
|
||||
|
||||
// at this point the task DB shoud have reached its limit and we should not be able to register new tasks
|
||||
let result = index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// Even the task deletion that doesn't delete anything shouldn't be accepted
|
||||
let result = index_scheduler
|
||||
.register(KindWithContent::TaskDeletion {
|
||||
query: S("test"),
|
||||
tasks: RoaringBitmap::new(),
|
||||
})
|
||||
.unwrap_err();
|
||||
snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.");
|
||||
// we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code
|
||||
snapshot!(format!("{:?}", result.error_code()), @"NoSpaceLeftOnDevice");
|
||||
|
||||
// But a task deletion that delete something should works
|
||||
index_scheduler
|
||||
.register(KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() })
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
// Now we should be able to enqueue a few tasks again
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_deletion_of_tasks() {
|
||||
let (index_scheduler, mut handle) =
|
||||
IndexScheduler::test_with_custom_config(vec![], |config| {
|
||||
config.max_number_of_tasks = 2;
|
||||
});
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
handle.advance_one_successful_batch();
|
||||
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
handle.advance_one_failed_batch();
|
||||
|
||||
// at this point the max number of tasks is reached
|
||||
// we can still enqueue multiple tasks
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
index_scheduler
|
||||
.register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None })
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full");
|
||||
drop(rtxn);
|
||||
|
||||
// now we're above the max number of tasks
|
||||
// and if we try to advance in the tick function a new task deletion should be enqueued
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_enqueued");
|
||||
drop(rtxn);
|
||||
|
||||
handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]);
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_been_processed");
|
||||
drop(rtxn);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
// a new task deletion has been enqueued
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "after_the_second_task_deletion");
|
||||
drop(rtxn);
|
||||
|
||||
handle.advance_one_failed_batch();
|
||||
handle.advance_one_successful_batch();
|
||||
let rtxn = index_scheduler.env.read_txn().unwrap();
|
||||
let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap();
|
||||
let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap();
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
"documentDeletion" [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bork"
|
||||
}
|
||||
]
|
||||
@@ -1,37 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
"documentDeletion" [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,]
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [1,]
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "jean bob"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"catto": "jorts"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bork"
|
||||
}
|
||||
]
|
||||
@@ -1,36 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 5,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"TaskDeletion": {
|
||||
"matched_tasks": 2,
|
||||
"deleted_tasks": 2,
|
||||
"original_filter": "[filter]"
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
16,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2,
|
||||
0,
|
||||
4,
|
||||
0
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,48 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 6,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"TaskDeletion": {
|
||||
"matched_tasks": 2,
|
||||
"deleted_tasks": 2,
|
||||
"original_filter": "[filter]"
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
16,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
3,
|
||||
0,
|
||||
5,
|
||||
0
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,133 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 0,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 1,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": {
|
||||
"message": "Index `doggo` already exists.",
|
||||
"code": "index_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_already_exists"
|
||||
},
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "failed",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 2,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 4,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"TaskDeletion": {
|
||||
"matched_tasks": 2,
|
||||
"deleted_tasks": null,
|
||||
"original_filter": "[filter]"
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
16,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,88 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 2,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 4,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"TaskDeletion": {
|
||||
"matched_tasks": 2,
|
||||
"deleted_tasks": 2,
|
||||
"original_filter": "[filter]"
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0,
|
||||
16,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
0
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,90 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"uid": 0,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "succeeded",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 1,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": {
|
||||
"message": "Index `doggo` already exists.",
|
||||
"code": "index_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_already_exists"
|
||||
},
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "failed",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 2,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"uid": 3,
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]",
|
||||
"error": null,
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"IndexInfo": {
|
||||
"primary_key": null
|
||||
}
|
||||
},
|
||||
"status": "enqueued",
|
||||
"kind": {
|
||||
"indexCreation": {
|
||||
"index_uid": "doggo",
|
||||
"primary_key": null
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -11,6 +11,6 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
insta = { version = "^1.19.1", features = ["json", "redactions"] }
|
||||
insta = { version = "^1.29.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.15"
|
||||
once_cell = "1.17"
|
||||
|
||||
@@ -11,16 +11,16 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.13.1"
|
||||
enum-iterator = "1.1.3"
|
||||
base64 = "0.21.0"
|
||||
enum-iterator = "1.4.0"
|
||||
hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sha2 = "0.10.6"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -34,6 +34,12 @@ impl AuthController {
|
||||
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth controller is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
self.store.health()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the size of the `AuthController` database in bytes.
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
self.store.size()
|
||||
@@ -304,6 +310,7 @@ pub const MASTER_KEY_MIN_SIZE: usize = 16;
|
||||
const MASTER_KEY_GEN_SIZE: usize = 32;
|
||||
|
||||
pub fn generate_master_key() -> String {
|
||||
use base64::Engine;
|
||||
use rand::rngs::OsRng;
|
||||
use rand::RngCore;
|
||||
|
||||
@@ -314,5 +321,5 @@ pub fn generate_master_key() -> String {
|
||||
|
||||
// let's encode the random bytes to base64 to make them human-readable and not too long.
|
||||
// We're using the URL_SAFE alphabet that will produce keys without =, / or other unusual characters.
|
||||
base64::encode_config(buf, base64::URL_SAFE_NO_PAD)
|
||||
base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(buf)
|
||||
}
|
||||
|
||||
@@ -61,6 +61,13 @@ impl HeedAuthStore {
|
||||
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth store is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.keys.first(&rtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the size in bytes of database
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
Ok(self.env.real_disk_size()?)
|
||||
|
||||
@@ -11,31 +11,31 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.2.1", default-features = false }
|
||||
anyhow = "1.0.65"
|
||||
actix-web = { version = "4.3.1", default-features = false }
|
||||
anyhow = "1.0.70"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.1.6"
|
||||
csv = "1.2.1"
|
||||
deserr = "0.5.0"
|
||||
either = { version = "1.6.1", features = ["serde"] }
|
||||
enum-iterator = "1.1.3"
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.24"
|
||||
flate2 = "1.0.25"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.5.7"
|
||||
memmap2 = "0.5.10"
|
||||
milli = { path = "../milli", default-features = false }
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.85"
|
||||
serde_json = "1.0.95"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.24"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.27"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.19.1"
|
||||
insta = "1.29.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
[features]
|
||||
@@ -50,3 +50,6 @@ hebrew = ["milli/hebrew"]
|
||||
japanese = ["milli/japanese"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["milli/greek"]
|
||||
|
||||
@@ -237,8 +237,11 @@ InvalidSearchMatchingStrategy , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchOffset , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPage , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchQ , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchQuery , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidFacetSearchName , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchShowMatchesPosition , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchFacet , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -327,6 +330,7 @@ impl ErrorCode for milli::Error {
|
||||
UserError::SortRankingRuleMissing => Code::InvalidSearchSort,
|
||||
UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets,
|
||||
UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort,
|
||||
UserError::InvalidSearchFacet { .. } => Code::InvalidSearchFacet,
|
||||
UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
|
||||
UserError::SortError(_) => Code::InvalidSearchSort,
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
|
||||
pub enum VersionFileError {
|
||||
#[error(
|
||||
"Meilisearch (v{}) failed to infer the version of the database.
|
||||
To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
|
||||
To update Meilisearch please follow our guide on https://www.meilisearch.com/docs/learn/update_and_migration/updating.",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)]
|
||||
MissingVersionFile,
|
||||
@@ -54,7 +54,7 @@ pub enum VersionFileError {
|
||||
MalformedVersionFile,
|
||||
#[error(
|
||||
"Your database version ({major}.{minor}.{patch}) is incompatible with your current engine version ({}).\n\
|
||||
To migrate data between Meilisearch versions, please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
|
||||
To migrate data between Meilisearch versions, please follow our guide on https://www.meilisearch.com/docs/learn/update_and_migration/updating.",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)]
|
||||
VersionMismatch { major: String, minor: String, patch: String },
|
||||
|
||||
@@ -13,97 +13,97 @@ license.workspace = true
|
||||
default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.6.3"
|
||||
actix-http = { version = "3.2.2", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] }
|
||||
actix-web = { version = "4.2.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
|
||||
actix-cors = "0.6.4"
|
||||
actix-http = { version = "3.3.1", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] }
|
||||
actix-web = { version = "4.3.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
|
||||
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
|
||||
anyhow = { version = "1.0.65", features = ["backtrace"] }
|
||||
async-stream = "0.3.3"
|
||||
async-trait = "0.1.57"
|
||||
bstr = "1.0.1"
|
||||
byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
|
||||
bytes = "1.2.1"
|
||||
clap = { version = "4.0.9", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.6"
|
||||
anyhow = { version = "1.0.70", features = ["backtrace"] }
|
||||
async-stream = "0.3.5"
|
||||
async-trait = "0.1.68"
|
||||
bstr = "1.4.0"
|
||||
byte-unit = { version = "4.0.19", default-features = false, features = ["std", "serde"] }
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
dump = { path = "../dump" }
|
||||
either = "1.8.0"
|
||||
env_logger = "0.9.1"
|
||||
either = "1.8.1"
|
||||
env_logger = "0.10.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.24"
|
||||
flate2 = "1.0.25"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.24"
|
||||
futures-util = "0.3.24"
|
||||
http = "0.2.8"
|
||||
futures = "0.3.28"
|
||||
futures-util = "0.3.28"
|
||||
http = "0.2.9"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "1.9.1", features = ["serde-1"] }
|
||||
indexmap = { version = "1.9.3", features = ["serde-1"] }
|
||||
itertools = "0.10.5"
|
||||
jsonwebtoken = "8.1.1"
|
||||
jsonwebtoken = "8.3.0"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.1"
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.15.0"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.15.0"
|
||||
once_cell = "1.17.1"
|
||||
parking_lot = "0.12.1"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.2", features = ["process"] }
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.5.3"
|
||||
regex = "1.6.0"
|
||||
reqwest = { version = "0.11.12", features = ["rustls-tls", "json"], default-features = false }
|
||||
rustls = "0.20.6"
|
||||
rustls-pemfile = "1.0.1"
|
||||
segment = { version = "0.2.1", optional = true }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
reqwest = { version = "0.11.16", features = ["rustls-tls", "json"], default-features = false }
|
||||
rustls = "0.20.8"
|
||||
rustls-pemfile = "1.0.2"
|
||||
segment = { version = "0.2.2", optional = true }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sha2 = "0.10.6"
|
||||
siphasher = "0.3.10"
|
||||
slice-group-by = "0.3.0"
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
sysinfo = "0.26.4"
|
||||
sysinfo = "0.28.4"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = { version = "1.24.2", features = ["full"] }
|
||||
tokio-stream = "0.1.10"
|
||||
toml = "0.5.9"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.2"
|
||||
yaup = "0.2.0"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = { version = "1.27.0", features = ["full"] }
|
||||
tokio-stream = "0.1.12"
|
||||
toml = "0.7.3"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.3"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
actix-utils = "3.0.1"
|
||||
atty = "0.2.14"
|
||||
termcolor = "1.1.3"
|
||||
termcolor = "1.2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.7.0"
|
||||
actix-rt = "2.8.0"
|
||||
assert-json-diff = "2.0.2"
|
||||
brotli = "3.3.4"
|
||||
insta = "1.19.1"
|
||||
insta = "1.29.0"
|
||||
manifest-dir-macros = "0.1.16"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = {path = "../meili-snap"}
|
||||
temp-env = "0.3.1"
|
||||
temp-env = "0.3.3"
|
||||
urlencoding = "2.1.2"
|
||||
yaup = "0.2.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.65", optional = true }
|
||||
cargo_toml = { version = "0.14.0", optional = true }
|
||||
anyhow = { version = "1.0.70", optional = true }
|
||||
cargo_toml = { version = "0.15.2", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.0", optional = true }
|
||||
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
tempfile = { version = "3.3.0", optional = true }
|
||||
vergen = { version = "7.4.2", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.2", optional = true }
|
||||
tempfile = { version = "3.5.0", optional = true }
|
||||
vergen = { version = "7.5.1", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.4", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
|
||||
@@ -113,6 +113,7 @@ chinese = ["meilisearch-types/chinese"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.7/build.zip"
|
||||
|
||||
@@ -38,6 +38,18 @@ impl MultiSearchAggregator {
|
||||
pub fn succeed(&mut self) {}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FacetSearchAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl FacetSearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
}
|
||||
|
||||
impl MockAnalytics {
|
||||
#[allow(clippy::new_ret_no_self)]
|
||||
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
||||
@@ -56,6 +68,7 @@ impl Analytics for MockAnalytics {
|
||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn post_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
|
||||
fn post_facet_search(&self, _aggregate: super::FacetSearchAggregator) {}
|
||||
fn add_documents(
|
||||
&self,
|
||||
_documents_query: &UpdateDocumentsQuery,
|
||||
|
||||
@@ -25,6 +25,8 @@ pub type SegmentAnalytics = mock_analytics::MockAnalytics;
|
||||
pub type SearchAggregator = mock_analytics::SearchAggregator;
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator;
|
||||
|
||||
// if we are in release mode and the feature analytics was enabled
|
||||
// we use the real analytics
|
||||
@@ -34,6 +36,8 @@ pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
|
||||
pub type SearchAggregator = segment_analytics::SearchAggregator;
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator;
|
||||
|
||||
/// The Meilisearch config dir:
|
||||
/// `~/.config/Meilisearch` on *NIX or *BSD.
|
||||
@@ -81,6 +85,9 @@ pub trait Analytics: Sync + Send {
|
||||
/// This method should be called to aggregate a post array of searches
|
||||
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
|
||||
|
||||
/// This method should be called to aggregate post facet values searches
|
||||
fn post_facet_search(&self, aggregate: FacetSearchAggregator);
|
||||
|
||||
// this method should be called to aggregate a add documents request
|
||||
fn add_documents(
|
||||
&self,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::collections::{BinaryHeap, HashMap, HashSet};
|
||||
use std::fs;
|
||||
use std::mem::take;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -27,11 +28,13 @@ use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
|
||||
use crate::routes::indexes::documents::UpdateDocumentsQuery;
|
||||
use crate::routes::indexes::facet_search::FacetSearchQuery;
|
||||
use crate::routes::tasks::TasksFilterQuery;
|
||||
use crate::routes::{create_all_stats, Stats};
|
||||
use crate::search::{
|
||||
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
use crate::Opt;
|
||||
|
||||
@@ -69,6 +72,7 @@ pub enum AnalyticsMsg {
|
||||
AggregateGetSearch(SearchAggregator),
|
||||
AggregatePostSearch(SearchAggregator),
|
||||
AggregatePostMultiSearch(MultiSearchAggregator),
|
||||
AggregatePostFacetSearch(FacetSearchAggregator),
|
||||
AggregateAddDocuments(DocumentsAggregator),
|
||||
AggregateDeleteDocuments(DocumentsDeletionAggregator),
|
||||
AggregateUpdateDocuments(DocumentsAggregator),
|
||||
@@ -86,7 +90,7 @@ impl SegmentAnalytics {
|
||||
pub async fn new(
|
||||
opt: &Opt,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) -> Arc<dyn Analytics> {
|
||||
let instance_uid = super::find_user_id(&opt.db_path);
|
||||
let first_time_run = instance_uid.is_none();
|
||||
@@ -135,6 +139,7 @@ impl SegmentAnalytics {
|
||||
batcher,
|
||||
post_search_aggregator: SearchAggregator::default(),
|
||||
post_multi_search_aggregator: MultiSearchAggregator::default(),
|
||||
post_facet_search_aggregator: FacetSearchAggregator::default(),
|
||||
get_search_aggregator: SearchAggregator::default(),
|
||||
add_documents_aggregator: DocumentsAggregator::default(),
|
||||
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
|
||||
@@ -176,6 +181,10 @@ impl super::Analytics for SegmentAnalytics {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
|
||||
}
|
||||
|
||||
fn post_facet_search(&self, aggregate: FacetSearchAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate));
|
||||
}
|
||||
|
||||
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
|
||||
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
|
||||
}
|
||||
@@ -335,6 +344,7 @@ pub struct Segment {
|
||||
get_search_aggregator: SearchAggregator,
|
||||
post_search_aggregator: SearchAggregator,
|
||||
post_multi_search_aggregator: MultiSearchAggregator,
|
||||
post_facet_search_aggregator: FacetSearchAggregator,
|
||||
add_documents_aggregator: DocumentsAggregator,
|
||||
delete_documents_aggregator: DocumentsDeletionAggregator,
|
||||
update_documents_aggregator: DocumentsAggregator,
|
||||
@@ -376,7 +386,11 @@ impl Segment {
|
||||
})
|
||||
}
|
||||
|
||||
async fn run(mut self, index_scheduler: Arc<IndexScheduler>, auth_controller: AuthController) {
|
||||
async fn run(
|
||||
mut self,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
||||
// The first batch must be sent after one hour.
|
||||
let mut interval =
|
||||
@@ -393,6 +407,7 @@ impl Segment {
|
||||
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
|
||||
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
|
||||
@@ -408,10 +423,10 @@ impl Segment {
|
||||
async fn tick(
|
||||
&mut self,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
if let Ok(stats) =
|
||||
create_all_stats(index_scheduler.into(), auth_controller, &AuthFilter::default())
|
||||
create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
|
||||
{
|
||||
// Replace the version number with the prototype name if any.
|
||||
let version = if let Some(prototype) = crate::prototype_name() {
|
||||
@@ -434,45 +449,62 @@ impl Segment {
|
||||
})
|
||||
.await;
|
||||
}
|
||||
let get_search = std::mem::take(&mut self.get_search_aggregator)
|
||||
.into_event(&self.user, "Documents Searched GET");
|
||||
let post_search = std::mem::take(&mut self.post_search_aggregator)
|
||||
.into_event(&self.user, "Documents Searched POST");
|
||||
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
|
||||
.into_event(&self.user, "Documents Searched by Multi-Search POST");
|
||||
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Added");
|
||||
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Deleted");
|
||||
let update_documents = std::mem::take(&mut self.update_documents_aggregator)
|
||||
.into_event(&self.user, "Documents Updated");
|
||||
let get_tasks =
|
||||
std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
|
||||
let health =
|
||||
std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen");
|
||||
|
||||
if let Some(get_search) = get_search {
|
||||
let Segment {
|
||||
inbox: _,
|
||||
opt: _,
|
||||
batcher: _,
|
||||
user,
|
||||
get_search_aggregator,
|
||||
post_search_aggregator,
|
||||
post_multi_search_aggregator,
|
||||
post_facet_search_aggregator,
|
||||
add_documents_aggregator,
|
||||
delete_documents_aggregator,
|
||||
update_documents_aggregator,
|
||||
get_tasks_aggregator,
|
||||
health_aggregator,
|
||||
} = self;
|
||||
|
||||
if let Some(get_search) =
|
||||
take(get_search_aggregator).into_event(&user, "Documents Searched GET")
|
||||
{
|
||||
let _ = self.batcher.push(get_search).await;
|
||||
}
|
||||
if let Some(post_search) = post_search {
|
||||
if let Some(post_search) =
|
||||
take(post_search_aggregator).into_event(&user, "Documents Searched POST")
|
||||
{
|
||||
let _ = self.batcher.push(post_search).await;
|
||||
}
|
||||
if let Some(post_multi_search) = post_multi_search {
|
||||
if let Some(post_multi_search) = take(post_multi_search_aggregator)
|
||||
.into_event(&user, "Documents Searched by Multi-Search POST")
|
||||
{
|
||||
let _ = self.batcher.push(post_multi_search).await;
|
||||
}
|
||||
if let Some(add_documents) = add_documents {
|
||||
if let Some(post_facet_search) = take(post_facet_search_aggregator)
|
||||
.into_event(&user, "Documents Searched by Facet-Search POST")
|
||||
{
|
||||
let _ = self.batcher.push(post_facet_search).await;
|
||||
}
|
||||
if let Some(add_documents) =
|
||||
take(add_documents_aggregator).into_event(&user, "Documents Added")
|
||||
{
|
||||
let _ = self.batcher.push(add_documents).await;
|
||||
}
|
||||
if let Some(delete_documents) = delete_documents {
|
||||
if let Some(delete_documents) =
|
||||
take(delete_documents_aggregator).into_event(&user, "Documents Deleted")
|
||||
{
|
||||
let _ = self.batcher.push(delete_documents).await;
|
||||
}
|
||||
if let Some(update_documents) = update_documents {
|
||||
if let Some(update_documents) =
|
||||
take(update_documents_aggregator).into_event(&user, "Documents Updated")
|
||||
{
|
||||
let _ = self.batcher.push(update_documents).await;
|
||||
}
|
||||
if let Some(get_tasks) = get_tasks {
|
||||
if let Some(get_tasks) = take(get_tasks_aggregator).into_event(&user, "Tasks Seen") {
|
||||
let _ = self.batcher.push(get_tasks).await;
|
||||
}
|
||||
if let Some(health) = health {
|
||||
if let Some(health) = take(health_aggregator).into_event(&user, "Health Seen") {
|
||||
let _ = self.batcher.push(health).await;
|
||||
}
|
||||
let _ = self.batcher.flush().await;
|
||||
@@ -849,6 +881,144 @@ impl MultiSearchAggregator {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct FacetSearchAggregator {
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
// context
|
||||
user_agents: HashSet<String>,
|
||||
|
||||
// requests
|
||||
total_received: usize,
|
||||
total_succeeded: usize,
|
||||
time_spent: BinaryHeap<usize>,
|
||||
|
||||
// The set of all facetNames that were used
|
||||
facet_names: HashSet<String>,
|
||||
|
||||
// As there been any other parameter than the facetName or facetQuery ones?
|
||||
additional_search_parameters_provided: bool,
|
||||
}
|
||||
|
||||
impl FacetSearchAggregator {
|
||||
pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self {
|
||||
let FacetSearchQuery {
|
||||
facet_query: _,
|
||||
facet_name,
|
||||
q,
|
||||
offset,
|
||||
limit,
|
||||
page,
|
||||
hits_per_page,
|
||||
attributes_to_retrieve,
|
||||
attributes_to_crop,
|
||||
crop_length,
|
||||
attributes_to_highlight,
|
||||
show_matches_position,
|
||||
filter,
|
||||
sort,
|
||||
facets,
|
||||
highlight_pre_tag,
|
||||
highlight_post_tag,
|
||||
crop_marker,
|
||||
matching_strategy,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
ret.timestamp = Some(OffsetDateTime::now_utc());
|
||||
|
||||
ret.total_received = 1;
|
||||
ret.user_agents = extract_user_agents(request).into_iter().collect();
|
||||
ret.facet_names = Some(facet_name.clone()).into_iter().collect();
|
||||
|
||||
ret.additional_search_parameters_provided = q.is_some()
|
||||
|| *offset != DEFAULT_SEARCH_OFFSET()
|
||||
|| *limit != DEFAULT_SEARCH_LIMIT()
|
||||
|| page.is_some()
|
||||
|| hits_per_page.is_some()
|
||||
|| attributes_to_retrieve.is_some()
|
||||
|| attributes_to_crop.is_some()
|
||||
|| *crop_length != DEFAULT_CROP_LENGTH()
|
||||
|| attributes_to_highlight.is_some()
|
||||
|| *show_matches_position
|
||||
|| filter.is_some()
|
||||
|| sort.is_some()
|
||||
|| facets.is_some()
|
||||
|| *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG()
|
||||
|| *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG()
|
||||
|| *crop_marker != DEFAULT_CROP_MARKER()
|
||||
|| *matching_strategy != MatchingStrategy::default();
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, result: &FacetSearchResult) {
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
self.time_spent.push(result.processing_time_ms as usize);
|
||||
}
|
||||
|
||||
/// Aggregate one [SearchAggregator] into another.
|
||||
pub fn aggregate(&mut self, mut other: Self) {
|
||||
if self.timestamp.is_none() {
|
||||
self.timestamp = other.timestamp;
|
||||
}
|
||||
|
||||
// context
|
||||
for user_agent in other.user_agents.into_iter() {
|
||||
self.user_agents.insert(user_agent);
|
||||
}
|
||||
|
||||
// request
|
||||
self.total_received = self.total_received.saturating_add(other.total_received);
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded);
|
||||
self.time_spent.append(&mut other.time_spent);
|
||||
|
||||
// facet_names
|
||||
for facet_name in other.facet_names.into_iter() {
|
||||
self.facet_names.insert(facet_name);
|
||||
}
|
||||
|
||||
// additional_search_parameters_provided
|
||||
self.additional_search_parameters_provided = self.additional_search_parameters_provided
|
||||
| other.additional_search_parameters_provided;
|
||||
}
|
||||
|
||||
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
|
||||
if self.total_received == 0 {
|
||||
None
|
||||
} else {
|
||||
// the index of the 99th percentage of value
|
||||
let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.;
|
||||
// we get all the values in a sorted manner
|
||||
let time_spent = self.time_spent.into_sorted_vec();
|
||||
// We are only interested by the slowest value of the 99th fastest results
|
||||
let time_spent = time_spent.get(percentile_99th as usize);
|
||||
|
||||
let properties = json!({
|
||||
"user-agent": self.user_agents,
|
||||
"requests": {
|
||||
"99th_response_time": time_spent.map(|t| format!("{:.2}", t)),
|
||||
"total_succeeded": self.total_succeeded,
|
||||
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
|
||||
"total_received": self.total_received,
|
||||
},
|
||||
"facets": {
|
||||
"total_distinct_facet_count": self.facet_names.len(),
|
||||
},
|
||||
"additional_search_parameters_provided": self.additional_search_parameters_provided,
|
||||
});
|
||||
|
||||
Some(Track {
|
||||
timestamp: self.timestamp,
|
||||
user: user.clone(),
|
||||
event: event_name.to_string(),
|
||||
properties,
|
||||
..Default::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DocumentsAggregator {
|
||||
timestamp: Option<OffsetDateTime>,
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::pin::Pin;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::FromRequest;
|
||||
pub use error::AuthenticationError;
|
||||
use futures::future::err;
|
||||
@@ -23,7 +24,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
|
||||
async fn auth_bearer(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
data: Option<D>,
|
||||
@@ -43,7 +44,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
}
|
||||
|
||||
async fn auth_token(auth: AuthController, data: Option<D>) -> Result<Self, ResponseError>
|
||||
async fn auth_token(auth: Data<AuthController>, data: Option<D>) -> Result<Self, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
@@ -60,7 +61,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
|
||||
async fn authenticate(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
) -> Result<Option<AuthFilter>, ResponseError>
|
||||
@@ -90,7 +91,7 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
|
||||
req: &actix_web::HttpRequest,
|
||||
_payload: &mut actix_web::dev::Payload,
|
||||
) -> Self::Future {
|
||||
match req.app_data::<AuthController>().cloned() {
|
||||
match req.app_data::<Data<AuthController>>().cloned() {
|
||||
Some(auth) => match req
|
||||
.headers()
|
||||
.get("Authorization")
|
||||
@@ -122,10 +123,15 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
|
||||
}
|
||||
|
||||
pub trait Policy {
|
||||
fn authenticate(auth: AuthController, token: &str, index: Option<&str>) -> Option<AuthFilter>;
|
||||
fn authenticate(
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter>;
|
||||
}
|
||||
|
||||
pub mod policies {
|
||||
use actix_web::web::Data;
|
||||
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
||||
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
||||
// reexport actions in policies in order to be used in routes configuration.
|
||||
@@ -178,7 +184,7 @@ pub mod policies {
|
||||
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
|
||||
/// (that may contain more indexes than requested).
|
||||
fn authenticate(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter> {
|
||||
|
||||
@@ -88,7 +88,7 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Data<AuthController>,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
enable_dashboard: bool,
|
||||
@@ -136,7 +136,7 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
||||
let snapshot_path_exists = snapshot_path.exists();
|
||||
@@ -195,6 +195,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
let index_scheduler = Arc::new(index_scheduler);
|
||||
let auth_controller = Arc::new(auth_controller);
|
||||
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
|
||||
let snapshot_delay = Duration::from_secs(snapshot_delay);
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
@@ -233,6 +234,7 @@ fn open_or_create_database_unchecked(
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
})?)
|
||||
@@ -367,18 +369,20 @@ fn import_dump(
|
||||
log::info!("All documents successfully imported.");
|
||||
}
|
||||
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
|
||||
// 4. Import the tasks.
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler.register_dumped_task(task, file)?;
|
||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||
}
|
||||
Ok(())
|
||||
Ok(index_scheduler_dump.finish()?)
|
||||
}
|
||||
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
opt: &Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) {
|
||||
|
||||
@@ -74,13 +74,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
async fn run_http(
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) -> anyhow::Result<()> {
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let index_scheduler = Data::from(index_scheduler);
|
||||
let auth_controller = Data::from(auth_controller);
|
||||
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(
|
||||
@@ -148,7 +149,7 @@ pub fn print_launch_resume(
|
||||
"
|
||||
Thank you for using Meilisearch!
|
||||
|
||||
\nWe collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html
|
||||
\nWe collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry
|
||||
|
||||
Anonymous telemetry:\t\"Enabled\""
|
||||
);
|
||||
|
||||
@@ -323,10 +323,10 @@ impl Opt {
|
||||
.clone()
|
||||
.unwrap_or_else(|| PathBuf::from(DEFAULT_CONFIG_FILE_PATH));
|
||||
|
||||
match std::fs::read(&config_file_path) {
|
||||
match std::fs::read_to_string(&config_file_path) {
|
||||
Ok(config) => {
|
||||
// If the file is successfully read, we deserialize it with `toml`.
|
||||
let opt_from_config = toml::from_slice::<Opt>(&config)?;
|
||||
let opt_from_config = toml::from_str::<Opt>(&config)?;
|
||||
// Return an error if config file contains 'config_file_path'
|
||||
// Using that key in the config file doesn't make sense bc it creates a logical loop (config file referencing itself)
|
||||
if opt_from_config.config_file_path.is_some() {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::str;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
@@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
}
|
||||
|
||||
pub async fn create_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, Data<AuthController>>,
|
||||
body: AwebJson<CreateApiKey, DeserrJsonError>,
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -66,7 +67,7 @@ impl ListApiKeys {
|
||||
}
|
||||
|
||||
pub async fn list_api_keys(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
list_api_keys: AwebQueryParameter<ListApiKeys, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let paginate = list_api_keys.into_inner().as_pagination();
|
||||
@@ -84,7 +85,7 @@ pub async fn list_api_keys(
|
||||
}
|
||||
|
||||
pub async fn get_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
@@ -103,7 +104,7 @@ pub async fn get_api_key(
|
||||
}
|
||||
|
||||
pub async fn patch_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, Data<AuthController>>,
|
||||
body: AwebJson<PatchApiKey, DeserrJsonError>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -123,7 +124,7 @@ pub async fn patch_api_key(
|
||||
}
|
||||
|
||||
pub async fn delete_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
|
||||
@@ -19,7 +19,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
|
||||
pub async fn create_dump(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
|
||||
133
meilisearch/src/routes/indexes/facet_search.rs
Normal file
133
meilisearch/src/routes/indexes/facet_search.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::AwebJson;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::analytics::{Analytics, FacetSearchAggregator};
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::search::{
|
||||
add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
|
||||
};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(search)));
|
||||
}
|
||||
|
||||
// TODO improve the error messages
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, deserr::Deserr)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FacetSearchQuery {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchQuery>)]
|
||||
pub facet_query: Option<String>,
|
||||
#[deserr(error = DeserrJsonError<InvalidFacetSearchName>)]
|
||||
pub facet_name: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
|
||||
pub q: Option<String>,
|
||||
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
|
||||
pub offset: usize,
|
||||
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
|
||||
pub limit: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
|
||||
pub page: Option<usize>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
|
||||
pub hits_per_page: Option<usize>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
|
||||
pub attributes_to_retrieve: Option<BTreeSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
|
||||
pub attributes_to_crop: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
|
||||
pub crop_length: usize,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
|
||||
pub attributes_to_highlight: Option<HashSet<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
|
||||
pub show_matches_position: bool,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
|
||||
pub filter: Option<Value>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
|
||||
pub sort: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
|
||||
pub facets: Option<Vec<String>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
|
||||
pub highlight_pre_tag: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
|
||||
pub highlight_post_tag: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
|
||||
pub crop_marker: String,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
|
||||
pub matching_strategy: MatchingStrategy,
|
||||
}
|
||||
|
||||
pub async fn search(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<FacetSearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
let query = params.into_inner();
|
||||
debug!("facet search called with params: {:?}", query);
|
||||
|
||||
let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
|
||||
|
||||
let facet_query = query.facet_query.clone();
|
||||
let facet_name = query.facet_name.clone();
|
||||
let mut search_query = SearchQuery::from(query);
|
||||
|
||||
// Tenant token search_rules.
|
||||
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
|
||||
add_search_rules(&mut search_query, search_rules);
|
||||
}
|
||||
|
||||
let index = index_scheduler.index(&index_uid)?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_facet_search(&index, search_query, facet_query, facet_name)
|
||||
})
|
||||
.await?;
|
||||
|
||||
if let Ok(ref search_result) = search_result {
|
||||
aggregate.succeed(search_result);
|
||||
}
|
||||
analytics.post_facet_search(aggregate);
|
||||
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!("returns: {:?}", search_result);
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
}
|
||||
|
||||
impl From<FacetSearchQuery> for SearchQuery {
|
||||
fn from(value: FacetSearchQuery) -> Self {
|
||||
SearchQuery {
|
||||
q: value.q,
|
||||
offset: value.offset,
|
||||
limit: value.limit,
|
||||
page: value.page,
|
||||
hits_per_page: value.hits_per_page,
|
||||
attributes_to_retrieve: value.attributes_to_retrieve,
|
||||
attributes_to_crop: value.attributes_to_crop,
|
||||
crop_length: value.crop_length,
|
||||
attributes_to_highlight: value.attributes_to_highlight,
|
||||
show_matches_position: value.show_matches_position,
|
||||
filter: value.filter,
|
||||
sort: value.sort,
|
||||
facets: value.facets,
|
||||
highlight_pre_tag: value.highlight_pre_tag,
|
||||
highlight_post_tag: value.highlight_post_tag,
|
||||
crop_marker: value.crop_marker,
|
||||
matching_strategy: value.matching_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -24,6 +24,7 @@ use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub mod documents;
|
||||
pub mod facet_search;
|
||||
pub mod search;
|
||||
pub mod settings;
|
||||
|
||||
@@ -44,6 +45,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/stats").route(web::get().to(SeqHandler(get_index_stats))))
|
||||
.service(web::scope("/documents").configure(documents::configure))
|
||||
.service(web::scope("/search").configure(search::configure))
|
||||
.service(web::scope("/facet-search").configure(facet_search::configure))
|
||||
.service(web::scope("/settings").configure(settings::configure)),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
|
||||
|
||||
pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<AuthController>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
|
||||
@@ -238,7 +238,7 @@ pub struct Stats {
|
||||
|
||||
async fn get_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -253,7 +253,7 @@ async fn get_stats(
|
||||
|
||||
pub fn create_all_stats(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Data<AuthController>,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
) -> Result<Stats, ResponseError> {
|
||||
let mut last_task: Option<OffsetDateTime> = None;
|
||||
@@ -318,9 +318,14 @@ struct KeysResponse {
|
||||
|
||||
pub async fn get_health(
|
||||
req: HttpRequest,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.health_seen(&req);
|
||||
|
||||
index_scheduler.health().unwrap();
|
||||
auth_controller.health().unwrap();
|
||||
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
|
||||
}
|
||||
|
||||
@@ -8,7 +8,9 @@ use either::Either;
|
||||
use meilisearch_auth::IndexSearchRules;
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::*;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::{FacetValueHit, SearchForFacetValues};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
@@ -170,7 +172,7 @@ impl SearchQueryWithIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr)]
|
||||
#[deserr(rename_all = camelCase)]
|
||||
pub enum MatchingStrategy {
|
||||
/// Remove query words from last to first
|
||||
@@ -241,6 +243,14 @@ pub struct FacetStats {
|
||||
pub max: f64,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct FacetSearchResult {
|
||||
pub hits: Vec<FacetValueHit>,
|
||||
pub query: Option<String>,
|
||||
pub processing_time_ms: u128,
|
||||
}
|
||||
|
||||
/// Incorporate search rules in search query
|
||||
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||
query.filter = match (query.filter.take(), rules.filter) {
|
||||
@@ -261,14 +271,12 @@ pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let mut search = index.search(&rtxn);
|
||||
fn prepare_search<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t RoTxn,
|
||||
query: &'t SearchQuery,
|
||||
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
|
||||
let mut search = index.search(rtxn);
|
||||
|
||||
if let Some(ref query) = query.q {
|
||||
search.query(query);
|
||||
@@ -278,7 +286,7 @@ pub fn perform_search(
|
||||
search.terms_matching_strategy(query.matching_strategy.into());
|
||||
|
||||
let max_total_hits = index
|
||||
.pagination_max_total_hits(&rtxn)
|
||||
.pagination_max_total_hits(rtxn)
|
||||
.map_err(milli::Error::from)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
@@ -320,6 +328,19 @@ pub fn perform_search(
|
||||
search.sort_criteria(sort);
|
||||
}
|
||||
|
||||
Ok((search, is_finite_pagination, max_total_hits, offset))
|
||||
}
|
||||
|
||||
pub fn perform_search(
|
||||
index: &Index,
|
||||
query: SearchQuery,
|
||||
) -> Result<SearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, is_finite_pagination, max_total_hits, offset) =
|
||||
prepare_search(index, &rtxn, &query)?;
|
||||
|
||||
let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@@ -473,6 +494,30 @@ pub fn perform_search(
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn perform_facet_search(
|
||||
index: &Index,
|
||||
search_query: SearchQuery,
|
||||
facet_query: Option<String>,
|
||||
facet_name: String,
|
||||
) -> Result<FacetSearchResult, MeilisearchHttpError> {
|
||||
let before_search = Instant::now();
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query)?;
|
||||
let mut facet_search = SearchForFacetValues::new(facet_name, search);
|
||||
if let Some(facet_query) = &facet_query {
|
||||
facet_search.query(facet_query);
|
||||
}
|
||||
|
||||
let hits = facet_search.execute()?;
|
||||
|
||||
Ok(FacetSearchResult {
|
||||
hits,
|
||||
query: facet_query,
|
||||
processing_time_ms: before_search.elapsed().as_millis(),
|
||||
})
|
||||
}
|
||||
|
||||
fn insert_geo_distance(sorts: &[String], document: &mut Document) {
|
||||
lazy_static::lazy_static! {
|
||||
static ref GEO_REGEX: Regex =
|
||||
|
||||
@@ -60,7 +60,7 @@ async fn create_api_key_bad_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-zA-Z], found `o` at 2",
|
||||
"message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-fA-F-], found `o` at 2",
|
||||
"code": "invalid_api_key_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_uid"
|
||||
|
||||
@@ -82,7 +82,7 @@ impl Server {
|
||||
> {
|
||||
actix_web::test::init_service(create_app(
|
||||
self.service.index_scheduler.clone().into(),
|
||||
self.service.auth.clone(),
|
||||
self.service.auth.clone().into(),
|
||||
self.service.options.clone(),
|
||||
analytics::MockAnalytics::new(&self.service.options),
|
||||
true,
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::common::encoder::Encoder;
|
||||
|
||||
pub struct Service {
|
||||
pub index_scheduler: Arc<IndexScheduler>,
|
||||
pub auth: AuthController,
|
||||
pub auth: Arc<AuthController>,
|
||||
pub options: Opt,
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
@@ -107,7 +107,7 @@ impl Service {
|
||||
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
|
||||
let app = test::init_service(create_app(
|
||||
self.index_scheduler.clone().into(),
|
||||
self.auth.clone(),
|
||||
self.auth.clone().into(),
|
||||
self.options.clone(),
|
||||
analytics::MockAnalytics::new(&self.options),
|
||||
true,
|
||||
|
||||
@@ -279,6 +279,81 @@ async fn add_csv_document() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_types() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("pets");
|
||||
|
||||
let document = "#id:number,name:string,race:string,age:number,cute:boolean
|
||||
0,jean,bernese mountain,2.5,true
|
||||
1,,,,
|
||||
2,lilou,pug,-2,false";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "pets",
|
||||
"status": "enqueued",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "pets",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 3,
|
||||
"indexedDocuments": 3
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"#id": 0,
|
||||
"name": "jean",
|
||||
"race": "bernese mountain",
|
||||
"age": 2.5,
|
||||
"cute": true
|
||||
},
|
||||
{
|
||||
"#id": 1,
|
||||
"name": null,
|
||||
"race": null,
|
||||
"age": null,
|
||||
"cute": null
|
||||
},
|
||||
{
|
||||
"#id": 2,
|
||||
"name": "lilou",
|
||||
"race": "pug",
|
||||
"age": -2,
|
||||
"cute": false
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 3
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_custom_delimiter() {
|
||||
let server = Server::new().await;
|
||||
@@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_types_error() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("pets");
|
||||
|
||||
let document = "#id:number,a:boolean,b:number
|
||||
0,doggo,1";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.",
|
||||
"code": "malformed_payload",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#malformed_payload"
|
||||
}
|
||||
"###);
|
||||
|
||||
let document = "#id:number,a:boolean,b:number
|
||||
0,true,doggo";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.",
|
||||
"code": "malformed_payload",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#malformed_payload"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
/// any other content-type is must be refused
|
||||
#[actix_rt::test]
|
||||
async fn error_add_documents_test_bad_content_types() {
|
||||
@@ -1664,7 +1773,7 @@ async fn error_add_documents_payload_size() {
|
||||
"content": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec metus erat, consequat in blandit venenatis, ultricies eu ipsum. Etiam luctus elit et mollis ultrices. Nam turpis risus, dictum non eros in, eleifend feugiat elit. Morbi non dolor pulvinar, sagittis mi sed, ultricies lorem. Nulla ultricies sem metus. Donec at suscipit quam, sed elementum mi. Suspendisse potenti. Fusce pharetra turpis tortor, sed eleifend odio dapibus ut. Nulla facilisi. Suspendisse elementum, dui eget aliquet dignissim, ex tellus aliquam nisl, at eleifend nisl metus tempus diam. Mauris fermentum sollicitudin efficitur. Donec dignissim est vitae elit finibus faucibus"
|
||||
}
|
||||
);
|
||||
let documents: Vec<_> = (0..16000).into_iter().map(|_| document.clone()).collect();
|
||||
let documents: Vec<_> = (0..16000).map(|_| document.clone()).collect();
|
||||
let documents = json!(documents);
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
|
||||
@@ -1825,7 +1934,6 @@ async fn batch_several_documents_addition() {
|
||||
let index = server.index("test");
|
||||
|
||||
let mut documents: Vec<_> = (0..150usize)
|
||||
.into_iter()
|
||||
.map(|id| {
|
||||
json!(
|
||||
{
|
||||
|
||||
@@ -1121,6 +1121,12 @@ async fn import_dump_v5() {
|
||||
assert_eq!(indexes["results"][1]["uid"], json!("test2"));
|
||||
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
|
||||
|
||||
// before doing anything we're going to wait until all the tasks in the dump have finished processing
|
||||
let result = server.tasks_filter("statuses=enqueued,processing").await.0;
|
||||
for task in result["results"].as_array().unwrap() {
|
||||
server.wait_task(task["uid"].as_u64().unwrap()).await;
|
||||
}
|
||||
|
||||
let expected_stats = json!({
|
||||
"numberOfDocuments": 10,
|
||||
"isIndexing": false,
|
||||
|
||||
@@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -672,7 +672,7 @@ async fn filter_reserved_geo_attribute_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass",
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -697,7 +697,7 @@ async fn filter_reserved_geo_attribute_string() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass",
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -722,7 +722,7 @@ async fn filter_reserved_attribute_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass",
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -747,7 +747,7 @@ async fn filter_reserved_attribute_string() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass",
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -760,6 +760,56 @@ async fn filter_reserved_attribute_string() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_array() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": ["_geoPoint = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_string() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": "_geoPoint = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn sort_geo_reserved_attribute() {
|
||||
let server = Server::new().await;
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
mod errors;
|
||||
|
||||
use byte_unit::{Byte, ByteUnit};
|
||||
use meili_snap::insta::assert_json_snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::common::{default_settings, Server};
|
||||
use crate::common::Server;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn error_get_unexisting_task_status() {
|
||||
@@ -1003,117 +1000,3 @@ async fn test_summarized_dump_creation() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_web::test]
|
||||
async fn test_task_queue_is_full() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let mut options = default_settings(dir.path());
|
||||
options.max_task_db_size = Byte::from_unit(500.0, ByteUnit::B).unwrap();
|
||||
|
||||
let server = Server::new_with_options(options).await.unwrap();
|
||||
|
||||
// the first task should be enqueued without issue
|
||||
let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(result, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "doggo",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
loop {
|
||||
let (res, code) = server.create_index(json!({ "uid": "doggo" })).await;
|
||||
if code == 422 {
|
||||
break;
|
||||
}
|
||||
if res["taskUid"] == json!(null) {
|
||||
panic!(
|
||||
"Encountered the strange case:\n{}",
|
||||
serde_json::to_string_pretty(&res).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
|
||||
snapshot!(code, @"422 Unprocessable Entity");
|
||||
snapshot!(json_string!(result), @r###"
|
||||
{
|
||||
"message": "Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.",
|
||||
"code": "no_space_left_on_device",
|
||||
"type": "system",
|
||||
"link": "https://docs.meilisearch.com/errors#no_space_left_on_device"
|
||||
}
|
||||
"###);
|
||||
|
||||
// But we should still be able to register tasks deletion IF they delete something
|
||||
let (result, code) = server.delete_tasks("uids=*").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
|
||||
{
|
||||
"taskUid": "uid",
|
||||
"indexUid": null,
|
||||
"status": "enqueued",
|
||||
"type": "taskDeletion",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let result = server.wait_task(result["taskUid"].as_u64().unwrap()).await;
|
||||
snapshot!(json_string!(result["status"]), @r###""succeeded""###);
|
||||
|
||||
// Now we should be able to register tasks again
|
||||
let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
|
||||
{
|
||||
"taskUid": "uid",
|
||||
"indexUid": "doggo",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// we're going to fill up the queue once again
|
||||
loop {
|
||||
let (res, code) = server.delete_tasks("uids=0").await;
|
||||
if code == 422 {
|
||||
break;
|
||||
}
|
||||
if res["taskUid"] == json!(null) {
|
||||
panic!(
|
||||
"Encountered the strange case:\n{}",
|
||||
serde_json::to_string_pretty(&res).unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// But we should NOT be able to register this task because it doesn't match any tasks
|
||||
let (result, code) = server.delete_tasks("uids=0").await;
|
||||
snapshot!(code, @"422 Unprocessable Entity");
|
||||
snapshot!(json_string!(result), @r###"
|
||||
{
|
||||
"message": "Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.",
|
||||
"code": "no_space_left_on_device",
|
||||
"type": "system",
|
||||
"link": "https://docs.meilisearch.com/errors#no_space_left_on_device"
|
||||
}
|
||||
"###);
|
||||
|
||||
// The deletion still works
|
||||
let (result, code) = server.delete_tasks("uids=*").await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
|
||||
{
|
||||
"taskUid": "uid",
|
||||
"indexUid": null,
|
||||
"status": "enqueued",
|
||||
"type": "taskDeletion",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -12,40 +12,40 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
bimap = { version = "0.6.2", features = ["serde"] }
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.0.1"
|
||||
bstr = "1.4.0"
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.7.1", default-features = false }
|
||||
charabia = { version = "0.7.2", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.6"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
either = "1.8.0"
|
||||
either = "1.8.1"
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.3", default-features = false, features = ["tempfile"] }
|
||||
grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] }
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.5.7"
|
||||
memmap2 = "0.5.10"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.15.0"
|
||||
ordered-float = "3.2.0"
|
||||
rayon = "1.5.3"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.6.0"
|
||||
rayon = "1.7.0"
|
||||
roaring = "0.10.1"
|
||||
rstar = { version = "0.9.3", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
rstar = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.0"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallvec = "1.10.0"
|
||||
smartstring = "1.0.1"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["v4"] }
|
||||
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
@@ -55,11 +55,12 @@ itertools = "0.10.5"
|
||||
# logging
|
||||
log = "0.4.17"
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.1.6"
|
||||
csv = "1.2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
big_s = "1.0.2"
|
||||
insta = "1.21.0"
|
||||
insta = "1.29.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
rand = {version = "0.8.5", features = ["small_rng"] }
|
||||
@@ -89,3 +90,6 @@ korean = ["charabia/korean"]
|
||||
|
||||
# allow thai specialized tokenization
|
||||
thai = ["charabia/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["charabia/greek"]
|
||||
|
||||
114
milli/examples/index.rs
Normal file
114
milli/examples/index.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Cursor, Seek};
|
||||
use std::path::Path;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{Index, Object};
|
||||
|
||||
fn usage(error: &str, program_name: &str) -> String {
|
||||
format!(
|
||||
"{}. Usage: {} <PATH-TO-INDEX> <PATH-TO-DATASET> [searchable_fields] [filterable_fields]",
|
||||
error, program_name
|
||||
)
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut args = std::env::args();
|
||||
let program_name = args.next().expect("No program name");
|
||||
let index_path =
|
||||
args.next().unwrap_or_else(|| panic!("{}", usage("Missing path to index.", &program_name)));
|
||||
let dataset_path = args
|
||||
.next()
|
||||
.unwrap_or_else(|| panic!("{}", usage("Missing path to source dataset.", &program_name)));
|
||||
// let primary_key = args.next().unwrap_or_else(|| "id".into());
|
||||
// "title overview"
|
||||
let searchable_fields: Vec<String> = args
|
||||
.next()
|
||||
.map(|arg| arg.split_whitespace().map(ToString::to_string).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
println!("{searchable_fields:?}");
|
||||
// "release_date genres"
|
||||
let filterable_fields: Vec<String> = args
|
||||
.next()
|
||||
.map(|arg| arg.split_whitespace().map(ToString::to_string).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
std::fs::create_dir_all(&index_path).unwrap();
|
||||
let index = Index::new(options, index_path).unwrap();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
// builder.set_primary_key(primary_key);
|
||||
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
|
||||
let documents = documents_from(
|
||||
&dataset_path,
|
||||
Path::new(&dataset_path).extension().unwrap_or_default().to_str().unwrap_or_default(),
|
||||
);
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
Ok(())
|
||||
}
|
||||
fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
|
||||
let reader = File::open(filename)
|
||||
.unwrap_or_else(|_| panic!("could not find the dataset in: {}", filename));
|
||||
let reader = BufReader::new(reader);
|
||||
let documents = match filetype {
|
||||
"csv" => documents_from_csv(reader).unwrap(),
|
||||
"json" => documents_from_json(reader).unwrap(),
|
||||
"jsonl" => documents_from_jsonl(reader).unwrap(),
|
||||
otherwise => panic!("invalid update format {:?}", otherwise),
|
||||
};
|
||||
DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||
}
|
||||
|
||||
fn documents_from_jsonl(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
|
||||
let object = result.unwrap();
|
||||
documents.append_json_object(&object)?;
|
||||
}
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_json(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
documents.append_json_array(reader)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_csv(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let csv = csv::Reader::from_reader(reader);
|
||||
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
documents.append_csv(csv)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
117
milli/examples/search.rs
Normal file
117
milli/examples/search.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
use std::error::Error;
|
||||
use std::io::stdin;
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, SearchLogger,
|
||||
TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut args = std::env::args();
|
||||
let program_name = args.next().expect("No program name");
|
||||
let dataset = args.next().unwrap_or_else(|| {
|
||||
panic!(
|
||||
"Missing path to index. Usage: {} <PATH-TO-INDEX> [<logger-dir>] [print-documents]",
|
||||
program_name
|
||||
)
|
||||
});
|
||||
let detailed_logger_dir = args.next();
|
||||
let print_documents: bool =
|
||||
if let Some(arg) = args.next() { arg == "print-documents" } else { false };
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
let index = Index::new(options, dataset)?;
|
||||
let txn = index.read_txn()?;
|
||||
let mut query = String::new();
|
||||
while stdin().read_line(&mut query)? > 0 {
|
||||
for _ in 0..2 {
|
||||
let mut default_logger = DefaultSearchLogger;
|
||||
// FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible.
|
||||
// Workaround'd here by recreating the logger on each iteration of the loop
|
||||
let mut detailed_logger = detailed_logger_dir
|
||||
.as_ref()
|
||||
.map(|logger_dir| (milli::VisualSearchLogger::default(), logger_dir));
|
||||
let logger: &mut dyn SearchLogger<_> =
|
||||
if let Some((detailed_logger, _)) = detailed_logger.as_mut() {
|
||||
detailed_logger
|
||||
} else {
|
||||
&mut default_logger
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let docs = execute_search(
|
||||
&mut ctx,
|
||||
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
|
||||
TermsMatchingStrategy::Last,
|
||||
false,
|
||||
&None,
|
||||
&None,
|
||||
GeoSortStrategy::default(),
|
||||
0,
|
||||
20,
|
||||
None,
|
||||
&mut DefaultSearchLogger,
|
||||
logger,
|
||||
)?;
|
||||
if let Some((logger, dir)) = detailed_logger {
|
||||
logger.finish(&mut ctx, Path::new(dir))?;
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
if print_documents {
|
||||
let documents = index
|
||||
.documents(&txn, docs.documents_ids.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(id, obkv)| {
|
||||
let mut object = serde_json::Map::default();
|
||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||
let value = obkv.get(fid).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||
object.insert(fid_name.to_owned(), value);
|
||||
}
|
||||
(id, serde_json::to_string_pretty(&object).unwrap())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (id, document) in documents {
|
||||
println!("{id}:");
|
||||
println!("{document}");
|
||||
}
|
||||
|
||||
let documents = index
|
||||
.documents(&txn, docs.documents_ids.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(id, obkv)| {
|
||||
let mut object = serde_json::Map::default();
|
||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||
let value = obkv.get(fid).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||
object.insert(fid_name.to_owned(), value);
|
||||
}
|
||||
(id, serde_json::to_string_pretty(&object).unwrap())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
for (id, document) in documents {
|
||||
println!("{id}:");
|
||||
println!("{document}");
|
||||
}
|
||||
}
|
||||
}
|
||||
query.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
33
milli/examples/settings.rs
Normal file
33
milli/examples/settings.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
// use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
// use maplit::hashset;
|
||||
use milli::{
|
||||
update::{IndexerConfig, Settings},
|
||||
Criterion, Index,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
let index = Index::new(options, "data_movies.ms").unwrap();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
// builder.set_min_word_len_one_typo(5);
|
||||
// builder.set_min_word_len_two_typos(7);
|
||||
// builder.set_sortable_fields(hashset! { S("release_date") });
|
||||
builder.set_criteria(vec![
|
||||
Criterion::Words,
|
||||
Criterion::Typo,
|
||||
Criterion::Proximity,
|
||||
Criterion::Attribute,
|
||||
Criterion::Sort,
|
||||
Criterion::Exactness,
|
||||
]);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
@@ -81,6 +81,8 @@ impl FromStr for Member {
|
||||
if is_reserved_keyword(text)
|
||||
|| text.starts_with("_geoRadius(")
|
||||
|| text.starts_with("_geoBoundingBox(")
|
||||
|| text.starts_with("_geo(")
|
||||
|| text.starts_with("_geoDistance(")
|
||||
{
|
||||
return Err(AscDescError::ReservedKeyword { name: text.to_string() })?;
|
||||
}
|
||||
@@ -265,6 +267,13 @@ mod tests {
|
||||
("_geoPoint(0, -180.000001):desc", GeoError(BadGeoError::Lng(-180.000001))),
|
||||
("_geoPoint(159.256, 130):asc", GeoError(BadGeoError::Lat(159.256))),
|
||||
("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))),
|
||||
("_geo(12, -2021):asc", ReservedKeyword { name: S("_geo(12, -2021)") }),
|
||||
("_geo(12, -2021):desc", ReservedKeyword { name: S("_geo(12, -2021)") }),
|
||||
("_geoDistance(12, -2021):asc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }),
|
||||
(
|
||||
"_geoDistance(12, -2021):desc",
|
||||
ReservedKeyword { name: S("_geoDistance(12, -2021)") },
|
||||
),
|
||||
];
|
||||
|
||||
for (req, expected_error) in invalid_req {
|
||||
|
||||
@@ -114,14 +114,15 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
self.value_buffer.clear();
|
||||
|
||||
let value = &record[*i];
|
||||
let trimmed_value = value.trim();
|
||||
match type_ {
|
||||
AllowedType::Number => {
|
||||
if value.trim().is_empty() {
|
||||
if trimmed_value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
} else if let Ok(integer) = value.trim().parse::<i64>() {
|
||||
} else if let Ok(integer) = trimmed_value.parse::<i64>() {
|
||||
to_writer(&mut self.value_buffer, &integer)?;
|
||||
} else {
|
||||
match value.trim().parse::<f64>() {
|
||||
match trimmed_value.parse::<f64>() {
|
||||
Ok(float) => {
|
||||
to_writer(&mut self.value_buffer, &float)?;
|
||||
}
|
||||
@@ -135,6 +136,24 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
}
|
||||
}
|
||||
}
|
||||
AllowedType::Boolean => {
|
||||
if trimmed_value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
} else {
|
||||
match trimmed_value.parse::<bool>() {
|
||||
Ok(bool) => {
|
||||
to_writer(&mut self.value_buffer, &bool)?;
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(Error::ParseBool {
|
||||
error,
|
||||
line,
|
||||
value: value.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AllowedType::String => {
|
||||
if value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
@@ -173,6 +192,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
#[derive(Debug)]
|
||||
enum AllowedType {
|
||||
String,
|
||||
Boolean,
|
||||
Number,
|
||||
}
|
||||
|
||||
@@ -181,6 +201,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
|
||||
match header.rsplit_once(':') {
|
||||
Some((field_name, field_type)) => match field_type {
|
||||
"string" => (field_name, AllowedType::String),
|
||||
"boolean" => (field_name, AllowedType::Boolean),
|
||||
"number" => (field_name, AllowedType::Number),
|
||||
// if the pattern isn't reconized, we keep the whole field.
|
||||
_otherwise => (header, AllowedType::String),
|
||||
|
||||
@@ -3,7 +3,7 @@ mod enriched;
|
||||
mod reader;
|
||||
mod serde_impl;
|
||||
|
||||
use std::fmt::{self, Debug};
|
||||
use std::fmt::Debug;
|
||||
use std::io;
|
||||
use std::str::Utf8Error;
|
||||
|
||||
@@ -87,71 +87,30 @@ impl DocumentsBatchIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("Error parsing number {value:?} at line {line}: {error}")]
|
||||
ParseFloat { error: std::num::ParseFloatError, line: usize, value: String },
|
||||
#[error("Error parsing boolean {value:?} at line {line}: {error}")]
|
||||
ParseBool { error: std::str::ParseBoolError, line: usize, value: String },
|
||||
#[error("Invalid document addition format, missing the documents batch index.")]
|
||||
InvalidDocumentFormat,
|
||||
#[error("Invalid enriched data.")]
|
||||
InvalidEnrichedData,
|
||||
InvalidUtf8(Utf8Error),
|
||||
Csv(csv::Error),
|
||||
Json(serde_json::Error),
|
||||
#[error(transparent)]
|
||||
InvalidUtf8(#[from] Utf8Error),
|
||||
#[error(transparent)]
|
||||
Csv(#[from] csv::Error),
|
||||
#[error(transparent)]
|
||||
Json(#[from] serde_json::Error),
|
||||
#[error(transparent)]
|
||||
Serialize(serde_json::Error),
|
||||
Grenad(grenad::Error),
|
||||
Io(io::Error),
|
||||
#[error(transparent)]
|
||||
Grenad(#[from] grenad::Error),
|
||||
#[error(transparent)]
|
||||
Io(#[from] io::Error),
|
||||
}
|
||||
|
||||
impl From<csv::Error> for Error {
|
||||
fn from(e: csv::Error) -> Self {
|
||||
Self::Csv(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(other: io::Error) -> Self {
|
||||
Self::Io(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(other: serde_json::Error) -> Self {
|
||||
Self::Json(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<grenad::Error> for Error {
|
||||
fn from(other: grenad::Error) -> Self {
|
||||
Self::Grenad(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Utf8Error> for Error {
|
||||
fn from(other: Utf8Error) -> Self {
|
||||
Self::InvalidUtf8(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Error::ParseFloat { error, line, value } => {
|
||||
write!(f, "Error parsing number {:?} at line {}: {}", value, line, error)
|
||||
}
|
||||
Error::InvalidDocumentFormat => {
|
||||
f.write_str("Invalid document addition format, missing the documents batch index.")
|
||||
}
|
||||
Error::InvalidEnrichedData => f.write_str("Invalid enriched data."),
|
||||
Error::InvalidUtf8(e) => write!(f, "{}", e),
|
||||
Error::Io(e) => write!(f, "{}", e),
|
||||
Error::Serialize(e) => write!(f, "{}", e),
|
||||
Error::Grenad(e) => write!(f, "{}", e),
|
||||
Error::Csv(e) => write!(f, "{}", e),
|
||||
Error::Json(e) => write!(f, "{}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
|
||||
let documents = match json {
|
||||
@@ -274,6 +233,19 @@ mod test {
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn csv_types_dont_panic() {
|
||||
let csv1_content =
|
||||
"id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5";
|
||||
let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
|
||||
|
||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
builder.append_csv(csv1).unwrap();
|
||||
let vector = builder.into_inner().unwrap();
|
||||
|
||||
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn out_of_order_csv_fields() {
|
||||
let csv1_content = "id:number,b\n1,0";
|
||||
|
||||
@@ -122,6 +122,16 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
}
|
||||
)]
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
|
||||
#[error("Attribute `{}` is not filterable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
true => "This index does not have configured filterable attributes.".to_string(),
|
||||
false => format!("Available filterable attributes are: `{}`.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidSearchFacet { field: String, valid_fields: BTreeSet<String> },
|
||||
#[error("{}", HeedError::BadOpenOptions)]
|
||||
InvalidLmdbOpenOptions,
|
||||
#[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
|
||||
|
||||
23
milli/src/heed_codec/fst_set_codec.rs
Normal file
23
milli/src/heed_codec/fst_set_codec.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use fst::Set;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `Set<&[u8]>`.
|
||||
pub struct FstSetCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for FstSetCodec {
|
||||
type EItem = Set<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item.as_fst().as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FstSetCodec {
|
||||
type DItem = Set<&'a [u8]>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Set::new(bytes).ok()
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ mod beu32_str_codec;
|
||||
mod byte_slice_ref;
|
||||
pub mod facet;
|
||||
mod field_id_word_count_codec;
|
||||
mod fst_set_codec;
|
||||
mod obkv_codec;
|
||||
mod roaring_bitmap;
|
||||
mod roaring_bitmap_length;
|
||||
@@ -15,11 +16,12 @@ pub use str_ref::StrRefCodec;
|
||||
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
};
|
||||
pub use self::script_language_codec::ScriptLanguageCodec;
|
||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
|
||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
|
||||
@@ -36,3 +36,39 @@ impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StrBEU16Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
|
||||
type DItem = (&'a str, u16);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let footer_len = size_of::<u16>();
|
||||
|
||||
if bytes.len() < footer_len + 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let (_, word) = word_plus_nul_byte.split_last()?;
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
|
||||
Some((word, pos))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
type EItem = (&'a str, u16);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,12 +19,12 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ScriptLanguageCodec, StrRefCodec};
|
||||
use crate::heed_codec::{FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
|
||||
Search, StrBEU32Codec, U8StrStrCodec, BEU16, BEU32,
|
||||
Search, U8StrStrCodec, BEU16, BEU32,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@@ -76,11 +76,16 @@ pub mod db_name {
|
||||
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
|
||||
pub const PREFIX_WORD_PAIR_PROXIMITY_DOCIDS: &str = "prefix-word-pair-proximity-docids";
|
||||
pub const WORD_POSITION_DOCIDS: &str = "word-position-docids";
|
||||
pub const WORD_FIELD_ID_DOCIDS: &str = "word-field-id-docids";
|
||||
pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids";
|
||||
pub const WORD_PREFIX_FIELD_ID_DOCIDS: &str = "word-prefix-field-id-docids";
|
||||
pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
|
||||
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
|
||||
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
|
||||
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
|
||||
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
|
||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FACET_ID_STRING_FST: &str = "facet-id-string-fst";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
pub const DOCUMENTS: &str = "documents";
|
||||
@@ -118,22 +123,33 @@ pub struct Index {
|
||||
pub prefix_word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the word and the position with the docids that corresponds to it.
|
||||
pub word_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||
pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word and the field id with the docids that corresponds to it.
|
||||
pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the field id and the word count with the docids that corresponds to it.
|
||||
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the position of a word prefix with all the docids where this prefix appears.
|
||||
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a position with all the docids where the prefix appears at the position.
|
||||
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
|
||||
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the script and language with all the docids that corresponds to it.
|
||||
pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and the docids for which this field exists
|
||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is set as null
|
||||
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is considered empty
|
||||
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||
/// Maps the facet field id of the string facets with an FST containing all the facets values.
|
||||
pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>,
|
||||
|
||||
/// Maps the document id, the facet field id and the numbers.
|
||||
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
||||
@@ -153,7 +169,7 @@ impl Index {
|
||||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(19);
|
||||
options.max_dbs(24);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
@@ -170,11 +186,16 @@ impl Index {
|
||||
let prefix_word_pair_proximity_docids =
|
||||
env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
|
||||
let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?;
|
||||
let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?;
|
||||
let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
|
||||
let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
|
||||
let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
|
||||
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
|
||||
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
|
||||
let facet_id_string_fst = env.create_database(Some(FACET_ID_STRING_FST))?;
|
||||
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||
let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
|
||||
let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
|
||||
|
||||
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||
let field_id_docid_facet_strings =
|
||||
@@ -196,11 +217,16 @@ impl Index {
|
||||
word_prefix_pair_proximity_docids,
|
||||
prefix_word_pair_proximity_docids,
|
||||
word_position_docids,
|
||||
word_fid_docids,
|
||||
word_prefix_position_docids,
|
||||
word_prefix_fid_docids,
|
||||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_string_fst,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
documents,
|
||||
@@ -833,6 +859,30 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id set as null
|
||||
pub fn null_faceted_documents_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id and that is considered empty
|
||||
pub fn empty_faceted_documents_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id
|
||||
pub fn exists_faceted_documents_ids(
|
||||
&self,
|
||||
@@ -1284,10 +1334,10 @@ pub(crate) mod tests {
|
||||
let index_documents_config = IndexDocumentsConfig::default();
|
||||
Self { inner, indexer_config, index_documents_config, _tempdir }
|
||||
}
|
||||
/// Creates a temporary index, with a default `4096 * 1000` size. This should be enough for
|
||||
/// Creates a temporary index, with a default `4096 * 2000` size. This should be enough for
|
||||
/// most tests.
|
||||
pub fn new() -> Self {
|
||||
Self::new_with_map_size(4096 * 1000)
|
||||
Self::new_with_map_size(4096 * 2000)
|
||||
}
|
||||
pub fn add_documents_using_wtxn<'t, R>(
|
||||
&'t self,
|
||||
|
||||
@@ -1,4 +1,56 @@
|
||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(test)]
|
||||
#[global_allocator]
|
||||
pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// #[cfg(test)]
|
||||
// pub mod allocator {
|
||||
// use std::alloc::{GlobalAlloc, System};
|
||||
// use std::sync::atomic::{self, AtomicI64};
|
||||
|
||||
// #[global_allocator]
|
||||
// pub static ALLOC: CountingAlloc = CountingAlloc {
|
||||
// max_resident: AtomicI64::new(0),
|
||||
// resident: AtomicI64::new(0),
|
||||
// allocated: AtomicI64::new(0),
|
||||
// };
|
||||
|
||||
// pub struct CountingAlloc {
|
||||
// pub max_resident: AtomicI64,
|
||||
// pub resident: AtomicI64,
|
||||
// pub allocated: AtomicI64,
|
||||
// }
|
||||
// unsafe impl GlobalAlloc for CountingAlloc {
|
||||
// unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
|
||||
// self.allocated.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
|
||||
// let old_resident =
|
||||
// self.resident.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
|
||||
|
||||
// let resident = old_resident + layout.size() as i64;
|
||||
// self.max_resident.fetch_max(resident, atomic::Ordering::SeqCst);
|
||||
|
||||
// // if layout.size() > 1_000_000 {
|
||||
// // eprintln!(
|
||||
// // "allocating {} with new resident size: {resident}",
|
||||
// // layout.size() / 1_000_000
|
||||
// // );
|
||||
// // // let trace = std::backtrace::Backtrace::capture();
|
||||
// // // let t = trace.to_string();
|
||||
// // // eprintln!("{t}");
|
||||
// // }
|
||||
|
||||
// System.alloc(layout)
|
||||
// }
|
||||
|
||||
// unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
|
||||
// self.resident.fetch_sub(layout.size() as i64, atomic::Ordering::Relaxed);
|
||||
// System.dealloc(ptr, layout)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
#[macro_use]
|
||||
pub mod documents;
|
||||
|
||||
@@ -26,6 +78,10 @@ use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}
|
||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||
use fxhash::{FxHasher32, FxHasher64};
|
||||
pub use grenad::CompressionType;
|
||||
pub use search::new::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, SearchContext, SearchLogger,
|
||||
VisualSearchLogger,
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use {charabia as tokenizer, heed};
|
||||
|
||||
@@ -43,8 +99,8 @@ pub use self::heed_codec::{
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
CriterionImplementationStrategy, FacetDistribution, Filter, FormatOptions, MatchBounds,
|
||||
MatcherBuilder, MatchingWord, MatchingWords, Search, SearchResult, TermsMatchingStrategy,
|
||||
FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
|
||||
MatchingWords, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
|
||||
@@ -100,6 +156,23 @@ pub fn relative_from_absolute_position(absolute: Position) -> (FieldId, Relative
|
||||
pub fn absolute_from_relative_position(field_id: FieldId, relative: RelativePosition) -> Position {
|
||||
(field_id as u32) << 16 | (relative as u32)
|
||||
}
|
||||
// TODO: this is wrong, but will do for now
|
||||
/// Compute the "bucketed" absolute position from the field id and relative position in the field.
|
||||
///
|
||||
/// In a bucketed position, the accuracy of the relative position is reduced exponentially as it gets larger.
|
||||
pub fn bucketed_position(relative: u16) -> u16 {
|
||||
// The first few relative positions are kept intact.
|
||||
if relative < 16 {
|
||||
relative
|
||||
} else if relative < 24 {
|
||||
// Relative positions between 16 and 24 all become equal to 24
|
||||
24
|
||||
} else {
|
||||
// Then, groups of positions that have the same base-2 logarithm are reduced to
|
||||
// the same relative position: the smallest power of 2 that is greater than them
|
||||
(relative as f64).log2().ceil().exp2() as u16
|
||||
}
|
||||
}
|
||||
|
||||
/// Transform a raw obkv store into a JSON Object.
|
||||
pub fn obkv_to_json(
|
||||
|
||||
@@ -1,569 +0,0 @@
|
||||
use std::mem::take;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use itertools::Itertools;
|
||||
use log::debug;
|
||||
use ordered_float::OrderedFloat;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::CriterionImplementationStrategy;
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 1000;
|
||||
|
||||
pub struct AscDesc<'t> {
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
field_name: String,
|
||||
field_id: Option<FieldId>,
|
||||
is_ascending: bool,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
faceted_candidates: RoaringBitmap,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
}
|
||||
|
||||
impl<'t> AscDesc<'t> {
|
||||
pub fn asc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, field_name, true, implementation_strategy)
|
||||
}
|
||||
|
||||
pub fn desc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, field_name, false, implementation_strategy)
|
||||
}
|
||||
|
||||
fn new(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
is_ascending: bool,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let field_id = fields_ids_map.id(&field_name);
|
||||
let faceted_candidates = match field_id {
|
||||
Some(field_id) => {
|
||||
let number_faceted =
|
||||
index.faceted_documents_ids(rtxn, field_id, FacetType::Number)?;
|
||||
let string_faceted =
|
||||
index.faceted_documents_ids(rtxn, field_id, FacetType::String)?;
|
||||
number_faceted | string_faceted
|
||||
}
|
||||
None => RoaringBitmap::default(),
|
||||
};
|
||||
|
||||
Ok(AscDesc {
|
||||
index,
|
||||
rtxn,
|
||||
field_name,
|
||||
field_id,
|
||||
is_ascending,
|
||||
query_tree: None,
|
||||
candidates: Box::new(std::iter::empty()),
|
||||
allowed_candidates: RoaringBitmap::new(),
|
||||
faceted_candidates,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
implementation_strategy,
|
||||
parent,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for AscDesc<'t> {
|
||||
#[logging_timer::time("AscDesc::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
self.allowed_candidates -= params.excluded_candidates;
|
||||
|
||||
loop {
|
||||
debug!(
|
||||
"Facet {}({}) iteration",
|
||||
if self.is_ascending { "Asc" } else { "Desc" },
|
||||
self.field_name
|
||||
);
|
||||
|
||||
match self.candidates.next().transpose()? {
|
||||
None if !self.allowed_candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(take(&mut self.allowed_candidates)),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
self.query_tree = query_tree;
|
||||
let mut candidates = match (&self.query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||
resolve_query_tree(&context, qt, params.wdcache)?
|
||||
}
|
||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||
self.candidates = match self.field_id {
|
||||
Some(field_id) => facet_ordered(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
field_id,
|
||||
self.is_ascending,
|
||||
candidates & &self.faceted_candidates,
|
||||
self.implementation_strategy,
|
||||
)?,
|
||||
None => Box::new(std::iter::empty()),
|
||||
};
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
Some(mut candidates) => {
|
||||
candidates -= params.excluded_candidates;
|
||||
self.allowed_candidates -= &candidates;
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn facet_ordered_iterative<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
let number_iter = iterative_facet_number_ordered_iter(
|
||||
index,
|
||||
rtxn,
|
||||
field_id,
|
||||
is_ascending,
|
||||
candidates.clone(),
|
||||
)?;
|
||||
let string_iter =
|
||||
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
|
||||
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||
}
|
||||
|
||||
fn facet_extreme_value<'t>(
|
||||
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
|
||||
) -> Result<Option<f64>> {
|
||||
let extreme_value =
|
||||
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
|
||||
let (_, extreme_value) = extreme_value?;
|
||||
|
||||
Ok(OrderedF64Codec::bytes_decode(extreme_value))
|
||||
}
|
||||
|
||||
pub fn facet_min_value<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||
facet_extreme_value(it)
|
||||
}
|
||||
|
||||
pub fn facet_max_value<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||
facet_extreme_value(it)
|
||||
}
|
||||
|
||||
fn facet_ordered_set_based<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
let number_db =
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let string_db =
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
|
||||
let (number_iter, string_iter) = if is_ascending {
|
||||
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
|
||||
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
|
||||
|
||||
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
|
||||
} else {
|
||||
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
|
||||
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
|
||||
|
||||
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
||||
};
|
||||
|
||||
Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids))))
|
||||
}
|
||||
|
||||
/// Returns an iterator over groups of the given candidates in ascending or descending order.
|
||||
///
|
||||
/// It will either use an iterative or a recursive method on the whole facet database depending
|
||||
/// on the number of candidates to rank.
|
||||
fn facet_ordered<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
match implementation_strategy {
|
||||
CriterionImplementationStrategy::OnlyIterative => {
|
||||
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
CriterionImplementationStrategy::OnlySetBased => {
|
||||
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
CriterionImplementationStrategy::Dynamic => {
|
||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
|
||||
} else {
|
||||
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch the whole list of candidates facet number values one by one and order them by it.
|
||||
///
|
||||
/// This function is fast when the amount of candidates to rank is small.
|
||||
fn iterative_facet_number_ordered_iter<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||
for docid in candidates.iter() {
|
||||
let left = (field_id, docid, f64::MIN);
|
||||
let right = (field_id, docid, f64::MAX);
|
||||
let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?;
|
||||
let entry = if is_ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||
docids_values.push((docid, OrderedFloat(value)));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, v)| *v);
|
||||
let iter = docids_values.into_iter();
|
||||
let iter = if is_ascending {
|
||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||
} else {
|
||||
Box::new(iter.rev())
|
||||
};
|
||||
|
||||
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||
// required to collect the result into an owned collection (a Vec).
|
||||
// https://github.com/rust-itertools/itertools/issues/499
|
||||
#[allow(clippy::needless_collect)]
|
||||
let vec: Vec<_> = iter
|
||||
.group_by(|(_, v)| *v)
|
||||
.into_iter()
|
||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||
.collect();
|
||||
|
||||
Ok(vec.into_iter())
|
||||
}
|
||||
|
||||
/// Fetch the whole list of candidates facet string values one by one and order them by it.
|
||||
///
|
||||
/// This function is fast when the amount of candidates to rank is small.
|
||||
fn iterative_facet_string_ordered_iter<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||
for docid in candidates.iter() {
|
||||
let left = (field_id, docid, "");
|
||||
let right = (field_id, docid.saturating_add(1), "");
|
||||
// FIXME Doing this means that it will never be possible to retrieve
|
||||
// the document with id 2^32, not sure this is a real problem.
|
||||
let mut iter = index.field_id_docid_facet_strings.range(rtxn, &(left..right))?;
|
||||
let entry = if is_ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), _)) = entry.transpose()? {
|
||||
docids_values.push((docid, value));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, v)| *v);
|
||||
let iter = docids_values.into_iter();
|
||||
let iter = if is_ascending {
|
||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||
} else {
|
||||
Box::new(iter.rev())
|
||||
};
|
||||
|
||||
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||
// required to collect the result into an owned collection (a Vec).
|
||||
// https://github.com/rust-itertools/itertools/issues/499
|
||||
#[allow(clippy::needless_collect)]
|
||||
let vec: Vec<_> = iter
|
||||
.group_by(|(_, v)| *v)
|
||||
.into_iter()
|
||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||
.collect();
|
||||
|
||||
Ok(vec.into_iter())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{AscDesc, Criterion, Filter, Search, SearchResult};
|
||||
|
||||
// Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD
|
||||
// constant to 0 to ensure that the other sort algorithms are also correct.
|
||||
#[test]
|
||||
fn sort_criterion_placeholder() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings
|
||||
.set_sortable_fields(maplit::hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut docs = vec![];
|
||||
for i in 0..100 {
|
||||
docs.push(
|
||||
serde_json::json!({ "id": i, "mod_10": format!("{}", i % 10), "mod_20": i % 20 }),
|
||||
);
|
||||
}
|
||||
|
||||
index.add_documents(documents!(docs)).unwrap();
|
||||
|
||||
let all_ids = (0..100).collect::<Vec<_>>();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![AscDesc::from_str("mod_10:desc").unwrap()]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 8, 18, 28, 38, 48, 58, 68, 78, 88, 98, 7, 17, 27, 37, 47, 57, 67, 77, 87, 97, 6, 16, 26, 36, 46, 56, 66, 76, 86, 96, 5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 4, 14, 24, 34, 44, 54, 64, 74, 84, 94, 3, 13, 23, 33, 43, 53, 63, 73, 83, 93, 2, 12, 22, 32, 42, 52, 62, 72, 82, 92, 1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[99, 89, 79, 69, 59, 49, 39, 29, 19, 9, 98, 88, 78, 68, 58, 48, 38, 28, 18, 8, 97, 87, 77, 67, 57, 47, 37, 27, 17, 7, 96, 86, 76, 66, 56, 46, 36, 26, 16, 6, 95, 85, 75, 65, 55, 45, 35, 25, 15, 5, 94, 84, 74, 64, 54, 44, 34, 24, 14, 4, 93, 83, 73, 63, 53, 43, 33, 23, 13, 3, 92, 82, 72, 62, 52, 42, 32, 22, 12, 2, 91, 81, 71, 61, 51, 41, 31, 21, 11, 1, 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 29, 49, 69, 89, 19, 39, 59, 79, 99, 8, 28, 48, 68, 88, 18, 38, 58, 78, 98, 7, 27, 47, 67, 87, 17, 37, 57, 77, 97, 6, 26, 46, 66, 86, 16, 36, 56, 76, 96, 5, 25, 45, 65, 85, 15, 35, 55, 75, 95, 4, 24, 44, 64, 84, 14, 34, 54, 74, 94, 3, 23, 43, 63, 83, 13, 33, 53, 73, 93, 2, 22, 42, 62, 82, 12, 32, 52, 72, 92, 1, 21, 41, 61, 81, 11, 31, 51, 71, 91, 0, 20, 40, 60, 80, 10, 30, 50, 70, 90]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 39, 59, 79, 99, 9, 29, 49, 69, 89, 18, 38, 58, 78, 98, 8, 28, 48, 68, 88, 17, 37, 57, 77, 97, 7, 27, 47, 67, 87, 16, 36, 56, 76, 96, 6, 26, 46, 66, 86, 15, 35, 55, 75, 95, 5, 25, 45, 65, 85, 14, 34, 54, 74, 94, 4, 24, 44, 64, 84, 13, 33, 53, 73, 93, 3, 23, 43, 63, 83, 12, 32, 52, 72, 92, 2, 22, 42, 62, 82, 11, 31, 51, 71, 91, 1, 21, 41, 61, 81, 10, 30, 50, 70, 90, 0, 20, 40, 60, 80]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:desc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[99, 79, 59, 39, 19, 89, 69, 49, 29, 9, 98, 78, 58, 38, 18, 88, 68, 48, 28, 8, 97, 77, 57, 37, 17, 87, 67, 47, 27, 7, 96, 76, 56, 36, 16, 86, 66, 46, 26, 6, 95, 75, 55, 35, 15, 85, 65, 45, 25, 5, 94, 74, 54, 34, 14, 84, 64, 44, 24, 4, 93, 73, 53, 33, 13, 83, 63, 43, 23, 3, 92, 72, 52, 32, 12, 82, 62, 42, 22, 2, 91, 71, 51, 31, 11, 81, 61, 41, 21, 1, 90, 70, 50, 30, 10, 80, 60, 40, 20, 0]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
}
|
||||
|
||||
// Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD
|
||||
// constant to 0 to ensure that the other sort algorithms are also correct.
|
||||
#[test]
|
||||
fn sort_criterion_non_placeholder() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_sortable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut docs = vec![];
|
||||
for i in 0..100 {
|
||||
docs.push(
|
||||
serde_json::json!({ "id": i, "mod_10": format!("{}", i % 10), "mod_20": i % 20 }),
|
||||
);
|
||||
}
|
||||
|
||||
index.add_documents(documents!(docs)).unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [1, 0, 2] OR mod_20 IN [10, 13] OR id IN [5, 6]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in increasing value of the id modulo 10, followed by increasing value of the id modulo 20, followed by decreasing value of the id
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 93, 73, 53, 33, 13, 82, 62, 42, 22, 2, 92, 72, 52, 32, 12, 81, 61, 41, 21, 1, 91, 71, 51, 31, 11, 80, 60, 40, 20, 0, 90, 70, 50, 30, 10]");
|
||||
let expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[1, 0, 2].contains(&(id % 10))
|
||||
|| [10, 13].contains(&(id % 20))
|
||||
|| [5, 6].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
documents_ids.sort();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [7, 8, 0] OR mod_20 IN [1, 15, 16] OR id IN [0, 4]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:asc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in increasing value of the id modulo 10, followed by increasing value of the id modulo 20, followed by decreasing value of the id
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[80, 60, 40, 20, 0, 90, 70, 50, 30, 10, 81, 61, 41, 21, 1, 4, 95, 75, 55, 35, 15, 96, 76, 56, 36, 16, 87, 67, 47, 27, 7, 97, 77, 57, 37, 17, 88, 68, 48, 28, 8, 98, 78, 58, 38, 18]");
|
||||
let expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[7, 8, 0].contains(&(id % 10))
|
||||
|| [1, 15, 16].contains(&(id % 20))
|
||||
|| [0, 4].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
documents_ids.sort();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [1, 0, 2] OR mod_20 IN [10, 13] OR id IN [5, 6]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![AscDesc::from_str("id:desc").unwrap()]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in decreasing value of the id
|
||||
let mut expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[1, 0, 2].contains(&(id % 10))
|
||||
|| [10, 13].contains(&(id % 20))
|
||||
|| [5, 6].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
expected_ids.sort();
|
||||
expected_ids.reverse();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
}
|
||||
}
|
||||
@@ -1,709 +0,0 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
use std::collections::binary_heap::PeekMut;
|
||||
use std::collections::{btree_map, BTreeMap, BinaryHeap, HashMap};
|
||||
use std::iter::Peekable;
|
||||
use std::mem::take;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{InitialCandidates, Query};
|
||||
use crate::search::query_tree::{Operation, QueryKind};
|
||||
use crate::search::{
|
||||
build_dfa, word_derivations, CriterionImplementationStrategy, WordDerivationsCache,
|
||||
};
|
||||
use crate::Result;
|
||||
|
||||
/// To be able to divide integers by the number of words in the query
|
||||
/// we want to find a multiplier that allow us to divide by any number between 1 and 10.
|
||||
/// We chose the LCM of all numbers between 1 and 10 as the multiplier (https://en.wikipedia.org/wiki/Least_common_multiple).
|
||||
const LCM_10_FIRST_NUMBERS: u32 = 2520;
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 500;
|
||||
|
||||
type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
|
||||
|
||||
pub struct Attribute<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||
set_buckets: Option<BinaryHeap<Branch<'t>>>,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
}
|
||||
|
||||
impl<'t> Attribute<'t> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Self {
|
||||
Attribute {
|
||||
ctx,
|
||||
state: None,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
linear_buckets: None,
|
||||
set_buckets: None,
|
||||
implementation_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Attribute<'t> {
|
||||
#[logging_timer::time("Attribute::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||
*allowed_candidates -= params.excluded_candidates;
|
||||
}
|
||||
|
||||
loop {
|
||||
match self.state.take() {
|
||||
Some((query_tree, _, allowed_candidates)) if allowed_candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||
let found_candidates = if matches!(
|
||||
self.implementation_strategy,
|
||||
CriterionImplementationStrategy::OnlyIterative
|
||||
) || (matches!(
|
||||
self.implementation_strategy,
|
||||
CriterionImplementationStrategy::Dynamic
|
||||
) && allowed_candidates.len()
|
||||
< CANDIDATES_THRESHOLD)
|
||||
{
|
||||
let linear_buckets = match self.linear_buckets.as_mut() {
|
||||
Some(linear_buckets) => linear_buckets,
|
||||
None => {
|
||||
let new_buckets = initialize_linear_buckets(
|
||||
self.ctx,
|
||||
&flattened_query_tree,
|
||||
&allowed_candidates,
|
||||
)?;
|
||||
self.linear_buckets.get_or_insert(new_buckets.into_iter())
|
||||
}
|
||||
};
|
||||
|
||||
match linear_buckets.next() {
|
||||
Some((_score, candidates)) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let set_buckets = match self.set_buckets.as_mut() {
|
||||
Some(set_buckets) => set_buckets,
|
||||
None => {
|
||||
let new_buckets = initialize_set_buckets(
|
||||
self.ctx,
|
||||
&flattened_query_tree,
|
||||
&allowed_candidates,
|
||||
params.wdcache,
|
||||
)?;
|
||||
self.set_buckets.get_or_insert(new_buckets)
|
||||
}
|
||||
};
|
||||
|
||||
match set_compute_candidates(set_buckets, &allowed_candidates)? {
|
||||
Some((_score, candidates)) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(allowed_candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
allowed_candidates -= &found_candidates;
|
||||
|
||||
self.state =
|
||||
Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(found_candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||
self.linear_buckets = None;
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// QueryPositionIterator is an Iterator over positions of a Query,
|
||||
/// It contains iterators over words positions.
|
||||
struct QueryPositionIterator<'t> {
|
||||
#[allow(clippy::type_complexity)]
|
||||
inner:
|
||||
Vec<Peekable<Box<dyn Iterator<Item = heed::Result<((&'t str, u32), RoaringBitmap)>> + 't>>>,
|
||||
}
|
||||
|
||||
impl<'t> QueryPositionIterator<'t> {
|
||||
fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
queries: &[Query],
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Self> {
|
||||
let mut inner = Vec::with_capacity(queries.len());
|
||||
for query in queries {
|
||||
let in_prefix_cache = query.prefix && ctx.in_prefix_cache(query.kind.word());
|
||||
match &query.kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if !query.prefix || in_prefix_cache {
|
||||
let word = query.kind.word();
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
} else {
|
||||
for (word, _) in word_derivations(word, true, 0, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
}
|
||||
}
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
for (word, _) in
|
||||
word_derivations(word, query.prefix, *typo, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(Self { inner })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Iterator for QueryPositionIterator<'t> {
|
||||
type Item = heed::Result<(u32, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// sort inner words from the closest next position to the farthest next position.
|
||||
let expected_pos = self
|
||||
.inner
|
||||
.iter_mut()
|
||||
.filter_map(|wli| match wli.peek() {
|
||||
Some(Ok(((_, pos), _))) => Some(*pos),
|
||||
_ => None,
|
||||
})
|
||||
.min()?;
|
||||
|
||||
let mut candidates = None;
|
||||
for wli in self.inner.iter_mut() {
|
||||
if let Some(Ok(((_, pos), _))) = wli.peek() {
|
||||
if *pos > expected_pos {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
match wli.next() {
|
||||
Some(Ok((_, docids))) => {
|
||||
candidates = match candidates.take() {
|
||||
Some(candidates) => Some(candidates | docids),
|
||||
None => Some(docids),
|
||||
}
|
||||
}
|
||||
Some(Err(e)) => return Some(Err(e)),
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
|
||||
candidates.map(|candidates| Ok((expected_pos, candidates)))
|
||||
}
|
||||
}
|
||||
|
||||
/// A Branch is represent a possible alternative of the original query and is build with the Query Tree,
|
||||
/// This branch allows us to iterate over meta-interval of positions.
|
||||
struct Branch<'t> {
|
||||
query_level_iterator: Vec<(u32, RoaringBitmap, Peekable<QueryPositionIterator<'t>>)>,
|
||||
last_result: (u32, RoaringBitmap),
|
||||
branch_size: u32,
|
||||
}
|
||||
|
||||
impl<'t> Branch<'t> {
|
||||
fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
flatten_branch: &[Vec<Query>],
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<Self> {
|
||||
let mut query_level_iterator = Vec::new();
|
||||
for queries in flatten_branch {
|
||||
let mut qli = QueryPositionIterator::new(ctx, queries, wdcache)?.peekable();
|
||||
let (pos, docids) = qli.next().transpose()?.unwrap_or((0, RoaringBitmap::new()));
|
||||
query_level_iterator.push((pos, docids & allowed_candidates, qli));
|
||||
}
|
||||
|
||||
let mut branch = Self {
|
||||
query_level_iterator,
|
||||
last_result: (0, RoaringBitmap::new()),
|
||||
branch_size: flatten_branch.len() as u32,
|
||||
};
|
||||
|
||||
branch.update_last_result();
|
||||
|
||||
Ok(branch)
|
||||
}
|
||||
|
||||
/// return the next meta-interval of the branch,
|
||||
/// and update inner interval in order to be ranked by the BinaryHeap.
|
||||
fn next(&mut self, allowed_candidates: &RoaringBitmap) -> heed::Result<bool> {
|
||||
// update the first query.
|
||||
let index = self.lowest_iterator_index();
|
||||
match self.query_level_iterator.get_mut(index) {
|
||||
Some((cur_pos, cur_docids, qli)) => match qli.next().transpose()? {
|
||||
Some((next_pos, next_docids)) => {
|
||||
*cur_pos = next_pos;
|
||||
*cur_docids |= next_docids & allowed_candidates;
|
||||
self.update_last_result();
|
||||
Ok(true)
|
||||
}
|
||||
None => Ok(false),
|
||||
},
|
||||
None => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
fn lowest_iterator_index(&mut self) -> usize {
|
||||
let (index, _) = self
|
||||
.query_level_iterator
|
||||
.iter_mut()
|
||||
.map(|(pos, docids, qli)| {
|
||||
if docids.is_empty() {
|
||||
0
|
||||
} else {
|
||||
match qli.peek() {
|
||||
Some(result) => {
|
||||
result.as_ref().map(|(next_pos, _)| *next_pos - *pos).unwrap_or(0)
|
||||
}
|
||||
None => u32::MAX,
|
||||
}
|
||||
}
|
||||
})
|
||||
.enumerate()
|
||||
.min_by_key(|(_, diff)| *diff)
|
||||
.unwrap_or((0, 0));
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
fn update_last_result(&mut self) {
|
||||
let mut result_pos = 0;
|
||||
let mut result_docids = None;
|
||||
|
||||
for (pos, docids, _qli) in self.query_level_iterator.iter() {
|
||||
result_pos += pos;
|
||||
result_docids = result_docids
|
||||
.take()
|
||||
.map_or_else(|| Some(docids.clone()), |candidates| Some(candidates & docids));
|
||||
}
|
||||
|
||||
// remove last result docids from inner iterators
|
||||
if let Some(docids) = result_docids.as_ref() {
|
||||
for (_, query_docids, _) in self.query_level_iterator.iter_mut() {
|
||||
*query_docids -= docids;
|
||||
}
|
||||
}
|
||||
|
||||
self.last_result = (result_pos, result_docids.unwrap_or_default());
|
||||
}
|
||||
|
||||
/// return the score of the current inner interval.
|
||||
fn compute_rank(&self) -> u32 {
|
||||
// we compute a rank from the position.
|
||||
let (pos, _) = self.last_result;
|
||||
pos.saturating_sub((0..self.branch_size).sum()) * LCM_10_FIRST_NUMBERS / self.branch_size
|
||||
}
|
||||
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
let self_rank = self.compute_rank();
|
||||
let other_rank = other.compute_rank();
|
||||
|
||||
// lower rank is better, and because BinaryHeap give the higher ranked branch, we reverse it.
|
||||
self_rank.cmp(&other_rank).reverse()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Ord for Branch<'t> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.cmp(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> PartialOrd for Branch<'t> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> PartialEq for Branch<'t> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.cmp(other) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Eq for Branch<'t> {}
|
||||
|
||||
fn initialize_set_buckets<'t>(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<BinaryHeap<Branch<'t>>> {
|
||||
let mut heap = BinaryHeap::new();
|
||||
for flatten_branch in branches {
|
||||
let branch = Branch::new(ctx, flatten_branch, wdcache, allowed_candidates)?;
|
||||
heap.push(branch);
|
||||
}
|
||||
|
||||
Ok(heap)
|
||||
}
|
||||
|
||||
fn set_compute_candidates(
|
||||
branches_heap: &mut BinaryHeap<Branch>,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<Option<(u32, RoaringBitmap)>> {
|
||||
let mut final_candidates: Option<(u32, RoaringBitmap)> = None;
|
||||
let mut allowed_candidates = allowed_candidates.clone();
|
||||
|
||||
while let Some(mut branch) = branches_heap.peek_mut() {
|
||||
// if current is worst than best we break to return
|
||||
// candidates that correspond to the best rank
|
||||
let branch_rank = branch.compute_rank();
|
||||
if let Some((best_rank, _)) = final_candidates {
|
||||
if branch_rank > best_rank {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let candidates = take(&mut branch.last_result.1);
|
||||
if candidates.is_empty() {
|
||||
// we don't have candidates, get next interval.
|
||||
if !branch.next(&allowed_candidates)? {
|
||||
PeekMut::pop(branch);
|
||||
}
|
||||
} else {
|
||||
allowed_candidates -= &candidates;
|
||||
final_candidates = match final_candidates.take() {
|
||||
// we add current candidates to best candidates
|
||||
Some((best_rank, mut best_candidates)) => {
|
||||
best_candidates |= candidates;
|
||||
branch.next(&allowed_candidates)?;
|
||||
Some((best_rank, best_candidates))
|
||||
}
|
||||
// we take current candidates as best candidates
|
||||
None => {
|
||||
branch.next(&allowed_candidates)?;
|
||||
Some((branch_rank, candidates))
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(final_candidates)
|
||||
}
|
||||
|
||||
fn initialize_linear_buckets(
|
||||
ctx: &dyn Context,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<BTreeMap<u64, RoaringBitmap>> {
|
||||
fn compute_candidate_rank(
|
||||
branches: &FlattenedQueryTree,
|
||||
words_positions: HashMap<String, RoaringBitmap>,
|
||||
) -> u64 {
|
||||
let mut min_rank = u64::max_value();
|
||||
for branch in branches {
|
||||
let branch_len = branch.len();
|
||||
let mut branch_rank = Vec::with_capacity(branch_len);
|
||||
for derivates in branch {
|
||||
let mut position = None;
|
||||
for Query { prefix, kind } in derivates {
|
||||
// find the best position of the current word in the document.
|
||||
let current_position = match kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if *prefix {
|
||||
word_derivations(word, true, 0, &words_positions)
|
||||
.flat_map(|positions| positions.iter().next())
|
||||
.min()
|
||||
} else {
|
||||
words_positions
|
||||
.get(word)
|
||||
.and_then(|positions| positions.iter().next())
|
||||
}
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
word_derivations(word, *prefix, *typo, &words_positions)
|
||||
.flat_map(|positions| positions.iter().next())
|
||||
.min()
|
||||
}
|
||||
};
|
||||
|
||||
match (position, current_position) {
|
||||
(Some(p), Some(cp)) => position = Some(cmp::min(p, cp)),
|
||||
(None, Some(cp)) => position = Some(cp),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
// if a position is found, we add it to the branch score,
|
||||
// otherwise the branch is considered as unfindable in this document and we break.
|
||||
if let Some(position) = position {
|
||||
branch_rank.push(position as u64);
|
||||
} else {
|
||||
branch_rank.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !branch_rank.is_empty() {
|
||||
branch_rank.sort_unstable();
|
||||
// because several words in same query can't match all a the position 0,
|
||||
// we substract the word index to the position.
|
||||
let branch_rank: u64 =
|
||||
branch_rank.into_iter().enumerate().map(|(i, r)| r - i as u64).sum();
|
||||
// here we do the means of the words of the branch
|
||||
min_rank =
|
||||
min_rank.min(branch_rank * LCM_10_FIRST_NUMBERS as u64 / branch_len as u64);
|
||||
}
|
||||
}
|
||||
|
||||
min_rank
|
||||
}
|
||||
|
||||
fn word_derivations<'a>(
|
||||
word: &str,
|
||||
is_prefix: bool,
|
||||
max_typo: u8,
|
||||
words_positions: &'a HashMap<String, RoaringBitmap>,
|
||||
) -> impl Iterator<Item = &'a RoaringBitmap> {
|
||||
let dfa = build_dfa(word, max_typo, is_prefix);
|
||||
words_positions.iter().filter_map(move |(document_word, positions)| {
|
||||
use levenshtein_automata::Distance;
|
||||
match dfa.eval(document_word) {
|
||||
Distance::Exact(_) => Some(positions),
|
||||
Distance::AtLeast(_) => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
let mut candidates = BTreeMap::new();
|
||||
for docid in allowed_candidates {
|
||||
let words_positions = ctx.docid_words_positions(docid)?;
|
||||
let rank = compute_candidate_rank(branches, words_positions);
|
||||
candidates.entry(rank).or_insert_with(RoaringBitmap::new).insert(docid);
|
||||
}
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
// TODO can we keep refs of Query
|
||||
fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
||||
use crate::search::criteria::Operation::{And, Or, Phrase};
|
||||
|
||||
fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
|
||||
match tail.split_first() {
|
||||
Some((thead, tail)) => {
|
||||
let tail = and_recurse(thead, tail);
|
||||
let mut out = Vec::new();
|
||||
for array in recurse(head) {
|
||||
for tail_array in &tail {
|
||||
let mut array = array.clone();
|
||||
array.extend(tail_array.iter().cloned());
|
||||
out.push(array);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
None => recurse(head),
|
||||
}
|
||||
}
|
||||
|
||||
fn recurse(op: &Operation) -> FlattenedQueryTree {
|
||||
match op {
|
||||
And(ops) => ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)),
|
||||
Or(_, ops) => {
|
||||
if ops.iter().all(|op| op.query().is_some()) {
|
||||
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
|
||||
} else {
|
||||
ops.iter().flat_map(recurse).collect()
|
||||
}
|
||||
}
|
||||
Phrase(words) => {
|
||||
let queries = words
|
||||
.iter()
|
||||
.filter_map(|w| w.as_ref())
|
||||
.map(|word| vec![Query { prefix: false, kind: QueryKind::exact(word.clone()) }])
|
||||
.collect();
|
||||
vec![queries]
|
||||
}
|
||||
Operation::Query(query) => vec![vec![vec![query.clone()]]],
|
||||
}
|
||||
}
|
||||
|
||||
recurse(query_tree)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
|
||||
use super::*;
|
||||
use crate::search::criteria::QueryKind;
|
||||
|
||||
#[test]
|
||||
fn simple_flatten_query_tree() {
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythe")) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
|
||||
]),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("many")) }),
|
||||
Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("thefish")),
|
||||
}),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("the")),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("fish")),
|
||||
}),
|
||||
]),
|
||||
],
|
||||
),
|
||||
]),
|
||||
],
|
||||
);
|
||||
let result = flatten_query_tree(&query_tree);
|
||||
|
||||
insta::assert_debug_snapshot!(result, @r###"
|
||||
[
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "manythefish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "manythe",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "fish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "many",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "thefish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "many",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "the",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "fish",
|
||||
},
|
||||
],
|
||||
],
|
||||
]
|
||||
"###);
|
||||
}
|
||||
}
|
||||
@@ -1,766 +0,0 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use crate::search::criteria::{
|
||||
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
|
||||
InitialCandidates,
|
||||
};
|
||||
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||
use crate::{absolute_from_relative_position, FieldId, Result};
|
||||
|
||||
pub struct Exactness<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_tree: Option<Operation>,
|
||||
state: Option<State>,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
query: Vec<ExactQueryPart>,
|
||||
cache: Option<ExactWordsCombinationCache>,
|
||||
}
|
||||
|
||||
impl<'t> Exactness<'t> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
primitive_query: &[PrimitiveQueryPart],
|
||||
) -> heed::Result<Self> {
|
||||
let mut query: Vec<_> = Vec::with_capacity(primitive_query.len());
|
||||
for part in primitive_query {
|
||||
query.push(ExactQueryPart::from_primitive_query_part(ctx, part)?);
|
||||
}
|
||||
|
||||
Ok(Exactness {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
state: None,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
query,
|
||||
cache: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Exactness<'t> {
|
||||
#[logging_timer::time("Exactness::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some(state) = self.state.as_mut() {
|
||||
state.difference_with(params.excluded_candidates);
|
||||
}
|
||||
loop {
|
||||
debug!("Exactness at state {:?}", self.state);
|
||||
|
||||
match self.state.as_mut() {
|
||||
Some(state) if state.is_empty() => {
|
||||
// reset state
|
||||
self.state = None;
|
||||
self.query_tree = None;
|
||||
// we don't need to reset the combinations cache since it only depends on
|
||||
// the primitive query, which does not change
|
||||
}
|
||||
Some(state) => {
|
||||
let (candidates, state) =
|
||||
resolve_state(self.ctx, take(state), &self.query, &mut self.cache)?;
|
||||
self.state = state;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
self.state = Some(State::new(candidates));
|
||||
self.query_tree = Some(query_tree);
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum State {
|
||||
/// Extract the documents that have an attribute that contains exactly the query.
|
||||
ExactAttribute(RoaringBitmap),
|
||||
/// Extract the documents that have an attribute that starts with exactly the query.
|
||||
AttributeStartsWith(RoaringBitmap),
|
||||
/// Rank the remaining documents by the number of exact words contained.
|
||||
ExactWords(RoaringBitmap),
|
||||
Remainings(Vec<RoaringBitmap>),
|
||||
}
|
||||
|
||||
impl State {
|
||||
fn new(candidates: RoaringBitmap) -> Self {
|
||||
Self::ExactAttribute(candidates)
|
||||
}
|
||||
|
||||
fn difference_with(&mut self, lhs: &RoaringBitmap) {
|
||||
match self {
|
||||
Self::ExactAttribute(candidates)
|
||||
| Self::AttributeStartsWith(candidates)
|
||||
| Self::ExactWords(candidates) => *candidates -= lhs,
|
||||
Self::Remainings(candidates_array) => {
|
||||
candidates_array.iter_mut().for_each(|candidates| *candidates -= lhs);
|
||||
candidates_array.retain(|candidates| !candidates.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::ExactAttribute(candidates)
|
||||
| Self::AttributeStartsWith(candidates)
|
||||
| Self::ExactWords(candidates) => candidates.is_empty(),
|
||||
Self::Remainings(candidates_array) => {
|
||||
candidates_array.iter().all(RoaringBitmap::is_empty)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for State {
|
||||
fn default() -> Self {
|
||||
Self::Remainings(vec![])
|
||||
}
|
||||
}
|
||||
#[logging_timer::time("Exactness::{}")]
|
||||
fn resolve_state(
|
||||
ctx: &dyn Context,
|
||||
state: State,
|
||||
query: &[ExactQueryPart],
|
||||
cache: &mut Option<ExactWordsCombinationCache>,
|
||||
) -> Result<(RoaringBitmap, Option<State>)> {
|
||||
use State::*;
|
||||
match state {
|
||||
ExactAttribute(mut allowed_candidates) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
if let Ok(query_len) = u8::try_from(query.len()) {
|
||||
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||
for id in attributes_ids {
|
||||
if let Some(attribute_allowed_docids) =
|
||||
ctx.field_id_word_count_docids(id, query_len)?
|
||||
{
|
||||
let mut attribute_candidates_array =
|
||||
attribute_start_with_docids(ctx, id, query)?;
|
||||
attribute_candidates_array.push(attribute_allowed_docids);
|
||||
|
||||
candidates |= MultiOps::intersection(attribute_candidates_array);
|
||||
}
|
||||
}
|
||||
|
||||
// only keep allowed candidates
|
||||
candidates &= &allowed_candidates;
|
||||
// remove current candidates from allowed candidates
|
||||
allowed_candidates -= &candidates;
|
||||
}
|
||||
|
||||
Ok((candidates, Some(AttributeStartsWith(allowed_candidates))))
|
||||
}
|
||||
AttributeStartsWith(mut allowed_candidates) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||
for id in attributes_ids {
|
||||
let attribute_candidates_array = attribute_start_with_docids(ctx, id, query)?;
|
||||
candidates |= MultiOps::intersection(attribute_candidates_array);
|
||||
}
|
||||
|
||||
// only keep allowed candidates
|
||||
candidates &= &allowed_candidates;
|
||||
// remove current candidates from allowed candidates
|
||||
allowed_candidates -= &candidates;
|
||||
Ok((candidates, Some(ExactWords(allowed_candidates))))
|
||||
}
|
||||
ExactWords(allowed_candidates) => {
|
||||
// Retrieve the cache if it already exist, otherwise create it.
|
||||
let owned_cache = if let Some(cache) = cache.take() {
|
||||
cache
|
||||
} else {
|
||||
compute_combinations(ctx, query)?
|
||||
};
|
||||
// The cache contains the sets of documents which contain exactly 1,2,3,.. exact words
|
||||
// from the query. It cannot be empty. All the candidates in it are disjoint.
|
||||
|
||||
let mut candidates_array = owned_cache.combinations.clone();
|
||||
for candidates in candidates_array.iter_mut() {
|
||||
*candidates &= &allowed_candidates;
|
||||
}
|
||||
*cache = Some(owned_cache);
|
||||
|
||||
let best_candidates = candidates_array.pop().unwrap();
|
||||
|
||||
candidates_array.insert(0, allowed_candidates);
|
||||
Ok((best_candidates, Some(Remainings(candidates_array))))
|
||||
}
|
||||
// pop remainings candidates until the emptiness
|
||||
Remainings(mut candidates_array) => {
|
||||
let candidates = candidates_array.pop().unwrap_or_default();
|
||||
if !candidates_array.is_empty() {
|
||||
Ok((candidates, Some(Remainings(candidates_array))))
|
||||
} else {
|
||||
Ok((candidates, None))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn attribute_start_with_docids(
|
||||
ctx: &dyn Context,
|
||||
attribute_id: FieldId,
|
||||
query: &[ExactQueryPart],
|
||||
) -> heed::Result<Vec<RoaringBitmap>> {
|
||||
let mut attribute_candidates_array = Vec::new();
|
||||
// start from attribute first position
|
||||
let mut pos = absolute_from_relative_position(attribute_id, 0);
|
||||
for part in query {
|
||||
use ExactQueryPart::*;
|
||||
match part {
|
||||
Synonyms(synonyms) => {
|
||||
let mut synonyms_candidates = RoaringBitmap::new();
|
||||
for word in synonyms {
|
||||
let wc = ctx.word_position_docids(word, pos)?;
|
||||
if let Some(word_candidates) = wc {
|
||||
synonyms_candidates |= word_candidates;
|
||||
}
|
||||
}
|
||||
attribute_candidates_array.push(synonyms_candidates);
|
||||
pos += 1;
|
||||
}
|
||||
Phrase(phrase) => {
|
||||
for word in phrase {
|
||||
if let Some(word) = word {
|
||||
let wc = ctx.word_position_docids(word, pos)?;
|
||||
if let Some(word_candidates) = wc {
|
||||
attribute_candidates_array.push(word_candidates);
|
||||
}
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(attribute_candidates_array)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ExactQueryPart {
|
||||
Phrase(Vec<Option<String>>),
|
||||
Synonyms(Vec<String>),
|
||||
}
|
||||
|
||||
impl ExactQueryPart {
|
||||
fn from_primitive_query_part(
|
||||
ctx: &dyn Context,
|
||||
part: &PrimitiveQueryPart,
|
||||
) -> heed::Result<Self> {
|
||||
let part = match part {
|
||||
PrimitiveQueryPart::Word(word, _) => {
|
||||
match ctx.synonyms(word)? {
|
||||
Some(synonyms) => {
|
||||
let mut synonyms: Vec<_> = synonyms
|
||||
.into_iter()
|
||||
.filter_map(|mut array| {
|
||||
// keep 1 word synonyms only.
|
||||
match array.pop() {
|
||||
Some(word) if array.is_empty() => Some(word),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
synonyms.push(word.clone());
|
||||
ExactQueryPart::Synonyms(synonyms)
|
||||
}
|
||||
None => ExactQueryPart::Synonyms(vec![word.clone()]),
|
||||
}
|
||||
}
|
||||
PrimitiveQueryPart::Phrase(phrase) => ExactQueryPart::Phrase(phrase.clone()),
|
||||
};
|
||||
|
||||
Ok(part)
|
||||
}
|
||||
}
|
||||
|
||||
struct ExactWordsCombinationCache {
|
||||
// index 0 is only 1 word
|
||||
combinations: Vec<RoaringBitmap>,
|
||||
}
|
||||
|
||||
fn compute_combinations(
|
||||
ctx: &dyn Context,
|
||||
query: &[ExactQueryPart],
|
||||
) -> Result<ExactWordsCombinationCache> {
|
||||
let number_of_part = query.len();
|
||||
let mut parts_candidates_array = Vec::with_capacity(number_of_part);
|
||||
for part in query {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
use ExactQueryPart::*;
|
||||
match part {
|
||||
Synonyms(synonyms) => {
|
||||
for synonym in synonyms {
|
||||
if let Some(synonym_candidates) = ctx.word_docids(synonym)? {
|
||||
candidates |= synonym_candidates;
|
||||
}
|
||||
}
|
||||
}
|
||||
// compute intersection on pair of words with a proximity of 0.
|
||||
Phrase(phrase) => {
|
||||
candidates |= resolve_phrase(ctx, phrase)?;
|
||||
}
|
||||
}
|
||||
parts_candidates_array.push(candidates);
|
||||
}
|
||||
let combinations = create_disjoint_combinations(parts_candidates_array);
|
||||
|
||||
Ok(ExactWordsCombinationCache { combinations })
|
||||
}
|
||||
|
||||
/// Given a list of bitmaps `b0,b1,...,bn` , compute the list of bitmaps `X0,X1,...,Xn`
|
||||
/// such that `Xi` contains all the elements that are contained in **at least** `i+1` bitmaps among `b0,b1,...,bn`.
|
||||
///
|
||||
/// The returned vector is guaranteed to be of length `n`. It is equal to `vec![X0, X1, ..., Xn]`.
|
||||
///
|
||||
/// ## Implementation
|
||||
///
|
||||
/// We do so by iteratively building a map containing the union of all the different ways to intersect `J` bitmaps among `b0,b1,...,bn`.
|
||||
/// - The key of the map is the index `i` of the last bitmap in the intersections
|
||||
/// - The value is the union of all the possible intersections of J bitmaps such that the last bitmap in the intersection is `bi`
|
||||
///
|
||||
/// For example, with the bitmaps `b0,b1,b2,b3`, this map should look like this
|
||||
/// ```text
|
||||
/// Map 0: (first iteration, contains all the combinations of 1 bitmap)
|
||||
/// // What follows are unions of intersection of bitmaps asscociated with the index of their last component
|
||||
/// 0: [b0]
|
||||
/// 1: [b1]
|
||||
/// 2: [b2]
|
||||
/// 3: [b3]
|
||||
/// Map 1: (second iteration, combinations of 2 bitmaps)
|
||||
/// 1: [b0&b1]
|
||||
/// 2: [b0&b2 | b1&b2]
|
||||
/// 3: [b0&b3 | b1&b3 | b2&b3]
|
||||
/// Map 2: (third iteration, combinations of 3 bitmaps)
|
||||
/// 2: [b0&b1&b2]
|
||||
/// 3: [b0&b2&b3 | b1&b2&b3]
|
||||
/// Map 3: (fourth iteration, combinations of 4 bitmaps)
|
||||
/// 3: [b0&b1&b2&b3]
|
||||
/// ```
|
||||
///
|
||||
/// These maps are built one by one from the content of the preceding map.
|
||||
/// For example, to create Map 2, we look at each line of Map 1, for example:
|
||||
/// ```text
|
||||
/// 2: [b0&b2 | b1&b2]
|
||||
/// ```
|
||||
/// And then for each i > 2, we compute `(b0&b2 | b1&b2) & bi = b0&b2&bi | b1&b2&bi`
|
||||
/// and then add it the new map (Map 3) under the key `i` (if it is not empty):
|
||||
/// ```text
|
||||
/// 3: [b0&b2&b3 | b1&b2&b3]
|
||||
/// 4: [b0&b2&b4 | b1&b2&b4]
|
||||
/// 5: [b0&b2&b5 | b1&b2&b5]
|
||||
/// etc.
|
||||
/// ```
|
||||
/// We only keep two maps in memory at any one point. As soon as Map J is built, we flatten Map J-1 into
|
||||
/// a single bitmap by taking the union of all of its values. This union gives us Xj-1.
|
||||
///
|
||||
/// ## Memory Usage
|
||||
/// This function is expected to be called on a maximum of 10 bitmaps. The worst case thus happens when
|
||||
/// 10 identical large bitmaps are given.
|
||||
///
|
||||
/// In the context of Meilisearch, let's imagine that we are given 10 bitmaps containing all
|
||||
/// the document ids. If the dataset contains 16 million documents, then each bitmap will take
|
||||
/// around 2MB of memory.
|
||||
///
|
||||
/// When creating Map 3, we will have, in memory:
|
||||
/// 1. The 10 original bitmaps (20MB)
|
||||
/// 2. X0 : 2MB
|
||||
/// 3. Map 1, containing 9 bitmaps: 18MB
|
||||
/// 4. Map 2, containing 8 bitmaps: 16MB
|
||||
/// 5. X1: 2MB
|
||||
/// for a total of around 60MB of memory. This roughly represents the maximum memory usage of this function.
|
||||
///
|
||||
/// ## Time complexity
|
||||
/// Let N be the size of the given list of bitmaps and M the length of each individual bitmap.
|
||||
///
|
||||
/// We need to create N new bitmaps. The most expensive one to create is the second one, where we need to
|
||||
/// iterate over the N keys of Map 1, and for each of those keys `k_i`, we perform `N-k_i` bitmap unions.
|
||||
/// Unioning two bitmaps is O(M), and we need to do it O(N^2) times.
|
||||
///
|
||||
/// Therefore the time complexity is O(N^3 * M).
|
||||
fn create_non_disjoint_combinations(bitmaps: Vec<RoaringBitmap>) -> Vec<RoaringBitmap> {
|
||||
let nbr_parts = bitmaps.len();
|
||||
if nbr_parts == 1 {
|
||||
return bitmaps;
|
||||
}
|
||||
let mut flattened_levels = vec![];
|
||||
let mut last_level: BTreeMap<usize, RoaringBitmap> =
|
||||
bitmaps.clone().into_iter().enumerate().collect();
|
||||
|
||||
for _ in 2..=nbr_parts {
|
||||
let mut new_level = BTreeMap::new();
|
||||
for (last_part_index, base_combination) in last_level.iter() {
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for new_last_part_index in last_part_index + 1..nbr_parts {
|
||||
let new_combination = base_combination & &bitmaps[new_last_part_index];
|
||||
if !new_combination.is_empty() {
|
||||
match new_level.entry(new_last_part_index) {
|
||||
Entry::Occupied(mut b) => {
|
||||
*b.get_mut() |= new_combination;
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(new_combination);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now flatten the last level to save memory
|
||||
let flattened_last_level = MultiOps::union(last_level.into_values());
|
||||
flattened_levels.push(flattened_last_level);
|
||||
last_level = new_level;
|
||||
}
|
||||
// Flatten the last level
|
||||
let flattened_last_level = MultiOps::union(last_level.into_values());
|
||||
flattened_levels.push(flattened_last_level);
|
||||
flattened_levels
|
||||
}
|
||||
|
||||
/// Given a list of bitmaps `b0,b1,...,bn` , compute the list of bitmaps `X0,X1,...,Xn`
|
||||
/// such that `Xi` contains all the elements that are contained in **exactly** `i+1` bitmaps among `b0,b1,...,bn`.
|
||||
///
|
||||
/// The returned vector is guaranteed to be of length `n`. It is equal to `vec![X0, X1, ..., Xn]`.
|
||||
fn create_disjoint_combinations(parts_candidates_array: Vec<RoaringBitmap>) -> Vec<RoaringBitmap> {
|
||||
let non_disjoint_combinations = create_non_disjoint_combinations(parts_candidates_array);
|
||||
let mut disjoint_combinations = vec![];
|
||||
let mut combinations = non_disjoint_combinations.into_iter().peekable();
|
||||
while let Some(mut combination) = combinations.next() {
|
||||
if let Some(forbidden) = combinations.peek() {
|
||||
combination -= forbidden;
|
||||
}
|
||||
disjoint_combinations.push(combination)
|
||||
}
|
||||
|
||||
disjoint_combinations
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::criteria::exactness::{
|
||||
create_disjoint_combinations, create_non_disjoint_combinations,
|
||||
};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::{Criterion, SearchResult};
|
||||
|
||||
#[test]
|
||||
fn test_exact_words_subcriterion() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key(S("id"));
|
||||
settings.set_criteria(vec![Criterion::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
// not relevant
|
||||
{ "id": "0", "text": "cat good dog bad" },
|
||||
// 1 exact word
|
||||
{ "id": "1", "text": "they said: cats arebetter thandogs" },
|
||||
// 3 exact words
|
||||
{ "id": "2", "text": "they said: cats arebetter than dogs" },
|
||||
// 5 exact words
|
||||
{ "id": "3", "text": "they said: cats are better than dogs" },
|
||||
// attribute starts with the exact words
|
||||
{ "id": "4", "text": "cats are better than dogs except on Saturday" },
|
||||
// attribute equal to the exact words
|
||||
{ "id": "5", "text": "cats are better than dogs" },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } =
|
||||
index.search(&rtxn).query("cats are better than dogs").execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 4, 3, 2, 1]");
|
||||
}
|
||||
|
||||
fn print_combinations(rbs: &[RoaringBitmap]) -> String {
|
||||
let mut s = String::new();
|
||||
for rb in rbs {
|
||||
s.push_str(&format!("{}\n", &display_bitmap(rb)));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
// In these unit tests, the test bitmaps always contain all the multiple of a certain number.
|
||||
// This makes it easy to check the validity of the results of `create_disjoint_combinations` by
|
||||
// counting the number of dividers of elements in the returned bitmaps.
|
||||
fn assert_correct_combinations(combinations: &[RoaringBitmap], dividers: &[u32]) {
|
||||
for (i, set) in combinations.iter().enumerate() {
|
||||
let expected_nbr_dividers = i + 1;
|
||||
for el in set {
|
||||
let nbr_dividers = dividers.iter().map(|d| usize::from(el % d == 0)).sum::<usize>();
|
||||
assert_eq!(
|
||||
nbr_dividers, expected_nbr_dividers,
|
||||
"{el} is divisible by {nbr_dividers} elements, not {expected_nbr_dividers}."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_1() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, &[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_2() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 8, 9, 10, 14, 15, 16, 20, 21, 22, 26, 27, 28, 32, 33, 34, 38, 39, 40, 44, 45, 46, 50, 51, 52, 56, 57, 58, 62, 63, 64, 68, 69, 70, 74, 75, 76, 80, 81, 82, 86, 87, 88, 92, 93, 94, 98, 99, 100, 104, 105, 106, 110, 111, 112, 116, 117, 118, 122, 123, 124, 128, 129, 130, 134, 135, 136, 140, 141, 142, 146, 147, 148, ]
|
||||
[0, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144, ]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_4() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (0..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
let b3: RoaringBitmap = (0..).into_iter().map(|x| 7 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 16, 22, 25, 26, 27, 32, 33, 34, 38, 39, 44, 46, 49, 51, 52, 55, 57, 58, 62, 64, 65, 68, 69, 74, 76, 77, 81, 82, 85, 86, 87, 88, 91, 92, 93, 94, 95, 99, 104, 106, 111, 115, 116, 117, 118, 119, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 145, 146, 148, ]
|
||||
[6, 10, 12, 14, 15, 18, 20, 21, 24, 28, 35, 36, 40, 45, 48, 50, 54, 56, 63, 66, 72, 75, 78, 80, 96, 98, 100, 102, 108, 110, 112, 114, 130, 132, 135, 138, 144, 147, ]
|
||||
[30, 42, 60, 70, 84, 90, 105, 120, 126, 140, ]
|
||||
[0, ]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 7]);
|
||||
}
|
||||
#[test]
|
||||
fn compute_combinations_4_with_empty_results_at_end() {
|
||||
let b0: RoaringBitmap = (1..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (1..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (1..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
let b3: RoaringBitmap = (1..).into_iter().map(|x| 7 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 16, 22, 25, 26, 27, 32, 33, 34, 38, 39, 44, 46, 49, 51, 52, 55, 57, 58, 62, 64, 65, 68, 69, 74, 76, 77, 81, 82, 85, 86, 87, 88, 91, 92, 93, 94, 95, 99, 104, 106, 111, 115, 116, 117, 118, 119, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 145, 146, 148, ]
|
||||
[6, 10, 12, 14, 15, 18, 20, 21, 24, 28, 35, 36, 40, 45, 48, 50, 54, 56, 63, 66, 72, 75, 78, 80, 96, 98, 100, 102, 108, 110, 112, 114, 130, 132, 135, 138, 144, 147, ]
|
||||
[30, 42, 60, 70, 84, 90, 105, 120, 126, 140, ]
|
||||
[]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 7]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_4_with_some_equal_bitmaps() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (0..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
// b3 == b1
|
||||
let b3: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 4, 5, 8, 14, 16, 22, 25, 26, 28, 32, 34, 35, 38, 44, 46, 52, 55, 56, 58, 62, 64, 65, 68, 74, 76, 82, 85, 86, 88, 92, 94, 95, 98, 104, 106, 112, 115, 116, 118, 122, 124, 125, 128, 134, 136, 142, 145, 146, 148, ]
|
||||
[3, 9, 10, 20, 21, 27, 33, 39, 40, 50, 51, 57, 63, 69, 70, 80, 81, 87, 93, 99, 100, 110, 111, 117, 123, 129, 130, 140, 141, 147, ]
|
||||
[6, 12, 15, 18, 24, 36, 42, 45, 48, 54, 66, 72, 75, 78, 84, 96, 102, 105, 108, 114, 126, 132, 135, 138, 144, ]
|
||||
[0, 30, 60, 90, 120, ]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_10() {
|
||||
let dividers = [2, 3, 5, 7, 11, 6, 15, 35, 18, 14];
|
||||
let parts_candidates: Vec<RoaringBitmap> = dividers
|
||||
.iter()
|
||||
.map(|÷r| {
|
||||
(0..).into_iter().map(|x| divider * x).take_while(|x| *x <= 210).collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 11, 16, 25, 26, 27, 32, 34, 38, 39, 46, 49, 51, 52, 57, 58, 62, 64, 65, 68, 69, 74, 76, 81, 82, 85, 86, 87, 91, 92, 93, 94, 95, 104, 106, 111, 115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 143, 145, 146, 148, 152, 153, 155, 158, 159, 161, 164, 166, 171, 172, 177, 178, 183, 184, 185, 187, 188, 194, 201, 202, 203, 205, 206, 207, 208, 209, ]
|
||||
[10, 20, 21, 22, 33, 40, 44, 50, 55, 63, 77, 80, 88, 99, 100, 130, 147, 160, 170, 176, 189, 190, 200, ]
|
||||
[6, 12, 14, 15, 24, 28, 35, 45, 48, 56, 75, 78, 96, 98, 102, 110, 112, 114, 135, 138, 156, 174, 175, 182, 186, 192, 195, 196, 204, ]
|
||||
[18, 36, 54, 66, 72, 108, 132, 144, 154, 162, 165, ]
|
||||
[30, 42, 60, 70, 84, 105, 120, 140, 150, 168, 198, ]
|
||||
[90, 126, 180, ]
|
||||
[]
|
||||
[210, ]
|
||||
[]
|
||||
[0, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, ÷rs);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_30() {
|
||||
let dividers: [u32; 30] = [
|
||||
1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4,
|
||||
5,
|
||||
];
|
||||
let parts_candidates: Vec<RoaringBitmap> = dividers
|
||||
.iter()
|
||||
.map(|divider| {
|
||||
(0..).into_iter().map(|x| divider * x).take_while(|x| *x <= 100).collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let combinations = create_non_disjoint_combinations(parts_candidates.clone());
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
"###);
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[1, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 49, 53, 59, 61, 67, 71, 73, 77, 79, 83, 89, 91, 97, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[2, 3, 5, 9, 14, 21, 22, 25, 26, 27, 33, 34, 35, 38, 39, 46, 51, 55, 57, 58, 62, 63, 65, 69, 74, 81, 82, 85, 86, 87, 93, 94, 95, 98, 99, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[4, 6, 8, 10, 15, 16, 18, 28, 32, 42, 44, 45, 50, 52, 54, 56, 64, 66, 68, 70, 75, 76, 78, 88, 92, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[12, 20, 24, 30, 36, 40, 48, 72, 80, 84, 90, 96, 100, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[0, 60, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, ÷rs);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user