mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-04 03:35:43 +00:00
Compare commits
353 Commits
v1.1.1
...
search-ref
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f8f190cd40 | ||
|
|
3a408e8287 | ||
|
|
d3e5b10e23 | ||
|
|
1aaf24ccbf | ||
|
|
90bc230820 | ||
|
|
342c4ff85d | ||
|
|
c85392ce40 | ||
|
|
8875d24a48 | ||
|
|
c470b67fa2 | ||
|
|
c0e081cd98 | ||
|
|
b60840ebff | ||
|
|
fdc1763838 | ||
|
|
75819bc940 | ||
|
|
7b8cc25625 | ||
|
|
2be641f373 | ||
|
|
d89d2efb7e | ||
|
|
f284a9c0dd | ||
|
|
134e7fc433 | ||
|
|
0cba919228 | ||
|
|
aa63091752 | ||
|
|
58735d6d8f | ||
|
|
1b514517f5 | ||
|
|
11f814821d | ||
|
|
30fb1153cc | ||
|
|
3b2c8b9f25 | ||
|
|
2a7f9adf78 | ||
|
|
608ceea440 | ||
|
|
79001b9c97 | ||
|
|
59b12fca87 | ||
|
|
48f5bb1693 | ||
|
|
93188b3c88 | ||
|
|
bc4efca611 | ||
|
|
feaf25a95d | ||
|
|
414b3fae89 | ||
|
|
899baa0ea5 | ||
|
|
374095d42c | ||
|
|
dd007dceca | ||
|
|
3ae587205c | ||
|
|
1bf2694604 | ||
|
|
ed9cc1af55 | ||
|
|
b41a6cbd7a | ||
|
|
c8af572697 | ||
|
|
249053e514 | ||
|
|
ff2cf2a5ae | ||
|
|
b448aca49c | ||
|
|
55bad07c16 | ||
|
|
380469665f | ||
|
|
3421125a55 | ||
|
|
0b2200e6e7 | ||
|
|
0fd5ab9fcc | ||
|
|
14293f6c8f | ||
|
|
d3a94e8b25 | ||
|
|
1944077a7f | ||
|
|
8195d366fa | ||
|
|
cfd1b2cc97 | ||
|
|
19b044b4e6 | ||
|
|
e0730b55b3 | ||
|
|
729fa3770d | ||
|
|
9cbc85b2f9 | ||
|
|
a3cf104736 | ||
|
|
a109802d45 | ||
|
|
2d8060df80 | ||
|
|
47b66e49b8 | ||
|
|
8f2e971879 | ||
|
|
654a3a9e19 | ||
|
|
d1fdbb63da | ||
|
|
fb9d9239b2 | ||
|
|
a7a0891210 | ||
|
|
84d9c731f8 | ||
|
|
11f4724957 | ||
|
|
85182497ab | ||
|
|
3e4a356638 | ||
|
|
dfd9c384aa | ||
|
|
f0b4046c43 | ||
|
|
bd9aba4d77 | ||
|
|
8edad8291b | ||
|
|
5acf953298 | ||
|
|
d9cebff61c | ||
|
|
30f7bd03f6 | ||
|
|
df0d9bb878 | ||
|
|
5230ddb3ea | ||
|
|
d6a7c28e4d | ||
|
|
e55efc419e | ||
|
|
644e136aee | ||
|
|
ec0ecb5515 | ||
|
|
38b7b31beb | ||
|
|
7a01f20df7 | ||
|
|
c20c38a7fa | ||
|
|
5ab46324c4 | ||
|
|
325f17488a | ||
|
|
e7ff987c46 | ||
|
|
244003e36f | ||
|
|
1f813a6f3b | ||
|
|
96183e804a | ||
|
|
5cfb066b0a | ||
|
|
a5f44a5ceb | ||
|
|
7ab48ed8c7 | ||
|
|
e7bb8c940f | ||
|
|
8cb85294ef | ||
|
|
d0e9d65025 | ||
|
|
540a396e49 | ||
|
|
a81165f0d8 | ||
|
|
d6585eb10b | ||
|
|
f7d90ad19f | ||
|
|
bc25f378e8 | ||
|
|
31630c85d0 | ||
|
|
ab09dc0167 | ||
|
|
618c54915d | ||
|
|
130d2061bd | ||
|
|
66ddee4390 | ||
|
|
90a6c01495 | ||
|
|
e58426109a | ||
|
|
f513cf930a | ||
|
|
8a13ed7e3f | ||
|
|
1b8e4d0301 | ||
|
|
996619b22a | ||
|
|
2c9822a337 | ||
|
|
7276deee0a | ||
|
|
6a068fe36a | ||
|
|
f7e7f438f8 | ||
|
|
8d826e478f | ||
|
|
ba8dcc2d78 | ||
|
|
4d308d5237 | ||
|
|
7ca91ebb71 | ||
|
|
1ba8a40d61 | ||
|
|
47f6a3ad3d | ||
|
|
b4c01581cd | ||
|
|
ae17c62e24 | ||
|
|
a1148c09c2 | ||
|
|
9c5f64769a | ||
|
|
ebe23b04c9 | ||
|
|
13b7c826c1 | ||
|
|
67fd3b08ef | ||
|
|
5440f43fd3 | ||
|
|
d9460a76f4 | ||
|
|
d1ddaa223d | ||
|
|
f7ecea142e | ||
|
|
337e75b0e4 | ||
|
|
b5691802a3 | ||
|
|
1690aec7f1 | ||
|
|
f267bed352 | ||
|
|
6e50f23896 | ||
|
|
597d57bf1d | ||
|
|
4c8a0179ba | ||
|
|
c69cbec64a | ||
|
|
01ac8344ad | ||
|
|
3508ba2f20 | ||
|
|
ce328c329d | ||
|
|
959e4607bb | ||
|
|
4b4ffb8ec9 | ||
|
|
3951fe22ab | ||
|
|
4d5bc9df4c | ||
|
|
ec2f8e8040 | ||
|
|
406b8bd248 | ||
|
|
62b9c6fbee | ||
|
|
b439d36807 | ||
|
|
faceb661e3 | ||
|
|
4129d657e2 | ||
|
|
1e6fe71a67 | ||
|
|
0fba08cd72 | ||
|
|
189d4c3b70 | ||
|
|
2fff6f7f23 | ||
|
|
fddfb37f1f | ||
|
|
52b4090286 | ||
|
|
3cabfb448b | ||
|
|
77cf5b3787 | ||
|
|
3acc5bbb15 | ||
|
|
114436926f | ||
|
|
0f7904fb38 | ||
|
|
3f13608002 | ||
|
|
590b1d8fb7 | ||
|
|
4708d9b016 | ||
|
|
0d2e7bcc13 | ||
|
|
55fbfb6124 | ||
|
|
be9741eb8a | ||
|
|
58fe260c72 | ||
|
|
24e5f6f7a9 | ||
|
|
0177d66149 | ||
|
|
9b87c36200 | ||
|
|
1861c69964 | ||
|
|
cb2b5eb38e | ||
|
|
53aa0a1b54 | ||
|
|
12b26cd54e | ||
|
|
061b1e6d7c | ||
|
|
0d6e8b5c31 | ||
|
|
d48cdc67a0 | ||
|
|
35c16ad047 | ||
|
|
2997d1f186 | ||
|
|
2a5997fb20 | ||
|
|
ee8a9e0bad | ||
|
|
3b0737a092 | ||
|
|
fdd02105ac | ||
|
|
aa9592455c | ||
|
|
01e24dd630 | ||
|
|
ae6bb1ce17 | ||
|
|
5fd28620cd | ||
|
|
728710d63a | ||
|
|
fa81381865 | ||
|
|
b96a682f16 | ||
|
|
d0f048c068 | ||
|
|
223e82a10d | ||
|
|
9507ff5e31 | ||
|
|
c2b025946a | ||
|
|
3a818c5e87 | ||
|
|
7871d12025 | ||
|
|
d74134ce3a | ||
|
|
5ac129bfa1 | ||
|
|
3fb67f94f7 | ||
|
|
cf5145b542 | ||
|
|
31bb61ba99 | ||
|
|
abb4522f76 | ||
|
|
ef084ef042 | ||
|
|
3524bd1257 | ||
|
|
d4f6216966 | ||
|
|
77acafe534 | ||
|
|
53afda3237 | ||
|
|
abb19d368d | ||
|
|
b4a52a622e | ||
|
|
8d7d8cdc2f | ||
|
|
626a93b348 | ||
|
|
af65fe201a | ||
|
|
9b83b1deb0 | ||
|
|
e9eb271499 | ||
|
|
3281a88d08 | ||
|
|
5a644054ab | ||
|
|
16fefd364e | ||
|
|
e7994cdeb3 | ||
|
|
00bad8c716 | ||
|
|
862714a18b | ||
|
|
d18ebe4f3a | ||
|
|
7169d85115 | ||
|
|
f5f5f03ec0 | ||
|
|
9b2653427d | ||
|
|
56b7209f26 | ||
|
|
9b1f439a91 | ||
|
|
01c7d2de8f | ||
|
|
a86aeba411 | ||
|
|
384fdc2df4 | ||
|
|
83e5b4ed0d | ||
|
|
272cd7ebbd | ||
|
|
c63c7377e6 | ||
|
|
9259cdb12e | ||
|
|
5b50e49522 | ||
|
|
65474c8de5 | ||
|
|
fbb1ba3de0 | ||
|
|
a59ca28e2c | ||
|
|
825f742000 | ||
|
|
dd491320e5 | ||
|
|
c6ff97a220 | ||
|
|
49240c367a | ||
|
|
1e6e624078 | ||
|
|
8b4e07e1a3 | ||
|
|
2853009987 | ||
|
|
aa59c3bc2c | ||
|
|
7b1d8f4c6d | ||
|
|
a49ddec9df | ||
|
|
05fe856e6e | ||
|
|
c0cdaf9f53 | ||
|
|
e9cf58d584 | ||
|
|
31628c5cd4 | ||
|
|
3004e281d7 | ||
|
|
14e8d0aaa2 | ||
|
|
1c58cf8426 | ||
|
|
5155fd2bf1 | ||
|
|
9ec9c204d3 | ||
|
|
78b9304d52 | ||
|
|
0465ba4a05 | ||
|
|
2099991dd1 | ||
|
|
c232cdabf5 | ||
|
|
4e266211bf | ||
|
|
57fa689131 | ||
|
|
10626dddfc | ||
|
|
9051065c22 | ||
|
|
e8c76cf7bf | ||
|
|
3f1729a17f | ||
|
|
cab2b6bcda | ||
|
|
c4979a2fda | ||
|
|
23931f8a4f | ||
|
|
aa414565bb | ||
|
|
1db152046e | ||
|
|
c27ea2677f | ||
|
|
caa1e1b923 | ||
|
|
71f18e4379 | ||
|
|
600e3dd1c5 | ||
|
|
362eb0de86 | ||
|
|
998d46ac10 | ||
|
|
6c85c0d95e | ||
|
|
0e1fbbf7c6 | ||
|
|
6806640ef0 | ||
|
|
173e37584c | ||
|
|
6ba4d5e987 | ||
|
|
dd12d44134 | ||
|
|
a61495d660 | ||
|
|
c8e251bf24 | ||
|
|
a938fbde4a | ||
|
|
dcf3f1d18a | ||
|
|
66d0c63694 | ||
|
|
132191360b | ||
|
|
345c99d5bd | ||
|
|
89d696c1e3 | ||
|
|
c645853529 | ||
|
|
a70ab8b072 | ||
|
|
48aae76b15 | ||
|
|
23bf572dea | ||
|
|
864f6410ed | ||
|
|
c9bf6bb2fa | ||
|
|
46249ea901 | ||
|
|
ce0d1e0e13 | ||
|
|
5065d8b0c1 | ||
|
|
a83007c013 | ||
|
|
79e0a6dd4e | ||
|
|
2d88089129 | ||
|
|
1d937f831b | ||
|
|
6c659dc12f | ||
|
|
a8531053a0 | ||
|
|
cf34d1c95f | ||
|
|
1a9c58a7ab | ||
|
|
64571c8288 | ||
|
|
72123c458b | ||
|
|
d5881519cb | ||
|
|
ea016d97af | ||
|
|
70c906d4b4 | ||
|
|
fa2ea4a379 | ||
|
|
030263caa3 | ||
|
|
c25779afba | ||
|
|
0f33a65468 | ||
|
|
7c9a8b1e1b | ||
|
|
f45daf8031 | ||
|
|
df48ac8803 | ||
|
|
ff86073288 | ||
|
|
0ad53784e7 | ||
|
|
7935bef4cd | ||
|
|
e064c52544 | ||
|
|
e106b16148 | ||
|
|
eddefb0e0f | ||
|
|
c5f22be6e1 | ||
|
|
b1d61f5a02 | ||
|
|
febc8d1b52 | ||
|
|
7dc04747fd | ||
|
|
7c0cd7172d | ||
|
|
43ff236df8 | ||
|
|
19ab4d1a15 | ||
|
|
9287858997 | ||
|
|
df3986cd83 | ||
|
|
34ed6518ae | ||
|
|
22219fd88f | ||
|
|
a9e17ab8c6 | ||
|
|
2dd948a4a1 | ||
|
|
76cf1bff87 | ||
|
|
c0d8eb295d | ||
|
|
bcd3f6054a | ||
|
|
3a0314f9de | ||
|
|
fa4d8b8348 |
@@ -2,3 +2,4 @@ target
|
||||
Dockerfile
|
||||
.dockerignore
|
||||
.gitignore
|
||||
**/.git
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -23,7 +23,8 @@ A clear and concise description of what you expected to happen.
|
||||
**Screenshots**
|
||||
If applicable, add screenshots to help explain your problem.
|
||||
|
||||
**Meilisearch version:** [e.g. v0.20.0]
|
||||
**Meilisearch version:**
|
||||
[e.g. v0.20.0]
|
||||
|
||||
**Additional context**
|
||||
Additional information that may be relevant to the issue.
|
||||
|
||||
34
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
Normal file
34
.github/ISSUE_TEMPLATE/sprint_issue.md
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
---
|
||||
name: New sprint issue
|
||||
about: ⚠️ Should only be used by the engine team ⚠️
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
|
||||
Related product discussion:
|
||||
Related spec: WIP
|
||||
|
||||
## Motivation
|
||||
|
||||
<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
|
||||
|
||||
## Usage
|
||||
|
||||
<!---Write a quick description of the usage if the usage has already been defined-->
|
||||
|
||||
Refer to the final spec to know the details and the final decisions about the usage.
|
||||
|
||||
## TODO
|
||||
|
||||
<!---Feel free to adapt this list with more technical/product steps-->
|
||||
|
||||
- [ ] Release a prototype
|
||||
- [ ] If prototype validated, merge changes into `main`
|
||||
- [ ] Update the spec
|
||||
|
||||
## Impacted teams
|
||||
|
||||
<!---Ping the related teams. Ask for the engine manager if any hesitation-->
|
||||
19
.github/uffizzi/Dockerfile
vendored
19
.github/uffizzi/Dockerfile
vendored
@@ -1,19 +0,0 @@
|
||||
# Run
|
||||
FROM uffizzi/ttyd:alpine
|
||||
|
||||
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
|
||||
ENV MEILI_SERVER_PROVIDER docker
|
||||
ENV MEILI_NO_ANALYTICS true
|
||||
|
||||
RUN apk update --quiet \
|
||||
&& apk add -q --no-cache libgcc tini curl
|
||||
|
||||
COPY target/x86_64-unknown-linux-musl/release/meilisearch /bin/meilisearch
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
||||
WORKDIR /meili_data
|
||||
|
||||
EXPOSE 7700/tcp
|
||||
|
||||
ENTRYPOINT ["tini", "--"]
|
||||
CMD ["ttyd", "/bin/zsh"]
|
||||
26
.github/uffizzi/docker-compose.uffizzi.yml
vendored
26
.github/uffizzi/docker-compose.uffizzi.yml
vendored
@@ -1,26 +0,0 @@
|
||||
version: "3"
|
||||
|
||||
x-uffizzi:
|
||||
ingress:
|
||||
service: nginx
|
||||
port: 8081
|
||||
|
||||
services:
|
||||
meilisearch:
|
||||
image: "${MEILISEARCH_IMAGE}"
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "7681:7681"
|
||||
- "7700:7700"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 500M
|
||||
|
||||
nginx:
|
||||
image: nginx:alpine
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8081:8081"
|
||||
volumes:
|
||||
- ./.github/uffizzi/nginx:/etc/nginx
|
||||
28
.github/uffizzi/nginx/nginx.conf
vendored
28
.github/uffizzi/nginx/nginx.conf
vendored
@@ -1,28 +0,0 @@
|
||||
|
||||
events {
|
||||
worker_connections 4096; ## Default: 1024
|
||||
}
|
||||
|
||||
http {
|
||||
map $http_upgrade $connection_upgrade {
|
||||
default upgrade;
|
||||
'' close;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 8081;
|
||||
|
||||
location / {
|
||||
proxy_pass http://localhost:7681;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection $connection_upgrade;
|
||||
}
|
||||
|
||||
location /meilisearch/ {
|
||||
# rewrite /meilisearch/(.*) /$1 break;
|
||||
proxy_pass http://localhost:7700/;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks
|
||||
name: Benchmarks (manual)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks indexing (push)
|
||||
name: Benchmarks of indexing (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search geo (push)
|
||||
name: Benchmarks of search for geo (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search songs (push)
|
||||
name: Benchmarks of search for songs (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Benchmarks search wikipedia articles (push)
|
||||
name: Benchmarks of search for Wikipedia articles (push)
|
||||
|
||||
on:
|
||||
push:
|
||||
28
.github/workflows/create-issue-dependencies.yml
vendored
28
.github/workflows/create-issue-dependencies.yml
vendored
@@ -1,28 +0,0 @@
|
||||
name: Create issue to upgrade dependencies
|
||||
on:
|
||||
schedule:
|
||||
# Run the first of the month, every 3 month
|
||||
- cron: '0 0 1 */3 *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
create-issue:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Create an issue
|
||||
uses: actions-ecosystem/action-create-issue@v1
|
||||
with:
|
||||
github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
title: Upgrade dependencies
|
||||
body: |
|
||||
This issue is about updating Meilisearch dependencies:
|
||||
- [ ] Cargo toml dependencies of Meilisearch; but also the main engine-team repositories that Meilisearch depends on (charabia, heed...)
|
||||
- [ ] If new Rust versions have been released, update the Rust version in the Clippy job of this [GitHub Action file](./.github/workflows/rust.yml)
|
||||
|
||||
⚠️ To avoid last minute bugs, this issue should only be done at the beginning of the sprint!
|
||||
|
||||
The GitHub action dependencies are managed by [Dependabot](./.github/dependabot.yml)
|
||||
labels: |
|
||||
dependencies
|
||||
maintenance
|
||||
24
.github/workflows/dependency-issue.yml
vendored
Normal file
24
.github/workflows/dependency-issue.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
name: Create issue to upgrade dependencies
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run the first of the month, every 3 month
|
||||
- cron: '0 0 1 */3 *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
create-issue:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ISSUE_TEMPLATE: issue-template.md
|
||||
GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Download the issue template
|
||||
run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
|
||||
- name: Create issue
|
||||
run: |
|
||||
gh issue create \
|
||||
--title 'Upgrade dependencies' \
|
||||
--label 'dependencies,maintenance' \
|
||||
--body-file $ISSUE_TEMPLATE
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Publish to APT repository & Homebrew
|
||||
name: Publish to APT & Homebrew
|
||||
|
||||
on:
|
||||
release:
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
12
.github/workflows/publish-binaries.yml
vendored
12
.github/workflows/publish-binaries.yml
vendored
@@ -1,3 +1,5 @@
|
||||
name: Publish binaries to GitHub release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
@@ -5,8 +7,6 @@ on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
name: Publish binaries to release
|
||||
|
||||
jobs:
|
||||
check-version:
|
||||
name: Check the version validity
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@@ -183,7 +183,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.4.0
|
||||
uses: svenstaro/upload-release-action@2.5.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
||||
12
.github/workflows/publish-docker-images.yml
vendored
12
.github/workflows/publish-docker-images.yml
vendored
@@ -1,4 +1,5 @@
|
||||
---
|
||||
name: Publish images to Docker Hub
|
||||
|
||||
on:
|
||||
push:
|
||||
# Will run for every tag pushed except `latest`
|
||||
@@ -12,8 +13,6 @@ on:
|
||||
- cron: '0 23 * * *' # Every day at 11:00pm
|
||||
workflow_dispatch:
|
||||
|
||||
name: Publish tagged images to Docker Hub
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: docker
|
||||
@@ -59,9 +58,13 @@ jobs:
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
with:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
with:
|
||||
platforms: linux/amd64,linux/arm64
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
@@ -89,10 +92,13 @@ jobs:
|
||||
push: true
|
||||
platforms: linux/amd64,linux/arm64
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
builder: ${{ steps.buildx.outputs.name }}
|
||||
build-args: |
|
||||
COMMIT_SHA=${{ github.sha }}
|
||||
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
|
||||
GIT_TAG=${{ github.ref_name }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
# /!\ Don't touch this without checking with Cloud team
|
||||
- name: Send CI information to Cloud team
|
||||
|
||||
145
.github/workflows/rust.yml
vendored
145
.github/workflows/rust.yml
vendored
@@ -1,145 +0,0 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Everyday at 5:00am
|
||||
- cron: '0 5 * * *'
|
||||
pull_request:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
branches:
|
||||
- trying
|
||||
- staging
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUST_BACKTRACE: 1
|
||||
RUSTFLAGS: "-D warnings"
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-18.04
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
# Disable cache due to disk space issues with Windows workers in CI
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
test-others:
|
||||
name: Tests on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-12, windows-2022]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --all
|
||||
|
||||
clippy:
|
||||
name: Run Clippy
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.67.0
|
||||
override: true
|
||||
components: clippy
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
# allow unlined_format_args https://github.com/rust-lang/rust-clippy/issues/10087
|
||||
args: --all-targets -- --deny warnings --allow clippy::uninlined_format_args
|
||||
|
||||
fmt:
|
||||
name: Run Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly
|
||||
override: true
|
||||
components: rustfmt
|
||||
# - name: Cache dependencies
|
||||
# uses: Swatinem/rust-cache@v2.2.0
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
# we are going to create an empty file where rustfmt expects it.
|
||||
run: |
|
||||
echo -ne "\n" > benchmarks/benches/datasets_paths.rs
|
||||
cargo fmt --all -- --check
|
||||
200
.github/workflows/sdks-tests.yml
vendored
Normal file
200
.github/workflows/sdks-tests.yml
vendored
Normal file
@@ -0,0 +1,200 @@
|
||||
# If any test fails, the engine team should ensure the "breaking" changes are expected and contact the integration team
|
||||
name: SDKs tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 6 * * MON" # Every Monday at 6:00AM
|
||||
|
||||
env:
|
||||
MEILI_MASTER_KEY: 'masterKey'
|
||||
MEILI_NO_ANALYTICS: 'true'
|
||||
|
||||
jobs:
|
||||
|
||||
meilisearch-js-tests:
|
||||
name: JS SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-js
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
cache: 'yarn'
|
||||
- name: Install dependencies
|
||||
run: yarn --dev
|
||||
- name: Run tests
|
||||
run: yarn test
|
||||
- name: Build project
|
||||
run: yarn build
|
||||
- name: Run ESM env
|
||||
run: yarn test:env:esm
|
||||
- name: Run Node.js env
|
||||
run: yarn test:env:nodejs
|
||||
- name: Run node typescript env
|
||||
run: yarn test:env:node-ts
|
||||
- name: Run Browser env
|
||||
run: yarn test:env:browser
|
||||
|
||||
instant-meilisearch-tests:
|
||||
name: instant-meilisearch tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/instant-meilisearch
|
||||
- name: Setup node
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
cache: yarn
|
||||
- name: Install dependencies
|
||||
run: yarn install
|
||||
- name: Run tests
|
||||
run: yarn test
|
||||
- name: Build all the playgrounds and the packages
|
||||
run: yarn build
|
||||
|
||||
meilisearch-php-tests:
|
||||
name: PHP SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-php
|
||||
- name: Install PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
coverage: none
|
||||
- name: Validate composer.json and composer.lock
|
||||
run: composer validate
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
composer remove --dev friendsofphp/php-cs-fixer --no-update --no-interaction
|
||||
composer update --prefer-dist --no-progress
|
||||
- name: Run test suite - default HTTP client (Guzzle 7)
|
||||
run: |
|
||||
sh scripts/tests.sh
|
||||
composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle
|
||||
|
||||
meilisearch-python-tests:
|
||||
name: Python SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-python
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
- name: Install pipenv
|
||||
uses: dschep/install-pipenv-action@v1
|
||||
- name: Install dependencies
|
||||
run: pipenv install --dev --python=${{ matrix.python-version }}
|
||||
- name: Test with pytest
|
||||
run: pipenv run pytest
|
||||
|
||||
meilisearch-go-tests:
|
||||
name: Go SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: stable
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-go
|
||||
- name: Get dependencies
|
||||
run: |
|
||||
go get -v -t -d ./...
|
||||
if [ -f Gopkg.toml ]; then
|
||||
curl https://raw.githubusercontent.com/golang/dep/master/install.sh | sh
|
||||
dep ensure
|
||||
fi
|
||||
- name: Run integration tests
|
||||
run: go test -v ./...
|
||||
|
||||
meilisearch-ruby-tests:
|
||||
name: Ruby SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-ruby
|
||||
- name: Set up Ruby 3
|
||||
uses: ruby/setup-ruby@v1
|
||||
with:
|
||||
ruby-version: 3
|
||||
- name: Install ruby dependencies
|
||||
run: bundle install --with test
|
||||
- name: Run test suite
|
||||
run: bundle exec rspec
|
||||
|
||||
meilisearch-rust-tests:
|
||||
name: Rust SDK tests
|
||||
runs-on: ubuntu-latest
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:nightly
|
||||
env:
|
||||
MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
|
||||
MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
|
||||
ports:
|
||||
- '7700:7700'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
repository: meilisearch/meilisearch-rust
|
||||
- name: Build
|
||||
run: cargo build --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --verbose
|
||||
171
.github/workflows/test-suite.yml
vendored
Normal file
171
.github/workflows/test-suite.yml
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
name: Test suite
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Everyday at 5:00am
|
||||
- cron: '0 5 * * *'
|
||||
pull_request:
|
||||
push:
|
||||
# trying and staging branches are for Bors config
|
||||
branches:
|
||||
- trying
|
||||
- staging
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RUST_BACKTRACE: 1
|
||||
RUSTFLAGS: "-D warnings"
|
||||
|
||||
jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-18.04
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
test-others:
|
||||
name: Tests on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-12, windows-2022]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --locked --release --no-default-features --all
|
||||
- name: Run cargo test
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release --all
|
||||
|
||||
test-all-features:
|
||||
name: Tests all features on cron schedule only
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
if: github.event_name == 'schedule'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo build with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --workspace --locked --release --all-features
|
||||
- name: Run cargo test with all features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --workspace --locked --release --all-features
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --locked --all
|
||||
|
||||
clippy:
|
||||
name: Run Clippy
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.69.0
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
args: --all-targets -- --deny warnings
|
||||
|
||||
fmt:
|
||||
name: Run Rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: nightly
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.1
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
# we are going to create an empty file where rustfmt expects it.
|
||||
run: |
|
||||
echo -ne "\n" > benchmarks/benches/datasets_paths.rs
|
||||
cargo fmt --all -- --check
|
||||
120
.github/workflows/uffizzi-build.yml
vendored
120
.github/workflows/uffizzi-build.yml
vendored
@@ -1,120 +0,0 @@
|
||||
name: Uffizzi - Build PR Image
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened,synchronize,reopened,closed]
|
||||
|
||||
jobs:
|
||||
build-meilisearch:
|
||||
name: Build and push `meilisearch`
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
if: ${{ github.event.action != 'closed' }}
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- run: sudo apt-get install musl-tools
|
||||
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
target: x86_64-unknown-linux-musl
|
||||
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.2.0
|
||||
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: build
|
||||
args: --target x86_64-unknown-linux-musl --release
|
||||
|
||||
- name: Remove dockerignore so we can use the target folder in our docker build
|
||||
run: rm -f .dockerignore
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Generate UUID image name
|
||||
id: uuid
|
||||
run: echo "UUID_TAG=$(uuidgen)" >> $GITHUB_ENV
|
||||
|
||||
- name: Docker metadata
|
||||
id: meta
|
||||
uses: docker/metadata-action@v3
|
||||
with:
|
||||
images: registry.uffizzi.com/${{ env.UUID_TAG }}
|
||||
tags: |
|
||||
type=raw,value=60d
|
||||
|
||||
- name: Build Image
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: ./
|
||||
file: .github/uffizzi/Dockerfile
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
push: true
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
render-compose-file:
|
||||
name: Render Docker Compose File
|
||||
# Pass output of this workflow to another triggered by `workflow_run` event.
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build-meilisearch
|
||||
outputs:
|
||||
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
steps:
|
||||
- name: Checkout git repo
|
||||
uses: actions/checkout@v3
|
||||
- name: Render Compose File
|
||||
run: |
|
||||
MEILISEARCH_IMAGE=$(echo ${{ needs.build-meilisearch.outputs.tags }})
|
||||
export MEILISEARCH_IMAGE
|
||||
# Render simple template from environment variables.
|
||||
envsubst < .github/uffizzi/docker-compose.uffizzi.yml > docker-compose.rendered.yml
|
||||
cat docker-compose.rendered.yml
|
||||
- name: Upload Rendered Compose File as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: docker-compose.rendered.yml
|
||||
retention-days: 2
|
||||
- name: Serialize PR Event to File
|
||||
run: |
|
||||
cat << EOF > event.json
|
||||
${{ toJSON(github.event) }}
|
||||
|
||||
EOF
|
||||
- name: Upload PR Event as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: event.json
|
||||
retention-days: 2
|
||||
|
||||
delete-preview:
|
||||
name: Call for Preview Deletion
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.action == 'closed' }}
|
||||
steps:
|
||||
# If this PR is closing, we will not render a compose file nor pass it to the next workflow.
|
||||
- name: Serialize PR Event to File
|
||||
run: |
|
||||
cat << EOF > event.json
|
||||
${{ toJSON(github.event) }}
|
||||
|
||||
EOF
|
||||
- name: Upload PR Event as Artifact
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: preview-spec
|
||||
path: event.json
|
||||
retention-days: 2
|
||||
103
.github/workflows/uffizzi-preview-deploy.yml
vendored
103
.github/workflows/uffizzi-preview-deploy.yml
vendored
@@ -1,103 +0,0 @@
|
||||
name: Uffizzi - Deploy Preview
|
||||
|
||||
on:
|
||||
workflow_run:
|
||||
workflows:
|
||||
- "Uffizzi - Build PR Image"
|
||||
types:
|
||||
- completed
|
||||
|
||||
jobs:
|
||||
cache-compose-file:
|
||||
name: Cache Compose File
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }}
|
||||
outputs:
|
||||
compose-file-cache-key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
pr-number: ${{ env.PR_NUMBER }}
|
||||
expected-url: ${{ env.EXPECTED_URL }}
|
||||
steps:
|
||||
- name: 'Download artifacts'
|
||||
# Fetch output (zip archive) from the workflow run that triggered this workflow.
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
script: |
|
||||
let allArtifacts = await github.rest.actions.listWorkflowRunArtifacts({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
run_id: context.payload.workflow_run.id,
|
||||
});
|
||||
let matchArtifact = allArtifacts.data.artifacts.filter((artifact) => {
|
||||
return artifact.name == "preview-spec"
|
||||
})[0];
|
||||
let download = await github.rest.actions.downloadArtifact({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
artifact_id: matchArtifact.id,
|
||||
archive_format: 'zip',
|
||||
});
|
||||
let fs = require('fs');
|
||||
fs.writeFileSync(`${process.env.GITHUB_WORKSPACE}/preview-spec.zip`, Buffer.from(download.data));
|
||||
|
||||
- name: 'Unzip artifact'
|
||||
run: unzip preview-spec.zip
|
||||
|
||||
- name: Read Event into ENV
|
||||
run: |
|
||||
echo 'EVENT_JSON<<EOF' >> $GITHUB_ENV
|
||||
cat event.json >> $GITHUB_ENV
|
||||
echo 'EOF' >> $GITHUB_ENV
|
||||
|
||||
- name: Hash Rendered Compose File
|
||||
id: hash
|
||||
# If the previous workflow was triggered by a PR close event, we will not have a compose file artifact.
|
||||
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
|
||||
run: echo "COMPOSE_FILE_HASH=$(md5sum docker-compose.rendered.yml | awk '{ print $1 }')" >> $GITHUB_ENV
|
||||
|
||||
- name: Cache Rendered Compose File
|
||||
if: ${{ fromJSON(env.EVENT_JSON).action != 'closed' }}
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: docker-compose.rendered.yml
|
||||
key: ${{ env.COMPOSE_FILE_HASH }}
|
||||
|
||||
- name: Read PR Number From Event Object
|
||||
id: pr
|
||||
run: echo "PR_NUMBER=${{ fromJSON(env.EVENT_JSON).number }}" >> $GITHUB_ENV
|
||||
|
||||
- name: DEBUG - Print Job Outputs
|
||||
if: ${{ runner.debug }}
|
||||
run: |
|
||||
echo "PR number: ${{ env.PR_NUMBER }}"
|
||||
echo "Compose file hash: ${{ env.COMPOSE_FILE_HASH }}"
|
||||
cat event.json
|
||||
|
||||
- name: Add expected URL env var
|
||||
if: ${{ runner.debug }}
|
||||
run: |
|
||||
REPO=$(echo ${{ github.repository }} | sed 's/\./+/g')
|
||||
echo "EXPECTED_URL=${{ inputs.server }}/github.com/$REPO/pull/${{ env.PR_NUMBER }}" >> $GITHUB_ENV
|
||||
|
||||
deploy-uffizzi-preview:
|
||||
name: Use Remote Workflow to Preview on Uffizzi
|
||||
needs:
|
||||
- cache-compose-file
|
||||
uses: UffizziCloud/preview-action/.github/workflows/reusable.yaml@v2
|
||||
with:
|
||||
# If this workflow was triggered by a PR close event, cache-key will be an empty string
|
||||
# and this reusable workflow will delete the preview deployment.
|
||||
compose-file-cache-key: ${{ needs.cache-compose-file.outputs.compose-file-cache-key }}
|
||||
compose-file-cache-path: docker-compose.rendered.yml
|
||||
server: https://app.uffizzi.com
|
||||
pr-number: ${{ needs.cache-compose-file.outputs.pr-number }}
|
||||
description: |
|
||||
The meilisearch preview environment contains a web terminal from where you can run the
|
||||
`meilisearch` command. You should be able to access this instance of meilisearch running in
|
||||
the preview from the link Meilisearch Endpoint link given below.
|
||||
|
||||
Web Terminal Endpoint : <uffizzi-url>
|
||||
Meilisearch Endpoint : <uffizzi-url>/meilisearch
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
id-token: write
|
||||
@@ -18,7 +18,7 @@ If Meilisearch does not offer optimized support for your language, please consid
|
||||
|
||||
## Assumptions
|
||||
|
||||
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.**
|
||||
1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests (PR)](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) workflow.**
|
||||
2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
|
||||
3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
|
||||
Please use this for help.**
|
||||
@@ -120,29 +120,9 @@ The full Meilisearch release process is described in [this guide](https://github
|
||||
|
||||
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
|
||||
|
||||
The prototype name must follow this convention: `prototype-X-Y` where
|
||||
- `X` is the feature name formatted in `kebab-case`. It should not end with a single number.
|
||||
- `Y` is the version of the prototype, starting from `0`.
|
||||
|
||||
✅ Example: `prototype-auto-resize-0`. </br>
|
||||
❌ Bad example: `auto-resize-0`: lacks the `prototype` prefix. </br>
|
||||
❌ Bad example: `prototype-auto-resize`: lacks the version suffix. </br>
|
||||
❌ Bad example: `prototype-auto-resize-0-0`: feature name ends with a single number.
|
||||
|
||||
Steps to create a prototype:
|
||||
|
||||
1. In your terminal, go to the last commit of your branch (the one you want to provide as a prototype).
|
||||
2. Create a tag following the convention: `git tag prototype-X-Y`
|
||||
3. Run Meilisearch and check that its launch summary features a line: `Prototype: prototype-X-Y` (you may need to switch branches and back after tagging for this to work).
|
||||
3. Push the tag: `git push origin prototype-X-Y`
|
||||
4. Check the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
|
||||
|
||||
🐳 Once the CI has finished to run (~1h30), a Docker image named `prototype-X-Y` will be available on [DockerHub](https://hub.docker.com/repository/docker/getmeili/meilisearch/general). People can use it with the following command: `docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-X-Y`. <br>
|
||||
More information about [how to run Meilisearch with Docker](https://docs.meilisearch.com/learn/cookbooks/docker.html#download-meilisearch-with-docker).
|
||||
|
||||
⚙️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-X-Y` tag in the Meilisearch repository and compile from the source code.
|
||||
|
||||
⚠️ When sharing a prototype with users, remind them to not use it in production. Prototypes are solely for test purposes.
|
||||
This happens in two steps:
|
||||
- [Release the prototype](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#how-to-publish-a-prototype)
|
||||
- [Communicate about it](https://github.com/meilisearch/engine-team/blob/main/resources/prototypes.md#communication)
|
||||
|
||||
### Release assets
|
||||
|
||||
|
||||
1116
Cargo.lock
generated
1116
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,3 +1,4 @@
|
||||
# syntax=docker/dockerfile:1.4
|
||||
# Compile
|
||||
FROM rust:alpine3.16 AS compiler
|
||||
|
||||
@@ -11,7 +12,7 @@ ARG GIT_TAG
|
||||
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
|
||||
ENV RUSTFLAGS="-C target-feature=-crt-static"
|
||||
|
||||
COPY . .
|
||||
COPY --link . .
|
||||
RUN set -eux; \
|
||||
apkArch="$(apk --print-arch)"; \
|
||||
if [ "$apkArch" = "aarch64" ]; then \
|
||||
@@ -30,7 +31,7 @@ RUN apk update --quiet \
|
||||
|
||||
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
|
||||
# to find.
|
||||
COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
COPY --from=compiler --link /meilisearch/target/release/meilisearch /bin/meilisearch
|
||||
# To stay compatible with the older version of the container (pre v0.27.0) we're
|
||||
# going to symlink the meilisearch binary in the path to `/meilisearch`
|
||||
RUN ln -s /bin/meilisearch /meilisearch
|
||||
|
||||
43
README.md
43
README.md
@@ -7,8 +7,8 @@
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://docs.meilisearch.com">Documentation</a> |
|
||||
<a href="https://docs.meilisearch.com/faq/">FAQ</a> |
|
||||
<a href="https://meilisearch.com/docs">Documentation</a> |
|
||||
<a href="https://meilisearch.com/docs/faq">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com">Discord</a>
|
||||
</h4>
|
||||
|
||||
@@ -36,27 +36,27 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering and faceted search](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://docs.meilisearch.com/learn/advanced/sorting.html):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://docs.meilisearch.com/learn/getting_started/customizing_relevancy.html#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://docs.meilisearch.com/learn/what_is_meilisearch/language.html):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://docs.meilisearch.com/learn/security/master_api_keys.html):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://docs.meilisearch.com/learn/security/tenant_tokens.html):** personalize search results for any number of application tenants
|
||||
- **[Typo tolerance](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://docs.meilisearch.com/reference/api/overview.html):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **[RESTful API](https://meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://docs.meilisearch.com](https://docs.meilisearch.com/).
|
||||
You can consult Meilisearch's documentation at [https://meilisearch.com/docs](https://meilisearch.com/docs/).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://docs.meilisearch.com/learn/getting_started/quick_start.html) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://meilisearch.com/docs/learn/getting_started/quick_start) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://docs.meilisearch.com/learn/getting_started/filtering_and_sorting.html) for an introduction to some of Meilisearch's most popular features.
|
||||
You may also want to check out [Meilisearch 101](https://meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ☁️ Meilisearch cloud
|
||||
|
||||
@@ -66,25 +66,25 @@ Let us manage your infrastructure so you can focus on integrating a great search
|
||||
|
||||
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
|
||||
|
||||
Take a look at the complete [Meilisearch integration list](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html).
|
||||
Take a look at the complete [Meilisearch integration list](https://meilisearch.com/docs/learn/what_is_meilisearch/sdks).
|
||||
|
||||
[](https://docs.meilisearch.com/learn/what_is_meilisearch/sdks.html)
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
|
||||
|
||||
## ⚙️ Advanced usage
|
||||
|
||||
Experienced users will want to keep our [API Reference](https://docs.meilisearch.com/reference/api) close at hand.
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
|
||||
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://docs.meilisearch.com/learn/advanced/filtering_and_faceted_search.html), [sorting](https://docs.meilisearch.com/learn/advanced/sorting.html), [geosearch](https://docs.meilisearch.com/learn/advanced/geosearch.html), [API keys](https://docs.meilisearch.com/learn/security/master_api_keys.html), and [tenant tokens](https://docs.meilisearch.com/learn/security/tenant_tokens.html).
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://meilisearch.com/docs/learn/advanced/filtering), [sorting](https://meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://meilisearch.com/docs/learn/security/tenant_tokens).
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://docs.meilisearch.com/learn/core_concepts/documents.html) and [indexes](https://docs.meilisearch.com/learn/core_concepts/indexes.html).
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://meilisearch.com/docs/learn/core_concepts/indexes).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html) of our documentation.
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
@@ -97,7 +97,6 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle
|
||||
- For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions)
|
||||
- Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)!
|
||||
- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch)
|
||||
- For everything else, please check [this page listing some of the other places where you can find us](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html)
|
||||
|
||||
Thank you for your support!
|
||||
|
||||
|
||||
@@ -11,11 +11,11 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
csv = "1.1.6"
|
||||
anyhow = "1.0.70"
|
||||
csv = "1.2.1"
|
||||
milli = { path = "../milli", default-features = false }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.4.0", features = ["html_reports"] }
|
||||
@@ -24,11 +24,11 @@ rand_chacha = "0.3.1"
|
||||
roaring = "0.10.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = "1.0.65"
|
||||
bytes = "1.2.1"
|
||||
anyhow = "1.0.70"
|
||||
bytes = "1.4.0"
|
||||
convert_case = "0.6.0"
|
||||
flate2 = "1.0.24"
|
||||
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
flate2 = "1.0.25"
|
||||
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
|
||||
|
||||
[features]
|
||||
default = ["milli/default"]
|
||||
@@ -48,7 +48,3 @@ harness = false
|
||||
[[bench]]
|
||||
name = "indexing"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "formatting"
|
||||
harness = false
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{FormatOptions, MatcherBuilder, MatchingWord, MatchingWords};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
struct Conf<'a> {
|
||||
name: &'a str,
|
||||
text: &'a str,
|
||||
matching_words: MatcherBuilder<'a, Vec<u8>>,
|
||||
}
|
||||
|
||||
fn bench_formatting(c: &mut criterion::Criterion) {
|
||||
#[rustfmt::skip]
|
||||
let confs = &[
|
||||
Conf {
|
||||
name: "'the door d'",
|
||||
text: r#"He used to do the door sounds in "Star Trek" with his mouth, phssst, phssst. The MD-11 passenger and cargo doors also tend to behave like electromagnetic apertures, because the doors do not have continuous electrical contact with the door frames around the door perimeter. But Theodor said that the doors don't work."#,
|
||||
matching_words: MatcherBuilder::new(MatchingWords::new(vec![
|
||||
(vec![Rc::new(MatchingWord::new("t".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("he".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("the".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("door".to_string(), 1, false).unwrap())], vec![1]),
|
||||
(vec![Rc::new(MatchingWord::new("do".to_string(), 0, false).unwrap()), Rc::new(MatchingWord::new("or".to_string(), 0, false).unwrap())], vec![0]),
|
||||
(vec![Rc::new(MatchingWord::new("thedoor".to_string(), 1, false).unwrap())], vec![0, 1]),
|
||||
(vec![Rc::new(MatchingWord::new("d".to_string(), 0, true).unwrap())], vec![2]),
|
||||
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
|
||||
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
|
||||
]
|
||||
).unwrap(), TokenizerBuilder::default().build()),
|
||||
},
|
||||
];
|
||||
|
||||
let format_options = &[
|
||||
FormatOptions { highlight: false, crop: None },
|
||||
FormatOptions { highlight: true, crop: None },
|
||||
FormatOptions { highlight: false, crop: Some(10) },
|
||||
FormatOptions { highlight: true, crop: Some(10) },
|
||||
FormatOptions { highlight: false, crop: Some(20) },
|
||||
FormatOptions { highlight: true, crop: Some(20) },
|
||||
];
|
||||
|
||||
for option in format_options {
|
||||
let highlight = if option.highlight { "highlight" } else { "no-highlight" };
|
||||
|
||||
let name = match option.crop {
|
||||
Some(size) => format!("{}-crop({})", highlight, size),
|
||||
None => format!("{}-no-crop", highlight),
|
||||
};
|
||||
|
||||
let mut group = c.benchmark_group(&name);
|
||||
for conf in confs {
|
||||
group.bench_function(conf.name, |b| {
|
||||
b.iter(|| {
|
||||
let mut matcher = conf.matching_words.build(conf.text);
|
||||
matcher.format(*option);
|
||||
})
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_formatting);
|
||||
criterion_main!(benches);
|
||||
@@ -103,7 +103,7 @@ not_available_failure_usage() {
|
||||
printf "$RED%s\n$DEFAULT" 'ERROR: Meilisearch binary is not available for your OS distribution or your architecture yet.'
|
||||
echo ''
|
||||
echo 'However, you can easily compile the binary from the source files.'
|
||||
echo 'Follow the steps at the page ("Source" tab): https://docs.meilisearch.com/learn/getting_started/installation.html'
|
||||
echo 'Follow the steps at the page ("Source" tab): https://www.meilisearch.com/docs/learn/getting_started/installation'
|
||||
}
|
||||
|
||||
fetch_release_failure_usage() {
|
||||
|
||||
@@ -11,22 +11,22 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.65"
|
||||
flate2 = "1.0.22"
|
||||
http = "0.2.8"
|
||||
anyhow = "1.0.70"
|
||||
flate2 = "1.0.25"
|
||||
http = "0.2.9"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
once_cell = "1.15.0"
|
||||
regex = "1.6.0"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
once_cell = "1.17.1"
|
||||
regex = "1.7.3"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -25,7 +25,6 @@ impl CompatV2ToV3 {
|
||||
CompatV2ToV3::Compat(compat) => compat.index_uuid(),
|
||||
};
|
||||
v2_uuids
|
||||
.into_iter()
|
||||
.into_iter()
|
||||
.map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid })
|
||||
.collect()
|
||||
|
||||
@@ -11,9 +11,9 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
faux = "0.1.8"
|
||||
faux = "0.1.9"
|
||||
|
||||
@@ -12,8 +12,8 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
nom = "7.1.1"
|
||||
nom_locate = "4.0.0"
|
||||
nom = "7.1.3"
|
||||
nom_locate = "4.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.21.0"
|
||||
insta = "1.29.0"
|
||||
|
||||
@@ -20,6 +20,8 @@ pub enum Condition<'a> {
|
||||
GreaterThanOrEqual(Token<'a>),
|
||||
Equal(Token<'a>),
|
||||
NotEqual(Token<'a>),
|
||||
Null,
|
||||
Empty,
|
||||
Exists,
|
||||
LowerThan(Token<'a>),
|
||||
LowerThanOrEqual(Token<'a>),
|
||||
@@ -44,6 +46,38 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
Ok((input, condition))
|
||||
}
|
||||
|
||||
/// null = value "IS" WS+ "NULL"
|
||||
pub fn parse_is_null(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NULL")))(input)?;
|
||||
Ok((input, FilterCondition::Condition { fid: key, op: Null }))
|
||||
}
|
||||
|
||||
/// null = value "IS" WS+ "NOT" WS+ "NULL"
|
||||
pub fn parse_is_not_null(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("NULL")))(input)?;
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Null }))))
|
||||
}
|
||||
|
||||
/// empty = value "IS" WS+ "EMPTY"
|
||||
pub fn parse_is_empty(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("EMPTY")))(input)?;
|
||||
Ok((input, FilterCondition::Condition { fid: key, op: Empty }))
|
||||
}
|
||||
|
||||
/// empty = value "IS" WS+ "NOT" WS+ "EMPTY"
|
||||
pub fn parse_is_not_empty(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = parse_value(input)?;
|
||||
|
||||
let (input, _) = tuple((tag("IS"), multispace1, tag("NOT"), multispace1, tag("EMPTY")))(input)?;
|
||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Empty }))))
|
||||
}
|
||||
|
||||
/// exist = value "EXISTS"
|
||||
pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
|
||||
|
||||
@@ -143,11 +143,9 @@ impl<'a> Display for Error<'a> {
|
||||
ErrorKind::MissingClosingDelimiter(c) => {
|
||||
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
||||
}
|
||||
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::InvalidPrimary => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)?
|
||||
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
|
||||
}
|
||||
ErrorKind::ExpectedEof => {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
@@ -159,7 +157,7 @@ impl<'a> Display for Error<'a> {
|
||||
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
|
||||
}
|
||||
ErrorKind::ReservedGeo(name) => {
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
}
|
||||
ErrorKind::MisusedGeoRadius => {
|
||||
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
|
||||
|
||||
@@ -47,7 +47,10 @@ mod value;
|
||||
use std::fmt::Debug;
|
||||
|
||||
pub use condition::{parse_condition, parse_to, Condition};
|
||||
use condition::{parse_exists, parse_not_exists};
|
||||
use condition::{
|
||||
parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
|
||||
parse_not_exists,
|
||||
};
|
||||
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
||||
pub use error::{Error, ErrorKind};
|
||||
use nom::branch::alt;
|
||||
@@ -382,6 +385,34 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
|
||||
}
|
||||
|
||||
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoDistance but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
tag("_geoDistance"),
|
||||
// if we were able to parse `_geoDistance` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
|
||||
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))
|
||||
}
|
||||
|
||||
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
|
||||
fn parse_geo(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geo but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
word_exact("_geo"),
|
||||
// if we were able to parse `_geo` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
|
||||
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))
|
||||
}
|
||||
|
||||
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
|
||||
match parse_condition(input) {
|
||||
Ok(result) => Ok(result),
|
||||
@@ -414,10 +445,16 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
||||
parse_in,
|
||||
parse_not_in,
|
||||
parse_condition,
|
||||
parse_is_null,
|
||||
parse_is_not_null,
|
||||
parse_is_empty,
|
||||
parse_is_not_empty,
|
||||
parse_exists,
|
||||
parse_not_exists,
|
||||
parse_to,
|
||||
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||
parse_geo,
|
||||
parse_geo_distance,
|
||||
parse_geo_point,
|
||||
parse_error_reserved_keyword,
|
||||
))(input)
|
||||
@@ -496,14 +533,30 @@ pub mod tests {
|
||||
insta::assert_display_snapshot!(p("subscribers <= 1000"), @"{subscribers} <= {1000}");
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO 1000"), @"{subscribers} {100} TO {1000}");
|
||||
|
||||
// Test NOT + EXISTS
|
||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
// Test NOT
|
||||
insta::assert_display_snapshot!(p("NOT subscribers < 1000"), @"NOT ({subscribers} < {1000})");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
|
||||
// Test NULL + NOT NULL
|
||||
insta::assert_display_snapshot!(p("subscribers IS NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT NULL"), @"{subscribers} IS NULL");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT NULL"), @"NOT ({subscribers} IS NULL)");
|
||||
|
||||
// Test EMPTY + NOT EMPTY
|
||||
insta::assert_display_snapshot!(p("subscribers IS EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
||||
insta::assert_display_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||
|
||||
// Test EXISTS + NOT EXITS
|
||||
insta::assert_display_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
|
||||
insta::assert_display_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||
insta::assert_display_snapshot!(p("NOT subscribers 100 TO 1000"), @"NOT ({subscribers} {100} TO {1000})");
|
||||
|
||||
// Test nested NOT
|
||||
insta::assert_display_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
|
||||
@@ -576,7 +629,7 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("'OR'"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
|
||||
1:5 'OR'
|
||||
"###);
|
||||
|
||||
@@ -586,12 +639,12 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel Ponce"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
|
||||
1:14 channel Ponce
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
|
||||
19:19 channel = Ponce OR
|
||||
"###);
|
||||
|
||||
@@ -621,15 +674,35 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:22 _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.
|
||||
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:34 position <= _geoPoint(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
|
||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:25 _geoDistance(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
|
||||
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:37 position <= _geoDistance(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
|
||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
1:17 _geo(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
|
||||
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
|
||||
13:29 position <= _geo(12, 13, 14)
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
|
||||
The `_geoRadius` filter is an operation and can't be used as a value.
|
||||
13:35 position <= _geoRadius(12, 13, 14)
|
||||
@@ -656,12 +729,12 @@ pub mod tests {
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
|
||||
1:17 colour NOT EXIST
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
|
||||
1:23 subscribers 100 TO1000
|
||||
"###);
|
||||
|
||||
@@ -722,6 +795,39 @@ pub mod tests {
|
||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||
"###);
|
||||
|
||||
insta::assert_display_snapshot!(p(r#"value NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
|
||||
1:11 value NULL
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value NOT NULL"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
|
||||
1:15 value NOT NULL
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
|
||||
1:12 value EMPTY
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value NOT EMPTY"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
|
||||
1:16 value NOT EMPTY
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
|
||||
1:9 value IS
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS NOT"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
|
||||
1:13 value IS NOT
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
|
||||
1:16 value IS EXISTS
|
||||
"###);
|
||||
insta::assert_display_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
|
||||
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
|
||||
1:20 value IS NOT EXISTS
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -803,6 +909,8 @@ impl<'a> std::fmt::Display for Condition<'a> {
|
||||
Condition::GreaterThanOrEqual(token) => write!(f, ">= {token}"),
|
||||
Condition::Equal(token) => write!(f, "= {token}"),
|
||||
Condition::NotEqual(token) => write!(f, "!= {token}"),
|
||||
Condition::Null => write!(f, "IS NULL"),
|
||||
Condition::Empty => write!(f, "IS EMPTY"),
|
||||
Condition::Exists => write!(f, "EXISTS"),
|
||||
Condition::LowerThan(token) => write!(f, "< {token}"),
|
||||
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
|
||||
|
||||
@@ -7,8 +7,8 @@ use nom::{InputIter, InputLength, InputTake, Slice};
|
||||
|
||||
use crate::error::{ExpectedValueKind, NomErrorExt};
|
||||
use crate::{
|
||||
parse_geo_bounding_box, parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span,
|
||||
Token,
|
||||
parse_geo, parse_geo_bounding_box, parse_geo_distance, parse_geo_point, parse_geo_radius,
|
||||
Error, ErrorKind, IResult, Span, Token,
|
||||
};
|
||||
|
||||
/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
|
||||
@@ -88,11 +88,16 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
// then, we want to check if the user is misusing a geo expression
|
||||
// This expression can’t finish without error.
|
||||
// We want to return an error in case of failure.
|
||||
if let Err(err) = parse_geo_point(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
let geo_reserved_parse_functions = [parse_geo_point, parse_geo_distance, parse_geo];
|
||||
|
||||
for parser in geo_reserved_parse_functions {
|
||||
if let Err(err) = parser(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match parse_geo_radius(input) {
|
||||
Ok(_) => {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
|
||||
@@ -178,7 +183,20 @@ fn is_syntax_component(c: char) -> bool {
|
||||
}
|
||||
|
||||
fn is_keyword(s: &str) -> bool {
|
||||
matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox")
|
||||
matches!(
|
||||
s,
|
||||
"AND"
|
||||
| "OR"
|
||||
| "IN"
|
||||
| "NOT"
|
||||
| "TO"
|
||||
| "EXISTS"
|
||||
| "IS"
|
||||
| "NULL"
|
||||
| "EMPTY"
|
||||
| "_geoRadius"
|
||||
| "_geoBoundingBox"
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -4,51 +4,56 @@ use serde_json::{Map, Value};
|
||||
|
||||
pub fn flatten(json: &Map<String, Value>) -> Map<String, Value> {
|
||||
let mut obj = Map::new();
|
||||
let mut all_keys = vec![];
|
||||
insert_object(&mut obj, None, json, &mut all_keys);
|
||||
for key in all_keys {
|
||||
obj.entry(key).or_insert(Value::Array(vec![]));
|
||||
let mut all_entries = vec![];
|
||||
insert_object(&mut obj, None, json, &mut all_entries);
|
||||
for (key, old_val) in all_entries {
|
||||
obj.entry(key).or_insert(old_val.clone());
|
||||
}
|
||||
obj
|
||||
}
|
||||
|
||||
fn insert_object(
|
||||
fn insert_object<'a>(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: Option<&str>,
|
||||
object: &Map<String, Value>,
|
||||
all_keys: &mut Vec<String>,
|
||||
object: &'a Map<String, Value>,
|
||||
all_entries: &mut Vec<(String, &'a Value)>,
|
||||
) {
|
||||
for (key, value) in object {
|
||||
let new_key = base_key.map_or_else(|| key.clone(), |base_key| format!("{base_key}.{key}"));
|
||||
all_keys.push(new_key.clone());
|
||||
all_entries.push((new_key.clone(), value));
|
||||
if let Some(array) = value.as_array() {
|
||||
insert_array(base_json, &new_key, array, all_keys);
|
||||
insert_array(base_json, &new_key, array, all_entries);
|
||||
} else if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(&new_key), object, all_keys);
|
||||
insert_object(base_json, Some(&new_key), object, all_entries);
|
||||
} else {
|
||||
insert_value(base_json, &new_key, value.clone());
|
||||
insert_value(base_json, &new_key, value.clone(), false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_array(
|
||||
fn insert_array<'a>(
|
||||
base_json: &mut Map<String, Value>,
|
||||
base_key: &str,
|
||||
array: &Vec<Value>,
|
||||
all_keys: &mut Vec<String>,
|
||||
array: &'a Vec<Value>,
|
||||
all_entries: &mut Vec<(String, &'a Value)>,
|
||||
) {
|
||||
for value in array {
|
||||
if let Some(object) = value.as_object() {
|
||||
insert_object(base_json, Some(base_key), object, all_keys);
|
||||
insert_object(base_json, Some(base_key), object, all_entries);
|
||||
} else if let Some(sub_array) = value.as_array() {
|
||||
insert_array(base_json, base_key, sub_array, all_keys);
|
||||
insert_array(base_json, base_key, sub_array, all_entries);
|
||||
} else {
|
||||
insert_value(base_json, base_key, value.clone());
|
||||
insert_value(base_json, base_key, value.clone(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value) {
|
||||
fn insert_value(
|
||||
base_json: &mut Map<String, Value>,
|
||||
key: &str,
|
||||
to_insert: Value,
|
||||
came_from_array: bool,
|
||||
) {
|
||||
debug_assert!(!to_insert.is_object());
|
||||
debug_assert!(!to_insert.is_array());
|
||||
|
||||
@@ -63,6 +68,8 @@ fn insert_value(base_json: &mut Map<String, Value>, key: &str, to_insert: Value)
|
||||
base_json[key] = Value::Array(vec![value, to_insert]);
|
||||
}
|
||||
// if it does not exist we can push the value untouched
|
||||
} else if came_from_array {
|
||||
base_json.insert(key.to_string(), Value::Array(vec![to_insert]));
|
||||
} else {
|
||||
base_json.insert(key.to_string(), to_insert);
|
||||
}
|
||||
@@ -113,7 +120,11 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [],
|
||||
"a": {
|
||||
"b": "c",
|
||||
"d": "e",
|
||||
"f": "g"
|
||||
},
|
||||
"a.b": "c",
|
||||
"a.d": "e",
|
||||
"a.f": "g"
|
||||
@@ -164,7 +175,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": 42,
|
||||
"a": [42],
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
@@ -186,7 +197,7 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": null,
|
||||
"a": [null],
|
||||
"a.b": ["c", "d", "e"],
|
||||
})
|
||||
.as_object()
|
||||
@@ -208,7 +219,9 @@ mod tests {
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"a": [],
|
||||
"a": {
|
||||
"b": "c"
|
||||
},
|
||||
"a.b": ["c", "d"],
|
||||
})
|
||||
.as_object()
|
||||
@@ -234,7 +247,7 @@ mod tests {
|
||||
json!({
|
||||
"a.b": ["c", "d", "f"],
|
||||
"a.c": "e",
|
||||
"a": 35,
|
||||
"a": [35],
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
@@ -302,4 +315,53 @@ mod tests {
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flatten_nested_values_keep_original_values() {
|
||||
let mut base: Value = json!({
|
||||
"tags": {
|
||||
"t1": "v1"
|
||||
},
|
||||
"prices": {
|
||||
"p1": [null],
|
||||
"p1000": {"tamo": {"le": {}}}
|
||||
},
|
||||
"kiki": [[]]
|
||||
});
|
||||
let json = std::mem::take(base.as_object_mut().unwrap());
|
||||
let flat = flatten(&json);
|
||||
|
||||
println!("{}", serde_json::to_string_pretty(&flat).unwrap());
|
||||
|
||||
assert_eq!(
|
||||
&flat,
|
||||
json!({
|
||||
"prices": {
|
||||
"p1": [null],
|
||||
"p1000": {
|
||||
"tamo": {
|
||||
"le": {}
|
||||
}
|
||||
}
|
||||
},
|
||||
"prices.p1": [null],
|
||||
"prices.p1000": {
|
||||
"tamo": {
|
||||
"le": {}
|
||||
}
|
||||
},
|
||||
"prices.p1000.tamo": {
|
||||
"le": {}
|
||||
},
|
||||
"prices.p1000.tamo.le": {},
|
||||
"tags": {
|
||||
"t1": "v1"
|
||||
},
|
||||
"tags.t1": "v1",
|
||||
"kiki": [[]]
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,29 +11,29 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.64"
|
||||
anyhow = "1.0.70"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.1.6"
|
||||
derive_builder = "0.11.2"
|
||||
csv = "1.2.1"
|
||||
derive_builder = "0.12.0"
|
||||
dump = { path = "../dump" }
|
||||
enum-iterator = "1.1.3"
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
log = "0.4.14"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.136", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
synchronoise = "1.0.1"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
crossbeam = "0.8.2"
|
||||
insta = { version = "1.19.1", features = ["json", "redactions"] }
|
||||
insta = { version = "1.29.0", features = ["json", "redactions"] }
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}
|
||||
|
||||
@@ -31,6 +31,7 @@ mod uuid_codec;
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
pub type TaskId = u32;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@@ -43,7 +44,7 @@ pub use error::Error;
|
||||
use file_store::FileStore;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn};
|
||||
use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
@@ -428,6 +429,13 @@ impl IndexScheduler {
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the index scheduler is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.all_tasks.first(&rtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn index_budget(
|
||||
tasks_path: &Path,
|
||||
base_map_size: usize,
|
||||
@@ -889,127 +897,8 @@ impl IndexScheduler {
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
// Currently we don't need to access the tasks queue while loading a dump thus I can block everything.
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, mut file) = self.create_update_file()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(file.as_file_mut());
|
||||
for doc in content_file {
|
||||
builder.append_json_object(&doc?)?;
|
||||
}
|
||||
builder.into_inner()?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
},
|
||||
};
|
||||
|
||||
self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
self.update_index(&mut wtxn, index, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
}
|
||||
|
||||
self.update_status(&mut wtxn, task.status, |bitmap| {
|
||||
bitmap.insert(task.uid);
|
||||
})?;
|
||||
|
||||
self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
|
||||
(bitmap.insert(task.uid));
|
||||
})?;
|
||||
|
||||
utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(&mut wtxn, self.started_at, started_at, task.uid)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(&mut wtxn, self.finished_at, finished_at, task.uid)?;
|
||||
}
|
||||
}
|
||||
|
||||
wtxn.commit()?;
|
||||
self.wake_up.signal();
|
||||
|
||||
Ok(task)
|
||||
pub fn register_dumped_task(&mut self) -> Result<Dump> {
|
||||
Dump::new(self)
|
||||
}
|
||||
|
||||
/// Create a new index without any associated task.
|
||||
@@ -1244,6 +1133,184 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Dump<'a> {
|
||||
index_scheduler: &'a IndexScheduler,
|
||||
wtxn: RwTxn<'a, 'a>,
|
||||
|
||||
indexes: HashMap<String, RoaringBitmap>,
|
||||
statuses: HashMap<Status, RoaringBitmap>,
|
||||
kinds: HashMap<Kind, RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'a> Dump<'a> {
|
||||
pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
|
||||
// While loading a dump no one should be able to access the scheduler thus I can block everything.
|
||||
let wtxn = index_scheduler.env.write_txn()?;
|
||||
|
||||
Ok(Dump {
|
||||
index_scheduler,
|
||||
wtxn,
|
||||
indexes: HashMap::new(),
|
||||
statuses: HashMap::new(),
|
||||
kinds: HashMap::new(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Register a new task coming from a dump in the scheduler.
|
||||
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
|
||||
pub fn register_dumped_task(
|
||||
&mut self,
|
||||
task: TaskDump,
|
||||
content_file: Option<Box<UpdateFile>>,
|
||||
) -> Result<Task> {
|
||||
let content_uuid = match content_file {
|
||||
Some(content_file) if task.status == Status::Enqueued => {
|
||||
let (uuid, mut file) = self.index_scheduler.create_update_file()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(file.as_file_mut());
|
||||
for doc in content_file {
|
||||
builder.append_json_object(&doc?)?;
|
||||
}
|
||||
builder.into_inner()?;
|
||||
file.persist()?;
|
||||
|
||||
Some(uuid)
|
||||
}
|
||||
// If the task isn't `Enqueued` then just generate a recognisable `Uuid`
|
||||
// in case we try to open it later.
|
||||
_ if task.status != Status::Enqueued => Some(Uuid::nil()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let task = Task {
|
||||
uid: task.uid,
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
error: task.error,
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
method,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
} => KindWithContent::DocumentAdditionOrUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
method,
|
||||
content_file: content_uuid.ok_or(Error::CorruptedDump)?,
|
||||
documents_count,
|
||||
allow_index_creation,
|
||||
},
|
||||
KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
|
||||
documents_ids,
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::DocumentClear => KindWithContent::DocumentClear {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::Settings { settings, is_deletion, allow_index_creation } => {
|
||||
KindWithContent::SettingsUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
new_settings: settings,
|
||||
is_deletion,
|
||||
allow_index_creation,
|
||||
}
|
||||
}
|
||||
KindDump::IndexDeletion => KindWithContent::IndexDeletion {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
},
|
||||
KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
|
||||
index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
|
||||
primary_key,
|
||||
},
|
||||
KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
|
||||
KindDump::TaskCancelation { query, tasks } => {
|
||||
KindWithContent::TaskCancelation { query, tasks }
|
||||
}
|
||||
KindDump::TasksDeletion { query, tasks } => {
|
||||
KindWithContent::TaskDeletion { query, tasks }
|
||||
}
|
||||
KindDump::DumpCreation { keys, instance_uid } => {
|
||||
KindWithContent::DumpCreation { keys, instance_uid }
|
||||
}
|
||||
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
|
||||
},
|
||||
};
|
||||
|
||||
self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
|
||||
|
||||
for index in task.indexes() {
|
||||
match self.indexes.get_mut(index) {
|
||||
Some(bitmap) => {
|
||||
bitmap.insert(task.uid);
|
||||
}
|
||||
None => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
bitmap.insert(task.uid);
|
||||
self.indexes.insert(index.to_string(), bitmap);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.enqueued_at,
|
||||
task.enqueued_at,
|
||||
task.uid,
|
||||
)?;
|
||||
|
||||
// we can't override the started_at & finished_at, so we must only set it if the tasks is finished and won't change
|
||||
if matches!(task.status, Status::Succeeded | Status::Failed | Status::Canceled) {
|
||||
if let Some(started_at) = task.started_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.started_at,
|
||||
started_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
if let Some(finished_at) = task.finished_at {
|
||||
utils::insert_task_datetime(
|
||||
&mut self.wtxn,
|
||||
self.index_scheduler.finished_at,
|
||||
finished_at,
|
||||
task.uid,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
|
||||
|
||||
Ok(task)
|
||||
}
|
||||
|
||||
/// Commit all the changes and exit the importing dump state
|
||||
pub fn finish(mut self) -> Result<()> {
|
||||
for (index, bitmap) in self.indexes {
|
||||
self.index_scheduler.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
|
||||
}
|
||||
for (status, bitmap) in self.statuses {
|
||||
self.index_scheduler.put_status(&mut self.wtxn, status, &bitmap)?;
|
||||
}
|
||||
for (kind, bitmap) in self.kinds {
|
||||
self.index_scheduler.put_kind(&mut self.wtxn, kind, &bitmap)?;
|
||||
}
|
||||
|
||||
self.wtxn.commit()?;
|
||||
self.index_scheduler.wake_up.signal();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// The outcome of calling the [`IndexScheduler::tick`] function.
|
||||
pub enum TickOutcome {
|
||||
/// The scheduler should immediately attempt another `tick`.
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
"documentDeletion" [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bork"
|
||||
}
|
||||
]
|
||||
@@ -1,37 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [0,]
|
||||
"documentDeletion" [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,]
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
succeeded [1,]
|
||||
failed [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "jean bob"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"catto": "jorts"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "bork"
|
||||
}
|
||||
]
|
||||
@@ -1,36 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
---
|
||||
source: index-scheduler/src/lib.rs
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing Tasks:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Kind:
|
||||
"documentAdditionOrUpdate" [1,]
|
||||
"documentDeletion" [0,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Tasks:
|
||||
doggos [0,1,]
|
||||
----------------------------------------------------------------------
|
||||
### Index Mapper:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Canceled By:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
### Enqueued At:
|
||||
[timestamp] [0,]
|
||||
[timestamp] [1,]
|
||||
----------------------------------------------------------------------
|
||||
### Started At:
|
||||
----------------------------------------------------------------------
|
||||
### Finished At:
|
||||
----------------------------------------------------------------------
|
||||
### File Store:
|
||||
00000000-0000-0000-0000-000000000000
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
@@ -11,6 +11,6 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
insta = { version = "^1.19.1", features = ["json", "redactions"] }
|
||||
insta = { version = "^1.29.0", features = ["json", "redactions"] }
|
||||
md5 = "0.7.0"
|
||||
once_cell = "1.15"
|
||||
once_cell = "1.17"
|
||||
|
||||
@@ -11,16 +11,16 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.13.1"
|
||||
enum-iterator = "1.1.3"
|
||||
base64 = "0.21.0"
|
||||
enum-iterator = "1.4.0"
|
||||
hmac = "0.12.1"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sha2 = "0.10.6"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
@@ -34,6 +34,12 @@ impl AuthController {
|
||||
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth controller is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
self.store.health()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the size of the `AuthController` database in bytes.
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
self.store.size()
|
||||
@@ -304,6 +310,7 @@ pub const MASTER_KEY_MIN_SIZE: usize = 16;
|
||||
const MASTER_KEY_GEN_SIZE: usize = 32;
|
||||
|
||||
pub fn generate_master_key() -> String {
|
||||
use base64::Engine;
|
||||
use rand::rngs::OsRng;
|
||||
use rand::RngCore;
|
||||
|
||||
@@ -314,5 +321,5 @@ pub fn generate_master_key() -> String {
|
||||
|
||||
// let's encode the random bytes to base64 to make them human-readable and not too long.
|
||||
// We're using the URL_SAFE alphabet that will produce keys without =, / or other unusual characters.
|
||||
base64::encode_config(buf, base64::URL_SAFE_NO_PAD)
|
||||
base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(buf)
|
||||
}
|
||||
|
||||
@@ -61,6 +61,13 @@ impl HeedAuthStore {
|
||||
Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth store is able to access one of its database.
|
||||
pub fn health(&self) -> Result<()> {
|
||||
let rtxn = self.env.read_txn()?;
|
||||
self.keys.first(&rtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the size in bytes of database
|
||||
pub fn size(&self) -> Result<u64> {
|
||||
Ok(self.env.real_disk_size()?)
|
||||
|
||||
@@ -11,31 +11,31 @@ edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
actix-web = { version = "4.2.1", default-features = false }
|
||||
anyhow = "1.0.65"
|
||||
actix-web = { version = "4.3.1", default-features = false }
|
||||
anyhow = "1.0.70"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.1.6"
|
||||
csv = "1.2.1"
|
||||
deserr = "0.5.0"
|
||||
either = { version = "1.6.1", features = ["serde"] }
|
||||
enum-iterator = "1.1.3"
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.24"
|
||||
flate2 = "1.0.25"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.5.7"
|
||||
memmap2 = "0.5.10"
|
||||
milli = { path = "../milli", default-features = false }
|
||||
roaring = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde-cs = "0.2.4"
|
||||
serde_json = "1.0.85"
|
||||
serde_json = "1.0.95"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.30"
|
||||
time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.24"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = "1.27"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.19.1"
|
||||
insta = "1.29.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
|
||||
[features]
|
||||
@@ -50,3 +50,6 @@ hebrew = ["milli/hebrew"]
|
||||
japanese = ["milli/japanese"]
|
||||
# thai specialized tokenization
|
||||
thai = ["milli/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["milli/greek"]
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> {
|
||||
pub enum VersionFileError {
|
||||
#[error(
|
||||
"Meilisearch (v{}) failed to infer the version of the database.
|
||||
To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
|
||||
To update Meilisearch please follow our guide on https://www.meilisearch.com/docs/learn/update_and_migration/updating.",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)]
|
||||
MissingVersionFile,
|
||||
@@ -54,7 +54,7 @@ pub enum VersionFileError {
|
||||
MalformedVersionFile,
|
||||
#[error(
|
||||
"Your database version ({major}.{minor}.{patch}) is incompatible with your current engine version ({}).\n\
|
||||
To migrate data between Meilisearch versions, please follow our guide on https://docs.meilisearch.com/learn/update_and_migration/updating.html.",
|
||||
To migrate data between Meilisearch versions, please follow our guide on https://www.meilisearch.com/docs/learn/update_and_migration/updating.",
|
||||
env!("CARGO_PKG_VERSION").to_string()
|
||||
)]
|
||||
VersionMismatch { major: String, minor: String, patch: String },
|
||||
|
||||
@@ -13,97 +13,97 @@ license.workspace = true
|
||||
default-run = "meilisearch"
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.6.3"
|
||||
actix-http = { version = "3.2.2", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] }
|
||||
actix-web = { version = "4.2.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
|
||||
actix-cors = "0.6.4"
|
||||
actix-http = { version = "3.3.1", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] }
|
||||
actix-web = { version = "4.3.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] }
|
||||
actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true }
|
||||
anyhow = { version = "1.0.65", features = ["backtrace"] }
|
||||
async-stream = "0.3.3"
|
||||
async-trait = "0.1.57"
|
||||
bstr = "1.0.1"
|
||||
byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
|
||||
bytes = "1.2.1"
|
||||
clap = { version = "4.0.9", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.6"
|
||||
anyhow = { version = "1.0.70", features = ["backtrace"] }
|
||||
async-stream = "0.3.5"
|
||||
async-trait = "0.1.68"
|
||||
bstr = "1.4.0"
|
||||
byte-unit = { version = "4.0.19", default-features = false, features = ["std", "serde"] }
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
dump = { path = "../dump" }
|
||||
either = "1.8.0"
|
||||
env_logger = "0.9.1"
|
||||
either = "1.8.1"
|
||||
env_logger = "0.10.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.24"
|
||||
flate2 = "1.0.25"
|
||||
fst = "0.4.7"
|
||||
futures = "0.3.24"
|
||||
futures-util = "0.3.24"
|
||||
http = "0.2.8"
|
||||
futures = "0.3.28"
|
||||
futures-util = "0.3.28"
|
||||
http = "0.2.9"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "1.9.1", features = ["serde-1"] }
|
||||
indexmap = { version = "1.9.3", features = ["serde-1"] }
|
||||
itertools = "0.10.5"
|
||||
jsonwebtoken = "8.1.1"
|
||||
jsonwebtoken = "8.3.0"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.1"
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.15.0"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.15.0"
|
||||
once_cell = "1.17.1"
|
||||
parking_lot = "0.12.1"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.2", features = ["process"] }
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.5.3"
|
||||
regex = "1.6.0"
|
||||
reqwest = { version = "0.11.12", features = ["rustls-tls", "json"], default-features = false }
|
||||
rustls = "0.20.6"
|
||||
rustls-pemfile = "1.0.1"
|
||||
segment = { version = "0.2.1", optional = true }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
reqwest = { version = "0.11.16", features = ["rustls-tls", "json"], default-features = false }
|
||||
rustls = "0.20.8"
|
||||
rustls-pemfile = "1.0.2"
|
||||
segment = { version = "0.2.2", optional = true }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
sha2 = "0.10.6"
|
||||
siphasher = "0.3.10"
|
||||
slice-group-by = "0.3.0"
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
sysinfo = "0.26.4"
|
||||
sysinfo = "0.28.4"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = { version = "1.24.2", features = ["full"] }
|
||||
tokio-stream = "0.1.10"
|
||||
toml = "0.5.9"
|
||||
uuid = { version = "1.1.2", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.2"
|
||||
yaup = "0.2.0"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
tokio = { version = "1.27.0", features = ["full"] }
|
||||
tokio-stream = "0.1.12"
|
||||
toml = "0.7.3"
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
walkdir = "2.3.3"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
actix-utils = "3.0.1"
|
||||
atty = "0.2.14"
|
||||
termcolor = "1.1.3"
|
||||
termcolor = "1.2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.7.0"
|
||||
actix-rt = "2.8.0"
|
||||
assert-json-diff = "2.0.2"
|
||||
brotli = "3.3.4"
|
||||
insta = "1.19.1"
|
||||
insta = "1.29.0"
|
||||
manifest-dir-macros = "0.1.16"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = {path = "../meili-snap"}
|
||||
temp-env = "0.3.1"
|
||||
temp-env = "0.3.3"
|
||||
urlencoding = "2.1.2"
|
||||
yaup = "0.2.1"
|
||||
|
||||
[build-dependencies]
|
||||
anyhow = { version = "1.0.65", optional = true }
|
||||
cargo_toml = { version = "0.14.0", optional = true }
|
||||
anyhow = { version = "1.0.70", optional = true }
|
||||
cargo_toml = { version = "0.15.2", optional = true }
|
||||
hex = { version = "0.4.3", optional = true }
|
||||
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.0", optional = true }
|
||||
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
|
||||
sha-1 = { version = "0.10.1", optional = true }
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
tempfile = { version = "3.3.0", optional = true }
|
||||
vergen = { version = "7.4.2", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.2", optional = true }
|
||||
tempfile = { version = "3.5.0", optional = true }
|
||||
vergen = { version = "7.5.1", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.4", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
|
||||
@@ -113,6 +113,7 @@ chinese = ["meilisearch-types/chinese"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
thai = ["meilisearch-types/thai"]
|
||||
greek = ["meilisearch-types/greek"]
|
||||
|
||||
[package.metadata.mini-dashboard]
|
||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.7/build.zip"
|
||||
|
||||
@@ -86,7 +86,7 @@ impl SegmentAnalytics {
|
||||
pub async fn new(
|
||||
opt: &Opt,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) -> Arc<dyn Analytics> {
|
||||
let instance_uid = super::find_user_id(&opt.db_path);
|
||||
let first_time_run = instance_uid.is_none();
|
||||
@@ -376,7 +376,11 @@ impl Segment {
|
||||
})
|
||||
}
|
||||
|
||||
async fn run(mut self, index_scheduler: Arc<IndexScheduler>, auth_controller: AuthController) {
|
||||
async fn run(
|
||||
mut self,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
|
||||
// The first batch must be sent after one hour.
|
||||
let mut interval =
|
||||
@@ -408,10 +412,10 @@ impl Segment {
|
||||
async fn tick(
|
||||
&mut self,
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
) {
|
||||
if let Ok(stats) =
|
||||
create_all_stats(index_scheduler.into(), auth_controller, &AuthFilter::default())
|
||||
create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
|
||||
{
|
||||
// Replace the version number with the prototype name if any.
|
||||
let version = if let Some(prototype) = crate::prototype_name() {
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::marker::PhantomData;
|
||||
use std::ops::Deref;
|
||||
use std::pin::Pin;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::FromRequest;
|
||||
pub use error::AuthenticationError;
|
||||
use futures::future::err;
|
||||
@@ -23,7 +24,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
|
||||
async fn auth_bearer(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
data: Option<D>,
|
||||
@@ -43,7 +44,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
}
|
||||
|
||||
async fn auth_token(auth: AuthController, data: Option<D>) -> Result<Self, ResponseError>
|
||||
async fn auth_token(auth: Data<AuthController>, data: Option<D>) -> Result<Self, ResponseError>
|
||||
where
|
||||
P: Policy + 'static,
|
||||
{
|
||||
@@ -60,7 +61,7 @@ impl<P, D> GuardedData<P, D> {
|
||||
}
|
||||
|
||||
async fn authenticate(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: String,
|
||||
index: Option<String>,
|
||||
) -> Result<Option<AuthFilter>, ResponseError>
|
||||
@@ -90,7 +91,7 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
|
||||
req: &actix_web::HttpRequest,
|
||||
_payload: &mut actix_web::dev::Payload,
|
||||
) -> Self::Future {
|
||||
match req.app_data::<AuthController>().cloned() {
|
||||
match req.app_data::<Data<AuthController>>().cloned() {
|
||||
Some(auth) => match req
|
||||
.headers()
|
||||
.get("Authorization")
|
||||
@@ -122,10 +123,15 @@ impl<P: Policy + 'static, D: 'static + Clone> FromRequest for GuardedData<P, D>
|
||||
}
|
||||
|
||||
pub trait Policy {
|
||||
fn authenticate(auth: AuthController, token: &str, index: Option<&str>) -> Option<AuthFilter>;
|
||||
fn authenticate(
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter>;
|
||||
}
|
||||
|
||||
pub mod policies {
|
||||
use actix_web::web::Data;
|
||||
use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
|
||||
use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
|
||||
// reexport actions in policies in order to be used in routes configuration.
|
||||
@@ -178,7 +184,7 @@ pub mod policies {
|
||||
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
|
||||
/// (that may contain more indexes than requested).
|
||||
fn authenticate(
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
token: &str,
|
||||
index: Option<&str>,
|
||||
) -> Option<AuthFilter> {
|
||||
|
||||
@@ -88,7 +88,7 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
|
||||
|
||||
pub fn create_app(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Data<AuthController>,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
enable_dashboard: bool,
|
||||
@@ -136,7 +136,7 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, AuthController)> {
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
||||
let snapshot_path_exists = snapshot_path.exists();
|
||||
@@ -195,6 +195,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
let index_scheduler = Arc::new(index_scheduler);
|
||||
let auth_controller = Arc::new(auth_controller);
|
||||
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
|
||||
let snapshot_delay = Duration::from_secs(snapshot_delay);
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
@@ -367,18 +368,20 @@ fn import_dump(
|
||||
log::info!("All documents successfully imported.");
|
||||
}
|
||||
|
||||
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
|
||||
|
||||
// 4. Import the tasks.
|
||||
for ret in dump_reader.tasks()? {
|
||||
let (task, file) = ret?;
|
||||
index_scheduler.register_dumped_task(task, file)?;
|
||||
index_scheduler_dump.register_dumped_task(task, file)?;
|
||||
}
|
||||
Ok(())
|
||||
Ok(index_scheduler_dump.finish()?)
|
||||
}
|
||||
|
||||
pub fn configure_data(
|
||||
config: &mut web::ServiceConfig,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth: AuthController,
|
||||
auth: Data<AuthController>,
|
||||
opt: &Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) {
|
||||
|
||||
@@ -74,13 +74,14 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
async fn run_http(
|
||||
index_scheduler: Arc<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Arc<AuthController>,
|
||||
opt: Opt,
|
||||
analytics: Arc<dyn Analytics>,
|
||||
) -> anyhow::Result<()> {
|
||||
let enable_dashboard = &opt.env == "development";
|
||||
let opt_clone = opt.clone();
|
||||
let index_scheduler = Data::from(index_scheduler);
|
||||
let auth_controller = Data::from(auth_controller);
|
||||
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(
|
||||
@@ -148,7 +149,7 @@ pub fn print_launch_resume(
|
||||
"
|
||||
Thank you for using Meilisearch!
|
||||
|
||||
\nWe collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html
|
||||
\nWe collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry
|
||||
|
||||
Anonymous telemetry:\t\"Enabled\""
|
||||
);
|
||||
|
||||
@@ -323,10 +323,10 @@ impl Opt {
|
||||
.clone()
|
||||
.unwrap_or_else(|| PathBuf::from(DEFAULT_CONFIG_FILE_PATH));
|
||||
|
||||
match std::fs::read(&config_file_path) {
|
||||
match std::fs::read_to_string(&config_file_path) {
|
||||
Ok(config) => {
|
||||
// If the file is successfully read, we deserialize it with `toml`.
|
||||
let opt_from_config = toml::from_slice::<Opt>(&config)?;
|
||||
let opt_from_config = toml::from_str::<Opt>(&config)?;
|
||||
// Return an error if config file contains 'config_file_path'
|
||||
// Using that key in the config file doesn't make sense bc it creates a logical loop (config file referencing itself)
|
||||
if opt_from_config.config_file_path.is_some() {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::str;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
@@ -35,7 +36,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
}
|
||||
|
||||
pub async fn create_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, Data<AuthController>>,
|
||||
body: AwebJson<CreateApiKey, DeserrJsonError>,
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -66,7 +67,7 @@ impl ListApiKeys {
|
||||
}
|
||||
|
||||
pub async fn list_api_keys(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
list_api_keys: AwebQueryParameter<ListApiKeys, DeserrQueryParamError>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let paginate = list_api_keys.into_inner().as_pagination();
|
||||
@@ -84,7 +85,7 @@ pub async fn list_api_keys(
|
||||
}
|
||||
|
||||
pub async fn get_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
@@ -103,7 +104,7 @@ pub async fn get_api_key(
|
||||
}
|
||||
|
||||
pub async fn patch_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, Data<AuthController>>,
|
||||
body: AwebJson<PatchApiKey, DeserrJsonError>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -123,7 +124,7 @@ pub async fn patch_api_key(
|
||||
}
|
||||
|
||||
pub async fn delete_api_key(
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_DELETE }>, Data<AuthController>>,
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
|
||||
@@ -19,7 +19,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
|
||||
pub async fn create_dump(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
|
||||
@@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
|
||||
|
||||
pub async fn get_metrics(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<AuthController>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let auth_filters = index_scheduler.filters();
|
||||
if !auth_filters.all_indexes_authorized() {
|
||||
|
||||
@@ -238,7 +238,7 @@ pub struct Stats {
|
||||
|
||||
async fn get_stats(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, AuthController>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::STATS_GET }>, Data<AuthController>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
@@ -253,7 +253,7 @@ async fn get_stats(
|
||||
|
||||
pub fn create_all_stats(
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: AuthController,
|
||||
auth_controller: Data<AuthController>,
|
||||
filters: &meilisearch_auth::AuthFilter,
|
||||
) -> Result<Stats, ResponseError> {
|
||||
let mut last_task: Option<OffsetDateTime> = None;
|
||||
@@ -318,9 +318,14 @@ struct KeysResponse {
|
||||
|
||||
pub async fn get_health(
|
||||
req: HttpRequest,
|
||||
index_scheduler: Data<IndexScheduler>,
|
||||
auth_controller: Data<AuthController>,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.health_seen(&req);
|
||||
|
||||
index_scheduler.health().unwrap();
|
||||
auth_controller.health().unwrap();
|
||||
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
|
||||
}
|
||||
|
||||
@@ -60,7 +60,7 @@ async fn create_api_key_bad_uid() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-zA-Z], found `o` at 2",
|
||||
"message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-fA-F-], found `o` at 2",
|
||||
"code": "invalid_api_key_uid",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_uid"
|
||||
|
||||
@@ -82,7 +82,7 @@ impl Server {
|
||||
> {
|
||||
actix_web::test::init_service(create_app(
|
||||
self.service.index_scheduler.clone().into(),
|
||||
self.service.auth.clone(),
|
||||
self.service.auth.clone().into(),
|
||||
self.service.options.clone(),
|
||||
analytics::MockAnalytics::new(&self.service.options),
|
||||
true,
|
||||
|
||||
@@ -13,7 +13,7 @@ use crate::common::encoder::Encoder;
|
||||
|
||||
pub struct Service {
|
||||
pub index_scheduler: Arc<IndexScheduler>,
|
||||
pub auth: AuthController,
|
||||
pub auth: Arc<AuthController>,
|
||||
pub options: Opt,
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
@@ -107,7 +107,7 @@ impl Service {
|
||||
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
|
||||
let app = test::init_service(create_app(
|
||||
self.index_scheduler.clone().into(),
|
||||
self.auth.clone(),
|
||||
self.auth.clone().into(),
|
||||
self.options.clone(),
|
||||
analytics::MockAnalytics::new(&self.options),
|
||||
true,
|
||||
|
||||
@@ -279,6 +279,81 @@ async fn add_csv_document() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_types() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("pets");
|
||||
|
||||
let document = "#id:number,name:string,race:string,age:number,cute:boolean
|
||||
0,jean,bernese mountain,2.5,true
|
||||
1,,,,
|
||||
2,lilou,pug,-2,false";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "pets",
|
||||
"status": "enqueued",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"uid": 0,
|
||||
"indexUid": "pets",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 3,
|
||||
"indexedDocuments": 3
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
{
|
||||
"results": [
|
||||
{
|
||||
"#id": 0,
|
||||
"name": "jean",
|
||||
"race": "bernese mountain",
|
||||
"age": 2.5,
|
||||
"cute": true
|
||||
},
|
||||
{
|
||||
"#id": 1,
|
||||
"name": null,
|
||||
"race": null,
|
||||
"age": null,
|
||||
"cute": null
|
||||
},
|
||||
{
|
||||
"#id": 2,
|
||||
"name": "lilou",
|
||||
"race": "pug",
|
||||
"age": -2,
|
||||
"cute": false
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
"limit": 20,
|
||||
"total": 3
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_custom_delimiter() {
|
||||
let server = Server::new().await;
|
||||
@@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() {
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn add_csv_document_with_types_error() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("pets");
|
||||
|
||||
let document = "#id:number,a:boolean,b:number
|
||||
0,doggo,1";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.",
|
||||
"code": "malformed_payload",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#malformed_payload"
|
||||
}
|
||||
"###);
|
||||
|
||||
let document = "#id:number,a:boolean,b:number
|
||||
0,true,doggo";
|
||||
|
||||
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.",
|
||||
"code": "malformed_payload",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#malformed_payload"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
/// any other content-type is must be refused
|
||||
#[actix_rt::test]
|
||||
async fn error_add_documents_test_bad_content_types() {
|
||||
@@ -1664,7 +1773,7 @@ async fn error_add_documents_payload_size() {
|
||||
"content": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec metus erat, consequat in blandit venenatis, ultricies eu ipsum. Etiam luctus elit et mollis ultrices. Nam turpis risus, dictum non eros in, eleifend feugiat elit. Morbi non dolor pulvinar, sagittis mi sed, ultricies lorem. Nulla ultricies sem metus. Donec at suscipit quam, sed elementum mi. Suspendisse potenti. Fusce pharetra turpis tortor, sed eleifend odio dapibus ut. Nulla facilisi. Suspendisse elementum, dui eget aliquet dignissim, ex tellus aliquam nisl, at eleifend nisl metus tempus diam. Mauris fermentum sollicitudin efficitur. Donec dignissim est vitae elit finibus faucibus"
|
||||
}
|
||||
);
|
||||
let documents: Vec<_> = (0..16000).into_iter().map(|_| document.clone()).collect();
|
||||
let documents: Vec<_> = (0..16000).map(|_| document.clone()).collect();
|
||||
let documents = json!(documents);
|
||||
let (response, code) = index.add_documents(documents, None).await;
|
||||
|
||||
@@ -1825,7 +1934,6 @@ async fn batch_several_documents_addition() {
|
||||
let index = server.index("test");
|
||||
|
||||
let mut documents: Vec<_> = (0..150usize)
|
||||
.into_iter()
|
||||
.map(|id| {
|
||||
json!(
|
||||
{
|
||||
|
||||
@@ -1121,6 +1121,12 @@ async fn import_dump_v5() {
|
||||
assert_eq!(indexes["results"][1]["uid"], json!("test2"));
|
||||
assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
|
||||
|
||||
// before doing anything we're going to wait until all the tasks in the dump have finished processing
|
||||
let result = server.tasks_filter("statuses=enqueued,processing").await.0;
|
||||
for task in result["results"].as_array().unwrap() {
|
||||
server.wait_task(task["uid"].as_u64().unwrap()).await;
|
||||
}
|
||||
|
||||
let expected_stats = json!({
|
||||
"numberOfDocuments": 10,
|
||||
"isIndexing": false,
|
||||
|
||||
@@ -547,7 +547,7 @@ async fn filter_invalid_syntax_object() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -572,7 +572,7 @@ async fn filter_invalid_syntax_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -672,7 +672,7 @@ async fn filter_reserved_geo_attribute_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass",
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -697,7 +697,7 @@ async fn filter_reserved_geo_attribute_string() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass",
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -722,7 +722,7 @@ async fn filter_reserved_attribute_array() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass",
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -747,7 +747,7 @@ async fn filter_reserved_attribute_string() {
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass",
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
@@ -760,6 +760,56 @@ async fn filter_reserved_attribute_string() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_array() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": ["_geoPoint = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_string() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": "_geoPoint = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn sort_geo_reserved_attribute() {
|
||||
let server = Server::new().await;
|
||||
|
||||
@@ -12,40 +12,40 @@ readme.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
bimap = { version = "0.6.2", features = ["serde"] }
|
||||
bimap = { version = "0.6.3", features = ["serde"] }
|
||||
bincode = "1.3.3"
|
||||
bstr = "1.0.1"
|
||||
bstr = "1.4.0"
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.7.1", default-features = false }
|
||||
charabia = { version = "0.7.2", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.6"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
either = "1.8.0"
|
||||
either = "1.8.1"
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
fxhash = "0.2.1"
|
||||
geoutils = "0.5.1"
|
||||
grenad = { version = "0.4.3", default-features = false, features = ["tempfile"] }
|
||||
grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] }
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.5.7"
|
||||
memmap2 = "0.5.10"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.15.0"
|
||||
ordered-float = "3.2.0"
|
||||
rayon = "1.5.3"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.6.0"
|
||||
rayon = "1.7.0"
|
||||
roaring = "0.10.1"
|
||||
rstar = { version = "0.9.3", features = ["serde"] }
|
||||
serde = { version = "1.0.145", features = ["derive"] }
|
||||
serde_json = { version = "1.0.85", features = ["preserve_order"] }
|
||||
rstar = { version = "0.10.0", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.0"
|
||||
smallstr = { version = "0.3.0", features = ["serde"] }
|
||||
smallvec = "1.10.0"
|
||||
smartstring = "1.0.1"
|
||||
tempfile = "3.3.0"
|
||||
thiserror = "1.0.37"
|
||||
time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.1.2", features = ["v4"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["v4"] }
|
||||
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
@@ -55,11 +55,12 @@ itertools = "0.10.5"
|
||||
# logging
|
||||
log = "0.4.17"
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.1.6"
|
||||
csv = "1.2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
big_s = "1.0.2"
|
||||
insta = "1.21.0"
|
||||
insta = "1.29.0"
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.7.0"
|
||||
rand = {version = "0.8.5", features = ["small_rng"] }
|
||||
@@ -89,3 +90,6 @@ korean = ["charabia/korean"]
|
||||
|
||||
# allow thai specialized tokenization
|
||||
thai = ["charabia/thai"]
|
||||
|
||||
# allow greek specialized tokenization
|
||||
greek = ["charabia/greek"]
|
||||
|
||||
114
milli/examples/index.rs
Normal file
114
milli/examples/index.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Cursor, Seek};
|
||||
use std::path::Path;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{Index, Object};
|
||||
|
||||
fn usage(error: &str, program_name: &str) -> String {
|
||||
format!(
|
||||
"{}. Usage: {} <PATH-TO-INDEX> <PATH-TO-DATASET> [searchable_fields] [filterable_fields]",
|
||||
error, program_name
|
||||
)
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut args = std::env::args();
|
||||
let program_name = args.next().expect("No program name");
|
||||
let index_path =
|
||||
args.next().unwrap_or_else(|| panic!("{}", usage("Missing path to index.", &program_name)));
|
||||
let dataset_path = args
|
||||
.next()
|
||||
.unwrap_or_else(|| panic!("{}", usage("Missing path to source dataset.", &program_name)));
|
||||
// let primary_key = args.next().unwrap_or_else(|| "id".into());
|
||||
// "title overview"
|
||||
let searchable_fields: Vec<String> = args
|
||||
.next()
|
||||
.map(|arg| arg.split_whitespace().map(ToString::to_string).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
println!("{searchable_fields:?}");
|
||||
// "release_date genres"
|
||||
let filterable_fields: Vec<String> = args
|
||||
.next()
|
||||
.map(|arg| arg.split_whitespace().map(ToString::to_string).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
std::fs::create_dir_all(&index_path).unwrap();
|
||||
let index = Index::new(options, index_path).unwrap();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
// builder.set_primary_key(primary_key);
|
||||
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
|
||||
let documents = documents_from(
|
||||
&dataset_path,
|
||||
Path::new(&dataset_path).extension().unwrap_or_default().to_str().unwrap_or_default(),
|
||||
);
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
Ok(())
|
||||
}
|
||||
fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
|
||||
let reader = File::open(filename)
|
||||
.unwrap_or_else(|_| panic!("could not find the dataset in: {}", filename));
|
||||
let reader = BufReader::new(reader);
|
||||
let documents = match filetype {
|
||||
"csv" => documents_from_csv(reader).unwrap(),
|
||||
"json" => documents_from_json(reader).unwrap(),
|
||||
"jsonl" => documents_from_jsonl(reader).unwrap(),
|
||||
otherwise => panic!("invalid update format {:?}", otherwise),
|
||||
};
|
||||
DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||
}
|
||||
|
||||
fn documents_from_jsonl(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
|
||||
let object = result.unwrap();
|
||||
documents.append_json_object(&object)?;
|
||||
}
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_json(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
documents.append_json_array(reader)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_csv(reader: impl BufRead) -> milli::Result<Vec<u8>> {
|
||||
let csv = csv::Reader::from_reader(reader);
|
||||
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
documents.append_csv(csv)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
117
milli/examples/search.rs
Normal file
117
milli/examples/search.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
use std::error::Error;
|
||||
use std::io::stdin;
|
||||
use std::path::Path;
|
||||
use std::time::Instant;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, SearchLogger,
|
||||
TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
let mut args = std::env::args();
|
||||
let program_name = args.next().expect("No program name");
|
||||
let dataset = args.next().unwrap_or_else(|| {
|
||||
panic!(
|
||||
"Missing path to index. Usage: {} <PATH-TO-INDEX> [<logger-dir>] [print-documents]",
|
||||
program_name
|
||||
)
|
||||
});
|
||||
let detailed_logger_dir = args.next();
|
||||
let print_documents: bool =
|
||||
if let Some(arg) = args.next() { arg == "print-documents" } else { false };
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
let index = Index::new(options, dataset)?;
|
||||
let txn = index.read_txn()?;
|
||||
let mut query = String::new();
|
||||
while stdin().read_line(&mut query)? > 0 {
|
||||
for _ in 0..2 {
|
||||
let mut default_logger = DefaultSearchLogger;
|
||||
// FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible.
|
||||
// Workaround'd here by recreating the logger on each iteration of the loop
|
||||
let mut detailed_logger = detailed_logger_dir
|
||||
.as_ref()
|
||||
.map(|logger_dir| (milli::VisualSearchLogger::default(), logger_dir));
|
||||
let logger: &mut dyn SearchLogger<_> =
|
||||
if let Some((detailed_logger, _)) = detailed_logger.as_mut() {
|
||||
detailed_logger
|
||||
} else {
|
||||
&mut default_logger
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let docs = execute_search(
|
||||
&mut ctx,
|
||||
&(!query.trim().is_empty()).then(|| query.trim().to_owned()),
|
||||
TermsMatchingStrategy::Last,
|
||||
false,
|
||||
&None,
|
||||
&None,
|
||||
GeoSortStrategy::default(),
|
||||
0,
|
||||
20,
|
||||
None,
|
||||
&mut DefaultSearchLogger,
|
||||
logger,
|
||||
)?;
|
||||
if let Some((logger, dir)) = detailed_logger {
|
||||
logger.finish(&mut ctx, Path::new(dir))?;
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
if print_documents {
|
||||
let documents = index
|
||||
.documents(&txn, docs.documents_ids.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(id, obkv)| {
|
||||
let mut object = serde_json::Map::default();
|
||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||
let value = obkv.get(fid).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||
object.insert(fid_name.to_owned(), value);
|
||||
}
|
||||
(id, serde_json::to_string_pretty(&object).unwrap())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for (id, document) in documents {
|
||||
println!("{id}:");
|
||||
println!("{document}");
|
||||
}
|
||||
|
||||
let documents = index
|
||||
.documents(&txn, docs.documents_ids.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|(id, obkv)| {
|
||||
let mut object = serde_json::Map::default();
|
||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||
let value = obkv.get(fid).unwrap();
|
||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||
object.insert(fid_name.to_owned(), value);
|
||||
}
|
||||
(id, serde_json::to_string_pretty(&object).unwrap())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||
for (id, document) in documents {
|
||||
println!("{id}:");
|
||||
println!("{document}");
|
||||
}
|
||||
}
|
||||
}
|
||||
query.clear();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
33
milli/examples/settings.rs
Normal file
33
milli/examples/settings.rs
Normal file
@@ -0,0 +1,33 @@
|
||||
// use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
// use maplit::hashset;
|
||||
use milli::{
|
||||
update::{IndexerConfig, Settings},
|
||||
Criterion, Index,
|
||||
};
|
||||
|
||||
fn main() {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
|
||||
let index = Index::new(options, "data_movies.ms").unwrap();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
// builder.set_min_word_len_one_typo(5);
|
||||
// builder.set_min_word_len_two_typos(7);
|
||||
// builder.set_sortable_fields(hashset! { S("release_date") });
|
||||
builder.set_criteria(vec![
|
||||
Criterion::Words,
|
||||
Criterion::Typo,
|
||||
Criterion::Proximity,
|
||||
Criterion::Attribute,
|
||||
Criterion::Sort,
|
||||
Criterion::Exactness,
|
||||
]);
|
||||
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
@@ -81,6 +81,8 @@ impl FromStr for Member {
|
||||
if is_reserved_keyword(text)
|
||||
|| text.starts_with("_geoRadius(")
|
||||
|| text.starts_with("_geoBoundingBox(")
|
||||
|| text.starts_with("_geo(")
|
||||
|| text.starts_with("_geoDistance(")
|
||||
{
|
||||
return Err(AscDescError::ReservedKeyword { name: text.to_string() })?;
|
||||
}
|
||||
@@ -265,6 +267,13 @@ mod tests {
|
||||
("_geoPoint(0, -180.000001):desc", GeoError(BadGeoError::Lng(-180.000001))),
|
||||
("_geoPoint(159.256, 130):asc", GeoError(BadGeoError::Lat(159.256))),
|
||||
("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))),
|
||||
("_geo(12, -2021):asc", ReservedKeyword { name: S("_geo(12, -2021)") }),
|
||||
("_geo(12, -2021):desc", ReservedKeyword { name: S("_geo(12, -2021)") }),
|
||||
("_geoDistance(12, -2021):asc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }),
|
||||
(
|
||||
"_geoDistance(12, -2021):desc",
|
||||
ReservedKeyword { name: S("_geoDistance(12, -2021)") },
|
||||
),
|
||||
];
|
||||
|
||||
for (req, expected_error) in invalid_req {
|
||||
|
||||
@@ -114,14 +114,15 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
self.value_buffer.clear();
|
||||
|
||||
let value = &record[*i];
|
||||
let trimmed_value = value.trim();
|
||||
match type_ {
|
||||
AllowedType::Number => {
|
||||
if value.trim().is_empty() {
|
||||
if trimmed_value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
} else if let Ok(integer) = value.trim().parse::<i64>() {
|
||||
} else if let Ok(integer) = trimmed_value.parse::<i64>() {
|
||||
to_writer(&mut self.value_buffer, &integer)?;
|
||||
} else {
|
||||
match value.trim().parse::<f64>() {
|
||||
match trimmed_value.parse::<f64>() {
|
||||
Ok(float) => {
|
||||
to_writer(&mut self.value_buffer, &float)?;
|
||||
}
|
||||
@@ -135,6 +136,24 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
}
|
||||
}
|
||||
}
|
||||
AllowedType::Boolean => {
|
||||
if trimmed_value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
} else {
|
||||
match trimmed_value.parse::<bool>() {
|
||||
Ok(bool) => {
|
||||
to_writer(&mut self.value_buffer, &bool)?;
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(Error::ParseBool {
|
||||
error,
|
||||
line,
|
||||
value: value.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
AllowedType::String => {
|
||||
if value.is_empty() {
|
||||
to_writer(&mut self.value_buffer, &Value::Null)?;
|
||||
@@ -173,6 +192,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
|
||||
#[derive(Debug)]
|
||||
enum AllowedType {
|
||||
String,
|
||||
Boolean,
|
||||
Number,
|
||||
}
|
||||
|
||||
@@ -181,6 +201,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
|
||||
match header.rsplit_once(':') {
|
||||
Some((field_name, field_type)) => match field_type {
|
||||
"string" => (field_name, AllowedType::String),
|
||||
"boolean" => (field_name, AllowedType::Boolean),
|
||||
"number" => (field_name, AllowedType::Number),
|
||||
// if the pattern isn't reconized, we keep the whole field.
|
||||
_otherwise => (header, AllowedType::String),
|
||||
|
||||
@@ -3,7 +3,7 @@ mod enriched;
|
||||
mod reader;
|
||||
mod serde_impl;
|
||||
|
||||
use std::fmt::{self, Debug};
|
||||
use std::fmt::Debug;
|
||||
use std::io;
|
||||
use std::str::Utf8Error;
|
||||
|
||||
@@ -87,71 +87,30 @@ impl DocumentsBatchIndex {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Error {
|
||||
#[error("Error parsing number {value:?} at line {line}: {error}")]
|
||||
ParseFloat { error: std::num::ParseFloatError, line: usize, value: String },
|
||||
#[error("Error parsing boolean {value:?} at line {line}: {error}")]
|
||||
ParseBool { error: std::str::ParseBoolError, line: usize, value: String },
|
||||
#[error("Invalid document addition format, missing the documents batch index.")]
|
||||
InvalidDocumentFormat,
|
||||
#[error("Invalid enriched data.")]
|
||||
InvalidEnrichedData,
|
||||
InvalidUtf8(Utf8Error),
|
||||
Csv(csv::Error),
|
||||
Json(serde_json::Error),
|
||||
#[error(transparent)]
|
||||
InvalidUtf8(#[from] Utf8Error),
|
||||
#[error(transparent)]
|
||||
Csv(#[from] csv::Error),
|
||||
#[error(transparent)]
|
||||
Json(#[from] serde_json::Error),
|
||||
#[error(transparent)]
|
||||
Serialize(serde_json::Error),
|
||||
Grenad(grenad::Error),
|
||||
Io(io::Error),
|
||||
#[error(transparent)]
|
||||
Grenad(#[from] grenad::Error),
|
||||
#[error(transparent)]
|
||||
Io(#[from] io::Error),
|
||||
}
|
||||
|
||||
impl From<csv::Error> for Error {
|
||||
fn from(e: csv::Error) -> Self {
|
||||
Self::Csv(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(other: io::Error) -> Self {
|
||||
Self::Io(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<serde_json::Error> for Error {
|
||||
fn from(other: serde_json::Error) -> Self {
|
||||
Self::Json(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<grenad::Error> for Error {
|
||||
fn from(other: grenad::Error) -> Self {
|
||||
Self::Grenad(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Utf8Error> for Error {
|
||||
fn from(other: Utf8Error) -> Self {
|
||||
Self::InvalidUtf8(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Error::ParseFloat { error, line, value } => {
|
||||
write!(f, "Error parsing number {:?} at line {}: {}", value, line, error)
|
||||
}
|
||||
Error::InvalidDocumentFormat => {
|
||||
f.write_str("Invalid document addition format, missing the documents batch index.")
|
||||
}
|
||||
Error::InvalidEnrichedData => f.write_str("Invalid enriched data."),
|
||||
Error::InvalidUtf8(e) => write!(f, "{}", e),
|
||||
Error::Io(e) => write!(f, "{}", e),
|
||||
Error::Serialize(e) => write!(f, "{}", e),
|
||||
Error::Grenad(e) => write!(f, "{}", e),
|
||||
Error::Csv(e) => write!(f, "{}", e),
|
||||
Error::Json(e) => write!(f, "{}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
|
||||
let documents = match json {
|
||||
@@ -274,6 +233,19 @@ mod test {
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn csv_types_dont_panic() {
|
||||
let csv1_content =
|
||||
"id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5";
|
||||
let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
|
||||
|
||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
builder.append_csv(csv1).unwrap();
|
||||
let vector = builder.into_inner().unwrap();
|
||||
|
||||
DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn out_of_order_csv_fields() {
|
||||
let csv1_content = "id:number,b\n1,0";
|
||||
|
||||
@@ -21,5 +21,5 @@ pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
};
|
||||
pub use self::script_language_codec::ScriptLanguageCodec;
|
||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||
pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
|
||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
|
||||
@@ -36,3 +36,39 @@ impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StrBEU16Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
|
||||
type DItem = (&'a str, u16);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let footer_len = size_of::<u16>();
|
||||
|
||||
if bytes.len() < footer_len + 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let (_, word) = word_plus_nul_byte.split_last()?;
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
|
||||
|
||||
Some((word, pos))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
|
||||
type EItem = (&'a str, u16);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,12 +19,12 @@ use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ScriptLanguageCodec, StrRefCodec};
|
||||
use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
|
||||
Search, StrBEU32Codec, U8StrStrCodec, BEU16, BEU32,
|
||||
Search, U8StrStrCodec, BEU16, BEU32,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@@ -76,10 +76,14 @@ pub mod db_name {
|
||||
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids";
|
||||
pub const PREFIX_WORD_PAIR_PROXIMITY_DOCIDS: &str = "prefix-word-pair-proximity-docids";
|
||||
pub const WORD_POSITION_DOCIDS: &str = "word-position-docids";
|
||||
pub const WORD_FIELD_ID_DOCIDS: &str = "word-field-id-docids";
|
||||
pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids";
|
||||
pub const WORD_PREFIX_FIELD_ID_DOCIDS: &str = "word-prefix-field-id-docids";
|
||||
pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids";
|
||||
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids";
|
||||
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids";
|
||||
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids";
|
||||
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids";
|
||||
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids";
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
@@ -118,17 +122,26 @@ pub struct Index {
|
||||
pub prefix_word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the word and the position with the docids that corresponds to it.
|
||||
pub word_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||
pub word_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word and the field id with the docids that corresponds to it.
|
||||
pub word_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the field id and the word count with the docids that corresponds to it.
|
||||
pub field_id_word_count_docids: Database<FieldIdWordCountCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the position of a word prefix with all the docids where this prefix appears.
|
||||
pub word_prefix_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a position with all the docids where the prefix appears at the position.
|
||||
pub word_prefix_position_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
/// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
|
||||
pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the script and language with all the docids that corresponds to it.
|
||||
pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and the docids for which this field exists
|
||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is set as null
|
||||
pub facet_id_is_null_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
/// Maps the facet field id and the docids for which this field is considered empty
|
||||
pub facet_id_is_empty_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||
@@ -153,7 +166,7 @@ impl Index {
|
||||
) -> Result<Index> {
|
||||
use db_name::*;
|
||||
|
||||
options.max_dbs(19);
|
||||
options.max_dbs(23);
|
||||
unsafe { options.flag(Flags::MdbAlwaysFreePages) };
|
||||
|
||||
let env = options.open(path)?;
|
||||
@@ -170,11 +183,15 @@ impl Index {
|
||||
let prefix_word_pair_proximity_docids =
|
||||
env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
|
||||
let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?;
|
||||
let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?;
|
||||
let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
|
||||
let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
|
||||
let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
|
||||
let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
|
||||
let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
|
||||
let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||
let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
|
||||
let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
|
||||
|
||||
let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||
let field_id_docid_facet_strings =
|
||||
@@ -196,11 +213,15 @@ impl Index {
|
||||
word_prefix_pair_proximity_docids,
|
||||
prefix_word_pair_proximity_docids,
|
||||
word_position_docids,
|
||||
word_fid_docids,
|
||||
word_prefix_position_docids,
|
||||
word_prefix_fid_docids,
|
||||
field_id_word_count_docids,
|
||||
facet_id_f64_docids,
|
||||
facet_id_string_docids,
|
||||
facet_id_exists_docids,
|
||||
facet_id_is_null_docids,
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
documents,
|
||||
@@ -833,6 +854,30 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id set as null
|
||||
pub fn null_faceted_documents_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id and that is considered empty
|
||||
pub fn empty_faceted_documents_ids(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
field_id: FieldId,
|
||||
) -> heed::Result<RoaringBitmap> {
|
||||
match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? {
|
||||
Some(docids) => Ok(docids),
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all the documents which contain this field id
|
||||
pub fn exists_faceted_documents_ids(
|
||||
&self,
|
||||
@@ -1284,10 +1329,10 @@ pub(crate) mod tests {
|
||||
let index_documents_config = IndexDocumentsConfig::default();
|
||||
Self { inner, indexer_config, index_documents_config, _tempdir }
|
||||
}
|
||||
/// Creates a temporary index, with a default `4096 * 1000` size. This should be enough for
|
||||
/// Creates a temporary index, with a default `4096 * 2000` size. This should be enough for
|
||||
/// most tests.
|
||||
pub fn new() -> Self {
|
||||
Self::new_with_map_size(4096 * 1000)
|
||||
Self::new_with_map_size(4096 * 2000)
|
||||
}
|
||||
pub fn add_documents_using_wtxn<'t, R>(
|
||||
&'t self,
|
||||
@@ -1416,11 +1461,11 @@ pub(crate) mod tests {
|
||||
db_snap!(index, field_distribution);
|
||||
|
||||
db_snap!(index, field_distribution,
|
||||
@"
|
||||
age 1
|
||||
id 2
|
||||
name 2
|
||||
"
|
||||
@r###"
|
||||
age 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
);
|
||||
|
||||
// snapshot_index!(&index, "1", include: "^field_distribution$");
|
||||
@@ -1437,10 +1482,10 @@ pub(crate) mod tests {
|
||||
|
||||
db_snap!(index, field_distribution,
|
||||
@r###"
|
||||
age 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
age 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
);
|
||||
|
||||
// then we update a document by removing one field and another by adding one field
|
||||
@@ -1453,10 +1498,10 @@ pub(crate) mod tests {
|
||||
|
||||
db_snap!(index, field_distribution,
|
||||
@r###"
|
||||
has_dog 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
has_dog 1
|
||||
id 2
|
||||
name 2
|
||||
"###
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,56 @@
|
||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(test)]
|
||||
#[global_allocator]
|
||||
pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// #[cfg(test)]
|
||||
// pub mod allocator {
|
||||
// use std::alloc::{GlobalAlloc, System};
|
||||
// use std::sync::atomic::{self, AtomicI64};
|
||||
|
||||
// #[global_allocator]
|
||||
// pub static ALLOC: CountingAlloc = CountingAlloc {
|
||||
// max_resident: AtomicI64::new(0),
|
||||
// resident: AtomicI64::new(0),
|
||||
// allocated: AtomicI64::new(0),
|
||||
// };
|
||||
|
||||
// pub struct CountingAlloc {
|
||||
// pub max_resident: AtomicI64,
|
||||
// pub resident: AtomicI64,
|
||||
// pub allocated: AtomicI64,
|
||||
// }
|
||||
// unsafe impl GlobalAlloc for CountingAlloc {
|
||||
// unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
|
||||
// self.allocated.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
|
||||
// let old_resident =
|
||||
// self.resident.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
|
||||
|
||||
// let resident = old_resident + layout.size() as i64;
|
||||
// self.max_resident.fetch_max(resident, atomic::Ordering::SeqCst);
|
||||
|
||||
// // if layout.size() > 1_000_000 {
|
||||
// // eprintln!(
|
||||
// // "allocating {} with new resident size: {resident}",
|
||||
// // layout.size() / 1_000_000
|
||||
// // );
|
||||
// // // let trace = std::backtrace::Backtrace::capture();
|
||||
// // // let t = trace.to_string();
|
||||
// // // eprintln!("{t}");
|
||||
// // }
|
||||
|
||||
// System.alloc(layout)
|
||||
// }
|
||||
|
||||
// unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
|
||||
// self.resident.fetch_sub(layout.size() as i64, atomic::Ordering::Relaxed);
|
||||
// System.dealloc(ptr, layout)
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
#[macro_use]
|
||||
pub mod documents;
|
||||
|
||||
@@ -26,6 +78,10 @@ use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}
|
||||
pub use filter_parser::{Condition, FilterCondition, Span, Token};
|
||||
use fxhash::{FxHasher32, FxHasher64};
|
||||
pub use grenad::CompressionType;
|
||||
pub use search::new::{
|
||||
execute_search, DefaultSearchLogger, GeoSortStrategy, SearchContext, SearchLogger,
|
||||
VisualSearchLogger,
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use {charabia as tokenizer, heed};
|
||||
|
||||
@@ -43,9 +99,8 @@ pub use self::heed_codec::{
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::search::{
|
||||
CriterionImplementationStrategy, FacetDistribution, Filter, FormatOptions, MatchBounds,
|
||||
MatcherBuilder, MatchingWord, MatchingWords, Search, SearchResult, TermsMatchingStrategy,
|
||||
DEFAULT_VALUES_PER_FACET,
|
||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, Search,
|
||||
SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||
};
|
||||
|
||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||
@@ -100,6 +155,23 @@ pub fn relative_from_absolute_position(absolute: Position) -> (FieldId, Relative
|
||||
pub fn absolute_from_relative_position(field_id: FieldId, relative: RelativePosition) -> Position {
|
||||
(field_id as u32) << 16 | (relative as u32)
|
||||
}
|
||||
// TODO: this is wrong, but will do for now
|
||||
/// Compute the "bucketed" absolute position from the field id and relative position in the field.
|
||||
///
|
||||
/// In a bucketed position, the accuracy of the relative position is reduced exponentially as it gets larger.
|
||||
pub fn bucketed_position(relative: u16) -> u16 {
|
||||
// The first few relative positions are kept intact.
|
||||
if relative < 16 {
|
||||
relative
|
||||
} else if relative < 24 {
|
||||
// Relative positions between 16 and 24 all become equal to 24
|
||||
24
|
||||
} else {
|
||||
// Then, groups of positions that have the same base-2 logarithm are reduced to
|
||||
// the same relative position: the smallest power of 2 that is greater than them
|
||||
(relative as f64).log2().ceil().exp2() as u16
|
||||
}
|
||||
}
|
||||
|
||||
/// Transform a raw obkv store into a JSON Object.
|
||||
pub fn obkv_to_json(
|
||||
|
||||
@@ -1,569 +0,0 @@
|
||||
use std::mem::take;
|
||||
|
||||
use heed::BytesDecode;
|
||||
use itertools::Itertools;
|
||||
use log::debug;
|
||||
use ordered_float::OrderedFloat;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::CriterionImplementationStrategy;
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 1000;
|
||||
|
||||
pub struct AscDesc<'t> {
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
field_name: String,
|
||||
field_id: Option<FieldId>,
|
||||
is_ascending: bool,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
faceted_candidates: RoaringBitmap,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
}
|
||||
|
||||
impl<'t> AscDesc<'t> {
|
||||
pub fn asc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, field_name, true, implementation_strategy)
|
||||
}
|
||||
|
||||
pub fn desc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, field_name, false, implementation_strategy)
|
||||
}
|
||||
|
||||
fn new(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
field_name: String,
|
||||
is_ascending: bool,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Self> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let field_id = fields_ids_map.id(&field_name);
|
||||
let faceted_candidates = match field_id {
|
||||
Some(field_id) => {
|
||||
let number_faceted =
|
||||
index.faceted_documents_ids(rtxn, field_id, FacetType::Number)?;
|
||||
let string_faceted =
|
||||
index.faceted_documents_ids(rtxn, field_id, FacetType::String)?;
|
||||
number_faceted | string_faceted
|
||||
}
|
||||
None => RoaringBitmap::default(),
|
||||
};
|
||||
|
||||
Ok(AscDesc {
|
||||
index,
|
||||
rtxn,
|
||||
field_name,
|
||||
field_id,
|
||||
is_ascending,
|
||||
query_tree: None,
|
||||
candidates: Box::new(std::iter::empty()),
|
||||
allowed_candidates: RoaringBitmap::new(),
|
||||
faceted_candidates,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
implementation_strategy,
|
||||
parent,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for AscDesc<'t> {
|
||||
#[logging_timer::time("AscDesc::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
self.allowed_candidates -= params.excluded_candidates;
|
||||
|
||||
loop {
|
||||
debug!(
|
||||
"Facet {}({}) iteration",
|
||||
if self.is_ascending { "Asc" } else { "Desc" },
|
||||
self.field_name
|
||||
);
|
||||
|
||||
match self.candidates.next().transpose()? {
|
||||
None if !self.allowed_candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(take(&mut self.allowed_candidates)),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
self.query_tree = query_tree;
|
||||
let mut candidates = match (&self.query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||
resolve_query_tree(&context, qt, params.wdcache)?
|
||||
}
|
||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||
self.candidates = match self.field_id {
|
||||
Some(field_id) => facet_ordered(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
field_id,
|
||||
self.is_ascending,
|
||||
candidates & &self.faceted_candidates,
|
||||
self.implementation_strategy,
|
||||
)?,
|
||||
None => Box::new(std::iter::empty()),
|
||||
};
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
Some(mut candidates) => {
|
||||
candidates -= params.excluded_candidates;
|
||||
self.allowed_candidates -= &candidates;
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn facet_ordered_iterative<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
let number_iter = iterative_facet_number_ordered_iter(
|
||||
index,
|
||||
rtxn,
|
||||
field_id,
|
||||
is_ascending,
|
||||
candidates.clone(),
|
||||
)?;
|
||||
let string_iter =
|
||||
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
|
||||
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||
}
|
||||
|
||||
fn facet_extreme_value<'t>(
|
||||
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
|
||||
) -> Result<Option<f64>> {
|
||||
let extreme_value =
|
||||
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
|
||||
let (_, extreme_value) = extreme_value?;
|
||||
|
||||
Ok(OrderedF64Codec::bytes_decode(extreme_value))
|
||||
}
|
||||
|
||||
pub fn facet_min_value<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||
facet_extreme_value(it)
|
||||
}
|
||||
|
||||
pub fn facet_max_value<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Option<f64>> {
|
||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
|
||||
facet_extreme_value(it)
|
||||
}
|
||||
|
||||
fn facet_ordered_set_based<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
let number_db =
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let string_db =
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
|
||||
let (number_iter, string_iter) = if is_ascending {
|
||||
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
|
||||
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
|
||||
|
||||
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
|
||||
} else {
|
||||
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
|
||||
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
|
||||
|
||||
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
|
||||
};
|
||||
|
||||
Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids))))
|
||||
}
|
||||
|
||||
/// Returns an iterator over groups of the given candidates in ascending or descending order.
|
||||
///
|
||||
/// It will either use an iterative or a recursive method on the whole facet database depending
|
||||
/// on the number of candidates to rank.
|
||||
fn facet_ordered<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
|
||||
match implementation_strategy {
|
||||
CriterionImplementationStrategy::OnlyIterative => {
|
||||
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
CriterionImplementationStrategy::OnlySetBased => {
|
||||
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
CriterionImplementationStrategy::Dynamic => {
|
||||
if candidates.len() <= CANDIDATES_THRESHOLD {
|
||||
facet_ordered_iterative(index, rtxn, field_id, is_ascending, candidates)
|
||||
} else {
|
||||
facet_ordered_set_based(index, rtxn, field_id, is_ascending, candidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch the whole list of candidates facet number values one by one and order them by it.
|
||||
///
|
||||
/// This function is fast when the amount of candidates to rank is small.
|
||||
fn iterative_facet_number_ordered_iter<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||
for docid in candidates.iter() {
|
||||
let left = (field_id, docid, f64::MIN);
|
||||
let right = (field_id, docid, f64::MAX);
|
||||
let mut iter = index.field_id_docid_facet_f64s.range(rtxn, &(left..=right))?;
|
||||
let entry = if is_ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), ())) = entry.transpose()? {
|
||||
docids_values.push((docid, OrderedFloat(value)));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, v)| *v);
|
||||
let iter = docids_values.into_iter();
|
||||
let iter = if is_ascending {
|
||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||
} else {
|
||||
Box::new(iter.rev())
|
||||
};
|
||||
|
||||
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||
// required to collect the result into an owned collection (a Vec).
|
||||
// https://github.com/rust-itertools/itertools/issues/499
|
||||
#[allow(clippy::needless_collect)]
|
||||
let vec: Vec<_> = iter
|
||||
.group_by(|(_, v)| *v)
|
||||
.into_iter()
|
||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||
.collect();
|
||||
|
||||
Ok(vec.into_iter())
|
||||
}
|
||||
|
||||
/// Fetch the whole list of candidates facet string values one by one and order them by it.
|
||||
///
|
||||
/// This function is fast when the amount of candidates to rank is small.
|
||||
fn iterative_facet_string_ordered_iter<'t>(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn,
|
||||
field_id: FieldId,
|
||||
is_ascending: bool,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<impl Iterator<Item = RoaringBitmap> + 't> {
|
||||
let mut docids_values = Vec::with_capacity(candidates.len() as usize);
|
||||
for docid in candidates.iter() {
|
||||
let left = (field_id, docid, "");
|
||||
let right = (field_id, docid.saturating_add(1), "");
|
||||
// FIXME Doing this means that it will never be possible to retrieve
|
||||
// the document with id 2^32, not sure this is a real problem.
|
||||
let mut iter = index.field_id_docid_facet_strings.range(rtxn, &(left..right))?;
|
||||
let entry = if is_ascending { iter.next() } else { iter.last() };
|
||||
if let Some(((_, _, value), _)) = entry.transpose()? {
|
||||
docids_values.push((docid, value));
|
||||
}
|
||||
}
|
||||
docids_values.sort_unstable_by_key(|(_, v)| *v);
|
||||
let iter = docids_values.into_iter();
|
||||
let iter = if is_ascending {
|
||||
Box::new(iter) as Box<dyn Iterator<Item = _>>
|
||||
} else {
|
||||
Box::new(iter.rev())
|
||||
};
|
||||
|
||||
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore
|
||||
// required to collect the result into an owned collection (a Vec).
|
||||
// https://github.com/rust-itertools/itertools/issues/499
|
||||
#[allow(clippy::needless_collect)]
|
||||
let vec: Vec<_> = iter
|
||||
.group_by(|(_, v)| *v)
|
||||
.into_iter()
|
||||
.map(|(_, ids)| ids.map(|(id, _)| id).collect())
|
||||
.collect();
|
||||
|
||||
Ok(vec.into_iter())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str::FromStr;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{AscDesc, Criterion, Filter, Search, SearchResult};
|
||||
|
||||
// Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD
|
||||
// constant to 0 to ensure that the other sort algorithms are also correct.
|
||||
#[test]
|
||||
fn sort_criterion_placeholder() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings
|
||||
.set_sortable_fields(maplit::hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut docs = vec![];
|
||||
for i in 0..100 {
|
||||
docs.push(
|
||||
serde_json::json!({ "id": i, "mod_10": format!("{}", i % 10), "mod_20": i % 20 }),
|
||||
);
|
||||
}
|
||||
|
||||
index.add_documents(documents!(docs)).unwrap();
|
||||
|
||||
let all_ids = (0..100).collect::<Vec<_>>();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![AscDesc::from_str("mod_10:desc").unwrap()]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 8, 18, 28, 38, 48, 58, 68, 78, 88, 98, 7, 17, 27, 37, 47, 57, 67, 77, 87, 97, 6, 16, 26, 36, 46, 56, 66, 76, 86, 96, 5, 15, 25, 35, 45, 55, 65, 75, 85, 95, 4, 14, 24, 34, 44, 54, 64, 74, 84, 94, 3, 13, 23, 33, 43, 53, 63, 73, 83, 93, 2, 12, 22, 32, 42, 52, 62, 72, 82, 92, 1, 11, 21, 31, 41, 51, 61, 71, 81, 91, 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[99, 89, 79, 69, 59, 49, 39, 29, 19, 9, 98, 88, 78, 68, 58, 48, 38, 28, 18, 8, 97, 87, 77, 67, 57, 47, 37, 27, 17, 7, 96, 86, 76, 66, 56, 46, 36, 26, 16, 6, 95, 85, 75, 65, 55, 45, 35, 25, 15, 5, 94, 84, 74, 64, 54, 44, 34, 24, 14, 4, 93, 83, 73, 63, 53, 43, 33, 23, 13, 3, 92, 82, 72, 62, 52, 42, 32, 22, 12, 2, 91, 81, 71, 61, 51, 41, 31, 21, 11, 1, 90, 80, 70, 60, 50, 40, 30, 20, 10, 0]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 29, 49, 69, 89, 19, 39, 59, 79, 99, 8, 28, 48, 68, 88, 18, 38, 58, 78, 98, 7, 27, 47, 67, 87, 17, 37, 57, 77, 97, 6, 26, 46, 66, 86, 16, 36, 56, 76, 96, 5, 25, 45, 65, 85, 15, 35, 55, 75, 95, 4, 24, 44, 64, 84, 14, 34, 54, 74, 94, 3, 23, 43, 63, 83, 13, 33, 53, 73, 93, 2, 22, 42, 62, 82, 12, 32, 52, 72, 92, 1, 21, 41, 61, 81, 11, 31, 51, 71, 91, 0, 20, 40, 60, 80, 10, 30, 50, 70, 90]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 39, 59, 79, 99, 9, 29, 49, 69, 89, 18, 38, 58, 78, 98, 8, 28, 48, 68, 88, 17, 37, 57, 77, 97, 7, 27, 47, 67, 87, 16, 36, 56, 76, 96, 6, 26, 46, 66, 86, 15, 35, 55, 75, 95, 5, 25, 45, 65, 85, 14, 34, 54, 74, 94, 4, 24, 44, 64, 84, 13, 33, 53, 73, 93, 3, 23, 43, 63, 83, 12, 32, 52, 72, 92, 2, 22, 42, 62, 82, 11, 31, 51, 71, 91, 1, 21, 41, 61, 81, 10, 30, 50, 70, 90, 0, 20, 40, 60, 80]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:desc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[99, 79, 59, 39, 19, 89, 69, 49, 29, 9, 98, 78, 58, 38, 18, 88, 68, 48, 28, 8, 97, 77, 57, 37, 17, 87, 67, 47, 27, 7, 96, 76, 56, 36, 16, 86, 66, 46, 26, 6, 95, 75, 55, 35, 15, 85, 65, 45, 25, 5, 94, 74, 54, 34, 14, 84, 64, 44, 24, 4, 93, 73, 53, 33, 13, 83, 63, 43, 23, 3, 92, 72, 52, 32, 12, 82, 62, 42, 22, 2, 91, 71, 51, 31, 11, 81, 61, 41, 21, 1, 90, 70, 50, 30, 10, 80, 60, 40, 20, 0]");
|
||||
documents_ids.sort();
|
||||
assert_eq!(all_ids, documents_ids);
|
||||
}
|
||||
|
||||
// Note that in this test, only the iterative sort algorithms are used. Set the CANDIDATES_THESHOLD
|
||||
// constant to 0 to ensure that the other sort algorithms are also correct.
|
||||
#[test]
|
||||
fn sort_criterion_non_placeholder() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_sortable_fields(hashset! { S("id"), S("mod_10"), S("mod_20") });
|
||||
settings.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut docs = vec![];
|
||||
for i in 0..100 {
|
||||
docs.push(
|
||||
serde_json::json!({ "id": i, "mod_10": format!("{}", i % 10), "mod_20": i % 20 }),
|
||||
);
|
||||
}
|
||||
|
||||
index.add_documents(documents!(docs)).unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [1, 0, 2] OR mod_20 IN [10, 13] OR id IN [5, 6]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:desc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in increasing value of the id modulo 10, followed by increasing value of the id modulo 20, followed by decreasing value of the id
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 93, 73, 53, 33, 13, 82, 62, 42, 22, 2, 92, 72, 52, 32, 12, 81, 61, 41, 21, 1, 91, 71, 51, 31, 11, 80, 60, 40, 20, 0, 90, 70, 50, 30, 10]");
|
||||
let expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[1, 0, 2].contains(&(id % 10))
|
||||
|| [10, 13].contains(&(id % 20))
|
||||
|| [5, 6].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
documents_ids.sort();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [7, 8, 0] OR mod_20 IN [1, 15, 16] OR id IN [0, 4]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![
|
||||
AscDesc::from_str("mod_10:asc").unwrap(),
|
||||
AscDesc::from_str("mod_20:asc").unwrap(),
|
||||
AscDesc::from_str("id:desc").unwrap(),
|
||||
]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { mut documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in increasing value of the id modulo 10, followed by increasing value of the id modulo 20, followed by decreasing value of the id
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[80, 60, 40, 20, 0, 90, 70, 50, 30, 10, 81, 61, 41, 21, 1, 4, 95, 75, 55, 35, 15, 96, 76, 56, 36, 16, 87, 67, 47, 27, 7, 97, 77, 57, 37, 17, 88, 68, 48, 28, 8, 98, 78, 58, 38, 18]");
|
||||
let expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[7, 8, 0].contains(&(id % 10))
|
||||
|| [1, 15, 16].contains(&(id % 20))
|
||||
|| [0, 4].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
documents_ids.sort();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.filter(
|
||||
Filter::from_str("mod_10 IN [1, 0, 2] OR mod_20 IN [10, 13] OR id IN [5, 6]")
|
||||
.unwrap()
|
||||
.unwrap(),
|
||||
);
|
||||
search.sort_criteria(vec![AscDesc::from_str("id:desc").unwrap()]);
|
||||
search.limit(100);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
// The order should be in decreasing value of the id
|
||||
let mut expected_ids = (0..100)
|
||||
.filter(|id| {
|
||||
[1, 0, 2].contains(&(id % 10))
|
||||
|| [10, 13].contains(&(id % 20))
|
||||
|| [5, 6].contains(id)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
expected_ids.sort();
|
||||
expected_ids.reverse();
|
||||
assert_eq!(expected_ids, documents_ids);
|
||||
}
|
||||
}
|
||||
@@ -1,709 +0,0 @@
|
||||
use std::cmp::{self, Ordering};
|
||||
use std::collections::binary_heap::PeekMut;
|
||||
use std::collections::{btree_map, BTreeMap, BinaryHeap, HashMap};
|
||||
use std::iter::Peekable;
|
||||
use std::mem::take;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{InitialCandidates, Query};
|
||||
use crate::search::query_tree::{Operation, QueryKind};
|
||||
use crate::search::{
|
||||
build_dfa, word_derivations, CriterionImplementationStrategy, WordDerivationsCache,
|
||||
};
|
||||
use crate::Result;
|
||||
|
||||
/// To be able to divide integers by the number of words in the query
|
||||
/// we want to find a multiplier that allow us to divide by any number between 1 and 10.
|
||||
/// We chose the LCM of all numbers between 1 and 10 as the multiplier (https://en.wikipedia.org/wiki/Least_common_multiple).
|
||||
const LCM_10_FIRST_NUMBERS: u32 = 2520;
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 500;
|
||||
|
||||
type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
|
||||
|
||||
pub struct Attribute<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||
set_buckets: Option<BinaryHeap<Branch<'t>>>,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
}
|
||||
|
||||
impl<'t> Attribute<'t> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Self {
|
||||
Attribute {
|
||||
ctx,
|
||||
state: None,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
linear_buckets: None,
|
||||
set_buckets: None,
|
||||
implementation_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Attribute<'t> {
|
||||
#[logging_timer::time("Attribute::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||
*allowed_candidates -= params.excluded_candidates;
|
||||
}
|
||||
|
||||
loop {
|
||||
match self.state.take() {
|
||||
Some((query_tree, _, allowed_candidates)) if allowed_candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||
let found_candidates = if matches!(
|
||||
self.implementation_strategy,
|
||||
CriterionImplementationStrategy::OnlyIterative
|
||||
) || (matches!(
|
||||
self.implementation_strategy,
|
||||
CriterionImplementationStrategy::Dynamic
|
||||
) && allowed_candidates.len()
|
||||
< CANDIDATES_THRESHOLD)
|
||||
{
|
||||
let linear_buckets = match self.linear_buckets.as_mut() {
|
||||
Some(linear_buckets) => linear_buckets,
|
||||
None => {
|
||||
let new_buckets = initialize_linear_buckets(
|
||||
self.ctx,
|
||||
&flattened_query_tree,
|
||||
&allowed_candidates,
|
||||
)?;
|
||||
self.linear_buckets.get_or_insert(new_buckets.into_iter())
|
||||
}
|
||||
};
|
||||
|
||||
match linear_buckets.next() {
|
||||
Some((_score, candidates)) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let set_buckets = match self.set_buckets.as_mut() {
|
||||
Some(set_buckets) => set_buckets,
|
||||
None => {
|
||||
let new_buckets = initialize_set_buckets(
|
||||
self.ctx,
|
||||
&flattened_query_tree,
|
||||
&allowed_candidates,
|
||||
params.wdcache,
|
||||
)?;
|
||||
self.set_buckets.get_or_insert(new_buckets)
|
||||
}
|
||||
};
|
||||
|
||||
match set_compute_candidates(set_buckets, &allowed_candidates)? {
|
||||
Some((_score, candidates)) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(allowed_candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
allowed_candidates -= &found_candidates;
|
||||
|
||||
self.state =
|
||||
Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(found_candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||
self.linear_buckets = None;
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// QueryPositionIterator is an Iterator over positions of a Query,
|
||||
/// It contains iterators over words positions.
|
||||
struct QueryPositionIterator<'t> {
|
||||
#[allow(clippy::type_complexity)]
|
||||
inner:
|
||||
Vec<Peekable<Box<dyn Iterator<Item = heed::Result<((&'t str, u32), RoaringBitmap)>> + 't>>>,
|
||||
}
|
||||
|
||||
impl<'t> QueryPositionIterator<'t> {
|
||||
fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
queries: &[Query],
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Self> {
|
||||
let mut inner = Vec::with_capacity(queries.len());
|
||||
for query in queries {
|
||||
let in_prefix_cache = query.prefix && ctx.in_prefix_cache(query.kind.word());
|
||||
match &query.kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if !query.prefix || in_prefix_cache {
|
||||
let word = query.kind.word();
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
} else {
|
||||
for (word, _) in word_derivations(word, true, 0, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
}
|
||||
}
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
for (word, _) in
|
||||
word_derivations(word, query.prefix, *typo, ctx.words_fst(), wdcache)?
|
||||
{
|
||||
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
|
||||
inner.push(iter.peekable());
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
Ok(Self { inner })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Iterator for QueryPositionIterator<'t> {
|
||||
type Item = heed::Result<(u32, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// sort inner words from the closest next position to the farthest next position.
|
||||
let expected_pos = self
|
||||
.inner
|
||||
.iter_mut()
|
||||
.filter_map(|wli| match wli.peek() {
|
||||
Some(Ok(((_, pos), _))) => Some(*pos),
|
||||
_ => None,
|
||||
})
|
||||
.min()?;
|
||||
|
||||
let mut candidates = None;
|
||||
for wli in self.inner.iter_mut() {
|
||||
if let Some(Ok(((_, pos), _))) = wli.peek() {
|
||||
if *pos > expected_pos {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
match wli.next() {
|
||||
Some(Ok((_, docids))) => {
|
||||
candidates = match candidates.take() {
|
||||
Some(candidates) => Some(candidates | docids),
|
||||
None => Some(docids),
|
||||
}
|
||||
}
|
||||
Some(Err(e)) => return Some(Err(e)),
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
|
||||
candidates.map(|candidates| Ok((expected_pos, candidates)))
|
||||
}
|
||||
}
|
||||
|
||||
/// A Branch is represent a possible alternative of the original query and is build with the Query Tree,
|
||||
/// This branch allows us to iterate over meta-interval of positions.
|
||||
struct Branch<'t> {
|
||||
query_level_iterator: Vec<(u32, RoaringBitmap, Peekable<QueryPositionIterator<'t>>)>,
|
||||
last_result: (u32, RoaringBitmap),
|
||||
branch_size: u32,
|
||||
}
|
||||
|
||||
impl<'t> Branch<'t> {
|
||||
fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
flatten_branch: &[Vec<Query>],
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<Self> {
|
||||
let mut query_level_iterator = Vec::new();
|
||||
for queries in flatten_branch {
|
||||
let mut qli = QueryPositionIterator::new(ctx, queries, wdcache)?.peekable();
|
||||
let (pos, docids) = qli.next().transpose()?.unwrap_or((0, RoaringBitmap::new()));
|
||||
query_level_iterator.push((pos, docids & allowed_candidates, qli));
|
||||
}
|
||||
|
||||
let mut branch = Self {
|
||||
query_level_iterator,
|
||||
last_result: (0, RoaringBitmap::new()),
|
||||
branch_size: flatten_branch.len() as u32,
|
||||
};
|
||||
|
||||
branch.update_last_result();
|
||||
|
||||
Ok(branch)
|
||||
}
|
||||
|
||||
/// return the next meta-interval of the branch,
|
||||
/// and update inner interval in order to be ranked by the BinaryHeap.
|
||||
fn next(&mut self, allowed_candidates: &RoaringBitmap) -> heed::Result<bool> {
|
||||
// update the first query.
|
||||
let index = self.lowest_iterator_index();
|
||||
match self.query_level_iterator.get_mut(index) {
|
||||
Some((cur_pos, cur_docids, qli)) => match qli.next().transpose()? {
|
||||
Some((next_pos, next_docids)) => {
|
||||
*cur_pos = next_pos;
|
||||
*cur_docids |= next_docids & allowed_candidates;
|
||||
self.update_last_result();
|
||||
Ok(true)
|
||||
}
|
||||
None => Ok(false),
|
||||
},
|
||||
None => Ok(false),
|
||||
}
|
||||
}
|
||||
|
||||
fn lowest_iterator_index(&mut self) -> usize {
|
||||
let (index, _) = self
|
||||
.query_level_iterator
|
||||
.iter_mut()
|
||||
.map(|(pos, docids, qli)| {
|
||||
if docids.is_empty() {
|
||||
0
|
||||
} else {
|
||||
match qli.peek() {
|
||||
Some(result) => {
|
||||
result.as_ref().map(|(next_pos, _)| *next_pos - *pos).unwrap_or(0)
|
||||
}
|
||||
None => u32::MAX,
|
||||
}
|
||||
}
|
||||
})
|
||||
.enumerate()
|
||||
.min_by_key(|(_, diff)| *diff)
|
||||
.unwrap_or((0, 0));
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
fn update_last_result(&mut self) {
|
||||
let mut result_pos = 0;
|
||||
let mut result_docids = None;
|
||||
|
||||
for (pos, docids, _qli) in self.query_level_iterator.iter() {
|
||||
result_pos += pos;
|
||||
result_docids = result_docids
|
||||
.take()
|
||||
.map_or_else(|| Some(docids.clone()), |candidates| Some(candidates & docids));
|
||||
}
|
||||
|
||||
// remove last result docids from inner iterators
|
||||
if let Some(docids) = result_docids.as_ref() {
|
||||
for (_, query_docids, _) in self.query_level_iterator.iter_mut() {
|
||||
*query_docids -= docids;
|
||||
}
|
||||
}
|
||||
|
||||
self.last_result = (result_pos, result_docids.unwrap_or_default());
|
||||
}
|
||||
|
||||
/// return the score of the current inner interval.
|
||||
fn compute_rank(&self) -> u32 {
|
||||
// we compute a rank from the position.
|
||||
let (pos, _) = self.last_result;
|
||||
pos.saturating_sub((0..self.branch_size).sum()) * LCM_10_FIRST_NUMBERS / self.branch_size
|
||||
}
|
||||
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
let self_rank = self.compute_rank();
|
||||
let other_rank = other.compute_rank();
|
||||
|
||||
// lower rank is better, and because BinaryHeap give the higher ranked branch, we reverse it.
|
||||
self_rank.cmp(&other_rank).reverse()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Ord for Branch<'t> {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.cmp(other)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> PartialOrd for Branch<'t> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> PartialEq for Branch<'t> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.cmp(other) == Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Eq for Branch<'t> {}
|
||||
|
||||
fn initialize_set_buckets<'t>(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<BinaryHeap<Branch<'t>>> {
|
||||
let mut heap = BinaryHeap::new();
|
||||
for flatten_branch in branches {
|
||||
let branch = Branch::new(ctx, flatten_branch, wdcache, allowed_candidates)?;
|
||||
heap.push(branch);
|
||||
}
|
||||
|
||||
Ok(heap)
|
||||
}
|
||||
|
||||
fn set_compute_candidates(
|
||||
branches_heap: &mut BinaryHeap<Branch>,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<Option<(u32, RoaringBitmap)>> {
|
||||
let mut final_candidates: Option<(u32, RoaringBitmap)> = None;
|
||||
let mut allowed_candidates = allowed_candidates.clone();
|
||||
|
||||
while let Some(mut branch) = branches_heap.peek_mut() {
|
||||
// if current is worst than best we break to return
|
||||
// candidates that correspond to the best rank
|
||||
let branch_rank = branch.compute_rank();
|
||||
if let Some((best_rank, _)) = final_candidates {
|
||||
if branch_rank > best_rank {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let candidates = take(&mut branch.last_result.1);
|
||||
if candidates.is_empty() {
|
||||
// we don't have candidates, get next interval.
|
||||
if !branch.next(&allowed_candidates)? {
|
||||
PeekMut::pop(branch);
|
||||
}
|
||||
} else {
|
||||
allowed_candidates -= &candidates;
|
||||
final_candidates = match final_candidates.take() {
|
||||
// we add current candidates to best candidates
|
||||
Some((best_rank, mut best_candidates)) => {
|
||||
best_candidates |= candidates;
|
||||
branch.next(&allowed_candidates)?;
|
||||
Some((best_rank, best_candidates))
|
||||
}
|
||||
// we take current candidates as best candidates
|
||||
None => {
|
||||
branch.next(&allowed_candidates)?;
|
||||
Some((branch_rank, candidates))
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(final_candidates)
|
||||
}
|
||||
|
||||
fn initialize_linear_buckets(
|
||||
ctx: &dyn Context,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<BTreeMap<u64, RoaringBitmap>> {
|
||||
fn compute_candidate_rank(
|
||||
branches: &FlattenedQueryTree,
|
||||
words_positions: HashMap<String, RoaringBitmap>,
|
||||
) -> u64 {
|
||||
let mut min_rank = u64::max_value();
|
||||
for branch in branches {
|
||||
let branch_len = branch.len();
|
||||
let mut branch_rank = Vec::with_capacity(branch_len);
|
||||
for derivates in branch {
|
||||
let mut position = None;
|
||||
for Query { prefix, kind } in derivates {
|
||||
// find the best position of the current word in the document.
|
||||
let current_position = match kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if *prefix {
|
||||
word_derivations(word, true, 0, &words_positions)
|
||||
.flat_map(|positions| positions.iter().next())
|
||||
.min()
|
||||
} else {
|
||||
words_positions
|
||||
.get(word)
|
||||
.and_then(|positions| positions.iter().next())
|
||||
}
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
word_derivations(word, *prefix, *typo, &words_positions)
|
||||
.flat_map(|positions| positions.iter().next())
|
||||
.min()
|
||||
}
|
||||
};
|
||||
|
||||
match (position, current_position) {
|
||||
(Some(p), Some(cp)) => position = Some(cmp::min(p, cp)),
|
||||
(None, Some(cp)) => position = Some(cp),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
// if a position is found, we add it to the branch score,
|
||||
// otherwise the branch is considered as unfindable in this document and we break.
|
||||
if let Some(position) = position {
|
||||
branch_rank.push(position as u64);
|
||||
} else {
|
||||
branch_rank.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if !branch_rank.is_empty() {
|
||||
branch_rank.sort_unstable();
|
||||
// because several words in same query can't match all a the position 0,
|
||||
// we substract the word index to the position.
|
||||
let branch_rank: u64 =
|
||||
branch_rank.into_iter().enumerate().map(|(i, r)| r - i as u64).sum();
|
||||
// here we do the means of the words of the branch
|
||||
min_rank =
|
||||
min_rank.min(branch_rank * LCM_10_FIRST_NUMBERS as u64 / branch_len as u64);
|
||||
}
|
||||
}
|
||||
|
||||
min_rank
|
||||
}
|
||||
|
||||
fn word_derivations<'a>(
|
||||
word: &str,
|
||||
is_prefix: bool,
|
||||
max_typo: u8,
|
||||
words_positions: &'a HashMap<String, RoaringBitmap>,
|
||||
) -> impl Iterator<Item = &'a RoaringBitmap> {
|
||||
let dfa = build_dfa(word, max_typo, is_prefix);
|
||||
words_positions.iter().filter_map(move |(document_word, positions)| {
|
||||
use levenshtein_automata::Distance;
|
||||
match dfa.eval(document_word) {
|
||||
Distance::Exact(_) => Some(positions),
|
||||
Distance::AtLeast(_) => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
let mut candidates = BTreeMap::new();
|
||||
for docid in allowed_candidates {
|
||||
let words_positions = ctx.docid_words_positions(docid)?;
|
||||
let rank = compute_candidate_rank(branches, words_positions);
|
||||
candidates.entry(rank).or_insert_with(RoaringBitmap::new).insert(docid);
|
||||
}
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
// TODO can we keep refs of Query
|
||||
fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
||||
use crate::search::criteria::Operation::{And, Or, Phrase};
|
||||
|
||||
fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
|
||||
match tail.split_first() {
|
||||
Some((thead, tail)) => {
|
||||
let tail = and_recurse(thead, tail);
|
||||
let mut out = Vec::new();
|
||||
for array in recurse(head) {
|
||||
for tail_array in &tail {
|
||||
let mut array = array.clone();
|
||||
array.extend(tail_array.iter().cloned());
|
||||
out.push(array);
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
None => recurse(head),
|
||||
}
|
||||
}
|
||||
|
||||
fn recurse(op: &Operation) -> FlattenedQueryTree {
|
||||
match op {
|
||||
And(ops) => ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)),
|
||||
Or(_, ops) => {
|
||||
if ops.iter().all(|op| op.query().is_some()) {
|
||||
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
|
||||
} else {
|
||||
ops.iter().flat_map(recurse).collect()
|
||||
}
|
||||
}
|
||||
Phrase(words) => {
|
||||
let queries = words
|
||||
.iter()
|
||||
.filter_map(|w| w.as_ref())
|
||||
.map(|word| vec![Query { prefix: false, kind: QueryKind::exact(word.clone()) }])
|
||||
.collect();
|
||||
vec![queries]
|
||||
}
|
||||
Operation::Query(query) => vec![vec![vec![query.clone()]]],
|
||||
}
|
||||
}
|
||||
|
||||
recurse(query_tree)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
|
||||
use super::*;
|
||||
use crate::search::criteria::QueryKind;
|
||||
|
||||
#[test]
|
||||
fn simple_flatten_query_tree() {
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("manythe")) }),
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("fish")) }),
|
||||
]),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query { prefix: false, kind: QueryKind::exact(S("many")) }),
|
||||
Operation::Or(
|
||||
false,
|
||||
vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("thefish")),
|
||||
}),
|
||||
Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("the")),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact(S("fish")),
|
||||
}),
|
||||
]),
|
||||
],
|
||||
),
|
||||
]),
|
||||
],
|
||||
);
|
||||
let result = flatten_query_tree(&query_tree);
|
||||
|
||||
insta::assert_debug_snapshot!(result, @r###"
|
||||
[
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "manythefish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "manythe",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "fish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "many",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "thefish",
|
||||
},
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
Exact {
|
||||
word: "many",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "the",
|
||||
},
|
||||
],
|
||||
[
|
||||
Exact {
|
||||
word: "fish",
|
||||
},
|
||||
],
|
||||
],
|
||||
]
|
||||
"###);
|
||||
}
|
||||
}
|
||||
@@ -1,766 +0,0 @@
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use crate::search::criteria::{
|
||||
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
|
||||
InitialCandidates,
|
||||
};
|
||||
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||
use crate::{absolute_from_relative_position, FieldId, Result};
|
||||
|
||||
pub struct Exactness<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_tree: Option<Operation>,
|
||||
state: Option<State>,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
query: Vec<ExactQueryPart>,
|
||||
cache: Option<ExactWordsCombinationCache>,
|
||||
}
|
||||
|
||||
impl<'t> Exactness<'t> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
primitive_query: &[PrimitiveQueryPart],
|
||||
) -> heed::Result<Self> {
|
||||
let mut query: Vec<_> = Vec::with_capacity(primitive_query.len());
|
||||
for part in primitive_query {
|
||||
query.push(ExactQueryPart::from_primitive_query_part(ctx, part)?);
|
||||
}
|
||||
|
||||
Ok(Exactness {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
state: None,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
query,
|
||||
cache: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Exactness<'t> {
|
||||
#[logging_timer::time("Exactness::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some(state) = self.state.as_mut() {
|
||||
state.difference_with(params.excluded_candidates);
|
||||
}
|
||||
loop {
|
||||
debug!("Exactness at state {:?}", self.state);
|
||||
|
||||
match self.state.as_mut() {
|
||||
Some(state) if state.is_empty() => {
|
||||
// reset state
|
||||
self.state = None;
|
||||
self.query_tree = None;
|
||||
// we don't need to reset the combinations cache since it only depends on
|
||||
// the primitive query, which does not change
|
||||
}
|
||||
Some(state) => {
|
||||
let (candidates, state) =
|
||||
resolve_state(self.ctx, take(state), &self.query, &mut self.cache)?;
|
||||
self.state = state;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
self.state = Some(State::new(candidates));
|
||||
self.query_tree = Some(query_tree);
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum State {
|
||||
/// Extract the documents that have an attribute that contains exactly the query.
|
||||
ExactAttribute(RoaringBitmap),
|
||||
/// Extract the documents that have an attribute that starts with exactly the query.
|
||||
AttributeStartsWith(RoaringBitmap),
|
||||
/// Rank the remaining documents by the number of exact words contained.
|
||||
ExactWords(RoaringBitmap),
|
||||
Remainings(Vec<RoaringBitmap>),
|
||||
}
|
||||
|
||||
impl State {
|
||||
fn new(candidates: RoaringBitmap) -> Self {
|
||||
Self::ExactAttribute(candidates)
|
||||
}
|
||||
|
||||
fn difference_with(&mut self, lhs: &RoaringBitmap) {
|
||||
match self {
|
||||
Self::ExactAttribute(candidates)
|
||||
| Self::AttributeStartsWith(candidates)
|
||||
| Self::ExactWords(candidates) => *candidates -= lhs,
|
||||
Self::Remainings(candidates_array) => {
|
||||
candidates_array.iter_mut().for_each(|candidates| *candidates -= lhs);
|
||||
candidates_array.retain(|candidates| !candidates.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::ExactAttribute(candidates)
|
||||
| Self::AttributeStartsWith(candidates)
|
||||
| Self::ExactWords(candidates) => candidates.is_empty(),
|
||||
Self::Remainings(candidates_array) => {
|
||||
candidates_array.iter().all(RoaringBitmap::is_empty)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for State {
|
||||
fn default() -> Self {
|
||||
Self::Remainings(vec![])
|
||||
}
|
||||
}
|
||||
#[logging_timer::time("Exactness::{}")]
|
||||
fn resolve_state(
|
||||
ctx: &dyn Context,
|
||||
state: State,
|
||||
query: &[ExactQueryPart],
|
||||
cache: &mut Option<ExactWordsCombinationCache>,
|
||||
) -> Result<(RoaringBitmap, Option<State>)> {
|
||||
use State::*;
|
||||
match state {
|
||||
ExactAttribute(mut allowed_candidates) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
if let Ok(query_len) = u8::try_from(query.len()) {
|
||||
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||
for id in attributes_ids {
|
||||
if let Some(attribute_allowed_docids) =
|
||||
ctx.field_id_word_count_docids(id, query_len)?
|
||||
{
|
||||
let mut attribute_candidates_array =
|
||||
attribute_start_with_docids(ctx, id, query)?;
|
||||
attribute_candidates_array.push(attribute_allowed_docids);
|
||||
|
||||
candidates |= MultiOps::intersection(attribute_candidates_array);
|
||||
}
|
||||
}
|
||||
|
||||
// only keep allowed candidates
|
||||
candidates &= &allowed_candidates;
|
||||
// remove current candidates from allowed candidates
|
||||
allowed_candidates -= &candidates;
|
||||
}
|
||||
|
||||
Ok((candidates, Some(AttributeStartsWith(allowed_candidates))))
|
||||
}
|
||||
AttributeStartsWith(mut allowed_candidates) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let attributes_ids = ctx.searchable_fields_ids()?;
|
||||
for id in attributes_ids {
|
||||
let attribute_candidates_array = attribute_start_with_docids(ctx, id, query)?;
|
||||
candidates |= MultiOps::intersection(attribute_candidates_array);
|
||||
}
|
||||
|
||||
// only keep allowed candidates
|
||||
candidates &= &allowed_candidates;
|
||||
// remove current candidates from allowed candidates
|
||||
allowed_candidates -= &candidates;
|
||||
Ok((candidates, Some(ExactWords(allowed_candidates))))
|
||||
}
|
||||
ExactWords(allowed_candidates) => {
|
||||
// Retrieve the cache if it already exist, otherwise create it.
|
||||
let owned_cache = if let Some(cache) = cache.take() {
|
||||
cache
|
||||
} else {
|
||||
compute_combinations(ctx, query)?
|
||||
};
|
||||
// The cache contains the sets of documents which contain exactly 1,2,3,.. exact words
|
||||
// from the query. It cannot be empty. All the candidates in it are disjoint.
|
||||
|
||||
let mut candidates_array = owned_cache.combinations.clone();
|
||||
for candidates in candidates_array.iter_mut() {
|
||||
*candidates &= &allowed_candidates;
|
||||
}
|
||||
*cache = Some(owned_cache);
|
||||
|
||||
let best_candidates = candidates_array.pop().unwrap();
|
||||
|
||||
candidates_array.insert(0, allowed_candidates);
|
||||
Ok((best_candidates, Some(Remainings(candidates_array))))
|
||||
}
|
||||
// pop remainings candidates until the emptiness
|
||||
Remainings(mut candidates_array) => {
|
||||
let candidates = candidates_array.pop().unwrap_or_default();
|
||||
if !candidates_array.is_empty() {
|
||||
Ok((candidates, Some(Remainings(candidates_array))))
|
||||
} else {
|
||||
Ok((candidates, None))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn attribute_start_with_docids(
|
||||
ctx: &dyn Context,
|
||||
attribute_id: FieldId,
|
||||
query: &[ExactQueryPart],
|
||||
) -> heed::Result<Vec<RoaringBitmap>> {
|
||||
let mut attribute_candidates_array = Vec::new();
|
||||
// start from attribute first position
|
||||
let mut pos = absolute_from_relative_position(attribute_id, 0);
|
||||
for part in query {
|
||||
use ExactQueryPart::*;
|
||||
match part {
|
||||
Synonyms(synonyms) => {
|
||||
let mut synonyms_candidates = RoaringBitmap::new();
|
||||
for word in synonyms {
|
||||
let wc = ctx.word_position_docids(word, pos)?;
|
||||
if let Some(word_candidates) = wc {
|
||||
synonyms_candidates |= word_candidates;
|
||||
}
|
||||
}
|
||||
attribute_candidates_array.push(synonyms_candidates);
|
||||
pos += 1;
|
||||
}
|
||||
Phrase(phrase) => {
|
||||
for word in phrase {
|
||||
if let Some(word) = word {
|
||||
let wc = ctx.word_position_docids(word, pos)?;
|
||||
if let Some(word_candidates) = wc {
|
||||
attribute_candidates_array.push(word_candidates);
|
||||
}
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(attribute_candidates_array)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ExactQueryPart {
|
||||
Phrase(Vec<Option<String>>),
|
||||
Synonyms(Vec<String>),
|
||||
}
|
||||
|
||||
impl ExactQueryPart {
|
||||
fn from_primitive_query_part(
|
||||
ctx: &dyn Context,
|
||||
part: &PrimitiveQueryPart,
|
||||
) -> heed::Result<Self> {
|
||||
let part = match part {
|
||||
PrimitiveQueryPart::Word(word, _) => {
|
||||
match ctx.synonyms(word)? {
|
||||
Some(synonyms) => {
|
||||
let mut synonyms: Vec<_> = synonyms
|
||||
.into_iter()
|
||||
.filter_map(|mut array| {
|
||||
// keep 1 word synonyms only.
|
||||
match array.pop() {
|
||||
Some(word) if array.is_empty() => Some(word),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
synonyms.push(word.clone());
|
||||
ExactQueryPart::Synonyms(synonyms)
|
||||
}
|
||||
None => ExactQueryPart::Synonyms(vec![word.clone()]),
|
||||
}
|
||||
}
|
||||
PrimitiveQueryPart::Phrase(phrase) => ExactQueryPart::Phrase(phrase.clone()),
|
||||
};
|
||||
|
||||
Ok(part)
|
||||
}
|
||||
}
|
||||
|
||||
struct ExactWordsCombinationCache {
|
||||
// index 0 is only 1 word
|
||||
combinations: Vec<RoaringBitmap>,
|
||||
}
|
||||
|
||||
fn compute_combinations(
|
||||
ctx: &dyn Context,
|
||||
query: &[ExactQueryPart],
|
||||
) -> Result<ExactWordsCombinationCache> {
|
||||
let number_of_part = query.len();
|
||||
let mut parts_candidates_array = Vec::with_capacity(number_of_part);
|
||||
for part in query {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
use ExactQueryPart::*;
|
||||
match part {
|
||||
Synonyms(synonyms) => {
|
||||
for synonym in synonyms {
|
||||
if let Some(synonym_candidates) = ctx.word_docids(synonym)? {
|
||||
candidates |= synonym_candidates;
|
||||
}
|
||||
}
|
||||
}
|
||||
// compute intersection on pair of words with a proximity of 0.
|
||||
Phrase(phrase) => {
|
||||
candidates |= resolve_phrase(ctx, phrase)?;
|
||||
}
|
||||
}
|
||||
parts_candidates_array.push(candidates);
|
||||
}
|
||||
let combinations = create_disjoint_combinations(parts_candidates_array);
|
||||
|
||||
Ok(ExactWordsCombinationCache { combinations })
|
||||
}
|
||||
|
||||
/// Given a list of bitmaps `b0,b1,...,bn` , compute the list of bitmaps `X0,X1,...,Xn`
|
||||
/// such that `Xi` contains all the elements that are contained in **at least** `i+1` bitmaps among `b0,b1,...,bn`.
|
||||
///
|
||||
/// The returned vector is guaranteed to be of length `n`. It is equal to `vec![X0, X1, ..., Xn]`.
|
||||
///
|
||||
/// ## Implementation
|
||||
///
|
||||
/// We do so by iteratively building a map containing the union of all the different ways to intersect `J` bitmaps among `b0,b1,...,bn`.
|
||||
/// - The key of the map is the index `i` of the last bitmap in the intersections
|
||||
/// - The value is the union of all the possible intersections of J bitmaps such that the last bitmap in the intersection is `bi`
|
||||
///
|
||||
/// For example, with the bitmaps `b0,b1,b2,b3`, this map should look like this
|
||||
/// ```text
|
||||
/// Map 0: (first iteration, contains all the combinations of 1 bitmap)
|
||||
/// // What follows are unions of intersection of bitmaps asscociated with the index of their last component
|
||||
/// 0: [b0]
|
||||
/// 1: [b1]
|
||||
/// 2: [b2]
|
||||
/// 3: [b3]
|
||||
/// Map 1: (second iteration, combinations of 2 bitmaps)
|
||||
/// 1: [b0&b1]
|
||||
/// 2: [b0&b2 | b1&b2]
|
||||
/// 3: [b0&b3 | b1&b3 | b2&b3]
|
||||
/// Map 2: (third iteration, combinations of 3 bitmaps)
|
||||
/// 2: [b0&b1&b2]
|
||||
/// 3: [b0&b2&b3 | b1&b2&b3]
|
||||
/// Map 3: (fourth iteration, combinations of 4 bitmaps)
|
||||
/// 3: [b0&b1&b2&b3]
|
||||
/// ```
|
||||
///
|
||||
/// These maps are built one by one from the content of the preceding map.
|
||||
/// For example, to create Map 2, we look at each line of Map 1, for example:
|
||||
/// ```text
|
||||
/// 2: [b0&b2 | b1&b2]
|
||||
/// ```
|
||||
/// And then for each i > 2, we compute `(b0&b2 | b1&b2) & bi = b0&b2&bi | b1&b2&bi`
|
||||
/// and then add it the new map (Map 3) under the key `i` (if it is not empty):
|
||||
/// ```text
|
||||
/// 3: [b0&b2&b3 | b1&b2&b3]
|
||||
/// 4: [b0&b2&b4 | b1&b2&b4]
|
||||
/// 5: [b0&b2&b5 | b1&b2&b5]
|
||||
/// etc.
|
||||
/// ```
|
||||
/// We only keep two maps in memory at any one point. As soon as Map J is built, we flatten Map J-1 into
|
||||
/// a single bitmap by taking the union of all of its values. This union gives us Xj-1.
|
||||
///
|
||||
/// ## Memory Usage
|
||||
/// This function is expected to be called on a maximum of 10 bitmaps. The worst case thus happens when
|
||||
/// 10 identical large bitmaps are given.
|
||||
///
|
||||
/// In the context of Meilisearch, let's imagine that we are given 10 bitmaps containing all
|
||||
/// the document ids. If the dataset contains 16 million documents, then each bitmap will take
|
||||
/// around 2MB of memory.
|
||||
///
|
||||
/// When creating Map 3, we will have, in memory:
|
||||
/// 1. The 10 original bitmaps (20MB)
|
||||
/// 2. X0 : 2MB
|
||||
/// 3. Map 1, containing 9 bitmaps: 18MB
|
||||
/// 4. Map 2, containing 8 bitmaps: 16MB
|
||||
/// 5. X1: 2MB
|
||||
/// for a total of around 60MB of memory. This roughly represents the maximum memory usage of this function.
|
||||
///
|
||||
/// ## Time complexity
|
||||
/// Let N be the size of the given list of bitmaps and M the length of each individual bitmap.
|
||||
///
|
||||
/// We need to create N new bitmaps. The most expensive one to create is the second one, where we need to
|
||||
/// iterate over the N keys of Map 1, and for each of those keys `k_i`, we perform `N-k_i` bitmap unions.
|
||||
/// Unioning two bitmaps is O(M), and we need to do it O(N^2) times.
|
||||
///
|
||||
/// Therefore the time complexity is O(N^3 * M).
|
||||
fn create_non_disjoint_combinations(bitmaps: Vec<RoaringBitmap>) -> Vec<RoaringBitmap> {
|
||||
let nbr_parts = bitmaps.len();
|
||||
if nbr_parts == 1 {
|
||||
return bitmaps;
|
||||
}
|
||||
let mut flattened_levels = vec![];
|
||||
let mut last_level: BTreeMap<usize, RoaringBitmap> =
|
||||
bitmaps.clone().into_iter().enumerate().collect();
|
||||
|
||||
for _ in 2..=nbr_parts {
|
||||
let mut new_level = BTreeMap::new();
|
||||
for (last_part_index, base_combination) in last_level.iter() {
|
||||
#[allow(clippy::needless_range_loop)]
|
||||
for new_last_part_index in last_part_index + 1..nbr_parts {
|
||||
let new_combination = base_combination & &bitmaps[new_last_part_index];
|
||||
if !new_combination.is_empty() {
|
||||
match new_level.entry(new_last_part_index) {
|
||||
Entry::Occupied(mut b) => {
|
||||
*b.get_mut() |= new_combination;
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(new_combination);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now flatten the last level to save memory
|
||||
let flattened_last_level = MultiOps::union(last_level.into_values());
|
||||
flattened_levels.push(flattened_last_level);
|
||||
last_level = new_level;
|
||||
}
|
||||
// Flatten the last level
|
||||
let flattened_last_level = MultiOps::union(last_level.into_values());
|
||||
flattened_levels.push(flattened_last_level);
|
||||
flattened_levels
|
||||
}
|
||||
|
||||
/// Given a list of bitmaps `b0,b1,...,bn` , compute the list of bitmaps `X0,X1,...,Xn`
|
||||
/// such that `Xi` contains all the elements that are contained in **exactly** `i+1` bitmaps among `b0,b1,...,bn`.
|
||||
///
|
||||
/// The returned vector is guaranteed to be of length `n`. It is equal to `vec![X0, X1, ..., Xn]`.
|
||||
fn create_disjoint_combinations(parts_candidates_array: Vec<RoaringBitmap>) -> Vec<RoaringBitmap> {
|
||||
let non_disjoint_combinations = create_non_disjoint_combinations(parts_candidates_array);
|
||||
let mut disjoint_combinations = vec![];
|
||||
let mut combinations = non_disjoint_combinations.into_iter().peekable();
|
||||
while let Some(mut combination) = combinations.next() {
|
||||
if let Some(forbidden) = combinations.peek() {
|
||||
combination -= forbidden;
|
||||
}
|
||||
disjoint_combinations.push(combination)
|
||||
}
|
||||
|
||||
disjoint_combinations
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::criteria::exactness::{
|
||||
create_disjoint_combinations, create_non_disjoint_combinations,
|
||||
};
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::{Criterion, SearchResult};
|
||||
|
||||
#[test]
|
||||
fn test_exact_words_subcriterion() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key(S("id"));
|
||||
settings.set_criteria(vec![Criterion::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
// not relevant
|
||||
{ "id": "0", "text": "cat good dog bad" },
|
||||
// 1 exact word
|
||||
{ "id": "1", "text": "they said: cats arebetter thandogs" },
|
||||
// 3 exact words
|
||||
{ "id": "2", "text": "they said: cats arebetter than dogs" },
|
||||
// 5 exact words
|
||||
{ "id": "3", "text": "they said: cats are better than dogs" },
|
||||
// attribute starts with the exact words
|
||||
{ "id": "4", "text": "cats are better than dogs except on Saturday" },
|
||||
// attribute equal to the exact words
|
||||
{ "id": "5", "text": "cats are better than dogs" },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } =
|
||||
index.search(&rtxn).query("cats are better than dogs").execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 4, 3, 2, 1]");
|
||||
}
|
||||
|
||||
fn print_combinations(rbs: &[RoaringBitmap]) -> String {
|
||||
let mut s = String::new();
|
||||
for rb in rbs {
|
||||
s.push_str(&format!("{}\n", &display_bitmap(rb)));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
// In these unit tests, the test bitmaps always contain all the multiple of a certain number.
|
||||
// This makes it easy to check the validity of the results of `create_disjoint_combinations` by
|
||||
// counting the number of dividers of elements in the returned bitmaps.
|
||||
fn assert_correct_combinations(combinations: &[RoaringBitmap], dividers: &[u32]) {
|
||||
for (i, set) in combinations.iter().enumerate() {
|
||||
let expected_nbr_dividers = i + 1;
|
||||
for el in set {
|
||||
let nbr_dividers = dividers.iter().map(|d| usize::from(el % d == 0)).sum::<usize>();
|
||||
assert_eq!(
|
||||
nbr_dividers, expected_nbr_dividers,
|
||||
"{el} is divisible by {nbr_dividers} elements, not {expected_nbr_dividers}."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_1() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, &[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_2() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 8, 9, 10, 14, 15, 16, 20, 21, 22, 26, 27, 28, 32, 33, 34, 38, 39, 40, 44, 45, 46, 50, 51, 52, 56, 57, 58, 62, 63, 64, 68, 69, 70, 74, 75, 76, 80, 81, 82, 86, 87, 88, 92, 93, 94, 98, 99, 100, 104, 105, 106, 110, 111, 112, 116, 117, 118, 122, 123, 124, 128, 129, 130, 134, 135, 136, 140, 141, 142, 146, 147, 148, ]
|
||||
[0, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90, 96, 102, 108, 114, 120, 126, 132, 138, 144, ]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_4() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (0..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
let b3: RoaringBitmap = (0..).into_iter().map(|x| 7 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 16, 22, 25, 26, 27, 32, 33, 34, 38, 39, 44, 46, 49, 51, 52, 55, 57, 58, 62, 64, 65, 68, 69, 74, 76, 77, 81, 82, 85, 86, 87, 88, 91, 92, 93, 94, 95, 99, 104, 106, 111, 115, 116, 117, 118, 119, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 145, 146, 148, ]
|
||||
[6, 10, 12, 14, 15, 18, 20, 21, 24, 28, 35, 36, 40, 45, 48, 50, 54, 56, 63, 66, 72, 75, 78, 80, 96, 98, 100, 102, 108, 110, 112, 114, 130, 132, 135, 138, 144, 147, ]
|
||||
[30, 42, 60, 70, 84, 90, 105, 120, 126, 140, ]
|
||||
[0, ]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 7]);
|
||||
}
|
||||
#[test]
|
||||
fn compute_combinations_4_with_empty_results_at_end() {
|
||||
let b0: RoaringBitmap = (1..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (1..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (1..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
let b3: RoaringBitmap = (1..).into_iter().map(|x| 7 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 16, 22, 25, 26, 27, 32, 33, 34, 38, 39, 44, 46, 49, 51, 52, 55, 57, 58, 62, 64, 65, 68, 69, 74, 76, 77, 81, 82, 85, 86, 87, 88, 91, 92, 93, 94, 95, 99, 104, 106, 111, 115, 116, 117, 118, 119, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 145, 146, 148, ]
|
||||
[6, 10, 12, 14, 15, 18, 20, 21, 24, 28, 35, 36, 40, 45, 48, 50, 54, 56, 63, 66, 72, 75, 78, 80, 96, 98, 100, 102, 108, 110, 112, 114, 130, 132, 135, 138, 144, 147, ]
|
||||
[30, 42, 60, 70, 84, 90, 105, 120, 126, 140, ]
|
||||
[]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 7]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_4_with_some_equal_bitmaps() {
|
||||
let b0: RoaringBitmap = (0..).into_iter().map(|x| 2 * x).take_while(|x| *x < 150).collect();
|
||||
let b1: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
let b2: RoaringBitmap = (0..).into_iter().map(|x| 5 * x).take_while(|x| *x < 150).collect();
|
||||
// b3 == b1
|
||||
let b3: RoaringBitmap = (0..).into_iter().map(|x| 3 * x).take_while(|x| *x < 150).collect();
|
||||
|
||||
let parts_candidates = vec![b0, b1, b2, b3];
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 4, 5, 8, 14, 16, 22, 25, 26, 28, 32, 34, 35, 38, 44, 46, 52, 55, 56, 58, 62, 64, 65, 68, 74, 76, 82, 85, 86, 88, 92, 94, 95, 98, 104, 106, 112, 115, 116, 118, 122, 124, 125, 128, 134, 136, 142, 145, 146, 148, ]
|
||||
[3, 9, 10, 20, 21, 27, 33, 39, 40, 50, 51, 57, 63, 69, 70, 80, 81, 87, 93, 99, 100, 110, 111, 117, 123, 129, 130, 140, 141, 147, ]
|
||||
[6, 12, 15, 18, 24, 36, 42, 45, 48, 54, 66, 72, 75, 78, 84, 96, 102, 105, 108, 114, 126, 132, 135, 138, 144, ]
|
||||
[0, 30, 60, 90, 120, ]
|
||||
"###);
|
||||
|
||||
// But we also check it programmatically
|
||||
assert_correct_combinations(&combinations, &[2, 3, 5, 3]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_10() {
|
||||
let dividers = [2, 3, 5, 7, 11, 6, 15, 35, 18, 14];
|
||||
let parts_candidates: Vec<RoaringBitmap> = dividers
|
||||
.iter()
|
||||
.map(|÷r| {
|
||||
(0..).into_iter().map(|x| divider * x).take_while(|x| *x <= 210).collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[2, 3, 4, 5, 7, 8, 9, 11, 16, 25, 26, 27, 32, 34, 38, 39, 46, 49, 51, 52, 57, 58, 62, 64, 65, 68, 69, 74, 76, 81, 82, 85, 86, 87, 91, 92, 93, 94, 95, 104, 106, 111, 115, 116, 117, 118, 119, 121, 122, 123, 124, 125, 128, 129, 133, 134, 136, 141, 142, 143, 145, 146, 148, 152, 153, 155, 158, 159, 161, 164, 166, 171, 172, 177, 178, 183, 184, 185, 187, 188, 194, 201, 202, 203, 205, 206, 207, 208, 209, ]
|
||||
[10, 20, 21, 22, 33, 40, 44, 50, 55, 63, 77, 80, 88, 99, 100, 130, 147, 160, 170, 176, 189, 190, 200, ]
|
||||
[6, 12, 14, 15, 24, 28, 35, 45, 48, 56, 75, 78, 96, 98, 102, 110, 112, 114, 135, 138, 156, 174, 175, 182, 186, 192, 195, 196, 204, ]
|
||||
[18, 36, 54, 66, 72, 108, 132, 144, 154, 162, 165, ]
|
||||
[30, 42, 60, 70, 84, 105, 120, 140, 150, 168, 198, ]
|
||||
[90, 126, 180, ]
|
||||
[]
|
||||
[210, ]
|
||||
[]
|
||||
[0, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, ÷rs);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compute_combinations_30() {
|
||||
let dividers: [u32; 30] = [
|
||||
1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4,
|
||||
5,
|
||||
];
|
||||
let parts_candidates: Vec<RoaringBitmap> = dividers
|
||||
.iter()
|
||||
.map(|divider| {
|
||||
(0..).into_iter().map(|x| divider * x).take_while(|x| *x <= 100).collect()
|
||||
})
|
||||
.collect();
|
||||
|
||||
let combinations = create_non_disjoint_combinations(parts_candidates.clone());
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 2, 3, 4, 5, 6, 8, 9, 10, 12, 14, 15, 16, 18, 20, 21, 22, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 36, 38, 39, 40, 42, 44, 45, 46, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 62, 63, 64, 65, 66, 68, 69, 70, 72, 74, 75, 76, 78, 80, 81, 82, 84, 85, 86, 87, 88, 90, 92, 93, 94, 95, 96, 98, 99, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 4, 6, 8, 10, 12, 15, 16, 18, 20, 24, 28, 30, 32, 36, 40, 42, 44, 45, 48, 50, 52, 54, 56, 60, 64, 66, 68, 70, 72, 75, 76, 78, 80, 84, 88, 90, 92, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 12, 20, 24, 30, 36, 40, 48, 60, 72, 80, 84, 90, 96, 100, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
[0, 60, ]
|
||||
"###);
|
||||
|
||||
let combinations = create_disjoint_combinations(parts_candidates);
|
||||
insta::assert_snapshot!(print_combinations(&combinations), @r###"
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[1, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 49, 53, 59, 61, 67, 71, 73, 77, 79, 83, 89, 91, 97, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[2, 3, 5, 9, 14, 21, 22, 25, 26, 27, 33, 34, 35, 38, 39, 46, 51, 55, 57, 58, 62, 63, 65, 69, 74, 81, 82, 85, 86, 87, 93, 94, 95, 98, 99, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[4, 6, 8, 10, 15, 16, 18, 28, 32, 42, 44, 45, 50, 52, 54, 56, 64, 66, 68, 70, 75, 76, 78, 88, 92, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[12, 20, 24, 30, 36, 40, 48, 72, 80, 84, 90, 96, 100, ]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[]
|
||||
[0, 60, ]
|
||||
"###);
|
||||
|
||||
assert_correct_combinations(&combinations, ÷rs);
|
||||
}
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::WordDerivationsCache;
|
||||
use crate::Result;
|
||||
|
||||
/// The result of a call to the fetcher.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct FinalResult {
|
||||
/// The query tree corresponding to the current bucket of the last criterion.
|
||||
pub query_tree: Option<Operation>,
|
||||
/// The candidates of the current bucket of the last criterion.
|
||||
pub candidates: RoaringBitmap,
|
||||
/// Candidates that comes from the current bucket of the initial criterion.
|
||||
pub initial_candidates: InitialCandidates,
|
||||
}
|
||||
|
||||
pub struct Final<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
wdcache: WordDerivationsCache,
|
||||
returned_candidates: RoaringBitmap,
|
||||
}
|
||||
|
||||
impl<'t> Final<'t> {
|
||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Final<'t> {
|
||||
Final {
|
||||
ctx,
|
||||
parent,
|
||||
wdcache: WordDerivationsCache::new(),
|
||||
returned_candidates: RoaringBitmap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[logging_timer::time("Final::{}")]
|
||||
pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> Result<Option<FinalResult>> {
|
||||
debug!("Final iteration");
|
||||
let excluded_candidates = &self.returned_candidates | excluded_candidates;
|
||||
let mut criterion_parameters = CriterionParameters {
|
||||
wdcache: &mut self.wdcache,
|
||||
// returned_candidates is merged with excluded_candidates to avoid duplicas
|
||||
excluded_candidates: &excluded_candidates,
|
||||
};
|
||||
|
||||
match self.parent.next(&mut criterion_parameters)? {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match (candidates, query_tree.as_ref()) {
|
||||
(Some(candidates), _) => candidates,
|
||||
(None, Some(qt)) => {
|
||||
resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates
|
||||
}
|
||||
(None, None) => self.ctx.documents_ids()? - excluded_candidates,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
let initial_candidates = initial_candidates
|
||||
.unwrap_or_else(|| InitialCandidates::Estimated(candidates.clone()));
|
||||
|
||||
self.returned_candidates |= &candidates;
|
||||
|
||||
Ok(Some(FinalResult { query_tree, candidates, initial_candidates }))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,154 +0,0 @@
|
||||
use std::iter;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
|
||||
|
||||
pub struct Geo<'t> {
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
ascending: bool,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
rtree: Option<RTree<GeoPoint>>,
|
||||
point: [f64; 2],
|
||||
}
|
||||
|
||||
impl<'t> Geo<'t> {
|
||||
pub fn asc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
point: [f64; 2],
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, point, true)
|
||||
}
|
||||
|
||||
pub fn desc(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
point: [f64; 2],
|
||||
) -> Result<Self> {
|
||||
Self::new(index, rtxn, parent, point, false)
|
||||
}
|
||||
|
||||
fn new(
|
||||
index: &'t Index,
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
point: [f64; 2],
|
||||
ascending: bool,
|
||||
) -> Result<Self> {
|
||||
let candidates = Box::new(iter::empty());
|
||||
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
|
||||
let initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
|
||||
let rtree = index.geo_rtree(rtxn)?;
|
||||
|
||||
Ok(Self {
|
||||
index,
|
||||
rtxn,
|
||||
ascending,
|
||||
parent,
|
||||
candidates,
|
||||
allowed_candidates,
|
||||
initial_candidates,
|
||||
rtree,
|
||||
point,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Criterion for Geo<'_> {
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
let rtree = self.rtree.as_ref();
|
||||
|
||||
loop {
|
||||
match self.candidates.next() {
|
||||
Some(mut candidates) => {
|
||||
candidates -= params.excluded_candidates;
|
||||
self.allowed_candidates -= &candidates;
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.clone()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match (&query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||
resolve_query_tree(&context, qt, params.wdcache)?
|
||||
}
|
||||
(None, None) => self.index.documents_ids(self.rtxn)?,
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
continue;
|
||||
}
|
||||
self.allowed_candidates = &candidates - params.excluded_candidates;
|
||||
self.candidates = match rtree {
|
||||
Some(rtree) => geo_point(
|
||||
rtree,
|
||||
self.allowed_candidates.clone(),
|
||||
self.point,
|
||||
self.ascending,
|
||||
),
|
||||
None => Box::new(std::iter::empty()),
|
||||
};
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn geo_point(
|
||||
rtree: &RTree<GeoPoint>,
|
||||
mut candidates: RoaringBitmap,
|
||||
point: [f64; 2],
|
||||
ascending: bool,
|
||||
) -> Box<dyn Iterator<Item = RoaringBitmap>> {
|
||||
let point = lat_lng_to_xyz(&point);
|
||||
|
||||
let mut results = Vec::new();
|
||||
for point in rtree.nearest_neighbor_iter(&point) {
|
||||
if candidates.remove(point.data.0) {
|
||||
results.push(std::iter::once(point.data.0).collect());
|
||||
if candidates.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ascending {
|
||||
Box::new(results.into_iter())
|
||||
} else {
|
||||
Box::new(results.into_iter().rev())
|
||||
}
|
||||
}
|
||||
@@ -1,82 +0,0 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{resolve_query_tree, Context, InitialCandidates};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::Distinct;
|
||||
use crate::Result;
|
||||
/// Initial is a mandatory criterion, it is always the first
|
||||
/// and is meant to initalize the CriterionResult used by the other criteria.
|
||||
/// It behave like an [Once Iterator](https://doc.rust-lang.org/std/iter/struct.Once.html) and will return Some(CriterionResult) only one time.
|
||||
pub struct Initial<'t, D> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
answer: Option<CriterionResult>,
|
||||
exhaustive_number_hits: bool,
|
||||
distinct: Option<D>,
|
||||
}
|
||||
|
||||
impl<'t, D> Initial<'t, D> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_tree: Option<Operation>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
exhaustive_number_hits: bool,
|
||||
distinct: Option<D>,
|
||||
) -> Initial<D> {
|
||||
let answer = CriterionResult {
|
||||
query_tree,
|
||||
candidates: None,
|
||||
filtered_candidates,
|
||||
initial_candidates: None,
|
||||
};
|
||||
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: Distinct> Criterion for Initial<'_, D> {
|
||||
#[logging_timer::time("Initial::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
self.answer
|
||||
.take()
|
||||
.map(|mut answer| {
|
||||
if self.exhaustive_number_hits {
|
||||
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
|
||||
let candidates = answer
|
||||
.query_tree
|
||||
.as_ref()
|
||||
.map(|query_tree| resolve_query_tree(self.ctx, query_tree, params.wdcache))
|
||||
.transpose()?;
|
||||
|
||||
// then intersect the candidates with the potential filtered candidates.
|
||||
let mut candidates = match (candidates, answer.filtered_candidates.take()) {
|
||||
(Some(candidates), Some(filtered)) => candidates & filtered,
|
||||
(Some(candidates), None) => candidates,
|
||||
(None, Some(filtered)) => filtered,
|
||||
(None, None) => self.ctx.documents_ids()?,
|
||||
};
|
||||
|
||||
// then remove the potential soft deleted documents.
|
||||
candidates -= params.excluded_candidates;
|
||||
|
||||
// because the initial_candidates should be an exhaustive count of the matching documents,
|
||||
// we precompute the distinct attributes.
|
||||
let initial_candidates = match &mut self.distinct {
|
||||
Some(distinct) => {
|
||||
let mut initial_candidates = RoaringBitmap::new();
|
||||
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
|
||||
initial_candidates.insert(c?);
|
||||
}
|
||||
initial_candidates
|
||||
}
|
||||
None => candidates.clone(),
|
||||
};
|
||||
|
||||
answer.candidates = Some(candidates);
|
||||
answer.initial_candidates =
|
||||
Some(InitialCandidates::Exhaustive(initial_candidates));
|
||||
}
|
||||
Ok(answer)
|
||||
})
|
||||
.transpose()
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,712 +0,0 @@
|
||||
use std::collections::btree_map::{self, BTreeMap};
|
||||
use std::collections::hash_map::HashMap;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
use super::{
|
||||
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
|
||||
Criterion, CriterionParameters, CriterionResult,
|
||||
};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
||||
use crate::search::{build_dfa, CriterionImplementationStrategy, WordDerivationsCache};
|
||||
use crate::{Position, Result};
|
||||
|
||||
type Cache = HashMap<(Operation, u8), Vec<(Query, Query, RoaringBitmap)>>;
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 1000;
|
||||
|
||||
/// Threshold on the number of proximity that will make
|
||||
/// the system choose between one algorithm or another.
|
||||
const PROXIMITY_THRESHOLD: u8 = 0;
|
||||
|
||||
pub struct Proximity<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
/// (max_proximity, query_tree, allowed_candidates)
|
||||
state: Option<(u8, Operation, RoaringBitmap)>,
|
||||
proximity: u8,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: Cache,
|
||||
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
}
|
||||
|
||||
impl<'t> Proximity<'t> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
implementation_strategy: CriterionImplementationStrategy,
|
||||
) -> Self {
|
||||
Proximity {
|
||||
ctx,
|
||||
state: None,
|
||||
proximity: 0,
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
candidates_cache: Cache::new(),
|
||||
plane_sweep_cache: None,
|
||||
implementation_strategy,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Proximity<'t> {
|
||||
#[logging_timer::time("Proximity::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||
*allowed_candidates -= params.excluded_candidates;
|
||||
}
|
||||
|
||||
loop {
|
||||
debug!(
|
||||
"Proximity at iteration {} (max prox {:?}) ({:?})",
|
||||
self.proximity,
|
||||
self.state.as_ref().map(|(mp, _, _)| mp),
|
||||
self.state.as_ref().map(|(_, _, cd)| cd),
|
||||
);
|
||||
|
||||
match &mut self.state {
|
||||
Some((max_prox, _, allowed_candidates))
|
||||
if allowed_candidates.is_empty() || self.proximity > *max_prox =>
|
||||
{
|
||||
self.state = None; // reset state
|
||||
}
|
||||
Some((_, query_tree, allowed_candidates)) => {
|
||||
let mut new_candidates = if matches!(
|
||||
self.implementation_strategy,
|
||||
CriterionImplementationStrategy::OnlyIterative
|
||||
) || (matches!(
|
||||
self.implementation_strategy,
|
||||
CriterionImplementationStrategy::Dynamic
|
||||
) && allowed_candidates.len()
|
||||
<= CANDIDATES_THRESHOLD
|
||||
&& self.proximity > PROXIMITY_THRESHOLD)
|
||||
{
|
||||
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
||||
match cache.next() {
|
||||
Some((p, candidates)) => {
|
||||
self.proximity = p;
|
||||
candidates
|
||||
}
|
||||
None => {
|
||||
self.state = None; // reset state
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let cache = resolve_plane_sweep_candidates(
|
||||
self.ctx,
|
||||
query_tree,
|
||||
allowed_candidates,
|
||||
)?;
|
||||
self.plane_sweep_cache = Some(cache.into_iter());
|
||||
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// use set theory based algorithm
|
||||
resolve_candidates(
|
||||
self.ctx,
|
||||
query_tree,
|
||||
self.proximity,
|
||||
&mut self.candidates_cache,
|
||||
params.wdcache,
|
||||
)?
|
||||
};
|
||||
|
||||
new_candidates &= &*allowed_candidates;
|
||||
*allowed_candidates -= &new_candidates;
|
||||
self.proximity += 1;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree.clone()),
|
||||
candidates: Some(new_candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?
|
||||
- params.excluded_candidates
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(filtered_candidates) = filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
let maximum_proximity = maximum_proximity(&query_tree);
|
||||
self.state = Some((maximum_proximity as u8, query_tree, candidates));
|
||||
self.proximity = 0;
|
||||
self.plane_sweep_cache = None;
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_candidates(
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap> {
|
||||
fn resolve_operation(
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
|
||||
use Operation::{And, Or, Phrase};
|
||||
|
||||
let result = match query_tree {
|
||||
And(ops) => mdfs(ctx, ops, proximity, cache, wdcache)?,
|
||||
Phrase(words) => {
|
||||
if proximity == 0 {
|
||||
let most_left = words
|
||||
.iter()
|
||||
.filter_map(|o| o.as_ref())
|
||||
.next()
|
||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||
let most_right = words
|
||||
.iter()
|
||||
.rev()
|
||||
.filter_map(|o| o.as_ref())
|
||||
.next()
|
||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||
|
||||
match (most_left, most_right) {
|
||||
(Some(l), Some(r)) => vec![(l, r, resolve_phrase(ctx, words)?)],
|
||||
_otherwise => Default::default(),
|
||||
}
|
||||
} else {
|
||||
Default::default()
|
||||
}
|
||||
}
|
||||
Or(_, ops) => {
|
||||
let mut output = Vec::new();
|
||||
for op in ops {
|
||||
let result = resolve_operation(ctx, op, proximity, cache, wdcache)?;
|
||||
output.extend(result);
|
||||
}
|
||||
output
|
||||
}
|
||||
Operation::Query(q) => {
|
||||
if proximity == 0 {
|
||||
let candidates = query_docids(ctx, q, wdcache)?;
|
||||
vec![(q.clone(), q.clone(), candidates)]
|
||||
} else {
|
||||
Default::default()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn mdfs_pair(
|
||||
ctx: &dyn Context,
|
||||
left: &Operation,
|
||||
right: &Operation,
|
||||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
|
||||
fn pair_combinations(mana: u8, left_max: u8) -> impl Iterator<Item = (u8, u8)> {
|
||||
(0..=mana.min(left_max)).map(move |m| (m, mana - m))
|
||||
}
|
||||
|
||||
let pair_max_proximity = 7;
|
||||
|
||||
let mut output = Vec::new();
|
||||
|
||||
for (pair_p, left_right_p) in pair_combinations(proximity, pair_max_proximity) {
|
||||
for (left_p, right_p) in pair_combinations(left_right_p, left_right_p) {
|
||||
let left_key = (left.clone(), left_p);
|
||||
if !cache.contains_key(&left_key) {
|
||||
let candidates = resolve_operation(ctx, left, left_p, cache, wdcache)?;
|
||||
cache.insert(left_key.clone(), candidates);
|
||||
}
|
||||
|
||||
let right_key = (right.clone(), right_p);
|
||||
if !cache.contains_key(&right_key) {
|
||||
let candidates = resolve_operation(ctx, right, right_p, cache, wdcache)?;
|
||||
cache.insert(right_key.clone(), candidates);
|
||||
}
|
||||
|
||||
let lefts = cache.get(&left_key).unwrap();
|
||||
let rights = cache.get(&right_key).unwrap();
|
||||
|
||||
for (ll, lr, lcandidates) in lefts {
|
||||
for (rl, rr, rcandidates) in rights {
|
||||
let mut candidates =
|
||||
query_pair_proximity_docids(ctx, lr, rl, pair_p + 1, wdcache)?;
|
||||
if lcandidates.len() < rcandidates.len() {
|
||||
candidates &= lcandidates;
|
||||
candidates &= rcandidates;
|
||||
} else {
|
||||
candidates &= rcandidates;
|
||||
candidates &= lcandidates;
|
||||
}
|
||||
if !candidates.is_empty() {
|
||||
output.push((ll.clone(), rr.clone(), candidates));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn mdfs(
|
||||
ctx: &dyn Context,
|
||||
branches: &[Operation],
|
||||
proximity: u8,
|
||||
cache: &mut Cache,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Vec<(Query, Query, RoaringBitmap)>> {
|
||||
// Extract the first two elements but gives the tail
|
||||
// that is just after the first element.
|
||||
let next =
|
||||
branches.split_first().map(|(h1, t)| (h1, t.split_first().map(|(h2, _)| (h2, t))));
|
||||
|
||||
match next {
|
||||
Some((head1, Some((head2, [_])))) => {
|
||||
mdfs_pair(ctx, head1, head2, proximity, cache, wdcache)
|
||||
}
|
||||
Some((head1, Some((head2, tail)))) => {
|
||||
let mut output = Vec::new();
|
||||
for p in 0..=proximity {
|
||||
for (lhead, _, head_candidates) in
|
||||
mdfs_pair(ctx, head1, head2, p, cache, wdcache)?
|
||||
{
|
||||
if !head_candidates.is_empty() {
|
||||
for (_, rtail, mut candidates) in
|
||||
mdfs(ctx, tail, proximity - p, cache, wdcache)?
|
||||
{
|
||||
candidates &= &head_candidates;
|
||||
if !candidates.is_empty() {
|
||||
output.push((lhead.clone(), rtail, candidates));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(output)
|
||||
}
|
||||
Some((head1, None)) => resolve_operation(ctx, head1, proximity, cache, wdcache),
|
||||
None => Ok(Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for (_, _, cds) in resolve_operation(ctx, query_tree, proximity, cache, wdcache)? {
|
||||
candidates |= cds;
|
||||
}
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
fn resolve_plane_sweep_candidates(
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> Result<BTreeMap<u8, RoaringBitmap>> {
|
||||
/// FIXME may be buggy with query like "new new york"
|
||||
fn plane_sweep(
|
||||
groups_positions: Vec<Vec<(Position, u8, Position)>>,
|
||||
consecutive: bool,
|
||||
) -> Result<Vec<(Position, u8, Position)>> {
|
||||
fn compute_groups_proximity(
|
||||
groups: &[(usize, (Position, u8, Position))],
|
||||
consecutive: bool,
|
||||
) -> Option<(Position, u8, Position)> {
|
||||
// take the inner proximity of the first group as initial
|
||||
let (_, (_, mut proximity, _)) = groups.first()?;
|
||||
let (_, (left_most_pos, _, _)) = groups.first()?;
|
||||
let (_, (_, _, right_most_pos)) =
|
||||
groups.iter().max_by_key(|(_, (_, _, right_most_pos))| right_most_pos)?;
|
||||
|
||||
for pair in groups.windows(2) {
|
||||
if let [(i1, (lpos1, _, rpos1)), (i2, (lpos2, prox2, rpos2))] = pair {
|
||||
// if two positions are equal, meaning that they share at least a word, we return None
|
||||
if rpos1 == rpos2 || lpos1 == lpos2 || rpos1 == lpos2 || lpos1 == rpos2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let pair_proximity = {
|
||||
// if intervals are disjoint [..].(..)
|
||||
if lpos2 > rpos1 {
|
||||
lpos2 - rpos1
|
||||
}
|
||||
// if the second interval is a subset of the first [.(..).]
|
||||
else if rpos2 < rpos1 {
|
||||
(lpos2 - lpos1).min(rpos1 - rpos2)
|
||||
}
|
||||
// if intervals overlaps [.(..].)
|
||||
else {
|
||||
(lpos2 - lpos1).min(rpos2 - rpos1)
|
||||
}
|
||||
};
|
||||
|
||||
// if groups are in the good order (query order) we remove 1 to the proximity
|
||||
// the proximity is clamped to 7
|
||||
let pair_proximity =
|
||||
if i1 < i2 { (pair_proximity - 1).min(7) } else { pair_proximity.min(7) };
|
||||
|
||||
proximity += pair_proximity as u8 + prox2;
|
||||
}
|
||||
}
|
||||
|
||||
// if groups should be consecutives, we will only accept groups with a proximity of 0
|
||||
if !consecutive || proximity == 0 {
|
||||
Some((*left_most_pos, proximity, *right_most_pos))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
let groups_len = groups_positions.len();
|
||||
|
||||
let mut groups_positions: Vec<_> =
|
||||
groups_positions.into_iter().map(|pos| pos.into_iter()).collect();
|
||||
|
||||
// Pop top elements of each list.
|
||||
let mut current = Vec::with_capacity(groups_len);
|
||||
for (i, positions) in groups_positions.iter_mut().enumerate() {
|
||||
match positions.next() {
|
||||
Some(p) => current.push((i, p)),
|
||||
// if a group return None, it means that the document does not contain all the words,
|
||||
// we return an empty result.
|
||||
None => return Ok(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
// Sort k elements by their positions.
|
||||
current.sort_unstable_by_key(|(_, p)| *p);
|
||||
|
||||
// Find leftmost and rightmost group and their positions.
|
||||
let mut leftmost = *current.first().unwrap();
|
||||
let mut rightmost = *current.last().unwrap();
|
||||
|
||||
let mut output = Vec::new();
|
||||
loop {
|
||||
// Find the position p of the next elements of a list of the leftmost group.
|
||||
// If the list is empty, break the loop.
|
||||
let p = groups_positions[leftmost.0].next().map(|p| (leftmost.0, p));
|
||||
|
||||
// let q be the position q of second group of the interval.
|
||||
let q = current[1];
|
||||
|
||||
// If p > r, then the interval [l, r] is minimal and
|
||||
// we insert it into the heap according to its size.
|
||||
if p.map_or(true, |p| p.1 > rightmost.1) {
|
||||
if let Some(group) = compute_groups_proximity(¤t, consecutive) {
|
||||
output.push(group);
|
||||
}
|
||||
}
|
||||
|
||||
let p = match p {
|
||||
Some(p) => p,
|
||||
None => break,
|
||||
};
|
||||
|
||||
// Replace the leftmost group P in the interval.
|
||||
current[0] = p;
|
||||
|
||||
if p.1 > rightmost.1 {
|
||||
// if [l, r] is minimal, let r = p and l = q.
|
||||
rightmost = p;
|
||||
leftmost = q;
|
||||
} else {
|
||||
// Ohterwise, let l = min{p,q}.
|
||||
leftmost = if p.1 < q.1 { p } else { q };
|
||||
}
|
||||
|
||||
// Then update the interval and order of groups_positions in the interval.
|
||||
current.sort_unstable_by_key(|(_, p)| *p);
|
||||
}
|
||||
|
||||
// Sort the list according to the size and the positions.
|
||||
output.sort_unstable();
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn resolve_operation<'a>(
|
||||
query_tree: &'a Operation,
|
||||
rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>,
|
||||
words_positions: &HashMap<String, RoaringBitmap>,
|
||||
) -> Result<Vec<(Position, u8, Position)>> {
|
||||
use Operation::{And, Or, Phrase};
|
||||
|
||||
if let Some(result) = rocache.get(query_tree) {
|
||||
return Ok(result.clone());
|
||||
}
|
||||
|
||||
let result = match query_tree {
|
||||
And(ops) => {
|
||||
let mut groups_positions = Vec::with_capacity(ops.len());
|
||||
for operation in ops {
|
||||
let positions = resolve_operation(operation, rocache, words_positions)?;
|
||||
groups_positions.push(positions);
|
||||
}
|
||||
plane_sweep(groups_positions, false)?
|
||||
}
|
||||
Phrase(words) => {
|
||||
let mut groups_positions = Vec::with_capacity(words.len());
|
||||
|
||||
// group stop_words together.
|
||||
for words in words.linear_group_by_key(Option::is_none) {
|
||||
// skip if it's a group of stop words.
|
||||
if matches!(words.first(), None | Some(None)) {
|
||||
continue;
|
||||
}
|
||||
// make a consecutive plane-sweep on the subgroup of words.
|
||||
let mut subgroup = Vec::with_capacity(words.len());
|
||||
for word in words.iter().map(|w| w.as_deref().unwrap()) {
|
||||
match words_positions.get(word) {
|
||||
Some(positions) => {
|
||||
subgroup.push(positions.iter().map(|p| (p, 0, p)).collect())
|
||||
}
|
||||
None => return Ok(vec![]),
|
||||
}
|
||||
}
|
||||
match subgroup.len() {
|
||||
0 => {}
|
||||
1 => groups_positions.push(subgroup.pop().unwrap()),
|
||||
_ => groups_positions.push(plane_sweep(subgroup, true)?),
|
||||
}
|
||||
}
|
||||
match groups_positions.len() {
|
||||
0 => vec![],
|
||||
1 => groups_positions.pop().unwrap(),
|
||||
_ => plane_sweep(groups_positions, false)?,
|
||||
}
|
||||
}
|
||||
Or(_, ops) => {
|
||||
let mut result = Vec::new();
|
||||
for op in ops {
|
||||
result.extend(resolve_operation(op, rocache, words_positions)?)
|
||||
}
|
||||
|
||||
result.sort_unstable();
|
||||
result
|
||||
}
|
||||
Operation::Query(Query { prefix, kind }) => {
|
||||
let mut result = Vec::new();
|
||||
match kind {
|
||||
QueryKind::Exact { word, .. } => {
|
||||
if *prefix {
|
||||
let iter = word_derivations(word, true, 0, words_positions)
|
||||
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
|
||||
result.extend(iter);
|
||||
} else if let Some(positions) = words_positions.get(word) {
|
||||
result.extend(positions.iter().map(|p| (p, 0, p)));
|
||||
}
|
||||
}
|
||||
QueryKind::Tolerant { typo, word } => {
|
||||
let iter = word_derivations(word, *prefix, *typo, words_positions)
|
||||
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
|
||||
result.extend(iter);
|
||||
}
|
||||
}
|
||||
|
||||
result.sort_unstable();
|
||||
result
|
||||
}
|
||||
};
|
||||
|
||||
rocache.insert(query_tree, result.clone());
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn word_derivations<'a>(
|
||||
word: &str,
|
||||
is_prefix: bool,
|
||||
max_typo: u8,
|
||||
words_positions: &'a HashMap<String, RoaringBitmap>,
|
||||
) -> impl Iterator<Item = &'a RoaringBitmap> {
|
||||
let dfa = build_dfa(word, max_typo, is_prefix);
|
||||
words_positions.iter().filter_map(move |(document_word, positions)| {
|
||||
use levenshtein_automata::Distance;
|
||||
match dfa.eval(document_word) {
|
||||
Distance::Exact(_) => Some(positions),
|
||||
Distance::AtLeast(_) => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
let mut resolve_operation_cache = HashMap::new();
|
||||
let mut candidates = BTreeMap::new();
|
||||
for docid in allowed_candidates {
|
||||
let words_positions = ctx.docid_words_positions(docid)?;
|
||||
resolve_operation_cache.clear();
|
||||
let positions =
|
||||
resolve_operation(query_tree, &mut resolve_operation_cache, &words_positions)?;
|
||||
let best_proximity = positions.into_iter().min_by_key(|(_, proximity, _)| *proximity);
|
||||
let best_proximity = best_proximity.map(|(_, proximity, _)| proximity).unwrap_or(7);
|
||||
candidates.entry(best_proximity).or_insert_with(RoaringBitmap::new).insert(docid);
|
||||
}
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use big_s::S;
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{Criterion, CriterionImplementationStrategy, SearchResult};
|
||||
|
||||
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
|
||||
let mut documents = Vec::new();
|
||||
for prefix in prefixes {
|
||||
for i in 0..500 {
|
||||
documents.push(
|
||||
serde_json::json!({
|
||||
"text": format!("{prefix}{i:x}"),
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
documents
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_proximity_criterion_prefix_handling() {
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.autogenerate_docids = true;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key(S("id"));
|
||||
settings.set_criteria(vec![
|
||||
Criterion::Words,
|
||||
Criterion::Typo,
|
||||
Criterion::Proximity,
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
for doc in [
|
||||
// 0
|
||||
serde_json::json!({ "text": "zero is exactly the amount of configuration I want" }),
|
||||
// 1
|
||||
serde_json::json!({ "text": "zero bad configuration" }),
|
||||
// 2
|
||||
serde_json::json!({ "text": "zero configuration" }),
|
||||
// 3
|
||||
serde_json::json!({ "text": "zero config" }),
|
||||
// 4
|
||||
serde_json::json!({ "text": "zero conf" }),
|
||||
// 5
|
||||
serde_json::json!({ "text": "zero bad conf" }),
|
||||
] {
|
||||
documents.append_json_object(doc.as_object().unwrap()).unwrap();
|
||||
}
|
||||
for doc in documents_with_enough_different_words_for_prefixes(&["conf"]) {
|
||||
documents.append_json_object(&doc).unwrap();
|
||||
}
|
||||
let documents =
|
||||
DocumentsBatchReader::from_reader(Cursor::new(documents.into_inner().unwrap()))
|
||||
.unwrap();
|
||||
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } = index
|
||||
.search(&rtxn)
|
||||
.query("zero c")
|
||||
.criterion_implementation_strategy(CriterionImplementationStrategy::OnlySetBased)
|
||||
.execute()
|
||||
.unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 1, 5, 0]");
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } = index
|
||||
.search(&rtxn)
|
||||
.query("zero co")
|
||||
.criterion_implementation_strategy(CriterionImplementationStrategy::OnlySetBased)
|
||||
.execute()
|
||||
.unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 1, 5, 0]");
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } = index
|
||||
.search(&rtxn)
|
||||
.query("zero con")
|
||||
.criterion_implementation_strategy(CriterionImplementationStrategy::OnlySetBased)
|
||||
.execute()
|
||||
.unwrap();
|
||||
// Here searh results are degraded because `con` is in the prefix cache but it is too
|
||||
// long to be stored in the prefix proximity databases, and we don't want to iterate over
|
||||
// all of its word derivations
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 4, 5]");
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } = index
|
||||
.search(&rtxn)
|
||||
.criterion_implementation_strategy(CriterionImplementationStrategy::OnlySetBased)
|
||||
.query("zero conf")
|
||||
.execute()
|
||||
.unwrap();
|
||||
// Here search results are degraded as well, but we can still rank correctly documents
|
||||
// that contain `conf` exactly, and not as a prefix.
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 2, 3]");
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } = index
|
||||
.search(&rtxn)
|
||||
.criterion_implementation_strategy(CriterionImplementationStrategy::OnlySetBased)
|
||||
.query("zero config")
|
||||
.execute()
|
||||
.unwrap();
|
||||
// `config` is not a common prefix, so the normal methods are used
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 4, 5]");
|
||||
}
|
||||
}
|
||||
@@ -1,493 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{
|
||||
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
|
||||
CriterionResult,
|
||||
};
|
||||
use crate::search::criteria::{resolve_phrase, InitialCandidates};
|
||||
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::Result;
|
||||
|
||||
/// Maximum number of typo for a word of any length.
|
||||
const MAX_TYPOS_PER_WORD: u8 = 2;
|
||||
|
||||
pub struct Typo<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
/// (max_typos, query_tree, candidates)
|
||||
state: Option<(u8, Operation, Candidates)>,
|
||||
typos: u8,
|
||||
initial_candidates: Option<InitialCandidates>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
||||
}
|
||||
|
||||
impl<'t> Typo<'t> {
|
||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
||||
Typo {
|
||||
ctx,
|
||||
state: None,
|
||||
typos: 0,
|
||||
initial_candidates: None,
|
||||
parent,
|
||||
candidates_cache: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Typo<'t> {
|
||||
#[logging_timer::time("Typo::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
use Candidates::{Allowed, Forbidden};
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
match self.state.as_mut() {
|
||||
Some((_, _, Allowed(candidates))) => *candidates -= params.excluded_candidates,
|
||||
Some((_, _, Forbidden(candidates))) => *candidates |= params.excluded_candidates,
|
||||
None => (),
|
||||
}
|
||||
|
||||
loop {
|
||||
debug!(
|
||||
"Typo at iteration {} (max typos {:?}) ({:?})",
|
||||
self.typos,
|
||||
self.state.as_ref().map(|(mt, _, _)| mt),
|
||||
self.state.as_ref().map(|(_, _, cd)| cd),
|
||||
);
|
||||
|
||||
match self.state.as_mut() {
|
||||
Some((max_typos, _, _)) if self.typos > *max_typos => {
|
||||
self.state = None; // reset state
|
||||
}
|
||||
Some((_, _, Allowed(allowed_candidates))) if allowed_candidates.is_empty() => {
|
||||
self.state = None; // reset state
|
||||
}
|
||||
Some((_, query_tree, candidates_authorization)) => {
|
||||
let fst = self.ctx.words_fst();
|
||||
let new_query_tree = match self.typos {
|
||||
typos if typos < MAX_TYPOS_PER_WORD => alterate_query_tree(
|
||||
fst,
|
||||
query_tree.clone(),
|
||||
self.typos,
|
||||
params.wdcache,
|
||||
)?,
|
||||
MAX_TYPOS_PER_WORD => {
|
||||
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
|
||||
// we keep the altered query tree
|
||||
*query_tree = alterate_query_tree(
|
||||
fst,
|
||||
query_tree.clone(),
|
||||
self.typos,
|
||||
params.wdcache,
|
||||
)?;
|
||||
// we compute the allowed candidates
|
||||
let query_tree_allowed_candidates =
|
||||
resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
|
||||
// we assign the allowed candidates to the candidates authorization.
|
||||
*candidates_authorization = match take(candidates_authorization) {
|
||||
Allowed(allowed_candidates) => {
|
||||
Allowed(query_tree_allowed_candidates & allowed_candidates)
|
||||
}
|
||||
Forbidden(forbidden_candidates) => {
|
||||
Allowed(query_tree_allowed_candidates - forbidden_candidates)
|
||||
}
|
||||
};
|
||||
query_tree.clone()
|
||||
}
|
||||
_otherwise => query_tree.clone(),
|
||||
};
|
||||
|
||||
let mut candidates = resolve_candidates(
|
||||
self.ctx,
|
||||
&new_query_tree,
|
||||
self.typos,
|
||||
&mut self.candidates_cache,
|
||||
params.wdcache,
|
||||
)?;
|
||||
|
||||
match candidates_authorization {
|
||||
Allowed(allowed_candidates) => {
|
||||
candidates &= &*allowed_candidates;
|
||||
*allowed_candidates -= &candidates;
|
||||
}
|
||||
Forbidden(forbidden_candidates) => {
|
||||
candidates -= &*forbidden_candidates;
|
||||
*forbidden_candidates |= &candidates;
|
||||
}
|
||||
}
|
||||
|
||||
let initial_candidates = match self.initial_candidates.as_mut() {
|
||||
Some(initial_candidates) => initial_candidates.take(),
|
||||
None => InitialCandidates::Estimated(candidates.clone()),
|
||||
};
|
||||
|
||||
self.typos += 1;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(new_query_tree),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
initial_candidates: Some(initial_candidates),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
self.initial_candidates =
|
||||
match (self.initial_candidates.take(), initial_candidates) {
|
||||
(Some(self_ic), Some(parent_ic)) => Some(self_ic | parent_ic),
|
||||
(self_ic, parent_ic) => self_ic.or(parent_ic),
|
||||
};
|
||||
|
||||
let candidates = match candidates.or(filtered_candidates) {
|
||||
Some(candidates) => {
|
||||
Candidates::Allowed(candidates - params.excluded_candidates)
|
||||
}
|
||||
None => Candidates::Forbidden(params.excluded_candidates.clone()),
|
||||
};
|
||||
|
||||
let maximum_typos = maximum_typo(&query_tree) as u8;
|
||||
self.state = Some((maximum_typos, query_tree, candidates));
|
||||
self.typos = 0;
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Modify the query tree by replacing every tolerant query by an Or operation
|
||||
/// containing all of the corresponding exact words in the words FST. Each tolerant
|
||||
/// query will only be replaced by exact query with up to `number_typos` maximum typos.
|
||||
fn alterate_query_tree(
|
||||
words_fst: &fst::Set<Cow<[u8]>>,
|
||||
mut query_tree: Operation,
|
||||
number_typos: u8,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<Operation> {
|
||||
fn recurse(
|
||||
words_fst: &fst::Set<Cow<[u8]>>,
|
||||
operation: &mut Operation,
|
||||
number_typos: u8,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<()> {
|
||||
use Operation::{And, Or, Phrase};
|
||||
|
||||
match operation {
|
||||
And(ops) | Or(_, ops) => {
|
||||
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
||||
}
|
||||
// Because Phrases don't allow typos, no alteration can be done.
|
||||
Phrase(_words) => Ok(()),
|
||||
Operation::Query(q) => {
|
||||
if let QueryKind::Tolerant { typo, word } = &q.kind {
|
||||
// if no typo is allowed we don't call word_derivations function,
|
||||
// and directly create an Exact query
|
||||
if number_typos == 0 {
|
||||
*operation = Operation::Query(Query {
|
||||
prefix: q.prefix,
|
||||
kind: QueryKind::Exact { original_typo: 0, word: word.clone() },
|
||||
});
|
||||
} else {
|
||||
let typo = *typo.min(&number_typos);
|
||||
let words = word_derivations(word, q.prefix, typo, words_fst, wdcache)?;
|
||||
let queries = words
|
||||
.iter()
|
||||
.map(|(word, typo)| {
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::Exact {
|
||||
original_typo: *typo,
|
||||
word: word.to_string(),
|
||||
},
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
*operation = Operation::or(false, queries);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
recurse(words_fst, &mut query_tree, number_typos, wdcache)?;
|
||||
Ok(query_tree)
|
||||
}
|
||||
|
||||
fn resolve_candidates(
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
number_typos: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap> {
|
||||
fn resolve_operation(
|
||||
ctx: &dyn Context,
|
||||
query_tree: &Operation,
|
||||
number_typos: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap> {
|
||||
use Operation::{And, Or, Phrase, Query};
|
||||
|
||||
match query_tree {
|
||||
And(ops) => mdfs(ctx, ops, number_typos, cache, wdcache),
|
||||
Phrase(words) => resolve_phrase(ctx, words),
|
||||
Or(_, ops) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for op in ops {
|
||||
let docids = resolve_operation(ctx, op, number_typos, cache, wdcache)?;
|
||||
candidates |= docids;
|
||||
}
|
||||
Ok(candidates)
|
||||
}
|
||||
Query(q) => {
|
||||
if q.kind.typo() == number_typos {
|
||||
Ok(query_docids(ctx, q, wdcache)?)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn mdfs(
|
||||
ctx: &dyn Context,
|
||||
branches: &[Operation],
|
||||
mana: u8,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> Result<RoaringBitmap> {
|
||||
match branches.split_first() {
|
||||
Some((head, [])) => {
|
||||
let cache_key = (head.clone(), mana);
|
||||
if let Some(candidates) = cache.get(&cache_key) {
|
||||
Ok(candidates.clone())
|
||||
} else {
|
||||
let candidates = resolve_operation(ctx, head, mana, cache, wdcache)?;
|
||||
cache.insert(cache_key, candidates.clone());
|
||||
Ok(candidates)
|
||||
}
|
||||
}
|
||||
Some((head, tail)) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
|
||||
for m in 0..=mana {
|
||||
let mut head_candidates = {
|
||||
let cache_key = (head.clone(), m);
|
||||
if let Some(candidates) = cache.get(&cache_key) {
|
||||
candidates.clone()
|
||||
} else {
|
||||
let candidates = resolve_operation(ctx, head, m, cache, wdcache)?;
|
||||
cache.insert(cache_key, candidates.clone());
|
||||
candidates
|
||||
}
|
||||
};
|
||||
if !head_candidates.is_empty() {
|
||||
let tail_candidates = mdfs(ctx, tail, mana - m, cache, wdcache)?;
|
||||
head_candidates &= tail_candidates;
|
||||
candidates |= head_candidates;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(candidates)
|
||||
}
|
||||
None => Ok(RoaringBitmap::new()),
|
||||
}
|
||||
}
|
||||
|
||||
resolve_operation(ctx, query_tree, number_typos, cache, wdcache)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::initial::Initial;
|
||||
use super::super::test::TestContext;
|
||||
use super::*;
|
||||
use crate::search::NoopDistinct;
|
||||
|
||||
fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String {
|
||||
let mut result = String::new();
|
||||
while let Some(criterion) = criteria.next(&mut parameters).unwrap() {
|
||||
result.push_str(&format!("{criterion:?}\n\n"));
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn initial_placeholder_no_facets() {
|
||||
let context = TestContext::default();
|
||||
let query_tree = None;
|
||||
let facet_candidates = None;
|
||||
|
||||
let criterion_parameters = CriterionParameters {
|
||||
wdcache: &mut WordDerivationsCache::new(),
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
};
|
||||
|
||||
let parent =
|
||||
Initial::<NoopDistinct>::new(&context, query_tree, facet_candidates, false, None);
|
||||
let criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
insta::assert_snapshot!(result, @r###"
|
||||
CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, initial_candidates: None }
|
||||
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn initial_query_tree_no_facets() {
|
||||
let context = TestContext::default();
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::tolerant(1, "world".to_string()),
|
||||
}),
|
||||
])],
|
||||
);
|
||||
|
||||
let facet_candidates = None;
|
||||
|
||||
let criterion_parameters = CriterionParameters {
|
||||
wdcache: &mut WordDerivationsCache::new(),
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
};
|
||||
let parent =
|
||||
Initial::<NoopDistinct>::new(&context, Some(query_tree), facet_candidates, false, None);
|
||||
let criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
insta::assert_snapshot!(result, @r###"
|
||||
CriterionResult { query_tree: Some(OR
|
||||
AND
|
||||
Exact { word: "split" }
|
||||
Exact { word: "this" }
|
||||
Exact { word: "world" }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||
|
||||
CriterionResult { query_tree: Some(OR
|
||||
AND
|
||||
Exact { word: "split" }
|
||||
Exact { word: "this" }
|
||||
OR
|
||||
Exact { word: "word" }
|
||||
Exact { word: "world" }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn initial_placeholder_with_facets() {
|
||||
let context = TestContext::default();
|
||||
let query_tree = None;
|
||||
let facet_candidates = context.word_docids("earth").unwrap().unwrap();
|
||||
|
||||
let criterion_parameters = CriterionParameters {
|
||||
wdcache: &mut WordDerivationsCache::new(),
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
};
|
||||
let parent =
|
||||
Initial::<NoopDistinct>::new(&context, query_tree, Some(facet_candidates), false, None);
|
||||
let criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
insta::assert_snapshot!(result, @r###"
|
||||
CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), initial_candidates: None }
|
||||
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn initial_query_tree_with_facets() {
|
||||
let context = TestContext::default();
|
||||
let query_tree = Operation::Or(
|
||||
false,
|
||||
vec![Operation::And(vec![
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("split".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::exact("this".to_string()),
|
||||
}),
|
||||
Operation::Query(Query {
|
||||
prefix: false,
|
||||
kind: QueryKind::tolerant(1, "world".to_string()),
|
||||
}),
|
||||
])],
|
||||
);
|
||||
|
||||
let facet_candidates = context.word_docids("earth").unwrap().unwrap();
|
||||
|
||||
let criterion_parameters = CriterionParameters {
|
||||
wdcache: &mut WordDerivationsCache::new(),
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
};
|
||||
let parent = Initial::<NoopDistinct>::new(
|
||||
&context,
|
||||
Some(query_tree),
|
||||
Some(facet_candidates),
|
||||
false,
|
||||
None,
|
||||
);
|
||||
let criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
insta::assert_snapshot!(result, @r###"
|
||||
CriterionResult { query_tree: Some(OR
|
||||
AND
|
||||
Exact { word: "split" }
|
||||
Exact { word: "this" }
|
||||
Exact { word: "world" }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||
|
||||
CriterionResult { query_tree: Some(OR
|
||||
AND
|
||||
Exact { word: "split" }
|
||||
Exact { word: "this" }
|
||||
OR
|
||||
Exact { word: "word" }
|
||||
Exact { word: "world" }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||
|
||||
"###);
|
||||
}
|
||||
}
|
||||
@@ -1,106 +0,0 @@
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::Result;
|
||||
|
||||
pub struct Words<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_trees: Vec<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
initial_candidates: Option<InitialCandidates>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
}
|
||||
|
||||
impl<'t> Words<'t> {
|
||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
||||
Words {
|
||||
ctx,
|
||||
query_trees: Vec::default(),
|
||||
candidates: None,
|
||||
initial_candidates: None,
|
||||
parent,
|
||||
filtered_candidates: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Criterion for Words<'t> {
|
||||
#[logging_timer::time("Words::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some(candidates) = self.candidates.as_mut() {
|
||||
*candidates -= params.excluded_candidates;
|
||||
}
|
||||
|
||||
loop {
|
||||
debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates);
|
||||
|
||||
match self.query_trees.pop() {
|
||||
Some(query_tree) => {
|
||||
let candidates = match self.candidates.as_mut() {
|
||||
Some(allowed_candidates) => {
|
||||
let mut candidates =
|
||||
resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
|
||||
candidates &= &*allowed_candidates;
|
||||
*allowed_candidates -= &candidates;
|
||||
Some(candidates)
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let initial_candidates = self.initial_candidates.clone();
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates: self.filtered_candidates.clone(),
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
self.query_trees = explode_query_tree(query_tree);
|
||||
self.candidates = candidates;
|
||||
self.filtered_candidates = filtered_candidates;
|
||||
|
||||
self.initial_candidates =
|
||||
match (self.initial_candidates.take(), initial_candidates) {
|
||||
(Some(self_ic), Some(parent_ic)) => Some(self_ic | parent_ic),
|
||||
(self_ic, parent_ic) => self_ic.or(parent_ic),
|
||||
};
|
||||
}
|
||||
Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn explode_query_tree(query_tree: Operation) -> Vec<Operation> {
|
||||
match query_tree {
|
||||
Operation::Or(true, ops) => ops,
|
||||
otherwise => vec![otherwise],
|
||||
}
|
||||
}
|
||||
@@ -1,218 +0,0 @@
|
||||
use std::mem::size_of;
|
||||
|
||||
use concat_arrays::concat_arrays;
|
||||
use heed::types::{ByteSlice, Str, Unit};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Distinct, DocIter};
|
||||
use crate::error::InternalError;
|
||||
use crate::heed_codec::facet::{FacetGroupKey, *};
|
||||
use crate::index::db_name;
|
||||
use crate::{DocumentId, FieldId, Index, Result};
|
||||
|
||||
const FID_SIZE: usize = size_of::<FieldId>();
|
||||
const DOCID_SIZE: usize = size_of::<DocumentId>();
|
||||
|
||||
/// A distinct implementer that is backed by facets.
|
||||
///
|
||||
/// On each iteration, the facet values for the
|
||||
/// distinct attribute of the first document are retrieved. The document ids for these facet values
|
||||
/// are then retrieved and taken out of the the candidate and added to the excluded set. We take
|
||||
/// care to keep the document we are currently on, and remove it from the excluded list. The next
|
||||
/// iterations will never contain any occurence of a document with the same distinct value as a
|
||||
/// document from previous iterations.
|
||||
#[derive(Clone)]
|
||||
pub struct FacetDistinct<'a> {
|
||||
distinct: FieldId,
|
||||
index: &'a Index,
|
||||
txn: &'a heed::RoTxn<'a>,
|
||||
}
|
||||
|
||||
impl<'a> FacetDistinct<'a> {
|
||||
pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self {
|
||||
Self { distinct, index, txn }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FacetDistinctIter<'a> {
|
||||
candidates: RoaringBitmap,
|
||||
distinct: FieldId,
|
||||
excluded: RoaringBitmap,
|
||||
index: &'a Index,
|
||||
iter_offset: usize,
|
||||
txn: &'a heed::RoTxn<'a>,
|
||||
}
|
||||
|
||||
impl<'a> FacetDistinctIter<'a> {
|
||||
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||
self.index
|
||||
.facet_id_string_docids
|
||||
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||
.map(|opt| opt.map(|v| v.bitmap))
|
||||
}
|
||||
|
||||
fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
|
||||
// get facet docids on level 0
|
||||
self.index
|
||||
.facet_id_f64_docids
|
||||
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||
.map(|opt| opt.map(|v| v.bitmap))
|
||||
}
|
||||
|
||||
fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
|
||||
let iter = facet_string_values(id, self.distinct, self.index, self.txn)?;
|
||||
|
||||
for item in iter {
|
||||
let ((_, _, value), _) = item?;
|
||||
let facet_docids =
|
||||
self.facet_string_docids(value)?.ok_or(InternalError::DatabaseMissingEntry {
|
||||
db_name: db_name::FACET_ID_STRING_DOCIDS,
|
||||
key: None,
|
||||
})?;
|
||||
self.excluded |= facet_docids;
|
||||
}
|
||||
|
||||
self.excluded.remove(id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn distinct_number(&mut self, id: DocumentId) -> Result<()> {
|
||||
let iter = facet_number_values(id, self.distinct, self.index, self.txn)?;
|
||||
|
||||
for item in iter {
|
||||
let ((_, _, value), _) = item?;
|
||||
let facet_docids =
|
||||
self.facet_number_docids(value)?.ok_or(InternalError::DatabaseMissingEntry {
|
||||
db_name: db_name::FACET_ID_F64_DOCIDS,
|
||||
key: None,
|
||||
})?;
|
||||
self.excluded |= facet_docids;
|
||||
}
|
||||
|
||||
self.excluded.remove(id);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Performs the next iteration of the facet distinct. This is a convenience method that is
|
||||
/// called by the Iterator::next implementation that transposes the result. It makes error
|
||||
/// handling easier.
|
||||
fn next_inner(&mut self) -> Result<Option<DocumentId>> {
|
||||
// The first step is to remove all the excluded documents from our candidates
|
||||
self.candidates -= &self.excluded;
|
||||
|
||||
let mut candidates_iter = self.candidates.iter().skip(self.iter_offset);
|
||||
match candidates_iter.next() {
|
||||
Some(id) => {
|
||||
// We distinct the document id on its facet strings and facet numbers.
|
||||
self.distinct_string(id)?;
|
||||
self.distinct_number(id)?;
|
||||
|
||||
// The first document of each iteration is kept, since the next call to
|
||||
// `difference_with` will filter out all the documents for that facet value. By
|
||||
// increasing the offset we make sure to get the first valid value for the next
|
||||
// distinct document to keep.
|
||||
self.iter_offset += 1;
|
||||
|
||||
Ok(Some(id))
|
||||
}
|
||||
// no more candidate at this offset, return.
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::drop_non_drop)]
|
||||
fn facet_values_prefix_key(distinct: FieldId, id: DocumentId) -> [u8; FID_SIZE + DOCID_SIZE] {
|
||||
concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
|
||||
}
|
||||
|
||||
fn facet_number_values<'a>(
|
||||
id: DocumentId,
|
||||
distinct: FieldId,
|
||||
index: &Index,
|
||||
txn: &'a heed::RoTxn,
|
||||
) -> Result<heed::RoPrefix<'a, FieldDocIdFacetF64Codec, Unit>> {
|
||||
let key = facet_values_prefix_key(distinct, id);
|
||||
|
||||
let iter = index
|
||||
.field_id_docid_facet_f64s
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(txn, &key)?
|
||||
.remap_key_type::<FieldDocIdFacetF64Codec>();
|
||||
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
fn facet_string_values<'a>(
|
||||
id: DocumentId,
|
||||
distinct: FieldId,
|
||||
index: &Index,
|
||||
txn: &'a heed::RoTxn,
|
||||
) -> Result<heed::RoPrefix<'a, FieldDocIdFacetStringCodec, Str>> {
|
||||
let key = facet_values_prefix_key(distinct, id);
|
||||
|
||||
let iter = index
|
||||
.field_id_docid_facet_strings
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.prefix_iter(txn, &key)?
|
||||
.remap_types::<FieldDocIdFacetStringCodec, Str>();
|
||||
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
impl Iterator for FacetDistinctIter<'_> {
|
||||
type Item = Result<DocumentId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.next_inner().transpose()
|
||||
}
|
||||
}
|
||||
|
||||
impl DocIter for FacetDistinctIter<'_> {
|
||||
fn into_excluded(self) -> RoaringBitmap {
|
||||
self.excluded
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Distinct for FacetDistinct<'a> {
|
||||
type Iter = FacetDistinctIter<'a>;
|
||||
|
||||
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
|
||||
FacetDistinctIter {
|
||||
candidates,
|
||||
distinct: self.distinct,
|
||||
excluded,
|
||||
index: self.index,
|
||||
iter_offset: 0,
|
||||
txn: self.txn,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::super::test::{generate_index, validate_distinct_candidates};
|
||||
use super::*;
|
||||
|
||||
macro_rules! test_facet_distinct {
|
||||
($name:ident, $distinct:literal) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let (index, fid, candidates) = generate_index($distinct);
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut map_distinct = FacetDistinct::new(fid, &index, &txn);
|
||||
let excluded = RoaringBitmap::new();
|
||||
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
|
||||
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
|
||||
let excluded = iter.into_excluded();
|
||||
assert_eq!(count as u64 + excluded.len(), candidates.len());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_facet_distinct!(test_string, "txt");
|
||||
test_facet_distinct!(test_strings, "txts");
|
||||
test_facet_distinct!(test_number, "cat-int");
|
||||
}
|
||||
@@ -1,155 +0,0 @@
|
||||
mod facet_distinct;
|
||||
mod noop_distinct;
|
||||
|
||||
pub use facet_distinct::FacetDistinct;
|
||||
pub use noop_distinct::NoopDistinct;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::{DocumentId, Result};
|
||||
|
||||
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
|
||||
/// It provides a way to get back the ownership to the excluded set.
|
||||
pub trait DocIter: Iterator<Item = Result<DocumentId>> {
|
||||
/// Returns ownership on the internal exluded set.
|
||||
fn into_excluded(self) -> RoaringBitmap;
|
||||
}
|
||||
|
||||
/// A trait that is implemented by structs that perform a distinct on `candidates`. Calling distinct
|
||||
/// must return an iterator containing only distinct documents, and add the discarded documents to
|
||||
/// the excluded set. The excluded set can later be retrieved by calling `DocIter::excluded` on the
|
||||
/// returned iterator.
|
||||
pub trait Distinct {
|
||||
type Iter: DocIter;
|
||||
|
||||
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::collections::HashSet;
|
||||
use std::io::Cursor;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use rand::seq::SliceRandom;
|
||||
use rand::Rng;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::index::Index;
|
||||
use crate::update::{
|
||||
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||
};
|
||||
use crate::{DocumentId, FieldId, BEU32};
|
||||
|
||||
static JSON: Lazy<Vec<u8>> = Lazy::new(|| {
|
||||
let mut rng = rand::thread_rng();
|
||||
let num_docs = rng.gen_range(10..30);
|
||||
|
||||
let mut builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
let txts = ["Toto", "Titi", "Tata"];
|
||||
let cats = (1..10).map(|i| i.to_string()).collect::<Vec<_>>();
|
||||
let cat_ints = (1..10).collect::<Vec<_>>();
|
||||
|
||||
for i in 0..num_docs {
|
||||
let txt = txts.choose(&mut rng).unwrap();
|
||||
let mut sample_txts = cats.clone();
|
||||
sample_txts.shuffle(&mut rng);
|
||||
|
||||
let mut sample_ints = cat_ints.clone();
|
||||
sample_ints.shuffle(&mut rng);
|
||||
|
||||
let json = json!({
|
||||
"id": i,
|
||||
"txt": txt,
|
||||
"cat-int": rng.gen_range(0..3),
|
||||
"txts": sample_txts[..(rng.gen_range(0..3))],
|
||||
"cat-ints": sample_ints[..(rng.gen_range(0..3))],
|
||||
});
|
||||
|
||||
let object = match json {
|
||||
Value::Object(object) => object,
|
||||
_ => panic!(),
|
||||
};
|
||||
|
||||
builder.append_json_object(&object).unwrap();
|
||||
}
|
||||
|
||||
builder.into_inner().unwrap()
|
||||
});
|
||||
|
||||
/// Returns a temporary index populated with random test documents, the FieldId for the
|
||||
/// distinct attribute, and the RoaringBitmap with the document ids.
|
||||
pub(crate) fn generate_index(distinct: &str) -> (TempIndex, FieldId, RoaringBitmap) {
|
||||
let index = TempIndex::new();
|
||||
let mut txn = index.write_txn().unwrap();
|
||||
|
||||
// set distinct and faceted attributes for the index.
|
||||
let config = IndexerConfig::default();
|
||||
let mut update = Settings::new(&mut txn, &index, &config);
|
||||
update.set_distinct_field(distinct.to_string());
|
||||
update.execute(|_| (), || false).unwrap();
|
||||
|
||||
// add documents to the index
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
};
|
||||
let addition =
|
||||
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| (), || false)
|
||||
.unwrap();
|
||||
|
||||
let reader =
|
||||
crate::documents::DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice()))
|
||||
.unwrap();
|
||||
|
||||
let (addition, user_error) = addition.add_documents(reader).unwrap();
|
||||
user_error.unwrap();
|
||||
addition.execute().unwrap();
|
||||
|
||||
let fields_map = index.fields_ids_map(&txn).unwrap();
|
||||
let fid = fields_map.id(distinct).unwrap();
|
||||
|
||||
let documents = DocumentsBatchReader::from_reader(Cursor::new(JSON.as_slice())).unwrap();
|
||||
let map = (0..documents.documents_count()).collect();
|
||||
|
||||
txn.commit().unwrap();
|
||||
|
||||
(index, fid, map)
|
||||
}
|
||||
|
||||
/// Checks that all the candidates are distinct, and returns the candidates number.
|
||||
pub(crate) fn validate_distinct_candidates(
|
||||
candidates: impl Iterator<Item = crate::Result<DocumentId>>,
|
||||
distinct: FieldId,
|
||||
index: &Index,
|
||||
) -> usize {
|
||||
fn test(seen: &mut HashSet<String>, value: &Value) {
|
||||
match value {
|
||||
Value::Null | Value::Object(_) | Value::Bool(_) => (),
|
||||
Value::Number(_) | Value::String(_) => {
|
||||
let s = value.to_string();
|
||||
assert!(seen.insert(s));
|
||||
}
|
||||
Value::Array(values) => values.iter().for_each(|value| test(seen, value)),
|
||||
}
|
||||
}
|
||||
|
||||
let mut seen = HashSet::<String>::new();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut count = 0;
|
||||
for candidate in candidates {
|
||||
count += 1;
|
||||
let candidate = candidate.unwrap();
|
||||
let id = BEU32::new(candidate);
|
||||
let document = index.documents.get(&txn, &id).unwrap().unwrap();
|
||||
let value = document.get(distinct).unwrap();
|
||||
let value = serde_json::from_slice(value).unwrap();
|
||||
test(&mut seen, &value);
|
||||
}
|
||||
count
|
||||
}
|
||||
}
|
||||
@@ -1,55 +0,0 @@
|
||||
use roaring::bitmap::IntoIter;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Distinct, DocIter};
|
||||
use crate::{DocumentId, Result};
|
||||
|
||||
/// A distinct implementer that does not perform any distinct,
|
||||
/// and simply returns an iterator to the candidates.
|
||||
pub struct NoopDistinct;
|
||||
|
||||
pub struct NoopDistinctIter {
|
||||
candidates: IntoIter,
|
||||
excluded: RoaringBitmap,
|
||||
}
|
||||
|
||||
impl Iterator for NoopDistinctIter {
|
||||
type Item = Result<DocumentId>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.candidates.next().map(Ok)
|
||||
}
|
||||
}
|
||||
|
||||
impl DocIter for NoopDistinctIter {
|
||||
fn into_excluded(self) -> RoaringBitmap {
|
||||
self.excluded
|
||||
}
|
||||
}
|
||||
|
||||
impl Distinct for NoopDistinct {
|
||||
type Iter = NoopDistinctIter;
|
||||
|
||||
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
|
||||
NoopDistinctIter { candidates: candidates.into_iter(), excluded }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_noop() {
|
||||
let candidates = (1..10).collect();
|
||||
let excluded = RoaringBitmap::new();
|
||||
let mut iter = NoopDistinct.distinct(candidates, excluded);
|
||||
assert_eq!(
|
||||
iter.by_ref().map(Result::unwrap).collect::<Vec<_>>(),
|
||||
(1..10).collect::<Vec<_>>()
|
||||
);
|
||||
|
||||
let excluded = iter.into_excluded();
|
||||
assert!(excluded.is_empty());
|
||||
}
|
||||
}
|
||||
@@ -73,7 +73,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
let distribution_prelength = distribution.len();
|
||||
let db = self.index.field_id_docid_facet_f64s;
|
||||
for docid in candidates.into_iter() {
|
||||
for docid in candidates {
|
||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||
let iter = db
|
||||
@@ -97,7 +97,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
|
||||
|
||||
let db = self.index.field_id_docid_facet_strings;
|
||||
'outer: for docid in candidates.into_iter() {
|
||||
'outer: for docid in candidates {
|
||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||
key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||
let iter = db
|
||||
@@ -309,7 +309,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
let mut distribution = BTreeMap::new();
|
||||
for (fid, name) in fields_ids_map.iter() {
|
||||
if crate::is_faceted(name, &fields) {
|
||||
let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value(
|
||||
let min_value = if let Some(min_value) = crate::search::facet::facet_min_value(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
fid,
|
||||
@@ -319,7 +319,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value(
|
||||
let max_value = if let Some(max_value) = crate::search::facet::facet_max_value(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
fid,
|
||||
@@ -505,7 +505,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..10_000).into_iter().collect())
|
||||
.candidates((0..10_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
@@ -513,7 +513,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..5_000).into_iter().collect())
|
||||
.candidates((0..5_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
@@ -521,7 +521,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..5_000).into_iter().collect())
|
||||
.candidates((0..5_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
@@ -529,7 +529,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..5_000).into_iter().collect())
|
||||
.candidates((0..5_000).collect())
|
||||
.max_values_per_facet(1)
|
||||
.execute()
|
||||
.unwrap();
|
||||
@@ -546,7 +546,7 @@ mod tests {
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).into_iter().map(|x| format!("{x:x}")).collect::<Vec<_>>();
|
||||
let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();
|
||||
|
||||
let mut documents = vec![];
|
||||
for i in 0..10_000 {
|
||||
@@ -582,7 +582,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..10_000).into_iter().collect())
|
||||
.candidates((0..10_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
@@ -590,7 +590,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..5_000).into_iter().collect())
|
||||
.candidates((0..5_000).collect())
|
||||
.execute()
|
||||
.unwrap();
|
||||
|
||||
@@ -606,7 +606,7 @@ mod tests {
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
|
||||
let mut documents = vec![];
|
||||
for i in 0..1000 {
|
||||
@@ -634,7 +634,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..1000).into_iter().collect())
|
||||
.candidates((0..1000).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
@@ -642,7 +642,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((217..777).into_iter().collect())
|
||||
.candidates((217..777).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
@@ -658,7 +658,7 @@ mod tests {
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
|
||||
let mut documents = vec![];
|
||||
for i in 0..1000 {
|
||||
@@ -686,7 +686,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..1000).into_iter().collect())
|
||||
.candidates((0..1000).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
@@ -694,7 +694,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((217..777).into_iter().collect())
|
||||
.candidates((217..777).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
@@ -710,7 +710,7 @@ mod tests {
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
|
||||
let mut documents = vec![];
|
||||
for i in 0..1000 {
|
||||
@@ -738,7 +738,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..1000).into_iter().collect())
|
||||
.candidates((0..1000).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
@@ -746,7 +746,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((217..777).into_iter().collect())
|
||||
.candidates((217..777).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
@@ -762,7 +762,7 @@ mod tests {
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
|
||||
let mut documents = vec![];
|
||||
for i in 0..1000 {
|
||||
@@ -794,7 +794,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((0..1000).into_iter().collect())
|
||||
.candidates((0..1000).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
@@ -802,7 +802,7 @@ mod tests {
|
||||
|
||||
let map = FacetDistribution::new(&txn, &index)
|
||||
.facets(std::iter::once("colour"))
|
||||
.candidates((217..777).into_iter().collect())
|
||||
.candidates((217..777).collect())
|
||||
.compute_stats()
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -142,7 +142,7 @@ mod tests {
|
||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||
for (i, index) in indexes.iter().enumerate() {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
|
||||
let candidates = (0..=255).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
iterate_over_facet_distribution(
|
||||
&txn,
|
||||
@@ -166,7 +166,7 @@ mod tests {
|
||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||
for (i, index) in indexes.iter().enumerate() {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (0..=255).into_iter().collect::<RoaringBitmap>();
|
||||
let candidates = (0..=255).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let mut nbr_facets = 0;
|
||||
iterate_over_facet_distribution(
|
||||
|
||||
@@ -410,7 +410,7 @@ mod tests {
|
||||
|
||||
let mut results = String::new();
|
||||
|
||||
for i in (0..=255).into_iter().rev() {
|
||||
for i in (0..=255).rev() {
|
||||
let i = i as f64;
|
||||
let start = Bound::Included(i);
|
||||
let end = Bound::Included(255.);
|
||||
@@ -431,7 +431,7 @@ mod tests {
|
||||
|
||||
let mut results = String::new();
|
||||
|
||||
for i in (0..=255).into_iter().rev() {
|
||||
for i in (0..=255).rev() {
|
||||
let i = i as f64;
|
||||
let start = Bound::Excluded(i);
|
||||
let end = Bound::Excluded(255.);
|
||||
@@ -466,7 +466,7 @@ mod tests {
|
||||
|
||||
let mut results = String::new();
|
||||
|
||||
for i in (0..=128).into_iter().rev() {
|
||||
for i in (0..=128).rev() {
|
||||
let i = i as f64;
|
||||
let start = Bound::Included(i);
|
||||
let end = Bound::Included(255. - i);
|
||||
@@ -491,7 +491,7 @@ mod tests {
|
||||
|
||||
let mut results = String::new();
|
||||
|
||||
for i in (0..=128).into_iter().rev() {
|
||||
for i in (0..=128).rev() {
|
||||
let i = i as f64;
|
||||
let start = Bound::Excluded(i);
|
||||
let end = Bound::Excluded(255. - i);
|
||||
|
||||
@@ -132,7 +132,7 @@ mod tests {
|
||||
let indexes = [get_simple_index(), get_random_looking_index()];
|
||||
for (i, index) in indexes.iter().enumerate() {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
|
||||
for el in iter {
|
||||
@@ -154,7 +154,7 @@ mod tests {
|
||||
];
|
||||
for (i, index) in indexes.iter().enumerate() {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
|
||||
for el in iter {
|
||||
|
||||
@@ -142,7 +142,7 @@ mod tests {
|
||||
];
|
||||
for (i, index) in indexes.iter().enumerate() {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||
@@ -165,7 +165,7 @@ mod tests {
|
||||
];
|
||||
for (i, index) in indexes.iter().enumerate() {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||
let candidates = (200..=300).collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user