mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-22 14:21:03 +00:00
Compare commits
195 Commits
v0.16.0
...
release-v0
Author | SHA1 | Date | |
---|---|---|---|
7807a8dcff | |||
0bad5529d8 | |||
4fe885408b | |||
9a1ab4e69f | |||
e0b3c4f82f | |||
ac858d9800 | |||
9587ea7f06 | |||
7f68b83cb7 | |||
28095c6454 | |||
bb7d3be1b8 | |||
70fd4f109d | |||
186b0869df | |||
7652fc1a04 | |||
89d13706f1 | |||
d4b1331a0a | |||
a2c8dae914 | |||
56b4782ee1 | |||
9992c36ced | |||
81255814b1 | |||
018cadc598 | |||
0a0eee4993 | |||
0c27bea135 | |||
f6d0689967 | |||
a2ac2de011 | |||
5ca3382f5c | |||
dcc6f20f31 | |||
f475385788 | |||
ce652fc8df | |||
07e7acc35d | |||
51e0d6d5ee | |||
70dce6cc0b | |||
77083d9e80 | |||
4a66803d76 | |||
d1f34f926e | |||
a77d517ac1 | |||
d8a337fcac | |||
ee02d55e67 | |||
417d0ae92a | |||
22108f9f90 | |||
101e050746 | |||
739c860cfd | |||
f01bb9cee3 | |||
86f32e4ee4 | |||
1873c0399a | |||
47eeed0a4c | |||
4d08f04db2 | |||
4ea0e0fc05 | |||
b28be43cc6 | |||
4a71861066 | |||
5f25703d44 | |||
e0976d10ba | |||
ea681026f7 | |||
759f6b48ee | |||
811426b161 | |||
b1d9ad7134 | |||
e000e10e01 | |||
8dea9662dc | |||
cb50781d2d | |||
1df0fdf3e2 | |||
a95a18afe4 | |||
69c91d2b56 | |||
97ba5e97c6 | |||
8760beed1c | |||
15464e57af | |||
c984fa1071 | |||
97f35de41f | |||
81e9fd8933 | |||
f0ca193122 | |||
940f83698c | |||
f37a420a04 | |||
c1808513fe | |||
eeccdce33a | |||
a6667b14df | |||
62e908264e | |||
2fe52d0a4f | |||
d01c93aeee | |||
c75ffbf3d5 | |||
e3e475c5b1 | |||
1d910dbb42 | |||
bf3f36b46e | |||
ff38220b68 | |||
7a7cb9bcbf | |||
fe9c99a11b | |||
9b47bbc1ac | |||
430a5f902b | |||
bc0d53e819 | |||
0bb8b3a68d | |||
e5c220b82c | |||
60c636738b | |||
06b2a587af | |||
26b1e5a51b | |||
81f343a46a | |||
956adfc90a | |||
c7c8ca63b6 | |||
fa40c6e3d4 | |||
7ccbbb7a75 | |||
948c89c26f | |||
768791440a | |||
08a8dc0d0d | |||
0675ecdd73 | |||
08c160c178 | |||
677627586c | |||
0731971300 | |||
c290719984 | |||
2a145e288c | |||
aeb676e757 | |||
2852349e68 | |||
0447594e02 | |||
748a8240dd | |||
808be4678a | |||
398577f116 | |||
8e64a24d19 | |||
8b149c9aa3 | |||
a7c88c7951 | |||
db64e19b8d | |||
b574960755 | |||
c6434f609c | |||
206308c1aa | |||
6527d3e492 | |||
e616b1e356 | |||
8843062604 | |||
5e00842087 | |||
8a4d05b7bb | |||
061832af7f | |||
9dd818ed7b | |||
0e04c90abe | |||
83ea088bf7 | |||
48eb78b14d | |||
e3d1314bd8 | |||
a05aef5c14 | |||
3de5161dd8 | |||
8e0d8f4533 | |||
d12ef576fc | |||
a05eea3a11 | |||
446b2e7058 | |||
e06f3808c0 | |||
6d79107b14 | |||
5fe0e06342 | |||
6eb7843858 | |||
2904ca7f57 | |||
54686b0505 | |||
861c6fec06 | |||
eec954ede1 | |||
aa99c1ba55 | |||
dec0e2545d | |||
90cf4b9462 | |||
2bd5d2474e | |||
a6e08a83a7 | |||
ed11dd62da | |||
c977b70921 | |||
31c9ccd8be | |||
044dbb0333 | |||
d45c794a9e | |||
c9dd7e10b9 | |||
56ad400c49 | |||
e2b0402cf5 | |||
0c7fffeaf6 | |||
5f8dc21dd2 | |||
7a27f9b610 | |||
1944dd70c7 | |||
3ec76ac33d | |||
72bc22dfd1 | |||
b8e677efd2 | |||
65079f5e2e | |||
cfb21b94e8 | |||
cf74cfed15 | |||
f564a9ce51 | |||
cd1a3ad7c9 | |||
85d0a914ac | |||
d3e7e18b7d | |||
d6c76b02e3 | |||
fe3e20751c | |||
aab041e692 | |||
75e22fc7f5 | |||
6fff49b33b | |||
2eaab48532 | |||
43df4a56c4 | |||
680756500c | |||
0645a6568e | |||
3a0861694d | |||
0f4182bddf | |||
cc4284b89e | |||
a326466f32 | |||
5a67862e00 | |||
201bb3f80a | |||
49afe7d89f | |||
f968d039f7 | |||
705669ddf8 | |||
73dd345cda | |||
65c6e46775 | |||
7a1d003341 | |||
6a2a56d48f | |||
9ff5bdd297 | |||
93953103ad | |||
f25890c140 |
16
.github/workflows/check-updated-changelog.yml
vendored
16
.github/workflows/check-updated-changelog.yml
vendored
@ -1,16 +0,0 @@
|
||||
name: Check if the CHANGELOG.md has been updated
|
||||
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
check:
|
||||
name: Test on ${{ matrix.os }}
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'ignore-changelog') }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Checking the CHANGELOG.md has been updated in this PR
|
||||
run: |
|
||||
set -e
|
||||
git fetch origin ${{ github.base_ref }}
|
||||
git diff --name-only origin/${{ github.base_ref }} | grep -q CHANGELOG.md
|
2
.github/workflows/coverage.yml
vendored
2
.github/workflows/coverage.yml
vendored
@ -7,7 +7,7 @@ name: Execute code coverage
|
||||
|
||||
jobs:
|
||||
nightly-coverage:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions-rs/toolchain@v1
|
||||
|
4
.github/workflows/publish-binaries.yml
vendored
4
.github/workflows/publish-binaries.yml
vendored
@ -10,9 +10,9 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
os: [ubuntu-18.04, macos-latest, windows-latest]
|
||||
include:
|
||||
- os: ubuntu-latest
|
||||
- os: ubuntu-18.04
|
||||
artifact_name: meilisearch
|
||||
asset_name: meilisearch-linux-amd64
|
||||
- os: macos-latest
|
||||
|
4
.github/workflows/publish-deb-brew-pkg.yml
vendored
4
.github/workflows/publish-deb-brew-pkg.yml
vendored
@ -7,7 +7,7 @@ on:
|
||||
jobs:
|
||||
debian:
|
||||
name: Publish debian packagge
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: hecrj/setup-rust-action@master
|
||||
with:
|
||||
@ -29,7 +29,7 @@ jobs:
|
||||
|
||||
homebrew:
|
||||
name: Bump Homebrew formula
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Create PR to Homebrew
|
||||
uses: mislav/bump-homebrew-formula-action@v1
|
||||
|
2
.github/workflows/publish-docker-latest.yml
vendored
2
.github/workflows/publish-docker-latest.yml
vendored
@ -7,7 +7,7 @@ name: Publish latest image to Docker Hub
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Check if current release is latest
|
||||
|
2
.github/workflows/publish-docker-tag.yml
vendored
2
.github/workflows/publish-docker-tag.yml
vendored
@ -8,7 +8,7 @@ name: Publish tagged image to Docker Hub
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Publish to Registry
|
||||
|
16
.github/workflows/test.yml
vendored
16
.github/workflows/test.yml
vendored
@ -16,7 +16,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
os: [ubuntu-18.04, macos-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- uses: actions-rs/toolchain@v1
|
||||
@ -30,19 +30,15 @@ jobs:
|
||||
with:
|
||||
command: test
|
||||
args: --locked --release
|
||||
- name: Run cargo test dump
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: dump --locked --release -- --ignored --test-threads 1
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: clippy
|
||||
args: --all-targets
|
||||
|
||||
build-image:
|
||||
name: Test the build of Docker image
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- run: docker build . --file Dockerfile -t meilisearch
|
||||
@ -53,7 +49,7 @@ jobs:
|
||||
name: create prerelease
|
||||
needs: [check, build-image]
|
||||
if: ${{ contains(github.ref, 'release-') && github.event_name == 'push' }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
@ -80,13 +76,13 @@ jobs:
|
||||
name: create release
|
||||
needs: [check, build-image]
|
||||
if: ${{ contains(github.ref, 'tags/v') }}
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v2
|
||||
- name: Get version number
|
||||
id: version-number
|
||||
run: echo "##[set-output name=number;]$(echo ${{ github.ref }} | sed 's/.*\(v.*\)/\1/')"
|
||||
run: echo "##[set-output name=number;]$(echo ${{ github.ref }} | sed 's/.*\(v.*\)/\1/')"
|
||||
- name: Create Release
|
||||
id: create_release
|
||||
uses: actions/create-release@v1
|
||||
|
43
CHANGELOG.md
43
CHANGELOG.md
@ -1,9 +1,46 @@
|
||||
## v0.16.0
|
||||
## v0.20.0 - 2021-03-22
|
||||
|
||||
- Fix build on mac M1 (#1280)
|
||||
- Server root returns 200 in production (#1292)
|
||||
- Healthcheck returns 200 (#1291)
|
||||
- Snapshot temporary files are not created in /tmp anymore (#1238)
|
||||
|
||||
## v0.19.0 - 2021-02-09
|
||||
|
||||
- The snapshots are now created and then renamed in atomically (#1172)
|
||||
- Fix a race condition when an update and a document addition are processed immediately one after the other (#1176)
|
||||
- Latin synonyms are normalized during indexation (#1174)
|
||||
|
||||
## v0.18.1 - 2021-01-14
|
||||
|
||||
- Fix unexpected CORS error (#1185)
|
||||
|
||||
## v0.18.0 - 2021-01-11
|
||||
|
||||
- Integration with the new tokenizer (#1091)
|
||||
- Fix setting consistency bug (#1128)
|
||||
- Fix attributes to retrieve bug (#1131)
|
||||
- Increase default payload size (#1147)
|
||||
- Improvements to code quality (#1167, #1165, #1126, #1151)
|
||||
|
||||
## v0.17.0 - 2020-11-30
|
||||
- Fix corrupted data during placeholder search (#1089)
|
||||
- Remove maintenance error from http (#1082)
|
||||
- Disable frontend in production (#1097)
|
||||
- Update nbHits count with filtered documents (#849)
|
||||
- Remove update changelog ci check (#1090)
|
||||
- Add deploy on Platform.sh option to README (#1087)
|
||||
- Change movie gifs in README (#1077)
|
||||
- Remove some clippy warnings (#1100)
|
||||
- Improve script `download-latest.sh` (#1054)
|
||||
- Bump dependencies version (#1056, #1057, #1059)
|
||||
|
||||
## v0.16.0 - 2020-11-02
|
||||
|
||||
- Automatically create index on document push if index doesn't exist (#914)
|
||||
- Sort displayedAttributes and facetDistribution (#946)
|
||||
|
||||
## v0.15.0
|
||||
## v0.15.0 - 2020-09-30
|
||||
|
||||
- Update actix-web dependency to 3.0.0 (#963)
|
||||
- Consider an empty query to be a placeholder search (#916)
|
||||
@ -40,7 +77,7 @@
|
||||
|
||||
## v0.11.1
|
||||
|
||||
- Fix facet cache on document update (#789)
|
||||
- Fix facet cache on document update (#789)
|
||||
- Improvements on settings consistency (#778)
|
||||
|
||||
## v0.11.0
|
||||
|
@ -32,7 +32,7 @@ expanding into more specifics.
|
||||
1. **You're familiar with [Github](https://github.com) and the [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)
|
||||
workflow.**
|
||||
2. **You've read the MeiliSearch [docs](https://docs.meilisearch.com).**
|
||||
3. **You know about the [MeiliSearch community](https://docs.meilisearch.com/resources/contact.html).
|
||||
3. **You know about the [MeiliSearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
|
||||
Please use this for help.**
|
||||
|
||||
## Your First Contribution
|
||||
@ -91,7 +91,7 @@ aligns better with our process.
|
||||
|
||||
### Setup
|
||||
|
||||
See the [MeiliSearch Docs](https://docs.meilisearch.com/guides/advanced_guides/installation.html) for how to set up a development environment.
|
||||
See the [MeiliSearch Docs](https://docs.meilisearch.com/reference/features/installation.html) for how to set up a development environment.
|
||||
|
||||
### Benchmarking & Profiling
|
||||
|
||||
|
1118
Cargo.lock
generated
1118
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -3,7 +3,6 @@ members = [
|
||||
"meilisearch-core",
|
||||
"meilisearch-http",
|
||||
"meilisearch-schema",
|
||||
"meilisearch-tokenizer",
|
||||
"meilisearch-types",
|
||||
]
|
||||
|
||||
|
2
LICENSE
2
LICENSE
@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019-2020 Meili SAS
|
||||
Copyright (c) 2019-2021 Meili SAS
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
33
README.md
33
README.md
@ -6,6 +6,7 @@
|
||||
|
||||
<h4 align="center">
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://fr.linkedin.com/company/meilisearch">LinkedIn</a> |
|
||||
<a href="https://twitter.com/meilisearch">Twitter</a> |
|
||||
@ -28,7 +29,7 @@
|
||||
For more information about features go to [our documentation](https://docs.meilisearch.com/).
|
||||
|
||||
<p align="center">
|
||||
<img src="assets/movies-web-demo.gif" alt="Web interface gif" />
|
||||
<img src="assets/trumen_quick_loop.gif" alt="Web interface gif" />
|
||||
</p>
|
||||
|
||||
## ✨ Features
|
||||
@ -47,7 +48,7 @@ For more information about features go to [our documentation](https://docs.meili
|
||||
|
||||
### Deploy the Server
|
||||
|
||||
#### Brew (Mac OS)
|
||||
#### Homebrew (Mac OS)
|
||||
|
||||
```bash
|
||||
brew update && brew install meilisearch
|
||||
@ -57,7 +58,7 @@ meilisearch
|
||||
#### Docker
|
||||
|
||||
```bash
|
||||
docker run -p 7700:7700 -v $(pwd)/data.ms:/data.ms getmeili/meilisearch
|
||||
docker run -p 7700:7700 -v "$(pwd)/data.ms:/data.ms" getmeili/meilisearch
|
||||
```
|
||||
|
||||
#### Try MeiliSearch in our Sandbox
|
||||
@ -68,6 +69,12 @@ Create a MeiliSearch instance in [MeiliSearch Sandbox](https://sandbox.meilisear
|
||||
|
||||
[](https://marketplace.digitalocean.com/apps/meilisearch?action=deploy&refcode=7c67bd97e101)
|
||||
|
||||
#### Deploy on Platform.sh
|
||||
|
||||
<a href="https://console.platform.sh/projects/create-project?template=https://raw.githubusercontent.com/platformsh/template-builder/master/templates/meilisearch/.platform.template.yaml&utm_content=meilisearch&utm_source=github&utm_medium=button&utm_campaign=deploy_on_platform">
|
||||
<img src="https://platform.sh/images/deploy/lg-blue.svg" alt="Deploy on Platform.sh" width="180px" />
|
||||
</a>
|
||||
|
||||
#### APT (Debian & Ubuntu)
|
||||
|
||||
```bash
|
||||
@ -90,13 +97,6 @@ If you have the latest stable Rust toolchain installed on your local system, clo
|
||||
```bash
|
||||
git clone https://github.com/meilisearch/MeiliSearch.git
|
||||
cd MeiliSearch
|
||||
```
|
||||
|
||||
In the cloned repository, compile MeiliSearch.
|
||||
|
||||
```bash
|
||||
rustup override set stable
|
||||
rustup update stable
|
||||
cargo run --release
|
||||
```
|
||||
|
||||
@ -179,16 +179,17 @@ Hey! We're glad you're thinking about contributing to MeiliSearch! If you think
|
||||
## Telemetry
|
||||
|
||||
MeiliSearch collects anonymous data regarding general usage.
|
||||
This helps us better understand developers usage of MeiliSearch features.<br/>
|
||||
To see what information we're retrieving, please see the complete list [on the dedicated issue](https://github.com/meilisearch/MeiliSearch/issues/720).<br/>
|
||||
We also use Sentry to make us crash and error reports. If you want to know more about what Sentry collects, please visit their [privacy policy website](https://sentry.io/privacy/).<br/>
|
||||
This helps us better understand developers' usage of MeiliSearch features.
|
||||
|
||||
To see what information we're retrieving, please see the complete list [on the dedicated issue](https://github.com/meilisearch/MeiliSearch/issues/720).
|
||||
|
||||
We also use Sentry to make us crash and error reports. If you want to know more about what Sentry collects, please visit their [privacy policy website](https://sentry.io/privacy/).
|
||||
|
||||
This program is optional, you can disable these analytics by using the `MEILI_NO_ANALYTICS` env variable.
|
||||
|
||||
## 💌 Contact
|
||||
|
||||
Feel free to contact us about any questions you may have:
|
||||
* At [bonjour@meilisearch.com](mailto:bonjour@meilisearch.com)
|
||||
* Via the chat box available on every page of [our documentation](https://docs.meilisearch.com/) and on [our landing page](https://www.meilisearch.com/).
|
||||
Feel free to contact us with any questions you may have:
|
||||
* 🆕 Join our [GitHub Discussions forum](https://github.com/meilisearch/MeiliSearch/discussions)
|
||||
* Join our [Slack community](https://slack.meilisearch.com/).
|
||||
* By opening an issue.
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 5.3 MiB |
BIN
assets/trumen_quick_loop.gif
Normal file
BIN
assets/trumen_quick_loop.gif
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 MiB |
@ -1,3 +1,3 @@
|
||||
status = ["Test on macos-latest", "Test on ubuntu-latest"]
|
||||
status = ["Test on macos-latest", "Test on ubuntu-18.04"]
|
||||
# 4 hours timeout
|
||||
timeout-sec = 14400
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -71,7 +71,7 @@ semverLT() {
|
||||
# Returns the tag of the latest stable release (in terms of semver and not of release date)
|
||||
get_latest() {
|
||||
temp_file='temp_file' # temp_file needed because the grep would start before the download is over
|
||||
curl -s 'https://api.github.com/repos/meilisearch/MeiliSearch/releases' > "$temp_file"
|
||||
curl -s 'https://api.github.com/repos/meilisearch/MeiliSearch/releases' > "$temp_file" || return 1
|
||||
releases=$(cat "$temp_file" | \
|
||||
grep -E "tag_name|draft|prerelease" \
|
||||
| tr -d ',"' | cut -d ':' -f2 | tr -d ' ')
|
||||
@ -168,16 +168,17 @@ failure_usage() {
|
||||
|
||||
# MAIN
|
||||
latest="$(get_latest)"
|
||||
get_os
|
||||
if [ "$?" -eq 1 ]; then
|
||||
|
||||
if ! get_os; then
|
||||
failure_usage
|
||||
exit 1
|
||||
fi
|
||||
get_archi
|
||||
if [ "$?" -eq 1 ]; then
|
||||
|
||||
if ! get_archi; then
|
||||
failure_usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Downloading MeiliSearch binary $latest for $os, architecture $archi..."
|
||||
release_file="meilisearch-$os-$archi"
|
||||
link="https://github.com/meilisearch/MeiliSearch/releases/download/$latest/$release_file"
|
||||
|
@ -1,57 +1,53 @@
|
||||
[package]
|
||||
name = "meilisearch-core"
|
||||
version = "0.16.0"
|
||||
version = "0.20.0"
|
||||
license = "MIT"
|
||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
arc-swap = "0.4.5"
|
||||
bincode = "1.2.1"
|
||||
arc-swap = "1.2.0"
|
||||
bincode = "1.3.1"
|
||||
byteorder = "1.3.4"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
compact_arena = "0.4.0"
|
||||
compact_arena = "0.4.1"
|
||||
cow-utils = "0.1.2"
|
||||
crossbeam-channel = "0.5.0"
|
||||
deunicode = "1.1.0"
|
||||
either = "1.5.3"
|
||||
env_logger = "0.7.1"
|
||||
fst = "0.4.4"
|
||||
hashbrown = { version = "0.9.0", features = ["serde"] }
|
||||
heed = "0.8.0"
|
||||
indexmap = { version = "1.3.2", features = ["serde-1"] }
|
||||
intervaltree = "0.2.5"
|
||||
itertools = "0.9.0"
|
||||
deunicode = "1.1.1"
|
||||
either = "1.6.1"
|
||||
env_logger = "0.8.2"
|
||||
fst = "0.4.5"
|
||||
hashbrown = { version = "0.9.1", features = ["serde"] }
|
||||
heed = "0.10.6"
|
||||
indexmap = { version = "1.6.1", features = ["serde-1"] }
|
||||
intervaltree = "0.2.6"
|
||||
itertools = "0.10.0"
|
||||
levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] }
|
||||
log = "0.4.8"
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.16.0" }
|
||||
meilisearch-schema = { path = "../meilisearch-schema", version = "0.16.0" }
|
||||
meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.16.0" }
|
||||
meilisearch-types = { path = "../meilisearch-types", version = "0.16.0" }
|
||||
once_cell = "1.3.1"
|
||||
ordered-float = { version = "2.0.0", features = ["serde"] }
|
||||
log = "0.4.11"
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.20.0" }
|
||||
meilisearch-schema = { path = "../meilisearch-schema", version = "0.20.0" }
|
||||
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.1.3" }
|
||||
meilisearch-types = { path = "../meilisearch-types", version = "0.20.0" }
|
||||
once_cell = "1.5.2"
|
||||
ordered-float = { version = "2.0.1", features = ["serde"] }
|
||||
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
|
||||
pest_derive = "2.0"
|
||||
regex = "1.4.1"
|
||||
pest_derive = "2.1.0"
|
||||
regex = "1.4.2"
|
||||
sdset = "0.4.0"
|
||||
serde = { version = "1.0.105", features = ["derive"] }
|
||||
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
||||
serde = { version = "1.0.118", features = ["derive"] }
|
||||
serde_json = { version = "1.0.61", features = ["preserve_order"] }
|
||||
slice-group-by = "0.2.6"
|
||||
unicase = "2.6.0"
|
||||
zerocopy = "0.3.0"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_matches = "1.4.0"
|
||||
criterion = "0.3.1"
|
||||
csv = "1.1.3"
|
||||
rustyline = { version = "6.0.0", default-features = false }
|
||||
structopt = "0.3.20"
|
||||
criterion = "0.3.3"
|
||||
csv = "1.1.5"
|
||||
rustyline = { version = "7.1.0", default-features = false }
|
||||
structopt = "0.3.21"
|
||||
tempfile = "3.1.0"
|
||||
termcolor = "1.1.0"
|
||||
termcolor = "1.1.2"
|
||||
|
||||
[target.'cfg(unix)'.dev-dependencies]
|
||||
jemallocator = "0.3.2"
|
||||
|
||||
[[bench]]
|
||||
name = "search_benchmark"
|
||||
harness = false
|
||||
|
@ -1,108 +0,0 @@
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
extern crate assert_matches;
|
||||
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::iter;
|
||||
use std::path::Path;
|
||||
use std::sync::mpsc;
|
||||
|
||||
use meilisearch_core::{Database, DatabaseOptions};
|
||||
use meilisearch_core::{ProcessedUpdateResult, UpdateStatus};
|
||||
use meilisearch_core::settings::{Settings, SettingsUpdate};
|
||||
use meilisearch_schema::Schema;
|
||||
use serde_json::Value;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion, BenchmarkId};
|
||||
|
||||
fn prepare_database(path: &Path) -> Database {
|
||||
let database = Database::open_or_create(path, DatabaseOptions::default()).unwrap();
|
||||
let db = &database;
|
||||
|
||||
let (sender, receiver) = mpsc::sync_channel(100);
|
||||
let update_fn = move |_name: &str, update: ProcessedUpdateResult| {
|
||||
sender.send(update.update_id).unwrap()
|
||||
};
|
||||
let index = database.create_index("bench").unwrap();
|
||||
|
||||
database.set_update_callback(Box::new(update_fn));
|
||||
|
||||
db.main_write::<_, _, Box<dyn Error>>(|writer| {
|
||||
index.main.put_schema(writer, &Schema::with_primary_key("id")).unwrap();
|
||||
Ok(())
|
||||
}).unwrap();
|
||||
|
||||
let settings_update: SettingsUpdate = {
|
||||
let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../datasets/movies/settings.json");
|
||||
let file = File::open(path).unwrap();
|
||||
let reader = BufReader::new(file);
|
||||
let settings: Settings = serde_json::from_reader(reader).unwrap();
|
||||
settings.to_update().unwrap()
|
||||
};
|
||||
|
||||
db.update_write::<_, _, Box<dyn Error>>(|writer| {
|
||||
let _update_id = index.settings_update(writer, settings_update).unwrap();
|
||||
Ok(())
|
||||
}).unwrap();
|
||||
|
||||
let mut additions = index.documents_addition();
|
||||
|
||||
let json: Value = {
|
||||
let path = concat!(env!("CARGO_MANIFEST_DIR"), "/../datasets/movies/movies.json");
|
||||
let movies_file = File::open(path).expect("find movies");
|
||||
serde_json::from_reader(movies_file).unwrap()
|
||||
};
|
||||
|
||||
let documents = json.as_array().unwrap();
|
||||
|
||||
for document in documents {
|
||||
additions.update_document(document);
|
||||
}
|
||||
|
||||
let update_id = db.update_write::<_, _, Box<dyn Error>>(|writer| {
|
||||
let update_id = additions.finalize(writer).unwrap();
|
||||
Ok(update_id)
|
||||
}).unwrap();
|
||||
|
||||
// block until the transaction is processed
|
||||
let _ = receiver.into_iter().find(|id| *id == update_id);
|
||||
|
||||
let update_reader = db.update_read_txn().unwrap();
|
||||
let result = index.update_status(&update_reader, update_id).unwrap();
|
||||
assert_matches!(result, Some(UpdateStatus::Processed { content }) if content.error.is_none());
|
||||
|
||||
database
|
||||
}
|
||||
|
||||
pub fn criterion_benchmark(c: &mut Criterion) {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let database = prepare_database(dir.path());
|
||||
|
||||
let reader = database.main_read_txn().unwrap();
|
||||
let index = database.open_index("bench").unwrap();
|
||||
|
||||
let mut count = 0;
|
||||
let query = "I love paris ";
|
||||
|
||||
let iter = iter::from_fn(|| {
|
||||
count += 1;
|
||||
query.get(0..count)
|
||||
});
|
||||
|
||||
let mut group = c.benchmark_group("searching in movies (19654 docs)");
|
||||
group.sample_size(10);
|
||||
|
||||
for query in iter {
|
||||
let bench_name = BenchmarkId::from_parameter(format!("{:?}", query));
|
||||
group.bench_with_input(bench_name, &query, |b, query| b.iter(|| {
|
||||
let builder = index.query_builder();
|
||||
builder.query(&reader, Some(*query), 0..20).unwrap();
|
||||
}));
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, criterion_benchmark);
|
||||
criterion_main!(benches);
|
@ -326,8 +326,11 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
||||
|
||||
let schema = schema.ok_or(meilisearch_core::Error::SchemaMissing)?;
|
||||
|
||||
let fields = command.displayed_fields.iter().map(String::as_str);
|
||||
let fields = HashSet::from_iter(fields);
|
||||
let fields = command
|
||||
.displayed_fields
|
||||
.iter()
|
||||
.map(String::as_str)
|
||||
.collect::<HashSet<_>>();
|
||||
|
||||
let config = Config::builder().auto_add_history(true).build();
|
||||
let mut readline = Editor::<()>::with_config(config);
|
||||
@ -349,8 +352,8 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
||||
|
||||
if let Some(ref filter) = command.filter {
|
||||
let filter = filter.as_str();
|
||||
let (positive, filter) = if filter.chars().next() == Some('!') {
|
||||
(false, &filter[1..])
|
||||
let (positive, filter) = if let Some(stripped) = filter.strip_prefix('!') {
|
||||
(false, stripped)
|
||||
} else {
|
||||
(true, filter)
|
||||
};
|
||||
|
@ -1,15 +1,4 @@
|
||||
mod dfa;
|
||||
|
||||
use meilisearch_tokenizer::is_cjk;
|
||||
|
||||
pub use self::dfa::{build_dfa, build_prefix_dfa, build_exact_dfa};
|
||||
|
||||
pub fn normalize_str(string: &str) -> String {
|
||||
let mut string = string.to_lowercase();
|
||||
|
||||
if !string.contains(is_cjk) {
|
||||
string = deunicode::deunicode_with_tofu(&string, "");
|
||||
}
|
||||
|
||||
string
|
||||
}
|
||||
|
@ -212,6 +212,7 @@ where
|
||||
FD: Fn(DocumentId) -> Option<u64>,
|
||||
{
|
||||
let mut result = SortResult::default();
|
||||
let mut filtered_count = 0;
|
||||
|
||||
let words_set = index.main.words_fst(reader)?;
|
||||
let stop_words = index.main.stop_words_fst(reader)?;
|
||||
@ -322,19 +323,36 @@ where
|
||||
let filter_accepted = match &filter {
|
||||
Some(filter) => {
|
||||
let entry = filter_map.entry(document.id);
|
||||
*entry.or_insert_with(|| (filter)(document.id))
|
||||
*entry.or_insert_with(|| {
|
||||
let accepted = (filter)(document.id);
|
||||
// we only want to count it out the first time we see it
|
||||
if !accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
accepted
|
||||
})
|
||||
}
|
||||
None => true,
|
||||
};
|
||||
|
||||
if filter_accepted {
|
||||
let entry = key_cache.entry(document.id);
|
||||
let key = entry.or_insert_with(|| (distinct)(document.id).map(Rc::new));
|
||||
let mut seen = true;
|
||||
let key = entry.or_insert_with(|| {
|
||||
seen = false;
|
||||
(distinct)(document.id).map(Rc::new)
|
||||
});
|
||||
|
||||
match key.clone() {
|
||||
let distinct = match key.clone() {
|
||||
Some(key) => buf_distinct.register(key),
|
||||
None => buf_distinct.register_without_key(),
|
||||
};
|
||||
|
||||
// we only want to count the document if it is the first time we see it and
|
||||
// if it wasn't accepted by distinct
|
||||
if !seen && !distinct {
|
||||
filtered_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// the requested range end is reached: stop computing distinct
|
||||
@ -396,7 +414,7 @@ where
|
||||
}
|
||||
}
|
||||
result.documents = documents;
|
||||
result.nb_hits = docids.len();
|
||||
result.nb_hits = docids.len() - filtered_count;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
@ -61,7 +61,7 @@ pub trait Criterion {
|
||||
}
|
||||
|
||||
pub struct ContextMut<'h, 'p, 'tag, 'txn, 'q> {
|
||||
pub reader: &'h heed::RoTxn<MainT>,
|
||||
pub reader: &'h heed::RoTxn<'h, MainT>,
|
||||
pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>,
|
||||
pub query_mapping: &'q HashMap<QueryId, Range<usize>>,
|
||||
pub documents_fields_counts_store: store::DocumentsFieldsCounts,
|
||||
|
@ -22,11 +22,11 @@ type ArcSwapFn = arc_swap::ArcSwapOption<BoxUpdateFn>;
|
||||
|
||||
type SerdeDatetime = SerdeBincode<DateTime<Utc>>;
|
||||
|
||||
pub type MainWriter<'a> = heed::RwTxn<'a, MainT>;
|
||||
pub type MainReader = heed::RoTxn<MainT>;
|
||||
pub type MainWriter<'a, 'b> = heed::RwTxn<'a, 'b, MainT>;
|
||||
pub type MainReader<'a, 'b> = heed::RoTxn<'a, MainT>;
|
||||
|
||||
pub type UpdateWriter<'a> = heed::RwTxn<'a, UpdateT>;
|
||||
pub type UpdateReader = heed::RoTxn<UpdateT>;
|
||||
pub type UpdateWriter<'a, 'b> = heed::RwTxn<'a, 'b, UpdateT>;
|
||||
pub type UpdateReader<'a> = heed::RoTxn<'a, UpdateT>;
|
||||
|
||||
const LAST_UPDATE_KEY: &str = "last-update";
|
||||
|
||||
@ -193,8 +193,8 @@ fn version_guard(path: &Path, create: bool) -> MResult<(u32, u32, u32)> {
|
||||
Err(Error::VersionMismatch(format!("{}.{}.XX", version_major, version_minor)))
|
||||
} else {
|
||||
Ok((
|
||||
version_major.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?,
|
||||
version_minor.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?,
|
||||
version_major.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?,
|
||||
version_minor.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?,
|
||||
version_patch.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?
|
||||
))
|
||||
}
|
||||
@ -212,8 +212,8 @@ fn version_guard(path: &Path, create: bool) -> MResult<(u32, u32, u32)> {
|
||||
current_version_patch).as_bytes())?;
|
||||
|
||||
Ok((
|
||||
current_version_major.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?,
|
||||
current_version_minor.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?,
|
||||
current_version_major.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?,
|
||||
current_version_minor.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?,
|
||||
current_version_patch.parse().map_err(|e| Error::VersionMismatch(format!("error parsing database version: {}", e)))?
|
||||
))
|
||||
} else {
|
||||
@ -350,7 +350,7 @@ impl Database {
|
||||
index.main.put_name(&mut writer, name)?;
|
||||
index.main.put_created_at(&mut writer)?;
|
||||
index.main.put_updated_at(&mut writer)?;
|
||||
index.main.put_schema(&mut writer, &Schema::new())?;
|
||||
index.main.put_schema(&mut writer, &Schema::default())?;
|
||||
|
||||
let env_clone = self.env.clone();
|
||||
let update_env_clone = self.update_env.clone();
|
||||
|
@ -245,8 +245,8 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_facet_key() {
|
||||
let mut schema = Schema::new();
|
||||
let id = schema.insert_and_index("hello").unwrap();
|
||||
let mut schema = Schema::default();
|
||||
let id = schema.insert_with_position("hello").unwrap().0;
|
||||
let facet_list = [schema.id("hello").unwrap()];
|
||||
assert_eq!(
|
||||
FacetKey::from_str("hello:12", &schema, &facet_list).unwrap(),
|
||||
@ -286,8 +286,8 @@ mod test {
|
||||
#[test]
|
||||
fn test_parse_facet_array() {
|
||||
use either::Either::{Left, Right};
|
||||
let mut schema = Schema::new();
|
||||
let _id = schema.insert_and_index("hello").unwrap();
|
||||
let mut schema = Schema::default();
|
||||
let _id = schema.insert_with_position("hello").unwrap();
|
||||
let facet_list = [schema.id("hello").unwrap()];
|
||||
assert_eq!(
|
||||
FacetFilter::from_str("[[\"hello:12\"]]", &schema, &facet_list).unwrap(),
|
||||
|
@ -39,6 +39,7 @@ pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus
|
||||
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
|
||||
pub use meilisearch_schema::Schema;
|
||||
pub use query_words_mapper::QueryWordsMapper;
|
||||
pub use query_tree::MAX_QUERY_LEN;
|
||||
|
||||
use compact_arena::SmallArena;
|
||||
use log::{error, trace};
|
||||
|
@ -225,10 +225,17 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> {
|
||||
|
||||
fn sort_result_from_docids(&self, docids: &[DocumentId], range: Range<usize>) -> SortResult {
|
||||
let mut sort_result = SortResult::default();
|
||||
let mut filtered_count = 0;
|
||||
let mut result = match self.filter {
|
||||
Some(ref filter) => docids
|
||||
.iter()
|
||||
.filter(|item| (filter)(**item))
|
||||
.filter(|item| {
|
||||
let accepted = (filter)(**item);
|
||||
if !accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
accepted
|
||||
})
|
||||
.skip(range.start)
|
||||
.take(range.end - range.start)
|
||||
.map(|&id| Document::from_highlights(id, &[]))
|
||||
@ -248,15 +255,19 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> {
|
||||
result.retain(|doc| {
|
||||
let id = doc.id;
|
||||
let key = (distinct)(id);
|
||||
match key {
|
||||
let distinct_accepted = match key {
|
||||
Some(key) => distinct_map.register(key),
|
||||
None => distinct_map.register_without_key(),
|
||||
};
|
||||
if !distinct_accepted {
|
||||
filtered_count += 1;
|
||||
}
|
||||
distinct_accepted
|
||||
});
|
||||
}
|
||||
|
||||
sort_result.documents = result;
|
||||
sort_result.nb_hits = docids.len();
|
||||
sort_result.nb_hits = docids.len() - filtered_count;
|
||||
sort_result
|
||||
}
|
||||
|
||||
@ -285,7 +296,6 @@ mod tests {
|
||||
use sdset::SetBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::automaton::normalize_str;
|
||||
use crate::bucket_sort::SimpleMatch;
|
||||
use crate::database::{Database, DatabaseOptions};
|
||||
use crate::store::Index;
|
||||
@ -293,6 +303,35 @@ mod tests {
|
||||
use crate::Document;
|
||||
use meilisearch_schema::Schema;
|
||||
|
||||
fn is_cjk(c: char) -> bool {
|
||||
('\u{1100}'..'\u{11ff}').contains(&c) // Hangul Jamo
|
||||
|| ('\u{2e80}'..'\u{2eff}').contains(&c) // CJK Radicals Supplement
|
||||
|| ('\u{2f00}'..'\u{2fdf}').contains(&c) // Kangxi radical
|
||||
|| ('\u{3000}'..'\u{303f}').contains(&c) // Japanese-style punctuation
|
||||
|| ('\u{3040}'..'\u{309f}').contains(&c) // Japanese Hiragana
|
||||
|| ('\u{30a0}'..'\u{30ff}').contains(&c) // Japanese Katakana
|
||||
|| ('\u{3100}'..'\u{312f}').contains(&c)
|
||||
|| ('\u{3130}'..'\u{318F}').contains(&c) // Hangul Compatibility Jamo
|
||||
|| ('\u{3200}'..'\u{32ff}').contains(&c) // Enclosed CJK Letters and Months
|
||||
|| ('\u{3400}'..'\u{4dbf}').contains(&c) // CJK Unified Ideographs Extension A
|
||||
|| ('\u{4e00}'..'\u{9fff}').contains(&c) // CJK Unified Ideographs
|
||||
|| ('\u{a960}'..'\u{a97f}').contains(&c) // Hangul Jamo Extended-A
|
||||
|| ('\u{ac00}'..'\u{d7a3}').contains(&c) // Hangul Syllables
|
||||
|| ('\u{d7b0}'..'\u{d7ff}').contains(&c) // Hangul Jamo Extended-B
|
||||
|| ('\u{f900}'..'\u{faff}').contains(&c) // CJK Compatibility Ideographs
|
||||
|| ('\u{ff00}'..'\u{ffef}').contains(&c) // Full-width roman characters and half-width katakana
|
||||
}
|
||||
|
||||
fn normalize_str(string: &str) -> String {
|
||||
let mut string = string.to_lowercase();
|
||||
|
||||
if !string.contains(is_cjk) {
|
||||
string = deunicode::deunicode_with_tofu(&string, "");
|
||||
}
|
||||
|
||||
string
|
||||
}
|
||||
|
||||
fn set_from_stream<'f, I, S>(stream: I) -> fst::Set<Vec<u8>>
|
||||
where
|
||||
I: for<'a> fst::IntoStreamer<'a, Into = S, Item = &'a [u8]>,
|
||||
@ -405,7 +444,7 @@ mod tests {
|
||||
for index in indexes {
|
||||
let name = index.attribute.to_string();
|
||||
schema.insert(&name).unwrap();
|
||||
let indexed_pos = schema.set_indexed(&name).unwrap().1;
|
||||
let indexed_pos = schema.insert_with_position(&name).unwrap().1;
|
||||
let index = DocIndex {
|
||||
attribute: indexed_pos.0,
|
||||
..*index
|
||||
@ -1258,15 +1297,15 @@ mod tests {
|
||||
let builder = store.query_builder();
|
||||
let SortResult { documents, .. } = builder.query(&reader, Some("télephone"), 0..20).unwrap();
|
||||
let mut iter = documents.into_iter();
|
||||
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(1), matches, .. }) => {
|
||||
let mut iter = matches.into_iter();
|
||||
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, distance: 1, word_index: 0, is_exact: false, .. })); // iphone | telephone
|
||||
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, .. }));
|
||||
assert_matches!(iter.next(), None);
|
||||
});
|
||||
assert_matches!(iter.next(), None);
|
||||
|
@ -7,15 +7,17 @@ use std::{cmp, fmt, iter::once};
|
||||
|
||||
use fst::{IntoStreamer, Streamer};
|
||||
use itertools::{EitherOrBoth, merge_join_by};
|
||||
use meilisearch_tokenizer::split_query_string;
|
||||
use sdset::{Set, SetBuf, SetOperation};
|
||||
use log::debug;
|
||||
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
||||
use sdset::{Set, SetBuf, SetOperation};
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::{store, DocumentId, DocIndex, MResult, FstSetCow};
|
||||
use crate::automaton::{normalize_str, build_dfa, build_prefix_dfa, build_exact_dfa};
|
||||
use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
|
||||
use crate::QueryWordsMapper;
|
||||
|
||||
pub const MAX_QUERY_LEN: usize = 10;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Operation {
|
||||
And(Vec<Operation>),
|
||||
@ -146,7 +148,7 @@ fn split_best_frequency<'a>(reader: &heed::RoTxn<MainT>, ctx: &Context, word: &'
|
||||
}
|
||||
|
||||
fn fetch_synonyms(reader: &heed::RoTxn<MainT>, ctx: &Context, words: &[&str]) -> MResult<Vec<Vec<String>>> {
|
||||
let words = normalize_str(&words.join(" "));
|
||||
let words = &words.join(" ");
|
||||
let set = ctx.synonyms.synonyms_fst(reader, words.as_bytes())?;
|
||||
|
||||
let mut strings = Vec::new();
|
||||
@ -174,15 +176,26 @@ where I: IntoIterator<Item=Operation>,
|
||||
|
||||
const MAX_NGRAM: usize = 3;
|
||||
|
||||
fn split_query_string<A: AsRef<[u8]>>(s: &str, stop_words: &fst::Set<A>) -> Vec<(usize, String)> {
|
||||
// TODO: Use global instance instead
|
||||
Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words))
|
||||
.analyze(s)
|
||||
.tokens()
|
||||
.filter(|t| t.is_word())
|
||||
.map(|t| t.word.to_string())
|
||||
.take(MAX_QUERY_LEN)
|
||||
.enumerate()
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn create_query_tree(
|
||||
reader: &heed::RoTxn<MainT>,
|
||||
ctx: &Context,
|
||||
query: &str,
|
||||
) -> MResult<(Operation, HashMap<QueryId, Range<usize>>)>
|
||||
{
|
||||
let words = split_query_string(query).map(str::to_lowercase);
|
||||
let words = words.filter(|w| !ctx.stop_words.contains(w));
|
||||
let words: Vec<_> = words.enumerate().collect();
|
||||
// TODO: use a shared analyzer instance
|
||||
let words = split_query_string(query, &ctx.stop_words);
|
||||
|
||||
let mut mapper = QueryWordsMapper::new(words.iter().map(|(_, w)| w));
|
||||
|
||||
|
@ -2,9 +2,9 @@ use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::convert::TryFrom;
|
||||
|
||||
use deunicode::deunicode_with_tofu;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use meilisearch_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
|
||||
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
||||
use meilisearch_tokenizer::{Token, token::SeparatorKind, TokenKind};
|
||||
use sdset::SetBuf;
|
||||
|
||||
use crate::{DocIndex, DocumentId};
|
||||
@ -14,11 +14,11 @@ const WORD_LENGTH_LIMIT: usize = 80;
|
||||
|
||||
type Word = Vec<u8>; // TODO make it be a SmallVec
|
||||
|
||||
pub struct RawIndexer<A> {
|
||||
pub struct RawIndexer<'a, A> {
|
||||
word_limit: usize, // the maximum number of indexed words
|
||||
stop_words: fst::Set<A>,
|
||||
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: HashMap<DocumentId, Vec<Word>>,
|
||||
analyzer: Analyzer<'a, A>,
|
||||
}
|
||||
|
||||
pub struct Indexed<'a> {
|
||||
@ -26,32 +26,35 @@ pub struct Indexed<'a> {
|
||||
pub docs_words: HashMap<DocumentId, FstSetCow<'a>>,
|
||||
}
|
||||
|
||||
impl<A> RawIndexer<A> {
|
||||
pub fn new(stop_words: fst::Set<A>) -> RawIndexer<A> {
|
||||
impl<'a, A> RawIndexer<'a, A>
|
||||
where
|
||||
A: AsRef<[u8]>
|
||||
{
|
||||
pub fn new(stop_words: &'a fst::Set<A>) -> RawIndexer<'a, A> {
|
||||
RawIndexer::with_word_limit(stop_words, 1000)
|
||||
}
|
||||
|
||||
pub fn with_word_limit(stop_words: fst::Set<A>, limit: usize) -> RawIndexer<A> {
|
||||
pub fn with_word_limit(stop_words: &'a fst::Set<A>, limit: usize) -> RawIndexer<A> {
|
||||
RawIndexer {
|
||||
word_limit: limit,
|
||||
stop_words,
|
||||
words_doc_indexes: BTreeMap::new(),
|
||||
docs_words: HashMap::new(),
|
||||
analyzer: Analyzer::new(AnalyzerConfig::default_with_stopwords(stop_words)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A: AsRef<[u8]>> RawIndexer<A> {
|
||||
pub fn index_text(&mut self, id: DocumentId, indexed_pos: IndexedPos, text: &str) -> usize {
|
||||
let mut number_of_words = 0;
|
||||
|
||||
for token in Tokenizer::new(text) {
|
||||
let analyzed_text = self.analyzer.analyze(text);
|
||||
for (token_pos, (word_pos, token)) in process_tokens(analyzed_text.tokens()).enumerate() {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
word_pos,
|
||||
token_pos,
|
||||
id,
|
||||
indexed_pos,
|
||||
self.word_limit,
|
||||
&self.stop_words,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
@ -66,24 +69,37 @@ impl<A: AsRef<[u8]>> RawIndexer<A> {
|
||||
number_of_words
|
||||
}
|
||||
|
||||
pub fn index_text_seq<'s, I>(&mut self, id: DocumentId, indexed_pos: IndexedPos, iter: I)
|
||||
pub fn index_text_seq<'s, I>(&mut self, id: DocumentId, indexed_pos: IndexedPos, text_iter: I)
|
||||
where
|
||||
I: IntoIterator<Item = &'s str>,
|
||||
{
|
||||
let iter = iter.into_iter();
|
||||
for token in SeqTokenizer::new(iter) {
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
id,
|
||||
indexed_pos,
|
||||
self.word_limit,
|
||||
&self.stop_words,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
let mut word_offset = 0;
|
||||
|
||||
if !must_continue {
|
||||
break;
|
||||
for text in text_iter.into_iter() {
|
||||
let current_word_offset = word_offset;
|
||||
|
||||
let analyzed_text = self.analyzer.analyze(text);
|
||||
let tokens = process_tokens(analyzed_text.tokens())
|
||||
.map(|(i, t)| (i + current_word_offset, t))
|
||||
.enumerate();
|
||||
|
||||
for (token_pos, (word_pos, token)) in tokens {
|
||||
word_offset = word_pos + 1;
|
||||
|
||||
let must_continue = index_token(
|
||||
token,
|
||||
word_pos,
|
||||
token_pos,
|
||||
id,
|
||||
indexed_pos,
|
||||
self.word_limit,
|
||||
&mut self.words_doc_indexes,
|
||||
&mut self.docs_words,
|
||||
);
|
||||
|
||||
if !must_continue {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -113,31 +129,53 @@ impl<A: AsRef<[u8]>> RawIndexer<A> {
|
||||
}
|
||||
}
|
||||
|
||||
fn index_token<A>(
|
||||
fn process_tokens<'a>(tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = (usize, Token<'a>)> {
|
||||
tokens
|
||||
.skip_while(|token| !token.is_word())
|
||||
.scan((0, None), |(offset, prev_kind), token| {
|
||||
match token.kind {
|
||||
TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
|
||||
*offset += match *prev_kind {
|
||||
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
||||
Some(_) => 1,
|
||||
None => 0,
|
||||
};
|
||||
*prev_kind = Some(token.kind)
|
||||
}
|
||||
TokenKind::Separator(SeparatorKind::Hard) => {
|
||||
*prev_kind = Some(token.kind);
|
||||
}
|
||||
TokenKind::Separator(SeparatorKind::Soft)
|
||||
if *prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) => {
|
||||
*prev_kind = Some(token.kind);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
Some((*offset, token))
|
||||
})
|
||||
.filter(|(_, t)| t.is_word())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn index_token(
|
||||
token: Token,
|
||||
word_pos: usize,
|
||||
token_pos: usize,
|
||||
id: DocumentId,
|
||||
indexed_pos: IndexedPos,
|
||||
word_limit: usize,
|
||||
stop_words: &fst::Set<A>,
|
||||
words_doc_indexes: &mut BTreeMap<Word, Vec<DocIndex>>,
|
||||
docs_words: &mut HashMap<DocumentId, Vec<Word>>,
|
||||
) -> bool
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
if token.index >= word_limit {
|
||||
if token_pos >= word_limit {
|
||||
return false;
|
||||
}
|
||||
|
||||
let lower = token.word.to_lowercase();
|
||||
let token = Token {
|
||||
word: &lower,
|
||||
..token
|
||||
};
|
||||
|
||||
if !stop_words.contains(&token.word) {
|
||||
match token_to_docindex(id, indexed_pos, token) {
|
||||
if !token.is_stopword() {
|
||||
match token_to_docindex(id, indexed_pos, &token, word_pos) {
|
||||
Some(docindex) => {
|
||||
let word = Vec::from(token.word);
|
||||
let word = Vec::from(token.word.as_ref());
|
||||
|
||||
if word.len() <= WORD_LENGTH_LIMIT {
|
||||
words_doc_indexes
|
||||
@ -145,20 +183,6 @@ where A: AsRef<[u8]>,
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
|
||||
if !lower.contains(is_cjk) {
|
||||
let unidecoded = deunicode_with_tofu(&lower, "");
|
||||
if unidecoded != lower && !unidecoded.is_empty() {
|
||||
let word = Vec::from(unidecoded);
|
||||
if word.len() <= WORD_LENGTH_LIMIT {
|
||||
words_doc_indexes
|
||||
.entry(word.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push(docindex);
|
||||
docs_words.entry(id).or_insert_with(Vec::new).push(word);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None => return false,
|
||||
@ -168,10 +192,10 @@ where A: AsRef<[u8]>,
|
||||
true
|
||||
}
|
||||
|
||||
fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> Option<DocIndex> {
|
||||
let word_index = u16::try_from(token.word_index).ok()?;
|
||||
let char_index = u16::try_from(token.char_index).ok()?;
|
||||
let char_length = u16::try_from(token.word.chars().count()).ok()?;
|
||||
fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: &Token, word_index: usize) -> Option<DocIndex> {
|
||||
let word_index = u16::try_from(word_index).ok()?;
|
||||
let char_index = u16::try_from(token.byte_start).ok()?;
|
||||
let char_length = u16::try_from(token.word.len()).ok()?;
|
||||
|
||||
let docindex = DocIndex {
|
||||
document_id: id,
|
||||
@ -188,10 +212,23 @@ fn token_to_docindex(id: DocumentId, indexed_pos: IndexedPos, token: Token) -> O
|
||||
mod tests {
|
||||
use super::*;
|
||||
use meilisearch_schema::IndexedPos;
|
||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||
use fst::Set;
|
||||
|
||||
#[test]
|
||||
fn test_process_token() {
|
||||
let text = " 為一包含一千多萬目詞的帶標記平衡語料庫";
|
||||
let stopwords = Set::default();
|
||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stopwords));
|
||||
let analyzer = analyzer.analyze(text);
|
||||
let tokens: Vec<_> = process_tokens(analyzer.tokens()).map(|(_, t)| t.text().to_string()).collect();
|
||||
assert_eq!(tokens, ["为", "一", "包含", "一千多万", "目词", "的", "带", "标记", "平衡", "语料库"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
@ -206,14 +243,12 @@ mod tests {
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strange_apostrophe_in_sequence() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
@ -228,9 +263,6 @@ mod tests {
|
||||
assert!(words_doc_indexes.get(&b"aspirateur"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_some());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -238,7 +270,7 @@ mod tests {
|
||||
let stop_words = sdset::SetBuf::from_dirty(vec!["l", "j", "ai", "de"]);
|
||||
let stop_words = fst::Set::from_iter(stop_words).unwrap();
|
||||
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
@ -255,14 +287,12 @@ mod tests {
|
||||
assert!(words_doc_indexes.get(&b"ai"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"de"[..]).is_none());
|
||||
assert!(words_doc_indexes.get(&b"eteindre"[..]).is_some());
|
||||
assert!(words_doc_indexes
|
||||
.get(&"éteindre".to_owned().into_bytes())
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_empty_unidecode() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
let docid = DocumentId(0);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
@ -281,7 +311,8 @@ mod tests {
|
||||
#[test]
|
||||
// test sample from 807
|
||||
fn very_long_text() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let docid = DocumentId(0);
|
||||
let text = " The locations block is the most powerful, and potentially most involved, section of the .platform.app.yaml file. It allows you to control how the application container responds to incoming requests at a very fine-grained level. Common patterns also vary between language containers due to the way PHP-FPM handles incoming requests.\nEach entry of the locations block is an absolute URI path (with leading /) and its value includes the configuration directives for how the web server should handle matching requests. That is, if your domain is example.com then '/' means “requests for example.com/”, while '/admin' means “requests for example.com/admin”. If multiple blocks could match an incoming request then the most-specific will apply.\nweb:locations:'/':# Rules for all requests that don't otherwise match....'/sites/default/files':# Rules for any requests that begin with /sites/default/files....The simplest possible locations configuration is one that simply passes all requests on to your application unconditionally:\nweb:locations:'/':passthru:trueThat is, all requests to /* should be forwarded to the process started by web.commands.start above. Note that for PHP containers the passthru key must specify what PHP file the request should be forwarded to, and must also specify a docroot under which the file lives. For example:\nweb:locations:'/':root:'web'passthru:'/app.php'This block will serve requests to / from the web directory in the application, and if a file doesn’t exist on disk then the request will be forwarded to the /app.php script.\nA full list of the possible subkeys for locations is below.\n root: The folder from which to serve static assets for this location relative to the application root. The application root is the directory in which the .platform.app.yaml file is located. Typical values for this property include public or web. Setting it to '' is not recommended, and its behavior may vary depending on the type of application. Absolute paths are not supported.\n passthru: Whether to forward disallowed and missing resources from this location to the application and can be true, false or an absolute URI path (with leading /). The default value is false. For non-PHP applications it will generally be just true or false. In a PHP application this will typically be the front controller such as /index.php or /app.php. This entry works similar to mod_rewrite under Apache. Note: If the value of passthru does not begin with the same value as the location key it is under, the passthru may evaluate to another entry. That may be useful when you want different cache settings for different paths, for instance, but want missing files in all of them to map back to the same front controller. See the example block below.\n index: The files to consider when serving a request for a directory: an array of file names or null. (typically ['index.html']). Note that in order for this to work, access to the static files named must be allowed by the allow or rules keys for this location.\n expires: How long to allow static assets from this location to be cached (this enables the Cache-Control and Expires headers) and can be a time or -1 for no caching (default). Times can be suffixed with “ms” (milliseconds), “s” (seconds), “m” (minutes), “h” (hours), “d” (days), “w” (weeks), “M” (months, 30d) or “y” (years, 365d).\n scripts: Whether to allow loading scripts in that location (true or false). This directive is only meaningful on PHP.\n allow: Whether to allow serving files which don’t match a rule (true or false, default: true).\n headers: Any additional headers to apply to static assets. This section is a mapping of header names to header values. Responses from the application aren’t affected, to avoid overlap with the application’s own ability to include custom headers in the response.\n rules: Specific overrides for a specific location. The key is a PCRE (regular expression) that is matched against the full request path.\n request_buffering: Most application servers do not support chunked requests (e.g. fpm, uwsgi), so Platform.sh enables request_buffering by default to handle them. That default configuration would look like this if it was present in .platform.app.yaml:\nweb:locations:'/':passthru:truerequest_buffering:enabled:truemax_request_size:250mIf the application server can already efficiently handle chunked requests, the request_buffering subkey can be modified to disable it entirely (enabled: false). Additionally, applications that frequently deal with uploads greater than 250MB in size can update the max_request_size key to the application’s needs. Note that modifications to request_buffering will need to be specified at each location where it is desired.\n ";
|
||||
@ -289,12 +320,13 @@ mod tests {
|
||||
let Indexed {
|
||||
words_doc_indexes, ..
|
||||
} = indexer.build();
|
||||
assert!(words_doc_indexes.get(&"buffering".to_owned().into_bytes()).is_some());
|
||||
assert!(words_doc_indexes.get(&"request".to_owned().into_bytes()).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn words_over_index_1000_not_indexed() {
|
||||
let mut indexer = RawIndexer::new(fst::Set::default());
|
||||
let stop_words = fst::Set::default();
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
let indexed_pos = IndexedPos(0);
|
||||
let docid = DocumentId(0);
|
||||
let mut text = String::with_capacity(5000);
|
||||
|
@ -51,7 +51,7 @@ impl From<heed::Error> for DeserializerError {
|
||||
|
||||
pub struct Deserializer<'a> {
|
||||
pub document_id: DocumentId,
|
||||
pub reader: &'a heed::RoTxn<MainT>,
|
||||
pub reader: &'a heed::RoTxn<'a, MainT>,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub schema: &'a Schema,
|
||||
pub fields: Option<&'a HashSet<FieldId>>,
|
||||
|
@ -13,7 +13,7 @@ static RANKING_RULE_REGEX: Lazy<regex::Regex> = Lazy::new(|| {
|
||||
regex::Regex::new(r"(asc|desc)\(([a-zA-Z0-9-_]*)\)").unwrap()
|
||||
});
|
||||
|
||||
#[derive(Default, Clone, Serialize, Deserialize)]
|
||||
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct Settings {
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
|
@ -33,7 +33,7 @@ impl DocsWords {
|
||||
self.docs_words.clear(writer)
|
||||
}
|
||||
|
||||
pub fn doc_words(self, reader: &heed::RoTxn<MainT>, document_id: DocumentId) -> ZResult<FstSetCow> {
|
||||
pub fn doc_words<'a>(self, reader: &'a heed::RoTxn<'a, MainT>, document_id: DocumentId) -> ZResult<FstSetCow> {
|
||||
let document_id = BEU32::new(document_id.0);
|
||||
match self.docs_words.get(reader, &document_id)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
|
@ -2,7 +2,7 @@ use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
|
||||
use heed::{RwTxn, RoTxn, RoRange, types::Str, BytesEncode, BytesDecode};
|
||||
use heed::{RwTxn, RoTxn, RoPrefix, types::Str, BytesEncode, BytesDecode};
|
||||
use sdset::{SetBuf, Set, SetOperation};
|
||||
|
||||
use meilisearch_types::DocumentId;
|
||||
@ -48,10 +48,10 @@ impl<'a> BytesDecode<'a> for FacetData {
|
||||
let mut size_buf = [0; LEN];
|
||||
size_buf.copy_from_slice(bytes.get(0..LEN)?);
|
||||
// decode size of the first item from the bytes
|
||||
let first_size = usize::from_be_bytes(size_buf);
|
||||
let first_size = u64::from_be_bytes(size_buf);
|
||||
// decode first and second items
|
||||
let first_item = Str::bytes_decode(bytes.get(LEN..(LEN + first_size))?)?;
|
||||
let second_item = CowSet::bytes_decode(bytes.get((LEN + first_size)..)?)?;
|
||||
let first_item = Str::bytes_decode(bytes.get(LEN..(LEN + first_size as usize))?)?;
|
||||
let second_item = CowSet::bytes_decode(bytes.get((LEN + first_size as usize)..)?)?;
|
||||
Some((first_item, second_item))
|
||||
}
|
||||
}
|
||||
@ -62,7 +62,7 @@ impl Facets {
|
||||
Ok(self.facets.put(writer, &facet_key, &(facet_value, doc_ids))?)
|
||||
}
|
||||
|
||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> MResult<RoRange<'txn, FacetKey, FacetData>> {
|
||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> MResult<RoPrefix<'txn, FacetKey, FacetData>> {
|
||||
Ok(self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))?)
|
||||
}
|
||||
|
||||
|
@ -143,7 +143,7 @@ impl Main {
|
||||
self.put_external_docids(writer, &external_docids)
|
||||
}
|
||||
|
||||
pub fn external_docids(self, reader: &heed::RoTxn<MainT>) -> MResult<FstMapCow> {
|
||||
pub fn external_docids<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<FstMapCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, EXTERNAL_DOCIDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Map::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Map::default().map_data(Cow::Owned).unwrap()),
|
||||
@ -155,7 +155,7 @@ impl Main {
|
||||
Ok(external_ids.get(external_docid).map(|id| DocumentId(id as u32)))
|
||||
}
|
||||
|
||||
pub fn words_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
|
||||
pub fn words_fst<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, WORDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
@ -170,7 +170,7 @@ impl Main {
|
||||
Ok(self.main.put::<_, Str, CowSlice<DocumentId>>(writer, SORTED_DOCUMENT_IDS_CACHE_KEY, documents_ids)?)
|
||||
}
|
||||
|
||||
pub fn sorted_document_ids_cache(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Cow<[DocumentId]>>> {
|
||||
pub fn sorted_document_ids_cache<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<Option<Cow<[DocumentId]>>> {
|
||||
Ok(self.main.get::<_, Str, CowSlice<DocumentId>>(reader, SORTED_DOCUMENT_IDS_CACHE_KEY)?)
|
||||
}
|
||||
|
||||
@ -199,7 +199,7 @@ impl Main {
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)?)
|
||||
}
|
||||
|
||||
pub(crate) fn synonyms_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
|
||||
pub(crate) fn synonyms_fst<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, SYNONYMS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
@ -219,7 +219,7 @@ impl Main {
|
||||
Ok(self.main.put::<_, Str, ByteSlice>(writer, STOP_WORDS_KEY, bytes)?)
|
||||
}
|
||||
|
||||
pub(crate) fn stop_words_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
|
||||
pub(crate) fn stop_words_fst<'a>(self, reader: &'a heed::RoTxn<'a, MainT>) -> MResult<FstSetCow> {
|
||||
match self.main.get::<_, Str, ByteSlice>(reader, STOP_WORDS_KEY)? {
|
||||
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
|
||||
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
|
||||
|
@ -11,12 +11,14 @@ pub fn apply_clear_all(
|
||||
index.main.put_internal_docids(writer, &sdset::SetBuf::default())?;
|
||||
index.main.put_ranked_map(writer, &RankedMap::default())?;
|
||||
index.main.put_number_of_documents(writer, |_| 0)?;
|
||||
index.main.put_sorted_document_ids_cache(writer, &[])?;
|
||||
index.documents_fields.clear(writer)?;
|
||||
index.documents_fields_counts.clear(writer)?;
|
||||
index.postings_lists.clear(writer)?;
|
||||
index.docs_words.clear(writer)?;
|
||||
index.prefix_documents_cache.clear(writer)?;
|
||||
index.prefix_postings_lists_cache.clear(writer)?;
|
||||
index.facets.clear(writer)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -23,6 +23,8 @@ pub struct DocumentsAddition<D> {
|
||||
updates_store: store::Updates,
|
||||
updates_results_store: store::UpdatesResults,
|
||||
updates_notifier: UpdateEventsEmitter,
|
||||
// Whether the user explicitly set the primary key in the update
|
||||
primary_key: Option<String>,
|
||||
documents: Vec<D>,
|
||||
is_partial: bool,
|
||||
}
|
||||
@ -39,6 +41,7 @@ impl<D> DocumentsAddition<D> {
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
is_partial: false,
|
||||
primary_key: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -53,9 +56,14 @@ impl<D> DocumentsAddition<D> {
|
||||
updates_notifier,
|
||||
documents: Vec::new(),
|
||||
is_partial: true,
|
||||
primary_key: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_primary_key(&mut self, primary_key: String) {
|
||||
self.primary_key = Some(primary_key);
|
||||
}
|
||||
|
||||
pub fn update_document(&mut self, document: D) {
|
||||
self.documents.push(document);
|
||||
}
|
||||
@ -71,6 +79,7 @@ impl<D> DocumentsAddition<D> {
|
||||
self.updates_results_store,
|
||||
self.documents,
|
||||
self.is_partial,
|
||||
self.primary_key,
|
||||
)?;
|
||||
Ok(update_id)
|
||||
}
|
||||
@ -88,6 +97,7 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
||||
updates_results_store: store::UpdatesResults,
|
||||
addition: Vec<D>,
|
||||
is_partial: bool,
|
||||
primary_key: Option<String>,
|
||||
) -> MResult<u64> {
|
||||
let mut values = Vec::with_capacity(addition.len());
|
||||
for add in addition {
|
||||
@ -99,9 +109,9 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
||||
let last_update_id = next_update_id(writer, updates_store, updates_results_store)?;
|
||||
|
||||
let update = if is_partial {
|
||||
Update::documents_partial(values)
|
||||
Update::documents_partial(primary_key, values)
|
||||
} else {
|
||||
Update::documents_addition(values)
|
||||
Update::documents_addition(primary_key, values)
|
||||
};
|
||||
|
||||
updates_store.put_update(writer, last_update_id, &update)?;
|
||||
@ -110,7 +120,7 @@ pub fn push_documents_addition<D: serde::Serialize>(
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn index_document<A>(
|
||||
fn index_document<A: AsRef<[u8]>>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
documents_fields: DocumentsFields,
|
||||
documents_fields_counts: DocumentsFieldsCounts,
|
||||
@ -121,18 +131,17 @@ fn index_document<A>(
|
||||
document_id: DocumentId,
|
||||
value: &Value,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
let serialized = serde_json::to_vec(value)?;
|
||||
documents_fields.put_document_field(writer, document_id, field_id, &serialized)?;
|
||||
|
||||
if let Some(indexed_pos) = schema.is_indexed(field_id) {
|
||||
let number_of_words = index_value(indexer, document_id, *indexed_pos, value);
|
||||
if let Some(indexed_pos) = schema.is_searchable(field_id) {
|
||||
let number_of_words = index_value(indexer, document_id, indexed_pos, value);
|
||||
if let Some(number_of_words) = number_of_words {
|
||||
documents_fields_counts.put_document_field_count(
|
||||
writer,
|
||||
document_id,
|
||||
*indexed_pos,
|
||||
indexed_pos,
|
||||
number_of_words as u16,
|
||||
)?;
|
||||
}
|
||||
@ -146,11 +155,12 @@ where A: AsRef<[u8]>,
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn apply_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
pub fn apply_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
partial: bool
|
||||
partial: bool,
|
||||
primary_key: Option<String>,
|
||||
) -> MResult<()>
|
||||
{
|
||||
let mut schema = match index.main.schema(writer)? {
|
||||
@ -163,7 +173,14 @@ pub fn apply_addition<'a, 'b>(
|
||||
let internal_docids = index.main.internal_docids(writer)?;
|
||||
let mut available_ids = DiscoverIds::new(&internal_docids);
|
||||
|
||||
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
|
||||
let primary_key = match schema.primary_key() {
|
||||
Some(primary_key) => primary_key.to_string(),
|
||||
None => {
|
||||
let name = primary_key.ok_or(Error::MissingPrimaryKey)?;
|
||||
schema.set_primary_key(&name)?;
|
||||
name
|
||||
}
|
||||
};
|
||||
|
||||
// 1. store documents ids for future deletion
|
||||
let mut documents_additions = HashMap::new();
|
||||
@ -222,13 +239,13 @@ pub fn apply_addition<'a, 'b>(
|
||||
let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?;
|
||||
|
||||
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
|
||||
// For each document in this update
|
||||
for (document_id, document) in &documents_additions {
|
||||
// For each key-value pair in the document.
|
||||
for (attribute, value) in document {
|
||||
let field_id = schema.insert_and_index(&attribute)?;
|
||||
let (field_id, _) = schema.insert_with_position(&attribute)?;
|
||||
index_document(
|
||||
writer,
|
||||
index.documents_fields,
|
||||
@ -272,20 +289,22 @@ pub fn apply_addition<'a, 'b>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn apply_documents_partial_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
pub fn apply_documents_partial_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
primary_key: Option<String>,
|
||||
) -> MResult<()> {
|
||||
apply_addition(writer, index, new_documents, true)
|
||||
apply_addition(writer, index, new_documents, true, primary_key)
|
||||
}
|
||||
|
||||
pub fn apply_documents_addition<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
pub fn apply_documents_addition(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
new_documents: Vec<IndexMap<String, Value>>,
|
||||
primary_key: Option<String>,
|
||||
) -> MResult<()> {
|
||||
apply_addition(writer, index, new_documents, false)
|
||||
apply_addition(writer, index, new_documents, false, primary_key)
|
||||
}
|
||||
|
||||
pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Index) -> MResult<()> {
|
||||
@ -317,7 +336,7 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
.unwrap();
|
||||
|
||||
let number_of_inserted_documents = documents_ids_to_reindex.len();
|
||||
let mut indexer = RawIndexer::new(stop_words);
|
||||
let mut indexer = RawIndexer::new(&stop_words);
|
||||
let mut ram_store = HashMap::new();
|
||||
|
||||
if let Some(ref attributes_for_facetting) = index.main.attributes_for_faceting(writer)? {
|
||||
@ -373,14 +392,13 @@ pub fn reindex_all_documents(writer: &mut heed::RwTxn<MainT>, index: &store::Ind
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_documents_addition_index<A>(
|
||||
pub fn write_documents_addition_index<A: AsRef<[u8]>>(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
ranked_map: &RankedMap,
|
||||
number_of_inserted_documents: usize,
|
||||
indexer: RawIndexer<A>,
|
||||
) -> MResult<()>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
let indexed = indexer.build();
|
||||
let mut delta_words_builder = SetBuilder::memory();
|
||||
|
@ -12,13 +12,12 @@ use crate::serde::SerializerError;
|
||||
use crate::store::DiscoverIds;
|
||||
|
||||
/// Returns the number of words indexed or `None` if the type is unindexable.
|
||||
pub fn index_value<A>(
|
||||
pub fn index_value<A: AsRef<[u8]>>(
|
||||
indexer: &mut RawIndexer<A>,
|
||||
document_id: DocumentId,
|
||||
indexed_pos: IndexedPos,
|
||||
value: &Value,
|
||||
) -> Option<usize>
|
||||
where A: AsRef<[u8]>,
|
||||
{
|
||||
match value {
|
||||
Value::Null => None,
|
||||
|
@ -52,16 +52,16 @@ impl Update {
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_addition(documents: Vec<IndexMap<String, Value>>) -> Update {
|
||||
fn documents_addition(primary_key: Option<String>, documents: Vec<IndexMap<String, Value>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsAddition(documents),
|
||||
data: UpdateData::DocumentsAddition{ documents, primary_key },
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn documents_partial(documents: Vec<IndexMap<String, Value>>) -> Update {
|
||||
fn documents_partial(primary_key: Option<String>, documents: Vec<IndexMap<String, Value>>) -> Update {
|
||||
Update {
|
||||
data: UpdateData::DocumentsPartial(documents),
|
||||
data: UpdateData::DocumentsPartial{ documents, primary_key },
|
||||
enqueued_at: Utc::now(),
|
||||
}
|
||||
}
|
||||
@ -85,8 +85,15 @@ impl Update {
|
||||
pub enum UpdateData {
|
||||
ClearAll,
|
||||
Customs(Vec<u8>),
|
||||
DocumentsAddition(Vec<IndexMap<String, Value>>),
|
||||
DocumentsPartial(Vec<IndexMap<String, Value>>),
|
||||
// (primary key, documents)
|
||||
DocumentsAddition {
|
||||
primary_key: Option<String>,
|
||||
documents: Vec<IndexMap<String, Value>>
|
||||
},
|
||||
DocumentsPartial {
|
||||
primary_key: Option<String>,
|
||||
documents: Vec<IndexMap<String, Value>>,
|
||||
},
|
||||
DocumentsDeletion(Vec<String>),
|
||||
Settings(Box<SettingsUpdate>)
|
||||
}
|
||||
@ -96,11 +103,11 @@ impl UpdateData {
|
||||
match self {
|
||||
UpdateData::ClearAll => UpdateType::ClearAll,
|
||||
UpdateData::Customs(_) => UpdateType::Customs,
|
||||
UpdateData::DocumentsAddition(addition) => UpdateType::DocumentsAddition {
|
||||
number: addition.len(),
|
||||
UpdateData::DocumentsAddition{ documents, .. } => UpdateType::DocumentsAddition {
|
||||
number: documents.len(),
|
||||
},
|
||||
UpdateData::DocumentsPartial(addition) => UpdateType::DocumentsPartial {
|
||||
number: addition.len(),
|
||||
UpdateData::DocumentsPartial{ documents, .. } => UpdateType::DocumentsPartial {
|
||||
number: documents.len(),
|
||||
},
|
||||
UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion {
|
||||
number: deletion.len(),
|
||||
@ -212,8 +219,8 @@ pub fn next_update_id(
|
||||
Ok(new_update_id)
|
||||
}
|
||||
|
||||
pub fn update_task<'a, 'b>(
|
||||
writer: &'a mut heed::RwTxn<'b, MainT>,
|
||||
pub fn update_task(
|
||||
writer: &mut heed::RwTxn<MainT>,
|
||||
index: &store::Index,
|
||||
update_id: u64,
|
||||
update: Update,
|
||||
@ -239,25 +246,25 @@ pub fn update_task<'a, 'b>(
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsAddition(documents) => {
|
||||
UpdateData::DocumentsAddition { documents, primary_key } => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsAddition {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_addition(writer, index, documents);
|
||||
let result = apply_documents_addition(writer, index, documents, primary_key);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
UpdateData::DocumentsPartial(documents) => {
|
||||
UpdateData::DocumentsPartial{ documents, primary_key } => {
|
||||
let start = Instant::now();
|
||||
|
||||
let update_type = UpdateType::DocumentsPartial {
|
||||
number: documents.len(),
|
||||
};
|
||||
|
||||
let result = apply_documents_partial_addition(writer, index, documents);
|
||||
let result = apply_documents_partial_addition(writer, index, documents, primary_key);
|
||||
|
||||
(update_type, result, start.elapsed())
|
||||
}
|
||||
|
@ -1,9 +1,10 @@
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::{borrow::Cow, collections::{BTreeMap, BTreeSet}};
|
||||
|
||||
use heed::Result as ZResult;
|
||||
use fst::{set::OpBuilder, SetBuilder};
|
||||
use fst::{SetBuilder, set::OpBuilder};
|
||||
use sdset::SetBuf;
|
||||
use meilisearch_schema::Schema;
|
||||
use meilisearch_tokenizer::analyzer::{Analyzer, AnalyzerConfig};
|
||||
|
||||
use crate::database::{MainT, UpdateT};
|
||||
use crate::settings::{UpdateState, SettingsUpdate, RankingRule};
|
||||
@ -71,14 +72,14 @@ pub fn apply_settings_update(
|
||||
match settings.searchable_attributes.clone() {
|
||||
UpdateState::Update(v) => {
|
||||
if v.iter().any(|e| e == "*") || v.is_empty() {
|
||||
schema.set_all_fields_as_indexed();
|
||||
schema.set_all_searchable();
|
||||
} else {
|
||||
schema.update_indexed(v)?;
|
||||
schema.update_searchable(v)?;
|
||||
}
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
schema.set_all_fields_as_indexed();
|
||||
schema.set_all_searchable();
|
||||
must_reindex = true;
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
@ -86,13 +87,13 @@ pub fn apply_settings_update(
|
||||
match settings.displayed_attributes.clone() {
|
||||
UpdateState::Update(v) => {
|
||||
if v.contains("*") || v.is_empty() {
|
||||
schema.set_all_fields_as_displayed();
|
||||
schema.set_all_displayed();
|
||||
} else {
|
||||
schema.update_displayed(v)?
|
||||
}
|
||||
},
|
||||
UpdateState::Clear => {
|
||||
schema.set_all_fields_as_displayed();
|
||||
schema.set_all_displayed();
|
||||
},
|
||||
UpdateState::Nothing => (),
|
||||
}
|
||||
@ -289,10 +290,28 @@ pub fn apply_synonyms_update(
|
||||
|
||||
let main_store = index.main;
|
||||
let synonyms_store = index.synonyms;
|
||||
let stop_words = index.main.stop_words_fst(writer)?.map_data(Cow::into_owned)?;
|
||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words));
|
||||
|
||||
fn normalize<T: AsRef<[u8]>>(analyzer: &Analyzer<T>, text: &str) -> String {
|
||||
analyzer.analyze(&text)
|
||||
.tokens()
|
||||
.fold(String::new(), |s, t| s + t.text())
|
||||
}
|
||||
|
||||
// normalize synonyms and reorder them creating a BTreeMap
|
||||
let synonyms: BTreeMap<String, Vec<String>> = synonyms.into_iter().map( |(word, alternatives)| {
|
||||
let word = normalize(&analyzer, &word);
|
||||
let alternatives = alternatives.into_iter().map(|text| normalize(&analyzer, &text)).collect();
|
||||
|
||||
(word, alternatives)
|
||||
}).collect();
|
||||
|
||||
// index synonyms,
|
||||
// synyonyms have to be ordered by key before indexation
|
||||
let mut synonyms_builder = SetBuilder::memory();
|
||||
synonyms_store.clear(writer)?;
|
||||
for (word, alternatives) in synonyms.clone() {
|
||||
for (word, alternatives) in synonyms {
|
||||
synonyms_builder.insert(&word)?;
|
||||
|
||||
let alternatives = {
|
||||
|
@ -1,8 +1,8 @@
|
||||
[package]
|
||||
name = "meilisearch-error"
|
||||
version = "0.16.0"
|
||||
version = "0.20.0"
|
||||
authors = ["marin <postma.marin@protonmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
actix-http = "2"
|
||||
actix-http = "2.2.0"
|
||||
|
@ -69,7 +69,6 @@ pub enum Code {
|
||||
DocumentNotFound,
|
||||
Internal,
|
||||
InvalidToken,
|
||||
Maintenance,
|
||||
MissingAuthorizationHeader,
|
||||
NotFound,
|
||||
PayloadTooLarge,
|
||||
@ -118,7 +117,6 @@ impl Code {
|
||||
DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND),
|
||||
Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR),
|
||||
InvalidToken => ErrCode::authentication("invalid_token", StatusCode::FORBIDDEN),
|
||||
Maintenance => ErrCode::internal("maintenance", StatusCode::SERVICE_UNAVAILABLE),
|
||||
MissingAuthorizationHeader => ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED),
|
||||
NotFound => ErrCode::invalid("not_found", StatusCode::NOT_FOUND),
|
||||
PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE),
|
||||
|
@ -1,7 +1,7 @@
|
||||
[package]
|
||||
name = "meilisearch-http"
|
||||
description = "MeiliSearch HTTP server"
|
||||
version = "0.16.0"
|
||||
version = "0.20.0"
|
||||
license = "MIT"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
@ -17,43 +17,43 @@ path = "src/main.rs"
|
||||
default = ["sentry"]
|
||||
|
||||
[dependencies]
|
||||
actix-cors = "0.4.1"
|
||||
actix-http = "2"
|
||||
actix-rt = "1"
|
||||
actix-cors = "0.5.4"
|
||||
actix-http = "2.2.0"
|
||||
actix-rt = "1.1.1"
|
||||
actix-service = "1.0.6"
|
||||
actix-web = { version = "3.1.0", features = ["rustls"] }
|
||||
bytes = "0.5.4"
|
||||
actix-web = { version = "3.3.2", features = ["rustls"] }
|
||||
bytes = "1.0.0"
|
||||
chrono = { version = "0.4.19", features = ["serde"] }
|
||||
crossbeam-channel = "0.5.0"
|
||||
env_logger = "0.7.1"
|
||||
flate2 = "1.0.18"
|
||||
futures = "0.3.6"
|
||||
http = "0.2.1"
|
||||
indexmap = { version = "1.3.2", features = ["serde-1"] }
|
||||
log = "0.4.8"
|
||||
main_error = "0.1.0"
|
||||
meilisearch-core = { path = "../meilisearch-core", version = "0.16.0" }
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.16.0" }
|
||||
meilisearch-schema = { path = "../meilisearch-schema", version = "0.16.0" }
|
||||
meilisearch-tokenizer = {path = "../meilisearch-tokenizer", version = "0.16.0"}
|
||||
env_logger = "0.8.2"
|
||||
flate2 = "1.0.19"
|
||||
futures = "0.3.8"
|
||||
http = "0.2.2"
|
||||
indexmap = { version = "1.6.1", features = ["serde-1"] }
|
||||
log = "0.4.11"
|
||||
main_error = "0.1.1"
|
||||
meilisearch-core = { path = "../meilisearch-core", version = "0.20.0" }
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.20.0" }
|
||||
meilisearch-schema = { path = "../meilisearch-schema", version = "0.20.0" }
|
||||
mime = "0.3.16"
|
||||
once_cell = "1.4.1"
|
||||
rand = "0.7.3"
|
||||
regex = "1.4.1"
|
||||
rustls = "0.18"
|
||||
serde = { version = "1.0.105", features = ["derive"] }
|
||||
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
||||
serde_qs = "0.7.0"
|
||||
sha2 = "0.9.1"
|
||||
siphasher = "0.3.2"
|
||||
once_cell = "1.5.2"
|
||||
rand = "0.8.1"
|
||||
regex = "1.4.2"
|
||||
rustls = "0.18.0"
|
||||
serde = { version = "1.0.118", features = ["derive"] }
|
||||
serde_json = { version = "1.0.61", features = ["preserve_order"] }
|
||||
serde_qs = "0.8.2"
|
||||
sha2 = "0.9.2"
|
||||
siphasher = "0.3.3"
|
||||
slice-group-by = "0.2.6"
|
||||
structopt = "0.3.20"
|
||||
tar = "0.4.29"
|
||||
structopt = "0.3.21"
|
||||
tar = "0.4.30"
|
||||
tempfile = "3.1.0"
|
||||
tokio = { version = "0.2.18", features = ["macros"] }
|
||||
ureq = { version = "1.5.1", features = ["tls"], default-features = false }
|
||||
tokio = { version = "0.2", features = ["macros"] }
|
||||
ureq = { version = "2.0.0", features = ["tls"], default-features = false }
|
||||
uuid = "0.8"
|
||||
walkdir = "2.3.1"
|
||||
whoami = "0.9.0"
|
||||
whoami = "1.0.3"
|
||||
|
||||
[dependencies.sentry]
|
||||
version = "0.18.1"
|
||||
@ -73,7 +73,7 @@ optional = true
|
||||
[dev-dependencies]
|
||||
serde_url_params = "0.2.0"
|
||||
tempdir = "0.3.7"
|
||||
tokio = { version = "0.2.18", features = ["macros", "time"] }
|
||||
tokio = { version = "0.2", features = ["macros", "time"] }
|
||||
|
||||
[dev-dependencies.assert-json-diff]
|
||||
git = "https://github.com/qdequele/assert-json-diff"
|
||||
@ -82,5 +82,5 @@ branch = "master"
|
||||
[build-dependencies]
|
||||
vergen = "3.1.0"
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
jemallocator = "0.3.2"
|
||||
|
@ -3,7 +3,7 @@
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<link rel="stylesheet" href="/bulma.min.css">
|
||||
<link rel="stylesheet" href="bulma.min.css">
|
||||
<title>MeiliSearch</title>
|
||||
<style>
|
||||
em {
|
||||
@ -28,8 +28,6 @@
|
||||
}
|
||||
|
||||
.document {
|
||||
padding: 20px 20px;
|
||||
background-color: #f5f5f5;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 20px;
|
||||
display: flex;
|
||||
@ -40,125 +38,175 @@
|
||||
max-width: 75%;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
.document ol li {
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
|
||||
.document .image {
|
||||
max-width: 25%;
|
||||
flex: 0 0 25%;
|
||||
padding-left: 30px;
|
||||
max-width: 50%;
|
||||
margin: 0 auto;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
@media screen and (min-width: 770px) {
|
||||
.document .image {
|
||||
max-width: 25%;
|
||||
flex: 0 0 25%;
|
||||
margin: 0;
|
||||
padding-left: 30px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
}
|
||||
|
||||
.document .image img {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.field {
|
||||
list-style-type: none;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.field:not(:last-child) {
|
||||
margin-bottom: 7px;
|
||||
}
|
||||
|
||||
.attribute {
|
||||
flex: 0 0 25%;
|
||||
max-width: 25%;
|
||||
text-align: right;
|
||||
padding-right: 10px;
|
||||
text-align: center;
|
||||
box-sizing: border-box;
|
||||
text-transform: uppercase;
|
||||
font-weight: bold;
|
||||
color: rgba(0,0,0,.7);
|
||||
}
|
||||
|
||||
@media screen and (min-width: 770px) {
|
||||
.attribute {
|
||||
flex: 0 0 25%;
|
||||
max-width: 25%;
|
||||
text-align: right;
|
||||
padding-right: 10px;
|
||||
font-weight: normal;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
}
|
||||
@media screen and (max-width: 770px) {
|
||||
.attribute {
|
||||
padding-bottom: 0;
|
||||
}
|
||||
}
|
||||
|
||||
.content {
|
||||
max-width: 75%;
|
||||
flex: 0 0 75%;
|
||||
box-sizing: border-box;
|
||||
padding-left: 10px;
|
||||
color: rgba(0,0,0,.9);
|
||||
overflow-wrap: break-word;
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
.hero-foot {
|
||||
padding-bottom: 3rem;
|
||||
}
|
||||
|
||||
@media screen and (max-width: 770px) {
|
||||
.align-on-mobile {
|
||||
text-align: center;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<section class="hero is-light">
|
||||
|
||||
<div class="hero-body">
|
||||
<div class="container">
|
||||
<h1 class="title">
|
||||
Welcome to MeiliSearch
|
||||
</h1>
|
||||
<h2 class="subtitle">
|
||||
This dashboard will help you check the search results with ease.
|
||||
</h2>
|
||||
|
||||
<div class="field">
|
||||
<!-- API Key -->
|
||||
<div class="field">
|
||||
<div class="control">
|
||||
<input id="apiKey" class="input is-small" type="password" placeholder="API key (optional)">
|
||||
<div class="help">At least a private API key is required for the dashboard to access the indexes list.</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="content is-medium align-on-mobile">
|
||||
<h1 class="title is-1 is-spaced">
|
||||
Welcome to MeiliSearch
|
||||
</h1>
|
||||
<p class="subtitle is-4">
|
||||
This dashboard will help you check the search results with ease.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="hero container">
|
||||
<div class="notification" style="border-radius: 0 0 4px 4px;">
|
||||
|
||||
<nav class="level">
|
||||
<!-- Left side -->
|
||||
<div class="level-left">
|
||||
<div class="level-item">
|
||||
<div class="field has-addons has-addons-right">
|
||||
<p class="control">
|
||||
<div id="apiKeyContainer" class="columns">
|
||||
<input type="hidden" id="apiKey">
|
||||
</div>
|
||||
<div class="columns">
|
||||
<div class="column is-8">
|
||||
<label class="label" for="search">Search something</label>
|
||||
<div class="field has-addons">
|
||||
<div class="control">
|
||||
<span class="select">
|
||||
<select id="index">
|
||||
<select role="listbox" id="index" aria-label="Select the index you want to search on">
|
||||
<!-- indexes names -->
|
||||
</select>
|
||||
</span>
|
||||
</p>
|
||||
<p class="control">
|
||||
<input id="search" class="input" type="text" autofocus placeholder="e.g. George Clooney">
|
||||
</p>
|
||||
</div>
|
||||
<div class="control is-expanded">
|
||||
<input id="search" class="input" type="search" autofocus placeholder="e.g. George Clooney" aria-label="Search through your documents">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Right side -->
|
||||
<nav class="level-right">
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<div class="column is-4">
|
||||
<div class="columns">
|
||||
<div class="column is-6 has-text-centered">
|
||||
<p class="heading">Documents</p>
|
||||
<p id="count" class="title">0</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="level-item has-text-centered">
|
||||
<div>
|
||||
<div class="column is-6 has-text-centered">
|
||||
<p class="heading">Time Spent</p>
|
||||
<p id="time" class="title">N/A</p>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
</nav>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<ol id="results" class="content">
|
||||
<!-- documents matching resquests -->
|
||||
</ol>
|
||||
<div class="container">
|
||||
<ol id="results" class="content">
|
||||
<!-- documents matching resquests -->
|
||||
</ol>
|
||||
</div>
|
||||
</section>
|
||||
</body>
|
||||
|
||||
<script>
|
||||
function setApiKeyField () {
|
||||
var xmlHttp = new XMLHttpRequest();
|
||||
xmlHttp.open("GET", `${baseUrl}/version`, false);
|
||||
|
||||
xmlHttp.onload = function () {
|
||||
let apiKeyContainer = document.getElementById('apiKeyContainer');
|
||||
if (xmlHttp.status === 401) {
|
||||
document.getElementById('apiKey').remove();
|
||||
let inputNode = document.createElement('input');
|
||||
inputNode.setAttribute('id', 'apiKey');
|
||||
inputNode.setAttribute('type', 'password');
|
||||
inputNode.setAttribute('placeholder', 'Enter your API key');
|
||||
inputNode.classList.add('input', 'is-small');
|
||||
|
||||
let controlNode = document.createElement('div');
|
||||
controlNode.classList.add('control');
|
||||
controlNode.appendChild(inputNode);
|
||||
|
||||
let labelNode = document.createElement('label');
|
||||
labelNode.classList.add('label')
|
||||
labelNode.setAttribute('for', 'apiKey');
|
||||
let textNode = document.createTextNode('API Key');
|
||||
labelNode.appendChild(textNode);
|
||||
|
||||
let fieldNode = document.createElement('div');
|
||||
fieldNode.classList.add('field');
|
||||
fieldNode.appendChild(labelNode);
|
||||
fieldNode.append(controlNode);
|
||||
|
||||
let columnNode = document.createElement('div');
|
||||
columnNode.classList.add('column', 'is-4');
|
||||
columnNode.appendChild(fieldNode);
|
||||
apiKeyContainer.appendChild(columnNode);
|
||||
}
|
||||
}
|
||||
|
||||
xmlHttp.send(null);
|
||||
}
|
||||
|
||||
function sanitizeHTMLEntities(str) {
|
||||
if (str && typeof str === 'string') {
|
||||
str = str.replace(/</g,"<");
|
||||
@ -230,9 +278,12 @@
|
||||
delete element._formatted;
|
||||
|
||||
const elem = document.createElement('li');
|
||||
elem.classList.add("document");
|
||||
elem.classList.add("document","box");
|
||||
|
||||
const ol = document.createElement('ol');
|
||||
const div = document.createElement('div');
|
||||
div.classList.add("columns","is-desktop","is-tablet");
|
||||
const info = document.createElement('div');
|
||||
info.classList.add("column","align-on-mobile");
|
||||
let image = undefined;
|
||||
|
||||
for (const prop in element) {
|
||||
@ -243,15 +294,16 @@
|
||||
}
|
||||
}
|
||||
|
||||
const field = document.createElement('li');
|
||||
field.classList.add("field");
|
||||
const field = document.createElement('div');
|
||||
field.classList.add("columns");
|
||||
|
||||
const attribute = document.createElement('div');
|
||||
attribute.classList.add("attribute");
|
||||
attribute.classList.add("attribute", "column");
|
||||
attribute.innerHTML = prop;
|
||||
|
||||
const content = document.createElement('div');
|
||||
content.classList.add("content");
|
||||
content.classList.add("content", "column");
|
||||
|
||||
if (typeof (element[prop]) === "object") {
|
||||
content.innerHTML = JSON.stringify(element[prop]);
|
||||
} else {
|
||||
@ -261,19 +313,22 @@
|
||||
field.appendChild(attribute);
|
||||
field.appendChild(content);
|
||||
|
||||
ol.appendChild(field);
|
||||
info.appendChild(field);
|
||||
}
|
||||
|
||||
elem.appendChild(ol);
|
||||
div.appendChild(info);
|
||||
elem.appendChild(div);
|
||||
|
||||
if (image != undefined) {
|
||||
const div = document.createElement('div');
|
||||
div.classList.add("image");
|
||||
|
||||
const divImage = document.createElement('div');
|
||||
divImage.classList.add("image","column","align-on-mobile");
|
||||
|
||||
const img = document.createElement('img');
|
||||
img.src = image;
|
||||
img.setAttribute("alt","Item illustration");
|
||||
|
||||
div.appendChild(img);
|
||||
divImage.appendChild(img);
|
||||
div.appendChild(divImage);
|
||||
elem.appendChild(div);
|
||||
}
|
||||
|
||||
@ -296,6 +351,7 @@
|
||||
}, false);
|
||||
|
||||
let baseUrl = window.location.origin;
|
||||
setApiKeyField();
|
||||
refreshIndexList();
|
||||
|
||||
search.oninput = triggerSearch;
|
||||
|
@ -127,9 +127,14 @@ pub fn analytics_sender(data: Data, opt: Opt) {
|
||||
|
||||
let body = qs::to_string(&request).unwrap();
|
||||
let response = ureq::post("https://api.amplitude.com/httpapi").send_string(&body);
|
||||
if !response.ok() {
|
||||
let body = response.into_string().unwrap();
|
||||
error!("Unsuccessful call to Amplitude: {}", body);
|
||||
match response {
|
||||
Err(ureq::Error::Status(_ , response)) => {
|
||||
error!("Unsuccessful call to Amplitude: {}", response.into_string().unwrap_or_default());
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Unsuccessful call to Amplitude: {}", e);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
thread::sleep(Duration::from_secs(3600)) // one hour
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::error::Error;
|
||||
use std::ops::Deref;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use meilisearch_core::{Database, DatabaseOptions, Index};
|
||||
use sha2::Digest;
|
||||
@ -9,6 +9,7 @@ use sha2::Digest;
|
||||
use crate::error::{Error as MSError, ResponseError};
|
||||
use crate::index_update_callback;
|
||||
use crate::option::Opt;
|
||||
use crate::dump::DumpInfo;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Data {
|
||||
@ -32,6 +33,7 @@ pub struct DataInner {
|
||||
pub api_keys: ApiKeys,
|
||||
pub server_pid: u32,
|
||||
pub http_payload_size_limit: usize,
|
||||
pub current_dump: Arc<Mutex<Option<DumpInfo>>>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -82,6 +84,8 @@ impl Data {
|
||||
|
||||
api_keys.generate_missing_api_keys();
|
||||
|
||||
let current_dump = Arc::new(Mutex::new(None));
|
||||
|
||||
let inner_data = DataInner {
|
||||
db: db.clone(),
|
||||
db_path,
|
||||
@ -90,6 +94,7 @@ impl Data {
|
||||
api_keys,
|
||||
server_pid,
|
||||
http_payload_size_limit,
|
||||
current_dump,
|
||||
};
|
||||
|
||||
let data = Data {
|
||||
@ -135,6 +140,14 @@ impl Data {
|
||||
Ok(created_index)
|
||||
}
|
||||
|
||||
pub fn get_current_dump_info(&self) -> Option<DumpInfo> {
|
||||
self.current_dump.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
pub fn set_current_dump_info(&self, dump_info: DumpInfo) {
|
||||
self.current_dump.lock().unwrap().replace(dump_info);
|
||||
}
|
||||
|
||||
pub fn get_or_create_index<F, R>(&self, uid: &str, f: F) -> Result<R, ResponseError>
|
||||
where
|
||||
F: FnOnce(&Index) -> Result<R, ResponseError>,
|
||||
|
@ -1,7 +1,6 @@
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::prelude::*;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
use std::thread;
|
||||
|
||||
use actix_web::web;
|
||||
@ -11,7 +10,6 @@ use log::{error, info};
|
||||
use meilisearch_core::{MainWriter, MainReader, UpdateReader};
|
||||
use meilisearch_core::settings::Settings;
|
||||
use meilisearch_core::update::{apply_settings_update, apply_documents_addition};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
@ -22,9 +20,6 @@ use crate::helpers::compression;
|
||||
use crate::routes::index;
|
||||
use crate::routes::index::IndexResponse;
|
||||
|
||||
// Mutex to share dump progress.
|
||||
static DUMP_INFO: Lazy<Mutex<Option<DumpInfo>>> = Lazy::new(Mutex::default);
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||
enum DumpVersion {
|
||||
V1,
|
||||
@ -133,15 +128,15 @@ fn import_index_v1(
|
||||
// push document in buffer
|
||||
values.push(document?);
|
||||
// if buffer is full, create and apply a batch, and clean buffer
|
||||
if values.len() == document_batch_size {
|
||||
if values.len() == document_batch_size {
|
||||
let batch = std::mem::replace(&mut values, Vec::with_capacity(document_batch_size));
|
||||
apply_documents_addition(write_txn, &index, batch)?;
|
||||
apply_documents_addition(write_txn, &index, batch, None)?;
|
||||
}
|
||||
}
|
||||
|
||||
// apply documents remaining in the buffer
|
||||
if !values.is_empty() {
|
||||
apply_documents_addition(write_txn, &index, values)?;
|
||||
// apply documents remaining in the buffer
|
||||
if !values.is_empty() {
|
||||
apply_documents_addition(write_txn, &index, values, None)?;
|
||||
}
|
||||
|
||||
// sync index information: stats, updated_at, last_update
|
||||
@ -211,6 +206,7 @@ pub struct DumpInfo {
|
||||
pub status: DumpStatus,
|
||||
#[serde(skip_serializing_if = "Option::is_none", flatten)]
|
||||
pub error: Option<serde_json::Value>,
|
||||
|
||||
}
|
||||
|
||||
impl DumpInfo {
|
||||
@ -228,14 +224,6 @@ impl DumpInfo {
|
||||
pub fn dump_already_in_progress(&self) -> bool {
|
||||
self.status == DumpStatus::InProgress
|
||||
}
|
||||
|
||||
pub fn get_current() -> Option<Self> {
|
||||
DUMP_INFO.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
pub fn set_current(&self) {
|
||||
*DUMP_INFO.lock().unwrap() = Some(self.clone());
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate uid from creation date
|
||||
@ -299,11 +287,10 @@ fn dump_index_documents(data: &web::Data<Data>, reader: &MainReader, dir_path: &
|
||||
}
|
||||
|
||||
/// Write error with a context.
|
||||
fn fail_dump_process<E: std::error::Error>(dump_info: DumpInfo, context: &str, error: E) {
|
||||
fn fail_dump_process<E: std::error::Error>(data: &web::Data<Data>, dump_info: DumpInfo, context: &str, error: E) {
|
||||
let error_message = format!("{}; {}", context, error);
|
||||
|
||||
error!("Something went wrong during dump process: {}", &error_message);
|
||||
dump_info.with_error(Error::dump_failed(error_message).into()).set_current();
|
||||
data.set_current_dump_info(dump_info.with_error(Error::dump_failed(error_message).into()))
|
||||
}
|
||||
|
||||
/// Main function of dump.
|
||||
@ -312,7 +299,7 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
|
||||
let update_reader = match data.db.update_read_txn() {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
fail_dump_process(dump_info, "creating RO transaction on updates", e);
|
||||
fail_dump_process(&data, dump_info, "creating RO transaction on updates", e);
|
||||
return ;
|
||||
}
|
||||
};
|
||||
@ -321,7 +308,7 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
|
||||
let main_reader = match data.db.main_read_txn() {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
fail_dump_process(dump_info, "creating RO transaction on main", e);
|
||||
fail_dump_process(&data, dump_info, "creating RO transaction on main", e);
|
||||
return ;
|
||||
}
|
||||
};
|
||||
@ -330,7 +317,7 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
|
||||
let tmp_dir = match TempDir::new() {
|
||||
Ok(tmp_dir) => tmp_dir,
|
||||
Err(e) => {
|
||||
fail_dump_process(dump_info, "creating temporary directory", e);
|
||||
fail_dump_process(&data, dump_info, "creating temporary directory", e);
|
||||
return ;
|
||||
}
|
||||
};
|
||||
@ -340,14 +327,14 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
|
||||
let indexes = match crate::routes::index::list_indexes_sync(&data, &main_reader) {
|
||||
Ok(indexes) => indexes,
|
||||
Err(e) => {
|
||||
fail_dump_process(dump_info, "listing indexes", e);
|
||||
fail_dump_process(&data, dump_info, "listing indexes", e);
|
||||
return ;
|
||||
}
|
||||
};
|
||||
|
||||
// create metadata
|
||||
if let Err(e) = dump_metadata(&data, &tmp_dir_path, indexes.clone()) {
|
||||
fail_dump_process(dump_info, "generating metadata", e);
|
||||
fail_dump_process(&data, dump_info, "generating metadata", e);
|
||||
return ;
|
||||
}
|
||||
|
||||
@ -357,32 +344,32 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
|
||||
|
||||
// create index sub-dircetory
|
||||
if let Err(e) = create_dir_all(&index_path) {
|
||||
fail_dump_process(dump_info, &format!("creating directory for index {}", &index.uid), e);
|
||||
fail_dump_process(&data, dump_info, &format!("creating directory for index {}", &index.uid), e);
|
||||
return ;
|
||||
}
|
||||
|
||||
// export settings
|
||||
if let Err(e) = dump_index_settings(&data, &main_reader, &index_path, &index.uid) {
|
||||
fail_dump_process(dump_info, &format!("generating settings for index {}", &index.uid), e);
|
||||
fail_dump_process(&data, dump_info, &format!("generating settings for index {}", &index.uid), e);
|
||||
return ;
|
||||
}
|
||||
|
||||
// export documents
|
||||
if let Err(e) = dump_index_documents(&data, &main_reader, &index_path, &index.uid) {
|
||||
fail_dump_process(dump_info, &format!("generating documents for index {}", &index.uid), e);
|
||||
fail_dump_process(&data, dump_info, &format!("generating documents for index {}", &index.uid), e);
|
||||
return ;
|
||||
}
|
||||
|
||||
// export updates
|
||||
if let Err(e) = dump_index_updates(&data, &update_reader, &index_path, &index.uid) {
|
||||
fail_dump_process(dump_info, &format!("generating updates for index {}", &index.uid), e);
|
||||
fail_dump_process(&data, dump_info, &format!("generating updates for index {}", &index.uid), e);
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
// compress dump in a file named `{dump_uid}.dump` in `dumps_dir`
|
||||
if let Err(e) = crate::helpers::compression::to_tar_gz(&tmp_dir_path, &compressed_dumps_dir(&dumps_dir, &dump_info.uid)) {
|
||||
fail_dump_process(dump_info, "compressing dump", e);
|
||||
fail_dump_process(&data, dump_info, "compressing dump", e);
|
||||
return ;
|
||||
}
|
||||
|
||||
@ -392,14 +379,14 @@ fn dump_process(data: web::Data<Data>, dumps_dir: PathBuf, dump_info: DumpInfo)
|
||||
DumpStatus::Done
|
||||
);
|
||||
|
||||
resume.set_current();
|
||||
data.set_current_dump_info(resume);
|
||||
}
|
||||
|
||||
pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<DumpInfo, Error> {
|
||||
create_dir_all(dumps_dir).map_err(|e| Error::dump_failed(format!("creating temporary directory {}", e)))?;
|
||||
|
||||
// check if a dump is already in progress
|
||||
if let Some(resume) = DumpInfo::get_current() {
|
||||
if let Some(resume) = data.get_current_dump_info() {
|
||||
if resume.dump_already_in_progress() {
|
||||
return Err(Error::dump_conflict())
|
||||
}
|
||||
@ -411,13 +398,13 @@ pub fn init_dump_process(data: &web::Data<Data>, dumps_dir: &Path) -> Result<Dum
|
||||
DumpStatus::InProgress
|
||||
);
|
||||
|
||||
info.set_current();
|
||||
data.set_current_dump_info(info.clone());
|
||||
|
||||
let data = data.clone();
|
||||
let dumps_dir = dumps_dir.to_path_buf();
|
||||
let info_cloned = info.clone();
|
||||
// run dump process in a new thread
|
||||
thread::spawn(move ||
|
||||
thread::spawn(move ||
|
||||
dump_process(data, dumps_dir, info_cloned)
|
||||
);
|
||||
|
||||
|
@ -90,7 +90,6 @@ pub enum Error {
|
||||
Internal(String),
|
||||
InvalidIndexUid,
|
||||
InvalidToken(String),
|
||||
Maintenance,
|
||||
MissingAuthorizationHeader,
|
||||
NotFound(String),
|
||||
OpenIndex(String),
|
||||
@ -117,7 +116,6 @@ impl ErrorCode for Error {
|
||||
Internal(_) => Code::Internal,
|
||||
InvalidIndexUid => Code::InvalidIndexUid,
|
||||
InvalidToken(_) => Code::InvalidToken,
|
||||
Maintenance => Code::Maintenance,
|
||||
MissingAuthorizationHeader => Code::MissingAuthorizationHeader,
|
||||
NotFound(_) => Code::NotFound,
|
||||
OpenIndex(_) => Code::OpenIndex,
|
||||
@ -218,10 +216,6 @@ impl Error {
|
||||
Error::InvalidIndexUid
|
||||
}
|
||||
|
||||
pub fn maintenance() -> Error {
|
||||
Error::Maintenance
|
||||
}
|
||||
|
||||
pub fn retrieve_document(doc_id: u32, err: impl fmt::Display) -> Error {
|
||||
Error::RetrieveDocument(doc_id, err.to_string())
|
||||
}
|
||||
@ -251,7 +245,6 @@ impl fmt::Display for Error {
|
||||
Self::Internal(err) => f.write_str(err),
|
||||
Self::InvalidIndexUid => f.write_str("Index must have a valid uid; Index uid can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_)."),
|
||||
Self::InvalidToken(err) => write!(f, "Invalid API key: {}", err),
|
||||
Self::Maintenance => f.write_str("Server is in maintenance, please try again later"),
|
||||
Self::MissingAuthorizationHeader => f.write_str("You must have an authorization token"),
|
||||
Self::NotFound(err) => write!(f, "{} not found", err),
|
||||
Self::OpenIndex(err) => write!(f, "Impossible to open index; {}", err),
|
||||
|
@ -6,6 +6,8 @@ use std::task::{Context, Poll};
|
||||
use actix_service::{Service, Transform};
|
||||
use actix_web::{dev::ServiceRequest, dev::ServiceResponse, web};
|
||||
use futures::future::{err, ok, Future, Ready};
|
||||
use actix_web::error::ResponseError as _;
|
||||
use actix_web::dev::Body;
|
||||
|
||||
use crate::error::{Error, ResponseError};
|
||||
use crate::Data;
|
||||
@ -17,14 +19,13 @@ pub enum Authentication {
|
||||
Admin,
|
||||
}
|
||||
|
||||
impl<S: 'static, B> Transform<S> for Authentication
|
||||
impl<S: 'static> Transform<S> for Authentication
|
||||
where
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error>,
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<Body>, Error = actix_web::Error>,
|
||||
S::Future: 'static,
|
||||
B: 'static,
|
||||
{
|
||||
type Request = ServiceRequest;
|
||||
type Response = ServiceResponse<B>;
|
||||
type Response = ServiceResponse<Body>;
|
||||
type Error = actix_web::Error;
|
||||
type InitError = ();
|
||||
type Transform = LoggingMiddleware<S>;
|
||||
@ -44,14 +45,13 @@ pub struct LoggingMiddleware<S> {
|
||||
}
|
||||
|
||||
#[allow(clippy::type_complexity)]
|
||||
impl<S, B> Service for LoggingMiddleware<S>
|
||||
impl<S> Service for LoggingMiddleware<S>
|
||||
where
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<B>, Error = actix_web::Error> + 'static,
|
||||
S: Service<Request = ServiceRequest, Response = ServiceResponse<Body>, Error = actix_web::Error> + 'static,
|
||||
S::Future: 'static,
|
||||
B: 'static,
|
||||
{
|
||||
type Request = ServiceRequest;
|
||||
type Response = ServiceResponse<B>;
|
||||
type Response = ServiceResponse<Body>;
|
||||
type Error = actix_web::Error;
|
||||
type Future = Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>>>>;
|
||||
|
||||
@ -72,7 +72,11 @@ where
|
||||
let auth_header = match req.headers().get("X-Meili-API-Key") {
|
||||
Some(auth) => match auth.to_str() {
|
||||
Ok(auth) => auth,
|
||||
Err(_) => return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into())),
|
||||
Err(_) => {
|
||||
let error = ResponseError::from(Error::MissingAuthorizationHeader).error_response();
|
||||
let (request, _) = req.into_parts();
|
||||
return Box::pin(ok(ServiceResponse::new(request, error)))
|
||||
}
|
||||
},
|
||||
None => {
|
||||
return Box::pin(err(ResponseError::from(Error::MissingAuthorizationHeader).into()));
|
||||
@ -95,9 +99,9 @@ where
|
||||
if authenticated {
|
||||
Box::pin(svc.call(req))
|
||||
} else {
|
||||
Box::pin(err(
|
||||
ResponseError::from(Error::InvalidToken(auth_header.to_string())).into()
|
||||
))
|
||||
let error = ResponseError::from(Error::InvalidToken(auth_header.to_string())).error_response();
|
||||
let (request, _) = req.into_parts();
|
||||
Box::pin(ok(ServiceResponse::new(request, error)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,19 +1,27 @@
|
||||
use flate2::Compression;
|
||||
use flate2::read::GzDecoder;
|
||||
use flate2::write::GzEncoder;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::fs::{create_dir_all, rename, File};
|
||||
use std::path::Path;
|
||||
use tar::{Builder, Archive};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::Error;
|
||||
|
||||
pub fn to_tar_gz(src: &Path, dest: &Path) -> Result<(), Error> {
|
||||
let f = File::create(dest)?;
|
||||
let file_name = format!(".{}", Uuid::new_v4().to_urn());
|
||||
let p = dest.with_file_name(file_name);
|
||||
let tmp_dest = p.as_path();
|
||||
|
||||
let f = File::create(tmp_dest)?;
|
||||
let gz_encoder = GzEncoder::new(f, Compression::default());
|
||||
let mut tar_encoder = Builder::new(gz_encoder);
|
||||
tar_encoder.append_dir_all(".", src)?;
|
||||
let gz_encoder = tar_encoder.into_inner()?;
|
||||
gz_encoder.finish()?;
|
||||
|
||||
rename(tmp_dest, dest)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,6 @@ use meilisearch_core::criterion::*;
|
||||
use meilisearch_core::settings::RankingRule;
|
||||
use meilisearch_core::{Highlight, Index, RankedMap};
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use meilisearch_tokenizer::is_cjk;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use siphasher::sip::SipHasher;
|
||||
@ -178,7 +177,7 @@ impl<'a> SearchBuilder<'a> {
|
||||
all_attributes.extend(&all_formatted);
|
||||
},
|
||||
None => {
|
||||
all_attributes.extend(schema.displayed_name());
|
||||
all_attributes.extend(schema.displayed_names());
|
||||
// If we specified at least one attribute to highlight or crop then
|
||||
// all available attributes will be returned in the _formatted field.
|
||||
if self.attributes_to_highlight.is_some() || self.attributes_to_crop.is_some() {
|
||||
@ -193,9 +192,7 @@ impl<'a> SearchBuilder<'a> {
|
||||
.index
|
||||
.document(reader, Some(&all_attributes), doc.id)
|
||||
.map_err(|e| Error::retrieve_document(doc.id.0, e))?
|
||||
.ok_or(Error::internal(
|
||||
"Impossible to retrieve the document; Corrupted data",
|
||||
))?;
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut formatted = document.iter()
|
||||
.filter(|(key, _)| all_formatted.contains(key.as_str()))
|
||||
@ -346,7 +343,7 @@ pub struct SearchResult {
|
||||
|
||||
/// returns the start index and the length on the crop.
|
||||
fn aligned_crop(text: &str, match_index: usize, context: usize) -> (usize, usize) {
|
||||
let is_word_component = |c: &char| c.is_alphanumeric() && !is_cjk(*c);
|
||||
let is_word_component = |c: &char| c.is_alphanumeric() && !super::is_cjk(*c);
|
||||
|
||||
let word_end_index = |mut index| {
|
||||
if text.chars().nth(index - 1).map_or(false, |c| is_word_component(&c)) {
|
||||
@ -447,7 +444,7 @@ fn calculate_matches(
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if !schema.displayed_name().contains(attribute) {
|
||||
if !schema.displayed_names().contains(&attribute) {
|
||||
continue;
|
||||
}
|
||||
if let Some(pos) = matches_result.get_mut(attribute) {
|
||||
@ -482,7 +479,7 @@ fn calculate_highlights(
|
||||
for (attribute, matches) in matches.iter() {
|
||||
if attributes_to_highlight.contains(attribute) {
|
||||
if let Some(Value::String(value)) = document.get(attribute) {
|
||||
let value: Vec<_> = value.chars().collect();
|
||||
let value = value;
|
||||
let mut highlighted_value = String::new();
|
||||
let mut index = 0;
|
||||
|
||||
@ -495,16 +492,16 @@ fn calculate_highlights(
|
||||
let before = value.get(index..m.start);
|
||||
let highlighted = value.get(m.start..(m.start + m.length));
|
||||
if let (Some(before), Some(highlighted)) = (before, highlighted) {
|
||||
highlighted_value.extend(before);
|
||||
highlighted_value.push_str(before);
|
||||
highlighted_value.push_str("<em>");
|
||||
highlighted_value.extend(highlighted);
|
||||
highlighted_value.push_str(highlighted);
|
||||
highlighted_value.push_str("</em>");
|
||||
index = m.start + m.length;
|
||||
} else {
|
||||
error!("value: {:?}; index: {:?}, match: {:?}", value, index, m);
|
||||
}
|
||||
}
|
||||
highlighted_value.extend(value[index..].iter());
|
||||
highlighted_value.push_str(&value[index..]);
|
||||
highlight_result.insert(attribute.to_string(), Value::String(highlighted_value));
|
||||
};
|
||||
}
|
||||
@ -600,7 +597,7 @@ mod tests {
|
||||
|
||||
let mut m = Vec::new();
|
||||
m.push(MatchPosition {
|
||||
start: 510,
|
||||
start: 529,
|
||||
length: 9,
|
||||
});
|
||||
matches.insert("description".to_string(), m);
|
||||
|
@ -5,3 +5,22 @@ pub mod compression;
|
||||
|
||||
pub use authentication::Authentication;
|
||||
pub use normalize_path::NormalizePath;
|
||||
|
||||
pub fn is_cjk(c: char) -> bool {
|
||||
('\u{1100}'..'\u{11ff}').contains(&c) // Hangul Jamo
|
||||
|| ('\u{2e80}'..'\u{2eff}').contains(&c) // CJK Radicals Supplement
|
||||
|| ('\u{2f00}'..'\u{2fdf}').contains(&c) // Kangxi radical
|
||||
|| ('\u{3000}'..'\u{303f}').contains(&c) // Japanese-style punctuation
|
||||
|| ('\u{3040}'..'\u{309f}').contains(&c) // Japanese Hiragana
|
||||
|| ('\u{30a0}'..'\u{30ff}').contains(&c) // Japanese Katakana
|
||||
|| ('\u{3100}'..'\u{312f}').contains(&c)
|
||||
|| ('\u{3130}'..'\u{318F}').contains(&c) // Hangul Compatibility Jamo
|
||||
|| ('\u{3200}'..'\u{32ff}').contains(&c) // Enclosed CJK Letters and Months
|
||||
|| ('\u{3400}'..'\u{4dbf}').contains(&c) // CJK Unified Ideographs Extension A
|
||||
|| ('\u{4e00}'..'\u{9fff}').contains(&c) // CJK Unified Ideographs
|
||||
|| ('\u{a960}'..'\u{a97f}').contains(&c) // Hangul Jamo Extended-A
|
||||
|| ('\u{ac00}'..'\u{d7a3}').contains(&c) // Hangul Syllables
|
||||
|| ('\u{d7b0}'..'\u{d7ff}').contains(&c) // Hangul Jamo Extended-B
|
||||
|| ('\u{f900}'..'\u{faff}').contains(&c) // CJK Compatibility Ideographs
|
||||
|| ('\u{ff00}'..'\u{ffef}').contains(&c) // Full-width roman characters and half-width katakana
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ use self::error::{payload_error_handler, ResponseError};
|
||||
|
||||
pub fn create_app(
|
||||
data: &Data,
|
||||
enable_frontend: bool,
|
||||
) -> App<
|
||||
impl ServiceFactory<
|
||||
Config = (),
|
||||
@ -34,7 +35,7 @@ pub fn create_app(
|
||||
>,
|
||||
actix_http::body::Body,
|
||||
> {
|
||||
App::new()
|
||||
let app = App::new()
|
||||
.data(data.clone())
|
||||
.app_data(
|
||||
web::JsonConfig::default()
|
||||
@ -46,8 +47,6 @@ pub fn create_app(
|
||||
web::QueryConfig::default()
|
||||
.error_handler(|err, _req| payload_error_handler(err).into())
|
||||
)
|
||||
.service(routes::load_html)
|
||||
.service(routes::load_css)
|
||||
.configure(routes::document::services)
|
||||
.configure(routes::index::services)
|
||||
.configure(routes::search::services)
|
||||
@ -57,7 +56,15 @@ pub fn create_app(
|
||||
.configure(routes::health::services)
|
||||
.configure(routes::stats::services)
|
||||
.configure(routes::key::services)
|
||||
.configure(routes::dump::services)
|
||||
.configure(routes::dump::services);
|
||||
if enable_frontend {
|
||||
app
|
||||
.service(routes::load_html)
|
||||
.service(routes::load_css)
|
||||
} else {
|
||||
app
|
||||
.service(routes::running)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_update_callback_txn(index: Index, index_uid: &str, data: &Data, mut writer: &mut MainWriter) -> Result<(), String> {
|
||||
|
@ -47,7 +47,7 @@ async fn main() -> Result<(), MainError> {
|
||||
}
|
||||
}
|
||||
"development" => {
|
||||
env_logger::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
@ -80,15 +80,17 @@ async fn main() -> Result<(), MainError> {
|
||||
|
||||
print_launch_resume(&opt, &data);
|
||||
|
||||
let enable_frontend = opt.env != "production";
|
||||
let http_server = HttpServer::new(move || {
|
||||
create_app(&data)
|
||||
.wrap(
|
||||
Cors::new()
|
||||
let cors = Cors::default()
|
||||
.send_wildcard()
|
||||
.allowed_headers(vec!["content-type", "x-meili-api-key"])
|
||||
.max_age(86_400) // 24h
|
||||
.finish(),
|
||||
)
|
||||
.allow_any_origin()
|
||||
.allow_any_method()
|
||||
.max_age(86_400); // 24h
|
||||
|
||||
create_app(&data, enable_frontend)
|
||||
.wrap(cors)
|
||||
.wrap(middleware::Logger::default())
|
||||
.wrap(middleware::Compress::default())
|
||||
.wrap(NormalizePath)
|
||||
@ -121,7 +123,7 @@ pub fn print_launch_resume(opt: &Opt, data: &Data) {
|
||||
eprintln!("{}", ascii_name);
|
||||
|
||||
eprintln!("Database path:\t\t{:?}", opt.db_path);
|
||||
eprintln!("Server listening on:\t{:?}", opt.http_addr);
|
||||
eprintln!("Server listening on:\t\"http://{}\"", opt.http_addr);
|
||||
eprintln!("Environment:\t\t{:?}", opt.env);
|
||||
eprintln!("Commit SHA:\t\t{:?}", env!("VERGEN_SHA").to_string());
|
||||
eprintln!(
|
||||
@ -144,7 +146,7 @@ pub fn print_launch_resume(opt: &Opt, data: &Data) {
|
||||
);
|
||||
|
||||
eprintln!(
|
||||
"Amplitude Analytics:\t{:?}",
|
||||
"Anonymous telemetry:\t{:?}",
|
||||
if !opt.no_analytics {
|
||||
"Enabled"
|
||||
} else {
|
||||
@ -164,6 +166,6 @@ pub fn print_launch_resume(opt: &Opt, data: &Data) {
|
||||
eprintln!();
|
||||
eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
|
||||
eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
|
||||
eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html or bonjour@meilisearch.com");
|
||||
eprintln!("Contact:\t\thttps://docs.meilisearch.com/learn/what_is_meilisearch/contact.html or bonjour@meilisearch.com");
|
||||
eprintln!();
|
||||
}
|
||||
|
@ -57,7 +57,7 @@ pub struct Opt {
|
||||
pub max_udb_size: usize,
|
||||
|
||||
/// The maximum size, in bytes, of accepted JSON payloads
|
||||
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "10485760")] // 10MB
|
||||
#[structopt(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "104857600")] // 100MB
|
||||
pub http_payload_size_limit: usize,
|
||||
|
||||
/// Read server certificates from CERTFILE.
|
||||
|
@ -132,7 +132,7 @@ async fn get_all_documents(
|
||||
let limit = params.limit.unwrap_or(20);
|
||||
let index_uid = &path.index_uid;
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
|
||||
let documents = get_all_documents_sync(
|
||||
&data,
|
||||
&reader,
|
||||
@ -145,15 +145,6 @@ async fn get_all_documents(
|
||||
Ok(HttpResponse::Ok().json(documents))
|
||||
}
|
||||
|
||||
fn find_primary_key(document: &IndexMap<String, Value>) -> Option<String> {
|
||||
for key in document.keys() {
|
||||
if key.to_lowercase().contains("id") {
|
||||
return Some(key.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct UpdateDocumentsQuery {
|
||||
@ -168,26 +159,6 @@ async fn update_multiple_documents(
|
||||
is_partial: bool,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let update_id = data.get_or_create_index(&path.index_uid, |index| {
|
||||
let reader = data.db.main_read_txn()?;
|
||||
|
||||
let mut schema = index
|
||||
.main
|
||||
.schema(&reader)?
|
||||
.ok_or(meilisearch_core::Error::SchemaMissing)?;
|
||||
|
||||
if schema.primary_key().is_none() {
|
||||
let id = match ¶ms.primary_key {
|
||||
Some(id) => id.to_string(),
|
||||
None => body
|
||||
.first()
|
||||
.and_then(find_primary_key)
|
||||
.ok_or(meilisearch_core::Error::MissingPrimaryKey)?,
|
||||
};
|
||||
|
||||
schema.set_primary_key(&id).map_err(Error::bad_request)?;
|
||||
|
||||
data.db.main_write(|w| index.main.put_schema(w, &schema))?;
|
||||
}
|
||||
|
||||
let mut document_addition = if is_partial {
|
||||
index.documents_partial_addition()
|
||||
@ -195,6 +166,26 @@ async fn update_multiple_documents(
|
||||
index.documents_addition()
|
||||
};
|
||||
|
||||
// Return an early error if primary key is already set, otherwise, try to set it up in the
|
||||
// update later.
|
||||
let reader = data.db.main_read_txn()?;
|
||||
let schema = index
|
||||
.main
|
||||
.schema(&reader)?
|
||||
.ok_or(meilisearch_core::Error::SchemaMissing)?;
|
||||
|
||||
match (params.into_inner().primary_key, schema.primary_key()) {
|
||||
(Some(key), None) => document_addition.set_primary_key(key),
|
||||
(None, None) => {
|
||||
let key = body
|
||||
.first()
|
||||
.and_then(find_primary_key)
|
||||
.ok_or(meilisearch_core::Error::MissingPrimaryKey)?;
|
||||
document_addition.set_primary_key(key);
|
||||
}
|
||||
_ => ()
|
||||
}
|
||||
|
||||
for document in body.into_inner() {
|
||||
document_addition.update_document(document);
|
||||
}
|
||||
@ -204,6 +195,15 @@ async fn update_multiple_documents(
|
||||
return Ok(HttpResponse::Accepted().json(IndexUpdateResponse::with_id(update_id)));
|
||||
}
|
||||
|
||||
fn find_primary_key(document: &IndexMap<String, Value>) -> Option<String> {
|
||||
for key in document.keys() {
|
||||
if key.to_lowercase().contains("id") {
|
||||
return Some(key.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[post("/indexes/{index_uid}/documents", wrap = "Authentication::Private")]
|
||||
async fn add_documents(
|
||||
data: web::Data<Data>,
|
||||
|
@ -45,7 +45,7 @@ async fn get_dump_status(
|
||||
let dumps_dir = Path::new(&data.dumps_dir);
|
||||
let dump_uid = &path.dump_uid;
|
||||
|
||||
if let Some(resume) = DumpInfo::get_current() {
|
||||
if let Some(resume) = data.get_current_dump_info() {
|
||||
if &resume.uid == dump_uid {
|
||||
return Ok(HttpResponse::Ok().json(resume));
|
||||
}
|
||||
|
@ -9,5 +9,6 @@ pub fn services(cfg: &mut web::ServiceConfig) {
|
||||
|
||||
#[get("/health")]
|
||||
async fn get_health() -> Result<HttpResponse, ResponseError> {
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
let payload = serde_json::json!({ "status": "available" });
|
||||
Ok(HttpResponse::Ok().json(payload))
|
||||
}
|
||||
|
@ -29,6 +29,7 @@ impl IndexUpdateResponse {
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the dashboard, should not be used in production. See [running]
|
||||
#[get("/")]
|
||||
pub async fn load_html() -> HttpResponse {
|
||||
HttpResponse::Ok()
|
||||
@ -36,6 +37,17 @@ pub async fn load_html() -> HttpResponse {
|
||||
.body(include_str!("../../public/interface.html").to_string())
|
||||
}
|
||||
|
||||
/// Always return a 200 with:
|
||||
/// ```json
|
||||
/// {
|
||||
/// "status": "Meilisearch is running"
|
||||
/// }
|
||||
/// ```
|
||||
#[get("/")]
|
||||
pub async fn running() -> HttpResponse {
|
||||
HttpResponse::Ok().json(serde_json::json!({ "status": "MeiliSearch is running" }))
|
||||
}
|
||||
|
||||
#[get("/bulma.min.css")]
|
||||
pub async fn load_css() -> HttpResponse {
|
||||
HttpResponse::Ok()
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::{HashMap, HashSet, BTreeSet};
|
||||
|
||||
use actix_web::{get, post, web, HttpResponse};
|
||||
use log::warn;
|
||||
@ -120,8 +120,8 @@ impl SearchQuery {
|
||||
search_builder.limit(limit);
|
||||
}
|
||||
|
||||
let available_attributes = schema.displayed_name();
|
||||
let mut restricted_attributes: HashSet<&str>;
|
||||
let available_attributes = schema.displayed_names();
|
||||
let mut restricted_attributes: BTreeSet<&str>;
|
||||
match &self.attributes_to_retrieve {
|
||||
Some(attributes_to_retrieve) => {
|
||||
let attributes_to_retrieve: HashSet<&str> =
|
||||
@ -129,13 +129,14 @@ impl SearchQuery {
|
||||
if attributes_to_retrieve.contains("*") {
|
||||
restricted_attributes = available_attributes.clone();
|
||||
} else {
|
||||
restricted_attributes = HashSet::new();
|
||||
restricted_attributes = BTreeSet::new();
|
||||
search_builder.attributes_to_retrieve(HashSet::new());
|
||||
for attr in attributes_to_retrieve {
|
||||
if available_attributes.contains(attr) {
|
||||
restricted_attributes.insert(attr);
|
||||
search_builder.add_retrievable_field(attr.to_string());
|
||||
} else {
|
||||
warn!("The attributes {:?} present in attributesToCrop parameter doesn't exist", attr);
|
||||
warn!("The attributes {:?} present in attributesToRetrieve parameter doesn't exist", attr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -523,11 +523,11 @@ async fn delete_attributes_for_faceting(
|
||||
}
|
||||
|
||||
fn get_indexed_attributes(schema: &Schema) -> Vec<String> {
|
||||
if schema.is_indexed_all() {
|
||||
["*"].iter().map(|s| s.to_string()).collect()
|
||||
if schema.is_searchable_all() {
|
||||
vec!["*".to_string()]
|
||||
} else {
|
||||
schema
|
||||
.indexed_name()
|
||||
.searchable_names()
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect()
|
||||
@ -539,7 +539,7 @@ fn get_displayed_attributes(schema: &Schema) -> BTreeSet<String> {
|
||||
["*"].iter().map(|s| s.to_string()).collect()
|
||||
} else {
|
||||
schema
|
||||
.displayed_name()
|
||||
.displayed_names()
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect()
|
||||
|
@ -6,8 +6,7 @@ use log::error;
|
||||
use std::fs::create_dir_all;
|
||||
use std::path::Path;
|
||||
use std::thread;
|
||||
use std::time::{Duration};
|
||||
use tempfile::TempDir;
|
||||
use std::time::Duration;
|
||||
|
||||
pub fn load_snapshot(
|
||||
db_path: &str,
|
||||
@ -28,24 +27,35 @@ pub fn load_snapshot(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn create_snapshot(data: &Data, snapshot_path: &Path) -> Result<(), Error> {
|
||||
let tmp_dir = TempDir::new()?;
|
||||
pub fn create_snapshot(data: &Data, snapshot_dir: impl AsRef<Path>, snapshot_name: impl AsRef<str>) -> Result<(), Error> {
|
||||
create_dir_all(&snapshot_dir)?;
|
||||
let tmp_dir = tempfile::tempdir_in(&snapshot_dir)?;
|
||||
|
||||
data.db.copy_and_compact_to_path(tmp_dir.path())?;
|
||||
|
||||
compression::to_tar_gz(tmp_dir.path(), snapshot_path).map_err(|e| Error::Internal(format!("something went wrong during snapshot compression: {}", e)))
|
||||
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
|
||||
|
||||
compression::to_tar_gz(tmp_dir.path(), temp_snapshot_file.path())
|
||||
.map_err(|e| Error::Internal(format!("something went wrong during snapshot compression: {}", e)))?;
|
||||
|
||||
let snapshot_path = snapshot_dir.as_ref().join(snapshot_name.as_ref());
|
||||
|
||||
temp_snapshot_file.persist(snapshot_path).map_err(|e| Error::Internal(e.to_string()))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn schedule_snapshot(data: Data, snapshot_dir: &Path, time_gap_s: u64) -> Result<(), Error> {
|
||||
if snapshot_dir.file_name().is_none() {
|
||||
if snapshot_dir.file_name().is_none() {
|
||||
return Err(Error::Internal("invalid snapshot file path".to_string()));
|
||||
}
|
||||
let db_name = Path::new(&data.db_path).file_name().ok_or_else(|| Error::Internal("invalid database name".to_string()))?;
|
||||
create_dir_all(snapshot_dir)?;
|
||||
let snapshot_path = snapshot_dir.join(format!("{}.snapshot", db_name.to_str().unwrap_or("data.ms")));
|
||||
|
||||
thread::spawn(move || loop {
|
||||
if let Err(e) = create_snapshot(&data, &snapshot_path) {
|
||||
let snapshot_name = format!("{}.snapshot", db_name.to_str().unwrap_or("data.ms"));
|
||||
let snapshot_dir = snapshot_dir.to_owned();
|
||||
|
||||
thread::spawn(move || loop {
|
||||
if let Err(e) = create_snapshot(&data, &snapshot_dir, &snapshot_name) {
|
||||
error!("Unsuccessful snapshot creation: {}", e);
|
||||
}
|
||||
thread::sleep(Duration::from_secs(time_gap_s));
|
||||
@ -62,7 +72,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_pack_unpack() {
|
||||
let tempdir = TempDir::new().unwrap();
|
||||
let tempdir = tempfile::tempdir().unwrap();
|
||||
|
||||
let test_dir = tempdir.path();
|
||||
let src_dir = test_dir.join("src");
|
||||
@ -72,12 +82,12 @@ mod tests {
|
||||
let file_1_relative = Path::new("file1.txt");
|
||||
let subdir_relative = Path::new("subdir/");
|
||||
let file_2_relative = Path::new("subdir/file2.txt");
|
||||
|
||||
|
||||
create_dir_all(src_dir.join(subdir_relative)).unwrap();
|
||||
fs::File::create(src_dir.join(file_1_relative)).unwrap().write_all(b"Hello_file_1").unwrap();
|
||||
fs::File::create(src_dir.join(file_2_relative)).unwrap().write_all(b"Hello_file_2").unwrap();
|
||||
|
||||
|
||||
|
||||
assert!(compression::to_tar_gz(&src_dir, &archive_path).is_ok());
|
||||
assert!(archive_path.exists());
|
||||
assert!(load_snapshot(&dest_dir.to_str().unwrap(), &archive_path, false, false).is_ok());
|
||||
@ -89,7 +99,7 @@ mod tests {
|
||||
|
||||
let contents = fs::read_to_string(dest_dir.join(file_1_relative)).unwrap();
|
||||
assert_eq!(contents, "Hello_file_1");
|
||||
|
||||
|
||||
let contents = fs::read_to_string(dest_dir.join(file_2_relative)).unwrap();
|
||||
assert_eq!(contents, "Hello_file_2");
|
||||
}
|
||||
|
@ -74,4 +74,4 @@
|
||||
{"id":73,"isActive":false,"balance":"$1,239.74","picture":"http://placehold.it/32x32","age":38,"color":"blue","name":"Eleanor Shepherd","gender":"female","email":"eleanorshepherd@chorizon.com","phone":"+1 (894) 567-2617","address":"670 Lafayette Walk, Darlington, Palau, 8803","about":"Adipisicing ad incididunt id veniam magna cupidatat et labore eu deserunt mollit. Lorem voluptate exercitation elit eu aliquip cupidatat occaecat anim excepteur reprehenderit est est. Ipsum excepteur ea mollit qui nisi laboris ex qui. Cillum velit culpa culpa commodo laboris nisi Lorem non elit deserunt incididunt. Officia quis velit nulla sint incididunt duis mollit tempor adipisicing qui officia eu nisi Lorem. Do proident pariatur ex enim nostrud eu aute esse deserunt eu velit quis culpa exercitation. Occaecat ad cupidatat ullamco consequat duis anim deserunt occaecat aliqua sunt consectetur ipsum magna.\r\n","registered":"2020-02-29T12:15:28 -01:00","latitude":35.749621,"longitude":-94.40842,"tags":["good first issue","new issue","new issue","bug"]}
|
||||
{"id":74,"isActive":true,"balance":"$1,180.90","picture":"http://placehold.it/32x32","age":36,"color":"Green","name":"Stark Wong","gender":"male","email":"starkwong@chorizon.com","phone":"+1 (805) 575-3055","address":"522 Bond Street, Bawcomville, Wisconsin, 324","about":"Aute qui sit incididunt eu adipisicing exercitation sunt nostrud. Id laborum incididunt proident ipsum est cillum esse. Officia ullamco eu ut Lorem do minim ea dolor consequat sit eu est voluptate. Id commodo cillum enim culpa aliquip ullamco nisi Lorem cillum ipsum cupidatat anim officia eu. Dolore sint elit labore pariatur. Officia duis nulla voluptate et nulla ut voluptate laboris eu commodo veniam qui veniam.\r\n","registered":"2020-01-25T10:47:48 -01:00","latitude":-80.452139,"longitude":160.72546,"tags":["wontfix"]}
|
||||
{"id":75,"isActive":false,"balance":"$1,913.42","picture":"http://placehold.it/32x32","age":24,"color":"Green","name":"Emma Jacobs","gender":"female","email":"emmajacobs@chorizon.com","phone":"+1 (899) 554-3847","address":"173 Tapscott Street, Esmont, Maine, 7450","about":"Laboris consequat consectetur tempor labore ullamco ullamco voluptate quis quis duis ut ad. In est irure quis amet sunt nulla ad ut sit labore ut eu quis duis. Nostrud cupidatat aliqua sunt occaecat minim id consequat officia deserunt laborum. Ea dolor reprehenderit laborum veniam exercitation est nostrud excepteur laborum minim id qui et.\r\n","registered":"2019-03-29T06:24:13 -01:00","latitude":-35.53722,"longitude":155.703874,"tags":[]}
|
||||
{"id":76,"isActive":false,"balance":"$1,274.29","picture":"http://placehold.it/32x32","age":25,"color":"Green","name":"Clarice Gardner","gender":"female","email":"claricegardner@chorizon.com","phone":"+1 (810) 407-3258","address":"894 Brooklyn Road, Utting, New Hampshire, 6404","about":"Elit occaecat aute ea adipisicing mollit cupidatat aliquip excepteur veniam minim. Sunt quis dolore in commodo aute esse quis. Lorem in cillum commodo eu anim commodo mollit. Adipisicing enim sunt adipisicing cupidatat adipisicing eiusmod eu do sit nisi.\r\n","registered":"2014-10-20T10:13:32 -02:00","latitude":17.11935,"longitude":65.38197,"tags":["new issue","wontfix"]}
|
||||
{"id":77,"isActive":false,"balance":"$1,274.29","picture":"http://placehold.it/32x32","age":25,"color":"Red","name":"孫武","gender":"male","email":"SunTzu@chorizon.com","phone":"+1 (810) 407-3258","address":"吴國","about":"孫武(前544年-前470年或前496年),字長卿,春秋時期齊國人,著名軍事家、政治家,兵家代表人物。兵書《孫子兵法》的作者,後人尊稱為孫子、兵聖、東方兵聖,山東、蘇州等地尚有祀奉孫武的廟宇兵聖廟。其族人为樂安孫氏始祖,次子孙明为富春孫氏始祖。\r\n","registered":"2014-10-20T10:13:32 -02:00","latitude":17.11935,"longitude":65.38197,"tags":["new issue","wontfix"]}
|
||||
|
@ -1,2 +1,3 @@
|
||||
{"status": "processed","updateId": 0,"type": {"name":"Settings","settings":{"ranking_rules":{"Update":["Typo","Words","Proximity","Attribute","WordsPosition","Exactness"]},"distinct_attribute":"Nothing","primary_key":"Nothing","searchable_attributes":{"Update":["balance","picture","age","color","name","gender","email","phone","address","about","registered","latitude","longitude","tags"]},"displayed_attributes":{"Update":["about","address","age","balance","color","email","gender","id","isActive","latitude","longitude","name","phone","picture","registered","tags"]},"stop_words":"Nothing","synonyms":"Nothing","attributes_for_faceting":"Nothing"}}}
|
||||
{"status": "processed", "updateId": 1, "type": { "name": "DocumentsAddition"}}
|
||||
{"status":"processed","updateId":0,"type":{"name":"Settings","settings":{"ranking_rules":{"Update":["Typo","Words","Proximity","Attribute","WordsPosition","Exactness"]},"distinct_attribute":"Nothing","primary_key":"Nothing","searchable_attributes":"Nothing","displayed_attributes":"Nothing","stop_words":"Nothing","synonyms":"Nothing","attributes_for_faceting":"Nothing"}}}
|
||||
{"status":"processed","updateId":1,"type":{"name":"DocumentsAddition","number":77}}
|
||||
|
||||
|
@ -1590,18 +1590,18 @@
|
||||
"tags": []
|
||||
},
|
||||
{
|
||||
"id": 76,
|
||||
"id": 77,
|
||||
"isActive": false,
|
||||
"balance": "$1,274.29",
|
||||
"picture": "http://placehold.it/32x32",
|
||||
"age": 25,
|
||||
"color": "Green",
|
||||
"name": "Clarice Gardner",
|
||||
"gender": "female",
|
||||
"email": "claricegardner@chorizon.com",
|
||||
"color": "Red",
|
||||
"name": "孫武",
|
||||
"gender": "male",
|
||||
"email": "SunTzu@chorizon.com",
|
||||
"phone": "+1 (810) 407-3258",
|
||||
"address": "894 Brooklyn Road, Utting, New Hampshire, 6404",
|
||||
"about": "Elit occaecat aute ea adipisicing mollit cupidatat aliquip excepteur veniam minim. Sunt quis dolore in commodo aute esse quis. Lorem in cillum commodo eu anim commodo mollit. Adipisicing enim sunt adipisicing cupidatat adipisicing eiusmod eu do sit nisi.\r\n",
|
||||
"address": "吴國",
|
||||
"about": "孫武(前544年-前470年或前496年),字長卿,春秋時期齊國人,著名軍事家、政治家,兵家代表人物。兵書《孫子兵法》的作者,後人尊稱為孫子、兵聖、東方兵聖,山東、蘇州等地尚有祀奉孫武的廟宇兵聖廟。其族人为樂安孫氏始祖,次子孙明为富春孫氏始祖。\r\n",
|
||||
"registered": "2014-10-20T10:13:32 -02:00",
|
||||
"latitude": 17.11935,
|
||||
"longitude": 65.38197,
|
||||
|
@ -57,11 +57,11 @@ impl Server {
|
||||
no_analytics: true,
|
||||
max_mdb_size: default_db_options.main_map_size,
|
||||
max_udb_size: default_db_options.update_map_size,
|
||||
http_payload_size_limit: 10000000,
|
||||
http_payload_size_limit: 100000000,
|
||||
..Opt::default()
|
||||
};
|
||||
|
||||
let data = Data::new(opt.clone()).unwrap();
|
||||
let data = Data::new(opt).unwrap();
|
||||
|
||||
Server {
|
||||
uid: uid.to_string(),
|
||||
@ -88,40 +88,6 @@ impl Server {
|
||||
"wordsPosition",
|
||||
"exactness",
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"balance",
|
||||
"picture",
|
||||
"age",
|
||||
"color",
|
||||
"name",
|
||||
"gender",
|
||||
"email",
|
||||
"phone",
|
||||
"address",
|
||||
"about",
|
||||
"registered",
|
||||
"latitude",
|
||||
"longitude",
|
||||
"tags",
|
||||
],
|
||||
"displayedAttributes": [
|
||||
"id",
|
||||
"isActive",
|
||||
"balance",
|
||||
"picture",
|
||||
"age",
|
||||
"color",
|
||||
"name",
|
||||
"gender",
|
||||
"email",
|
||||
"phone",
|
||||
"address",
|
||||
"about",
|
||||
"registered",
|
||||
"latitude",
|
||||
"longitude",
|
||||
"tags",
|
||||
],
|
||||
});
|
||||
|
||||
server.update_all_settings(body).await;
|
||||
@ -160,11 +126,11 @@ impl Server {
|
||||
eprintln!("get_request: {}", url);
|
||||
|
||||
let mut app =
|
||||
test::init_service(meilisearch_http::create_app(&self.data).wrap(NormalizePath)).await;
|
||||
test::init_service(meilisearch_http::create_app(&self.data, true).wrap(NormalizePath)).await;
|
||||
|
||||
let req = test::TestRequest::get().uri(url).to_request();
|
||||
let res = test::call_service(&mut app, req).await;
|
||||
let status_code = res.status().clone();
|
||||
let status_code = res.status();
|
||||
|
||||
let body = test::read_body(res).await;
|
||||
let response = serde_json::from_slice(&body).unwrap_or_default();
|
||||
@ -175,14 +141,14 @@ impl Server {
|
||||
eprintln!("post_request: {}", url);
|
||||
|
||||
let mut app =
|
||||
test::init_service(meilisearch_http::create_app(&self.data).wrap(NormalizePath)).await;
|
||||
test::init_service(meilisearch_http::create_app(&self.data, true).wrap(NormalizePath)).await;
|
||||
|
||||
let req = test::TestRequest::post()
|
||||
.uri(url)
|
||||
.set_json(&body)
|
||||
.to_request();
|
||||
let res = test::call_service(&mut app, req).await;
|
||||
let status_code = res.status().clone();
|
||||
let status_code = res.status();
|
||||
|
||||
let body = test::read_body(res).await;
|
||||
let response = serde_json::from_slice(&body).unwrap_or_default();
|
||||
@ -204,14 +170,14 @@ impl Server {
|
||||
eprintln!("put_request: {}", url);
|
||||
|
||||
let mut app =
|
||||
test::init_service(meilisearch_http::create_app(&self.data).wrap(NormalizePath)).await;
|
||||
test::init_service(meilisearch_http::create_app(&self.data, true).wrap(NormalizePath)).await;
|
||||
|
||||
let req = test::TestRequest::put()
|
||||
.uri(url)
|
||||
.set_json(&body)
|
||||
.to_request();
|
||||
let res = test::call_service(&mut app, req).await;
|
||||
let status_code = res.status().clone();
|
||||
let status_code = res.status();
|
||||
|
||||
let body = test::read_body(res).await;
|
||||
let response = serde_json::from_slice(&body).unwrap_or_default();
|
||||
@ -233,11 +199,11 @@ impl Server {
|
||||
eprintln!("delete_request: {}", url);
|
||||
|
||||
let mut app =
|
||||
test::init_service(meilisearch_http::create_app(&self.data).wrap(NormalizePath)).await;
|
||||
test::init_service(meilisearch_http::create_app(&self.data, true).wrap(NormalizePath)).await;
|
||||
|
||||
let req = test::TestRequest::delete().uri(url).to_request();
|
||||
let res = test::call_service(&mut app, req).await;
|
||||
let status_code = res.status().clone();
|
||||
let status_code = res.status();
|
||||
|
||||
let body = test::read_body(res).await;
|
||||
let response = serde_json::from_slice(&body).unwrap_or_default();
|
||||
|
@ -1,5 +1,7 @@
|
||||
mod common;
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn delete() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
@ -32,3 +34,34 @@ async fn delete_batch() {
|
||||
assert_eq!(status_code, 404);
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn text_clear_all_placeholder_search() {
|
||||
let mut server = common::Server::with_uid("test");
|
||||
let body = json!({
|
||||
"uid": "test",
|
||||
});
|
||||
|
||||
server.create_index(body).await;
|
||||
let settings = json!({
|
||||
"attributesForFaceting": ["genre"],
|
||||
});
|
||||
|
||||
server.update_all_settings(settings).await;
|
||||
|
||||
let documents = json!([
|
||||
{ "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance" },
|
||||
{ "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" },
|
||||
{ "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy" },
|
||||
{ "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" },
|
||||
{ "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" },
|
||||
{ "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams" }
|
||||
]);
|
||||
|
||||
server.add_or_update_multiple_documents(documents).await;
|
||||
server.clear_all_documents().await;
|
||||
let (response, _) = server.search_post(json!({ "q": "", "facetsDistribution": ["genre"] })).await;
|
||||
assert_eq!(response["nbHits"], 0);
|
||||
let (response, _) = server.search_post(json!({ "q": "" })).await;
|
||||
assert_eq!(response["nbHits"], 0);
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ async fn trigger_and_wait_dump(server: &mut common::Server) -> String {
|
||||
|
||||
let dump_uid = value["uid"].as_str().unwrap().to_string();
|
||||
|
||||
for _ in 0..20 as u8 {
|
||||
for _ in 0..20_u8 {
|
||||
let (value, status_code) = server.get_dump_status(&dump_uid).await;
|
||||
|
||||
assert_eq!(status_code, 200);
|
||||
@ -42,14 +42,12 @@ fn current_dump_version() -> String {
|
||||
}
|
||||
|
||||
fn read_all_jsonline<R: std::io::Read>(r: R) -> Value {
|
||||
let deserializer = serde_json::Deserializer::from_reader(r);
|
||||
let iterator = deserializer.into_iter::<serde_json::Value>();
|
||||
let deserializer = serde_json::Deserializer::from_reader(r); let iterator = deserializer.into_iter::<serde_json::Value>();
|
||||
|
||||
json!(iterator.map(|v| v.unwrap()).collect::<Vec<Value>>())
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn trigger_dump_should_return_ok() {
|
||||
let server = common::Server::test_server().await;
|
||||
|
||||
@ -59,7 +57,6 @@ async fn trigger_dump_should_return_ok() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn trigger_dump_twice_should_return_conflict() {
|
||||
let server = common::Server::test_server().await;
|
||||
|
||||
@ -77,12 +74,11 @@ async fn trigger_dump_twice_should_return_conflict() {
|
||||
let (value, status_code) = server.trigger_dump().await;
|
||||
|
||||
|
||||
assert_json_eq!(expected.clone(), value.clone(), ordered: false);
|
||||
assert_json_eq!(expected, value, ordered: false);
|
||||
assert_eq!(status_code, 409);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn trigger_dump_concurently_should_return_conflict() {
|
||||
let server = common::Server::test_server().await;
|
||||
|
||||
@ -95,12 +91,11 @@ async fn trigger_dump_concurently_should_return_conflict() {
|
||||
|
||||
let ((_value_1, _status_code_1), (value_2, status_code_2)) = futures::join!(server.trigger_dump(), server.trigger_dump());
|
||||
|
||||
assert_json_eq!(expected.clone(), value_2.clone(), ordered: false);
|
||||
assert_json_eq!(expected, value_2, ordered: false);
|
||||
assert_eq!(status_code_2, 409);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn get_dump_status_early_should_return_in_progress() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
@ -121,11 +116,10 @@ async fn get_dump_status_early_should_return_in_progress() {
|
||||
|
||||
assert_eq!(status_code, 200);
|
||||
|
||||
assert_json_eq!(expected.clone(), value.clone(), ordered: false);
|
||||
assert_json_eq!(expected, value, ordered: false);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn get_dump_status_should_return_done() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
@ -147,11 +141,10 @@ async fn get_dump_status_should_return_done() {
|
||||
|
||||
assert_eq!(status_code, 200);
|
||||
|
||||
assert_json_eq!(expected.clone(), value.clone(), ordered: false);
|
||||
assert_json_eq!(expected, value, ordered: false);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn get_dump_status_should_return_error_provoking_it() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
@ -180,11 +173,10 @@ async fn get_dump_status_should_return_error_provoking_it() {
|
||||
|
||||
assert_eq!(status_code, 200);
|
||||
|
||||
assert_json_eq!(expected.clone(), value.clone(), ordered: false);
|
||||
assert_json_eq!(expected, value, ordered: false);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn dump_metadata_should_be_valid() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
@ -228,11 +220,10 @@ async fn dump_metadata_should_be_valid() {
|
||||
"dumpVersion": current_dump_version()
|
||||
});
|
||||
|
||||
assert_json_include!(expected: expected.clone(), actual: metadata.clone());
|
||||
assert_json_include!(expected: expected, actual: metadata);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn dump_gzip_should_have_been_created() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
@ -245,7 +236,6 @@ async fn dump_gzip_should_have_been_created() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn dump_index_settings_should_be_valid() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
@ -321,11 +311,10 @@ async fn dump_index_settings_should_be_valid() {
|
||||
let file = File::open(tmp_dir_path.join("test").join("settings.json")).unwrap();
|
||||
let settings: serde_json::Value = serde_json::from_reader(file).unwrap();
|
||||
|
||||
assert_json_eq!(expected.clone(), settings.clone(), ordered: false);
|
||||
assert_json_eq!(expected, settings, ordered: false);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn dump_index_documents_should_be_valid() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
@ -345,11 +334,10 @@ async fn dump_index_documents_should_be_valid() {
|
||||
let file = File::open(tmp_dir_path.join("test").join("documents.jsonl")).unwrap();
|
||||
let documents = read_all_jsonline(file);
|
||||
|
||||
assert_json_eq!(expected.clone(), documents.clone(), ordered: false);
|
||||
assert_json_eq!(expected, documents, ordered: false);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn dump_index_updates_should_be_valid() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
@ -367,25 +355,14 @@ async fn dump_index_updates_should_be_valid() {
|
||||
compression::from_tar_gz(&dumps_dir.join(&format!("{}.dump", uid)), tmp_dir_path).unwrap();
|
||||
|
||||
let file = File::open(tmp_dir_path.join("test").join("updates.jsonl")).unwrap();
|
||||
let mut updates = read_all_jsonline(file);
|
||||
let updates = read_all_jsonline(file);
|
||||
|
||||
|
||||
// hotfix until #943 is fixed (https://github.com/meilisearch/MeiliSearch/issues/943)
|
||||
updates.as_array_mut().unwrap()
|
||||
.get_mut(0).unwrap()
|
||||
.get_mut("type").unwrap()
|
||||
.get_mut("settings").unwrap()
|
||||
.get_mut("displayed_attributes").unwrap()
|
||||
.get_mut("Update").unwrap()
|
||||
.as_array_mut().unwrap().sort_by(|a, b| a.as_str().cmp(&b.as_str()));
|
||||
|
||||
eprintln!("{}\n", updates.to_string());
|
||||
eprintln!("{}", expected.to_string());
|
||||
assert_json_include!(expected: expected.clone(), actual: updates.clone());
|
||||
eprintln!("{}\n", updates);
|
||||
eprintln!("{}", expected);
|
||||
assert_json_include!(expected: expected, actual: updates);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
#[ignore]
|
||||
async fn get_unexisting_dump_status_should_return_not_found() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
|
@ -177,7 +177,7 @@ async fn document_not_found_error() {
|
||||
#[actix_rt::test]
|
||||
async fn payload_too_large_error() {
|
||||
let mut server = common::Server::with_uid("test");
|
||||
let bigvec = vec![0u64; 10_000_000]; // 80mb
|
||||
let bigvec = vec![0u64; 100_000_000]; // 800mb
|
||||
assert_error!(
|
||||
"payload_too_large",
|
||||
"invalid_request_error",
|
||||
|
@ -6,6 +6,7 @@ async fn test_healthyness() {
|
||||
|
||||
// Check that the server is healthy
|
||||
|
||||
let (_response, status_code) = server.get_health().await;
|
||||
assert_eq!(status_code, 204);
|
||||
let (response, status_code) = server.get_health().await;
|
||||
assert_eq!(status_code, 200);
|
||||
assert_eq!(response["status"], "available");
|
||||
}
|
||||
|
@ -779,30 +779,32 @@ async fn update_existing_primary_key_is_error() {
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_facets_distribution_attribute() {
|
||||
async fn test_field_distribution_attribute() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
let (response, _status_code) = server.get_index_stats().await;
|
||||
|
||||
let expected = json!({
|
||||
"isIndexing": false,
|
||||
"numberOfDocuments":77,
|
||||
"fieldsDistribution":{
|
||||
"age":77,
|
||||
"gender":77,
|
||||
"phone":77,
|
||||
"name":77,
|
||||
"registered":77,
|
||||
"latitude":77,
|
||||
"email":77,
|
||||
"tags":77,
|
||||
"longitude":77,
|
||||
"color":77,
|
||||
"address":77,
|
||||
"balance":77,
|
||||
"about":77,
|
||||
"picture":77,
|
||||
"fieldsDistribution": {
|
||||
"about": 77,
|
||||
"address": 77,
|
||||
"age": 77,
|
||||
"balance": 77,
|
||||
"color": 77,
|
||||
"email": 77,
|
||||
"gender": 77,
|
||||
"id": 77,
|
||||
"isActive": 77,
|
||||
"latitude": 77,
|
||||
"longitude": 77,
|
||||
"name": 77,
|
||||
"phone": 77,
|
||||
"picture": 77,
|
||||
"registered": 77,
|
||||
"tags": 77
|
||||
},
|
||||
"isIndexing": false,
|
||||
"numberOfDocuments": 77
|
||||
});
|
||||
|
||||
assert_json_eq!(expected, response, ordered: true);
|
||||
|
@ -94,13 +94,21 @@ async fn return_update_status_of_pushed_documents() {
|
||||
];
|
||||
|
||||
let mut update_ids = Vec::new();
|
||||
|
||||
let mut bodies = bodies.into_iter();
|
||||
|
||||
let url = "/indexes/test/documents?primaryKey=title";
|
||||
let (response, status_code) = server.post_request(&url, bodies.next().unwrap()).await;
|
||||
assert_eq!(status_code, 202);
|
||||
let update_id = response["updateId"].as_u64().unwrap();
|
||||
update_ids.push(update_id);
|
||||
server.wait_update_id(update_id).await;
|
||||
|
||||
let url = "/indexes/test/documents";
|
||||
for body in bodies {
|
||||
let (response, status_code) = server.post_request(&url, body).await;
|
||||
assert_eq!(status_code, 202);
|
||||
let update_id = response["updateId"].as_u64().unwrap();
|
||||
update_ids.push(update_id);
|
||||
let (response, status_code) = server.post_request(&url, body).await;
|
||||
assert_eq!(status_code, 202);
|
||||
let update_id = response["updateId"].as_u64().unwrap();
|
||||
update_ids.push(update_id);
|
||||
}
|
||||
|
||||
// 2. Fetch the status of index.
|
||||
@ -173,7 +181,7 @@ async fn should_return_existing_update() {
|
||||
let (response, status_code) = server.create_index(body).await;
|
||||
assert_eq!(status_code, 201);
|
||||
assert_eq!(response["primaryKey"], json!(null));
|
||||
|
||||
|
||||
let body = json!([{
|
||||
"title": "Test",
|
||||
"comment": "comment test"
|
||||
|
@ -37,14 +37,8 @@ async fn placeholder_search_with_offset() {
|
||||
assert_eq!(status_code, 200);
|
||||
// take results at offset 3 as reference
|
||||
let lock = expected.lock().unwrap();
|
||||
lock.replace(
|
||||
response["hits"].as_array().unwrap()[3..6]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect(),
|
||||
);
|
||||
lock.replace(response["hits"].as_array().unwrap()[3..6].to_vec());
|
||||
});
|
||||
|
||||
let expected = expected.into_inner().unwrap().into_inner();
|
||||
|
||||
let query = json!({
|
||||
@ -588,3 +582,48 @@ async fn placeholder_search_with_empty_query() {
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
|
||||
});
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_filter_nb_hits_search_placeholder() {
|
||||
let mut server = common::Server::with_uid("test");
|
||||
|
||||
let body = json!({
|
||||
"uid": "test",
|
||||
"primaryKey": "id",
|
||||
});
|
||||
|
||||
server.create_index(body).await;
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "a",
|
||||
"color": "green",
|
||||
"size": 1,
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "a",
|
||||
"color": "green",
|
||||
"size": 2,
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "a",
|
||||
"color": "blue",
|
||||
"size": 3,
|
||||
},
|
||||
]);
|
||||
|
||||
server.add_or_update_multiple_documents(documents).await;
|
||||
let (response, _) = server.search_post(json!({})).await;
|
||||
assert_eq!(response["nbHits"], 3);
|
||||
|
||||
server.update_distinct_attribute(json!("color")).await;
|
||||
|
||||
let (response, _) = server.search_post(json!({})).await;
|
||||
assert_eq!(response["nbHits"], 2);
|
||||
|
||||
let (response, _) = server.search_post(json!({"filters": "size < 3"})).await;
|
||||
println!("result: {}", response);
|
||||
assert_eq!(response["nbHits"], 1);
|
||||
}
|
||||
|
@ -130,13 +130,13 @@ async fn search_unexpected_params() {
|
||||
|
||||
let expected = "unknown field `lol`, expected one of `q`, `offset`, `limit`, `attributesToRetrieve`, `attributesToCrop`, `cropLength`, `attributesToHighlight`, `filters`, `matches`, `facetFilters`, `facetsDistribution` at line 1 column 6";
|
||||
|
||||
let post_query = serde_json::from_str::<meilisearch_http::routes::search::SearchQueryPost>(&query.clone().to_string());
|
||||
let post_query = serde_json::from_str::<meilisearch_http::routes::search::SearchQueryPost>(&query.to_string());
|
||||
assert!(post_query.is_err());
|
||||
assert_eq!(expected.clone(), post_query.err().unwrap().to_string());
|
||||
assert_eq!(expected, post_query.err().unwrap().to_string());
|
||||
|
||||
let get_query: Result<meilisearch_http::routes::search::SearchQuery, _> = serde_json::from_str(&query.clone().to_string());
|
||||
let get_query: Result<meilisearch_http::routes::search::SearchQuery, _> = serde_json::from_str(&query.to_string());
|
||||
assert!(get_query.is_err());
|
||||
assert_eq!(expected.clone(), get_query.err().unwrap().to_string());
|
||||
assert_eq!(expected, get_query.err().unwrap().to_string());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -358,6 +358,66 @@ async fn search_with_attribute_to_highlight_wildcard() {
|
||||
});
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_attribute_to_highlight_wildcard_chinese() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
let query = json!({
|
||||
"q": "子孙",
|
||||
"limit": 1,
|
||||
"attributesToHighlight": ["*"]
|
||||
});
|
||||
|
||||
let expected = json!([
|
||||
{
|
||||
"id": 77,
|
||||
"isActive": false,
|
||||
"balance": "$1,274.29",
|
||||
"picture": "http://placehold.it/32x32",
|
||||
"age": 25,
|
||||
"color": "Red",
|
||||
"name": "孫武",
|
||||
"gender": "male",
|
||||
"email": "SunTzu@chorizon.com",
|
||||
"phone": "+1 (810) 407-3258",
|
||||
"address": "吴國",
|
||||
"about": "孫武(前544年-前470年或前496年),字長卿,春秋時期齊國人,著名軍事家、政治家,兵家代表人物。兵書《孫子兵法》的作者,後人尊稱為孫子、兵聖、東方兵聖,山東、蘇州等地尚有祀奉孫武的廟宇兵聖廟。其族人为樂安孫氏始祖,次子孙明为富春孫氏始祖。\r\n",
|
||||
"registered": "2014-10-20T10:13:32 -02:00",
|
||||
"latitude": 17.11935,
|
||||
"longitude": 65.38197,
|
||||
"tags": [
|
||||
"new issue",
|
||||
"wontfix"
|
||||
],
|
||||
"_formatted": {
|
||||
"id": 77,
|
||||
"isActive": false,
|
||||
"balance": "$1,274.29",
|
||||
"picture": "http://placehold.it/32x32",
|
||||
"age": 25,
|
||||
"color": "Red",
|
||||
"name": "<em>孫武</em>",
|
||||
"gender": "male",
|
||||
"email": "SunTzu@chorizon.com",
|
||||
"phone": "+1 (810) 407-3258",
|
||||
"address": "吴國",
|
||||
"about": "<em>孫武</em>(前544年-前470年或前496年),字長卿,春秋時期齊國人,著名軍事家、政治家,兵家代表人物。兵書《<em>孫子</em>兵法》的作者,後人尊稱為<em>孫子</em>、兵聖、東方兵聖,山東、蘇州等地尚有祀奉<em>孫武</em>的廟宇兵聖廟。其族人为樂安<em>孫氏</em>始祖,次<em>子孙</em>明为富春孫氏始祖。\r\n",
|
||||
"registered": "2014-10-20T10:13:32 -02:00",
|
||||
"latitude": 17.11935,
|
||||
"longitude": 65.38197,
|
||||
"tags": [
|
||||
"new issue",
|
||||
"wontfix"
|
||||
]
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
test_post_get_search!(server, query, |response, _status_code| {
|
||||
assert_json_eq!(expected.clone(), response["hits"].clone(), ordered: false);
|
||||
});
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_attribute_to_highlight_1() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
@ -554,6 +614,16 @@ async fn search_with_attributes_to_retrieve() {
|
||||
test_post_get_search!(server, query, |response, _status_code| {
|
||||
assert_json_eq!(expected.clone(), response["hits"].clone(), ordered: false);
|
||||
});
|
||||
|
||||
let query = json!({
|
||||
"q": "cherry",
|
||||
"limit": 1,
|
||||
"attributesToRetrieve": [],
|
||||
});
|
||||
|
||||
test_post_get_search!(server, query, |response, _status_code| {
|
||||
assert_json_eq!(json!([{}]), response["hits"].clone(), ordered: false);
|
||||
});
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -1779,8 +1849,6 @@ async fn update_documents_with_facet_distribution() {
|
||||
server.create_index(body).await;
|
||||
let settings = json!({
|
||||
"attributesForFaceting": ["genre"],
|
||||
"displayedAttributes": ["genre"],
|
||||
"searchableAttributes": ["genre"]
|
||||
});
|
||||
server.update_all_settings(settings).await;
|
||||
let update1 = json!([
|
||||
@ -1829,3 +1897,80 @@ async fn update_documents_with_facet_distribution() {
|
||||
let (response2, _) = server.search_post(search).await;
|
||||
assert_json_eq!(expected_facet_distribution, response2["facetsDistribution"].clone());
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_filter_nb_hits_search_normal() {
|
||||
let mut server = common::Server::with_uid("test");
|
||||
|
||||
let body = json!({
|
||||
"uid": "test",
|
||||
"primaryKey": "id",
|
||||
});
|
||||
|
||||
server.create_index(body).await;
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "a",
|
||||
"color": "green",
|
||||
"size": 1,
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "a",
|
||||
"color": "green",
|
||||
"size": 2,
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "a",
|
||||
"color": "blue",
|
||||
"size": 3,
|
||||
},
|
||||
]);
|
||||
|
||||
server.add_or_update_multiple_documents(documents).await;
|
||||
let (response, _) = server.search_post(json!({"q": "a"})).await;
|
||||
assert_eq!(response["nbHits"], 3);
|
||||
|
||||
let (response, _) = server.search_post(json!({"q": "a", "filters": "size = 1"})).await;
|
||||
assert_eq!(response["nbHits"], 1);
|
||||
|
||||
server.update_distinct_attribute(json!("color")).await;
|
||||
|
||||
let (response, _) = server.search_post(json!({"q": "a"})).await;
|
||||
assert_eq!(response["nbHits"], 2);
|
||||
|
||||
let (response, _) = server.search_post(json!({"q": "a", "filters": "size < 3"})).await;
|
||||
println!("result: {}", response);
|
||||
assert_eq!(response["nbHits"], 1);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_max_word_query() {
|
||||
use meilisearch_core::MAX_QUERY_LEN;
|
||||
|
||||
let mut server = common::Server::with_uid("test");
|
||||
let body = json!({
|
||||
"uid": "test",
|
||||
"primaryKey": "id",
|
||||
});
|
||||
server.create_index(body).await;
|
||||
let documents = json!([
|
||||
{"id": 1, "value": "1 2 3 4 5 6 7 8 9 10 11"},
|
||||
{"id": 2, "value": "1 2 3 4 5 6 7 8 9 10"}]
|
||||
);
|
||||
server.add_or_update_multiple_documents(documents).await;
|
||||
|
||||
// We want to create a request where the 11 will be ignored. We have 2 documents, where a query
|
||||
// with only one should return both, but a query with 1 and 11 should return only the first.
|
||||
// This is how we know that outstanding query words have been ignored
|
||||
let query = (0..MAX_QUERY_LEN)
|
||||
.map(|_| "1")
|
||||
.chain(std::iter::once("11"))
|
||||
.fold(String::new(), |s, w| s + " " + w);
|
||||
let (response, _) = server.search_post(json!({"q": query})).await;
|
||||
assert_eq!(response["nbHits"], 2);
|
||||
let (response, _) = server.search_post(json!({"q": "1 11"})).await;
|
||||
assert_eq!(response["nbHits"], 1);
|
||||
}
|
||||
|
@ -167,6 +167,89 @@ async fn search_with_settings_stop_words() {
|
||||
async fn search_with_settings_synonyms() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
let config = json!({
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"wordsPosition",
|
||||
"desc(age)",
|
||||
"exactness",
|
||||
"desc(balance)"
|
||||
],
|
||||
"distinctAttribute": null,
|
||||
"searchableAttributes": [
|
||||
"name",
|
||||
"age",
|
||||
"color",
|
||||
"gender",
|
||||
"email",
|
||||
"address",
|
||||
"about"
|
||||
],
|
||||
"displayedAttributes": [
|
||||
"name",
|
||||
"age",
|
||||
"gender",
|
||||
"color",
|
||||
"email",
|
||||
"phone",
|
||||
"address",
|
||||
"balance"
|
||||
],
|
||||
"stopWords": null,
|
||||
"synonyms": {
|
||||
"Application": [
|
||||
"Exercitation"
|
||||
]
|
||||
},
|
||||
});
|
||||
|
||||
server.update_all_settings(config).await;
|
||||
|
||||
let query = "q=application&limit=3";
|
||||
let expect = json!([
|
||||
{
|
||||
"balance": "$1,921.58",
|
||||
"age": 31,
|
||||
"color": "Green",
|
||||
"name": "Harper Carson",
|
||||
"gender": "male",
|
||||
"email": "harpercarson@chorizon.com",
|
||||
"phone": "+1 (912) 430-3243",
|
||||
"address": "883 Dennett Place, Knowlton, New Mexico, 9219"
|
||||
},
|
||||
{
|
||||
"balance": "$1,706.13",
|
||||
"age": 27,
|
||||
"color": "Green",
|
||||
"name": "Cherry Orr",
|
||||
"gender": "female",
|
||||
"email": "cherryorr@chorizon.com",
|
||||
"phone": "+1 (995) 479-3174",
|
||||
"address": "442 Beverly Road, Ventress, New Mexico, 3361"
|
||||
},
|
||||
{
|
||||
"balance": "$1,476.39",
|
||||
"age": 28,
|
||||
"color": "brown",
|
||||
"name": "Maureen Dale",
|
||||
"gender": "female",
|
||||
"email": "maureendale@chorizon.com",
|
||||
"phone": "+1 (984) 538-3684",
|
||||
"address": "817 Newton Street, Bannock, Wyoming, 1468"
|
||||
}
|
||||
]);
|
||||
|
||||
let (response, _status_code) = server.search_get(query).await;
|
||||
assert_json_eq!(expect, response["hits"].clone(), ordered: false);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_settings_normalized_synonyms() {
|
||||
let mut server = common::Server::test_server().await;
|
||||
|
||||
let config = json!({
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
|
@ -171,6 +171,8 @@ async fn write_all_and_update() {
|
||||
"synonyms": {
|
||||
"road": ["street", "avenue"],
|
||||
"street": ["avenue"],
|
||||
"HP": ["Harry Potter"],
|
||||
"Harry Potter": ["HP"]
|
||||
},
|
||||
"attributesForFaceting": ["title"],
|
||||
});
|
||||
@ -208,6 +210,8 @@ async fn write_all_and_update() {
|
||||
"synonyms": {
|
||||
"road": ["street", "avenue"],
|
||||
"street": ["avenue"],
|
||||
"hp": ["harry potter"],
|
||||
"harry potter": ["hp"]
|
||||
},
|
||||
"attributesForFaceting": ["title"],
|
||||
});
|
||||
|
@ -1,13 +1,13 @@
|
||||
[package]
|
||||
name = "meilisearch-schema"
|
||||
version = "0.16.0"
|
||||
version = "0.20.0"
|
||||
license = "MIT"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
indexmap = { version = "1.3.2", features = ["serde-1"] }
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.16.0" }
|
||||
serde = { version = "1.0.105", features = ["derive"] }
|
||||
serde_json = { version = "1.0.59", features = ["preserve_order"] }
|
||||
indexmap = { version = "1.6.1", features = ["serde-1"] }
|
||||
meilisearch-error = { path = "../meilisearch-error", version = "0.20.0" }
|
||||
serde = { version = "1.0.118", features = ["derive"] }
|
||||
serde_json = { version = "1.0.61", features = ["preserve_order"] }
|
||||
zerocopy = "0.3.0"
|
||||
|
@ -6,22 +6,14 @@ use serde::{Deserialize, Serialize};
|
||||
use crate::{SResult, FieldId};
|
||||
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct FieldsMap {
|
||||
pub(crate) struct FieldsMap {
|
||||
name_map: HashMap<String, FieldId>,
|
||||
id_map: HashMap<FieldId, String>,
|
||||
next_id: FieldId
|
||||
}
|
||||
|
||||
impl FieldsMap {
|
||||
pub fn len(&self) -> usize {
|
||||
self.name_map.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.name_map.is_empty()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, name: &str) -> SResult<FieldId> {
|
||||
pub(crate) fn insert(&mut self, name: &str) -> SResult<FieldId> {
|
||||
if let Some(id) = self.name_map.get(name) {
|
||||
return Ok(*id)
|
||||
}
|
||||
@ -32,22 +24,15 @@ impl FieldsMap {
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, name: &str) {
|
||||
if let Some(id) = self.name_map.get(name) {
|
||||
self.id_map.remove(&id);
|
||||
}
|
||||
self.name_map.remove(name);
|
||||
}
|
||||
|
||||
pub fn id(&self, name: &str) -> Option<FieldId> {
|
||||
pub(crate) fn id(&self, name: &str) -> Option<FieldId> {
|
||||
self.name_map.get(name).copied()
|
||||
}
|
||||
|
||||
pub fn name<I: Into<FieldId>>(&self, id: I) -> Option<&str> {
|
||||
pub(crate) fn name<I: Into<FieldId>>(&self, id: I) -> Option<&str> {
|
||||
self.id_map.get(&id.into()).map(|s| s.as_str())
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> Iter<'_, String, FieldId> {
|
||||
pub(crate) fn iter(&self) -> Iter<'_, String, FieldId> {
|
||||
self.name_map.iter()
|
||||
}
|
||||
}
|
||||
@ -69,14 +54,10 @@ mod tests {
|
||||
assert_eq!(fields_map.id("title"), Some(1.into()));
|
||||
assert_eq!(fields_map.id("descritpion"), Some(2.into()));
|
||||
assert_eq!(fields_map.id("date"), None);
|
||||
assert_eq!(fields_map.len(), 3);
|
||||
assert_eq!(fields_map.name(0), Some("id"));
|
||||
assert_eq!(fields_map.name(1), Some("title"));
|
||||
assert_eq!(fields_map.name(2), Some("descritpion"));
|
||||
assert_eq!(fields_map.name(4), None);
|
||||
fields_map.remove("title");
|
||||
assert_eq!(fields_map.id("title"), None);
|
||||
assert_eq!(fields_map.insert("title").unwrap(), 3.into());
|
||||
assert_eq!(fields_map.len(), 3);
|
||||
assert_eq!(fields_map.insert("title").unwrap(), 1.into());
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,10 @@
|
||||
mod error;
|
||||
mod fields_map;
|
||||
mod schema;
|
||||
mod position_map;
|
||||
|
||||
pub use error::{Error, SResult};
|
||||
pub use fields_map::FieldsMap;
|
||||
use fields_map::FieldsMap;
|
||||
pub use schema::Schema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
161
meilisearch-schema/src/position_map.rs
Normal file
161
meilisearch-schema/src/position_map.rs
Normal file
@ -0,0 +1,161 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::{FieldId, IndexedPos};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct PositionMap {
|
||||
pos_to_field: Vec<FieldId>,
|
||||
field_to_pos: BTreeMap<FieldId, IndexedPos>,
|
||||
}
|
||||
|
||||
impl PositionMap {
|
||||
/// insert `id` at the specified `position` updating the other position if a shift is caused by
|
||||
/// the operation. If `id` is already present in the position map, it is moved to the requested
|
||||
/// `position`, potentially causing shifts.
|
||||
pub fn insert(&mut self, id: FieldId, position: IndexedPos) -> IndexedPos {
|
||||
let mut upos = position.0 as usize;
|
||||
let mut must_rebuild_map = false;
|
||||
|
||||
if let Some(old_pos) = self.field_to_pos.get(&id) {
|
||||
let uold_pos = old_pos.0 as usize;
|
||||
self.pos_to_field.remove(uold_pos);
|
||||
must_rebuild_map = true;
|
||||
}
|
||||
|
||||
if upos < self.pos_to_field.len() {
|
||||
self.pos_to_field.insert(upos, id);
|
||||
must_rebuild_map = true;
|
||||
} else {
|
||||
upos = self.pos_to_field.len();
|
||||
self.pos_to_field.push(id);
|
||||
}
|
||||
|
||||
// we only need to update all the positions if there have been a shift a some point. In
|
||||
// most cases we only did a push, so we don't need to rebuild the `field_to_pos` map.
|
||||
if must_rebuild_map {
|
||||
self.field_to_pos.clear();
|
||||
self.field_to_pos.extend(
|
||||
self.pos_to_field
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(p, f)| (*f, IndexedPos(p as u16))),
|
||||
);
|
||||
} else {
|
||||
self.field_to_pos.insert(id, IndexedPos(upos as u16));
|
||||
}
|
||||
IndexedPos(upos as u16)
|
||||
}
|
||||
|
||||
/// Pushes `id` in last position
|
||||
pub fn push(&mut self, id: FieldId) -> IndexedPos {
|
||||
let pos = self.len();
|
||||
self.insert(id, IndexedPos(pos as u16))
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.pos_to_field.len()
|
||||
}
|
||||
|
||||
pub fn field_to_pos(&self, id: FieldId) -> Option<IndexedPos> {
|
||||
self.field_to_pos.get(&id).cloned()
|
||||
}
|
||||
|
||||
pub fn pos_to_field(&self, pos: IndexedPos) -> Option<FieldId> {
|
||||
let pos = pos.0 as usize;
|
||||
self.pos_to_field.get(pos).cloned()
|
||||
}
|
||||
|
||||
pub fn field_pos(&self) -> impl Iterator<Item = (FieldId, IndexedPos)> + '_ {
|
||||
self.pos_to_field
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, f)| (*f, IndexedPos(i as u16)))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_default() {
|
||||
assert_eq!(
|
||||
format!("{:?}", PositionMap::default()),
|
||||
r##"PositionMap { pos_to_field: [], field_to_pos: {} }"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert() {
|
||||
let mut map = PositionMap::default();
|
||||
// changing position removes from old position
|
||||
map.insert(0.into(), 0.into());
|
||||
map.insert(1.into(), 1.into());
|
||||
assert_eq!(
|
||||
format!("{:?}", map),
|
||||
r##"PositionMap { pos_to_field: [FieldId(0), FieldId(1)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(1): IndexedPos(1)} }"##
|
||||
);
|
||||
map.insert(0.into(), 1.into());
|
||||
assert_eq!(
|
||||
format!("{:?}", map),
|
||||
r##"PositionMap { pos_to_field: [FieldId(1), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(1), FieldId(1): IndexedPos(0)} }"##
|
||||
);
|
||||
map.insert(2.into(), 1.into());
|
||||
assert_eq!(
|
||||
format!("{:?}", map),
|
||||
r##"PositionMap { pos_to_field: [FieldId(1), FieldId(2), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(2), FieldId(1): IndexedPos(0), FieldId(2): IndexedPos(1)} }"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_push() {
|
||||
let mut map = PositionMap::default();
|
||||
map.push(0.into());
|
||||
map.push(2.into());
|
||||
assert_eq!(map.len(), 2);
|
||||
assert_eq!(
|
||||
format!("{:?}", map),
|
||||
r##"PositionMap { pos_to_field: [FieldId(0), FieldId(2)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(2): IndexedPos(1)} }"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_field_to_pos() {
|
||||
let mut map = PositionMap::default();
|
||||
map.push(0.into());
|
||||
map.push(2.into());
|
||||
assert_eq!(map.field_to_pos(2.into()), Some(1.into()));
|
||||
assert_eq!(map.field_to_pos(0.into()), Some(0.into()));
|
||||
assert_eq!(map.field_to_pos(4.into()), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pos_to_field() {
|
||||
let mut map = PositionMap::default();
|
||||
map.push(0.into());
|
||||
map.push(2.into());
|
||||
map.push(3.into());
|
||||
map.push(4.into());
|
||||
assert_eq!(
|
||||
format!("{:?}", map),
|
||||
r##"PositionMap { pos_to_field: [FieldId(0), FieldId(2), FieldId(3), FieldId(4)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(2): IndexedPos(1), FieldId(3): IndexedPos(2), FieldId(4): IndexedPos(3)} }"##
|
||||
);
|
||||
assert_eq!(map.pos_to_field(0.into()), Some(0.into()));
|
||||
assert_eq!(map.pos_to_field(1.into()), Some(2.into()));
|
||||
assert_eq!(map.pos_to_field(2.into()), Some(3.into()));
|
||||
assert_eq!(map.pos_to_field(3.into()), Some(4.into()));
|
||||
assert_eq!(map.pos_to_field(4.into()), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_field_pos() {
|
||||
let mut map = PositionMap::default();
|
||||
map.push(0.into());
|
||||
map.push(2.into());
|
||||
let mut iter = map.field_pos();
|
||||
assert_eq!(iter.next(), Some((0.into(), 0.into())));
|
||||
assert_eq!(iter.next(), Some((2.into(), 1.into())));
|
||||
assert_eq!(iter.next(), None);
|
||||
}
|
||||
}
|
@ -1,39 +1,10 @@
|
||||
use crate::{FieldsMap, FieldId, SResult, Error, IndexedPos};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||
enum OptionAll<T> {
|
||||
All,
|
||||
Some(T),
|
||||
None,
|
||||
}
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
impl<T> OptionAll<T> {
|
||||
// replace the value with None and return the previous value
|
||||
fn take(&mut self) -> OptionAll<T> {
|
||||
std::mem::replace(self, OptionAll::None)
|
||||
}
|
||||
|
||||
fn map<U, F: FnOnce(T) -> U>(self, f: F) -> OptionAll<U> {
|
||||
match self {
|
||||
OptionAll::Some(x) => OptionAll::Some(f(x)),
|
||||
OptionAll::All => OptionAll::All,
|
||||
OptionAll::None => OptionAll::None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_all(&self) -> bool {
|
||||
matches!(self, OptionAll::All)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for OptionAll<T> {
|
||||
fn default() -> OptionAll<T> {
|
||||
OptionAll::All
|
||||
}
|
||||
}
|
||||
use crate::position_map::PositionMap;
|
||||
use crate::{Error, FieldId, FieldsMap, IndexedPos, SResult};
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, Default)]
|
||||
pub struct Schema {
|
||||
@ -41,34 +12,26 @@ pub struct Schema {
|
||||
|
||||
primary_key: Option<FieldId>,
|
||||
ranked: HashSet<FieldId>,
|
||||
displayed: OptionAll<HashSet<FieldId>>,
|
||||
displayed: Option<BTreeSet<FieldId>>,
|
||||
|
||||
indexed: OptionAll<Vec<FieldId>>,
|
||||
indexed_map: HashMap<FieldId, IndexedPos>,
|
||||
searchable: Option<Vec<FieldId>>,
|
||||
pub indexed_position: PositionMap,
|
||||
}
|
||||
|
||||
impl Schema {
|
||||
pub fn new() -> Schema {
|
||||
Schema::default()
|
||||
}
|
||||
|
||||
pub fn with_primary_key(name: &str) -> Schema {
|
||||
let mut fields_map = FieldsMap::default();
|
||||
let field_id = fields_map.insert(name).unwrap();
|
||||
|
||||
let mut displayed = HashSet::new();
|
||||
let mut indexed_map = HashMap::new();
|
||||
|
||||
displayed.insert(field_id);
|
||||
indexed_map.insert(field_id, 0.into());
|
||||
let mut indexed_position = PositionMap::default();
|
||||
indexed_position.push(field_id);
|
||||
|
||||
Schema {
|
||||
fields_map,
|
||||
primary_key: Some(field_id),
|
||||
ranked: HashSet::new(),
|
||||
displayed: OptionAll::All,
|
||||
indexed: OptionAll::All,
|
||||
indexed_map,
|
||||
displayed: None,
|
||||
searchable: None,
|
||||
indexed_position,
|
||||
}
|
||||
}
|
||||
|
||||
@ -78,13 +41,11 @@ impl Schema {
|
||||
|
||||
pub fn set_primary_key(&mut self, name: &str) -> SResult<FieldId> {
|
||||
if self.primary_key.is_some() {
|
||||
return Err(Error::PrimaryKeyAlreadyPresent)
|
||||
return Err(Error::PrimaryKeyAlreadyPresent);
|
||||
}
|
||||
|
||||
let id = self.insert(name)?;
|
||||
self.primary_key = Some(id);
|
||||
self.set_indexed(name)?;
|
||||
self.set_displayed(name)?;
|
||||
|
||||
Ok(id)
|
||||
}
|
||||
@ -101,202 +62,98 @@ impl Schema {
|
||||
self.fields_map.iter().map(|(k, _)| k.as_ref())
|
||||
}
|
||||
|
||||
pub fn contains(&self, name: &str) -> bool {
|
||||
self.fields_map.id(name).is_some()
|
||||
}
|
||||
|
||||
/// add `name` to the list of known fields
|
||||
pub fn insert(&mut self, name: &str) -> SResult<FieldId> {
|
||||
self.fields_map.insert(name)
|
||||
}
|
||||
|
||||
pub fn insert_and_index(&mut self, name: &str) -> SResult<FieldId> {
|
||||
match self.fields_map.id(name) {
|
||||
Some(id) => {
|
||||
Ok(id)
|
||||
}
|
||||
None => {
|
||||
self.set_indexed(name)?;
|
||||
self.set_displayed(name)
|
||||
}
|
||||
}
|
||||
/// Adds `name` to the list of known fields, and in the last position of the indexed_position map. This
|
||||
/// field is taken into acccount when `searchableAttribute` or `displayedAttributes` is set to `"*"`
|
||||
pub fn insert_with_position(&mut self, name: &str) -> SResult<(FieldId, IndexedPos)> {
|
||||
let field_id = self.fields_map.insert(name)?;
|
||||
let position = self
|
||||
.is_searchable(field_id)
|
||||
.unwrap_or_else(|| self.indexed_position.push(field_id));
|
||||
Ok((field_id, position))
|
||||
}
|
||||
|
||||
pub fn ranked(&self) -> &HashSet<FieldId> {
|
||||
&self.ranked
|
||||
}
|
||||
|
||||
pub fn ranked_name(&self) -> HashSet<&str> {
|
||||
self.ranked.iter().filter_map(|a| self.name(*a)).collect()
|
||||
}
|
||||
|
||||
pub fn displayed(&self) -> Cow<HashSet<FieldId>> {
|
||||
match self.displayed {
|
||||
OptionAll::Some(ref v) => Cow::Borrowed(v),
|
||||
OptionAll::All => {
|
||||
let fields = self
|
||||
.fields_map
|
||||
.iter()
|
||||
.map(|(_, &v)| v)
|
||||
.collect::<HashSet<_>>();
|
||||
Cow::Owned(fields)
|
||||
}
|
||||
OptionAll::None => Cow::Owned(HashSet::new())
|
||||
fn displayed(&self) -> Cow<BTreeSet<FieldId>> {
|
||||
match &self.displayed {
|
||||
Some(displayed) => Cow::Borrowed(displayed),
|
||||
None => Cow::Owned(self.indexed_position.field_pos().map(|(f, _)| f).collect()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_displayed_all(&self) -> bool {
|
||||
self.displayed.is_all()
|
||||
self.displayed.is_none()
|
||||
}
|
||||
|
||||
pub fn displayed_name(&self) -> HashSet<&str> {
|
||||
match self.displayed {
|
||||
OptionAll::All => self.fields_map.iter().filter_map(|(_, &v)| self.name(v)).collect(),
|
||||
OptionAll::Some(ref v) => v.iter().filter_map(|a| self.name(*a)).collect(),
|
||||
OptionAll::None => HashSet::new(),
|
||||
pub fn displayed_names(&self) -> BTreeSet<&str> {
|
||||
self.displayed()
|
||||
.iter()
|
||||
.filter_map(|&f| self.name(f))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn searchable(&self) -> Cow<[FieldId]> {
|
||||
match &self.searchable {
|
||||
Some(searchable) => Cow::Borrowed(&searchable),
|
||||
None => Cow::Owned(self.indexed_position.field_pos().map(|(f, _)| f).collect()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn indexed(&self) -> Cow<[FieldId]> {
|
||||
match self.indexed {
|
||||
OptionAll::Some(ref v) => Cow::Borrowed(v),
|
||||
OptionAll::All => {
|
||||
let fields = self
|
||||
.fields_map
|
||||
.iter()
|
||||
.map(|(_, &f)| f)
|
||||
.collect();
|
||||
Cow::Owned(fields)
|
||||
},
|
||||
OptionAll::None => Cow::Owned(Vec::new())
|
||||
}
|
||||
pub fn searchable_names(&self) -> Vec<&str> {
|
||||
self.searchable()
|
||||
.iter()
|
||||
.filter_map(|a| self.name(*a))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn indexed_name(&self) -> Vec<&str> {
|
||||
self.indexed().iter().filter_map(|a| self.name(*a)).collect()
|
||||
}
|
||||
|
||||
pub fn set_ranked(&mut self, name: &str) -> SResult<FieldId> {
|
||||
pub(crate) fn set_ranked(&mut self, name: &str) -> SResult<FieldId> {
|
||||
let id = self.fields_map.insert(name)?;
|
||||
self.ranked.insert(id);
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn set_displayed(&mut self, name: &str) -> SResult<FieldId> {
|
||||
let id = self.fields_map.insert(name)?;
|
||||
self.displayed = match self.displayed.take() {
|
||||
OptionAll::All => OptionAll::All,
|
||||
OptionAll::None => {
|
||||
let mut displayed = HashSet::new();
|
||||
displayed.insert(id);
|
||||
OptionAll::Some(displayed)
|
||||
},
|
||||
OptionAll::Some(mut v) => {
|
||||
v.insert(id);
|
||||
OptionAll::Some(v)
|
||||
}
|
||||
};
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
pub fn set_indexed(&mut self, name: &str) -> SResult<(FieldId, IndexedPos)> {
|
||||
let id = self.fields_map.insert(name)?;
|
||||
|
||||
if let Some(indexed_pos) = self.indexed_map.get(&id) {
|
||||
return Ok((id, *indexed_pos))
|
||||
};
|
||||
let pos = self.indexed_map.len() as u16;
|
||||
self.indexed_map.insert(id, pos.into());
|
||||
self.indexed = self.indexed.take().map(|mut v| {
|
||||
v.push(id);
|
||||
v
|
||||
});
|
||||
Ok((id, pos.into()))
|
||||
}
|
||||
|
||||
pub fn clear_ranked(&mut self) {
|
||||
self.ranked.clear();
|
||||
}
|
||||
|
||||
pub fn remove_ranked(&mut self, name: &str) {
|
||||
if let Some(id) = self.fields_map.id(name) {
|
||||
self.ranked.remove(&id);
|
||||
}
|
||||
}
|
||||
|
||||
/// remove field from displayed attributes. If diplayed attributes is OptionAll::All,
|
||||
/// dipslayed attributes is turned into OptionAll::Some(v) where v is all displayed attributes
|
||||
/// except name.
|
||||
pub fn remove_displayed(&mut self, name: &str) {
|
||||
if let Some(id) = self.fields_map.id(name) {
|
||||
self.displayed = match self.displayed.take() {
|
||||
OptionAll::Some(mut v) => {
|
||||
v.remove(&id);
|
||||
OptionAll::Some(v)
|
||||
}
|
||||
OptionAll::All => {
|
||||
let displayed = self.fields_map
|
||||
.iter()
|
||||
.filter_map(|(key, &value)| {
|
||||
if key != name {
|
||||
Some(value)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<HashSet<_>>();
|
||||
OptionAll::Some(displayed)
|
||||
}
|
||||
OptionAll::None => OptionAll::None,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_indexed(&mut self, name: &str) {
|
||||
if let Some(id) = self.fields_map.id(name) {
|
||||
self.indexed_map.remove(&id);
|
||||
self.indexed = match self.indexed.take() {
|
||||
// valid because indexed is All and indexed() return the content of
|
||||
// indexed_map that is already updated
|
||||
OptionAll::All => OptionAll::Some(self.indexed().into_owned()),
|
||||
OptionAll::Some(mut v) => {
|
||||
v.retain(|x| *x != id);
|
||||
OptionAll::Some(v)
|
||||
}
|
||||
OptionAll::None => OptionAll::None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_ranked(&self, id: FieldId) -> bool {
|
||||
self.ranked.get(&id).is_some()
|
||||
}
|
||||
|
||||
pub fn is_displayed(&self, id: FieldId) -> bool {
|
||||
match self.displayed {
|
||||
OptionAll::Some(ref v) => v.contains(&id),
|
||||
OptionAll::All => true,
|
||||
OptionAll::None => false,
|
||||
match &self.displayed {
|
||||
Some(displayed) => displayed.contains(&id),
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_indexed(&self, id: FieldId) -> Option<&IndexedPos> {
|
||||
self.indexed_map.get(&id)
|
||||
pub fn is_searchable(&self, id: FieldId) -> Option<IndexedPos> {
|
||||
match &self.searchable {
|
||||
Some(searchable) if searchable.contains(&id) => self.indexed_position.field_to_pos(id),
|
||||
None => self.indexed_position.field_to_pos(id),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_indexed_all(&self) -> bool {
|
||||
self.indexed.is_all()
|
||||
pub fn is_searchable_all(&self) -> bool {
|
||||
self.searchable.is_none()
|
||||
}
|
||||
|
||||
pub fn indexed_pos_to_field_id<I: Into<IndexedPos>>(&self, pos: I) -> Option<FieldId> {
|
||||
let indexed_pos = pos.into().0;
|
||||
self
|
||||
.indexed_map
|
||||
.iter()
|
||||
.find(|(_, &v)| v.0 == indexed_pos)
|
||||
.map(|(&k, _)| k)
|
||||
self.indexed_position.pos_to_field(pos.into())
|
||||
}
|
||||
|
||||
pub fn update_ranked<S: AsRef<str>>(&mut self, data: impl IntoIterator<Item = S>) -> SResult<()> {
|
||||
pub fn update_ranked<S: AsRef<str>>(
|
||||
&mut self,
|
||||
data: impl IntoIterator<Item = S>,
|
||||
) -> SResult<()> {
|
||||
self.ranked.clear();
|
||||
for name in data {
|
||||
self.set_ranked(name.as_ref())?;
|
||||
@ -304,46 +161,208 @@ impl Schema {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn update_displayed<S: AsRef<str>>(&mut self, data: impl IntoIterator<Item = S>) -> SResult<()> {
|
||||
self.displayed = match self.displayed.take() {
|
||||
OptionAll::Some(mut v) => {
|
||||
v.clear();
|
||||
OptionAll::Some(v)
|
||||
}
|
||||
_ => OptionAll::Some(HashSet::new())
|
||||
};
|
||||
pub fn update_displayed<S: AsRef<str>>(
|
||||
&mut self,
|
||||
data: impl IntoIterator<Item = S>,
|
||||
) -> SResult<()> {
|
||||
let mut displayed = BTreeSet::new();
|
||||
for name in data {
|
||||
self.set_displayed(name.as_ref())?;
|
||||
let id = self.fields_map.insert(name.as_ref())?;
|
||||
displayed.insert(id);
|
||||
}
|
||||
self.displayed.replace(displayed);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn update_indexed<S: AsRef<str>>(&mut self, data: Vec<S>) -> SResult<()> {
|
||||
self.indexed = match self.indexed.take() {
|
||||
OptionAll::Some(mut v) => {
|
||||
v.clear();
|
||||
OptionAll::Some(v)
|
||||
},
|
||||
_ => OptionAll::Some(Vec::new()),
|
||||
};
|
||||
self.indexed_map.clear();
|
||||
for name in data {
|
||||
self.set_indexed(name.as_ref())?;
|
||||
pub fn update_searchable<S: AsRef<str>>(&mut self, data: Vec<S>) -> SResult<()> {
|
||||
let mut searchable = Vec::with_capacity(data.len());
|
||||
for (pos, name) in data.iter().enumerate() {
|
||||
let id = self.insert(name.as_ref())?;
|
||||
self.indexed_position.insert(id, IndexedPos(pos as u16));
|
||||
searchable.push(id);
|
||||
}
|
||||
self.searchable.replace(searchable);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn set_all_fields_as_indexed(&mut self) {
|
||||
self.indexed = OptionAll::All;
|
||||
self.indexed_map.clear();
|
||||
|
||||
for (_name, id) in self.fields_map.iter() {
|
||||
let pos = self.indexed_map.len() as u16;
|
||||
self.indexed_map.insert(*id, pos.into());
|
||||
}
|
||||
pub fn set_all_searchable(&mut self) {
|
||||
self.searchable.take();
|
||||
}
|
||||
|
||||
pub fn set_all_fields_as_displayed(&mut self) {
|
||||
self.displayed = OptionAll::All
|
||||
pub fn set_all_displayed(&mut self) {
|
||||
self.displayed.take();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_with_primary_key() {
|
||||
let schema = Schema::with_primary_key("test");
|
||||
assert_eq!(
|
||||
format!("{:?}", schema),
|
||||
r##"Schema { fields_map: FieldsMap { name_map: {"test": FieldId(0)}, id_map: {FieldId(0): "test"}, next_id: FieldId(1) }, primary_key: Some(FieldId(0)), ranked: {}, displayed: None, searchable: None, indexed_position: PositionMap { pos_to_field: [FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(0)} } }"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn primary_key() {
|
||||
let schema = Schema::with_primary_key("test");
|
||||
assert_eq!(schema.primary_key(), Some("test"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_with_position_base() {
|
||||
let mut schema = Schema::default();
|
||||
let (id, position) = schema.insert_with_position("foo").unwrap();
|
||||
assert!(schema.searchable.is_none());
|
||||
assert!(schema.displayed.is_none());
|
||||
assert_eq!(id, 0.into());
|
||||
assert_eq!(position, 0.into());
|
||||
let (id, position) = schema.insert_with_position("bar").unwrap();
|
||||
assert_eq!(id, 1.into());
|
||||
assert_eq!(position, 1.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert_with_position_primary_key() {
|
||||
let mut schema = Schema::with_primary_key("test");
|
||||
let (id, position) = schema.insert_with_position("foo").unwrap();
|
||||
assert!(schema.searchable.is_none());
|
||||
assert!(schema.displayed.is_none());
|
||||
assert_eq!(id, 1.into());
|
||||
assert_eq!(position, 1.into());
|
||||
let (id, position) = schema.insert_with_position("test").unwrap();
|
||||
assert_eq!(id, 0.into());
|
||||
assert_eq!(position, 0.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_insert() {
|
||||
let mut schema = Schema::default();
|
||||
let field_id = schema.insert("foo").unwrap();
|
||||
assert!(schema.fields_map.name(field_id).is_some());
|
||||
assert!(schema.searchable.is_none());
|
||||
assert!(schema.displayed.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_update_searchable() {
|
||||
let mut schema = Schema::default();
|
||||
|
||||
schema.update_searchable(vec!["foo", "bar"]).unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.indexed_position),
|
||||
r##"PositionMap { pos_to_field: [FieldId(0), FieldId(1)], field_to_pos: {FieldId(0): IndexedPos(0), FieldId(1): IndexedPos(1)} }"##
|
||||
);
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.searchable),
|
||||
r##"Some([FieldId(0), FieldId(1)])"##
|
||||
);
|
||||
schema.update_searchable(vec!["bar"]).unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.searchable),
|
||||
r##"Some([FieldId(1)])"##
|
||||
);
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.indexed_position),
|
||||
r##"PositionMap { pos_to_field: [FieldId(1), FieldId(0)], field_to_pos: {FieldId(0): IndexedPos(1), FieldId(1): IndexedPos(0)} }"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_update_displayed() {
|
||||
let mut schema = Schema::default();
|
||||
schema.update_displayed(vec!["foobar"]).unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.displayed),
|
||||
r##"Some({FieldId(0)})"##
|
||||
);
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.indexed_position),
|
||||
r##"PositionMap { pos_to_field: [], field_to_pos: {} }"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_searchable_all() {
|
||||
let mut schema = Schema::default();
|
||||
assert!(schema.is_searchable_all());
|
||||
schema.update_searchable(vec!["foo"]).unwrap();
|
||||
assert!(!schema.is_searchable_all());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_displayed_all() {
|
||||
let mut schema = Schema::default();
|
||||
assert!(schema.is_displayed_all());
|
||||
schema.update_displayed(vec!["foo"]).unwrap();
|
||||
assert!(!schema.is_displayed_all());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_searchable_names() {
|
||||
let mut schema = Schema::default();
|
||||
assert_eq!(format!("{:?}", schema.searchable_names()), r##"[]"##);
|
||||
schema.insert_with_position("foo").unwrap();
|
||||
schema.insert_with_position("bar").unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.searchable_names()),
|
||||
r##"["foo", "bar"]"##
|
||||
);
|
||||
schema.update_searchable(vec!["hello", "world"]).unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.searchable_names()),
|
||||
r##"["hello", "world"]"##
|
||||
);
|
||||
schema.set_all_searchable();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.searchable_names()),
|
||||
r##"["hello", "world", "foo", "bar"]"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_displayed_names() {
|
||||
let mut schema = Schema::default();
|
||||
assert_eq!(format!("{:?}", schema.displayed_names()), r##"{}"##);
|
||||
schema.insert_with_position("foo").unwrap();
|
||||
schema.insert_with_position("bar").unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.displayed_names()),
|
||||
r##"{"bar", "foo"}"##
|
||||
);
|
||||
schema.update_displayed(vec!["hello", "world"]).unwrap();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.displayed_names()),
|
||||
r##"{"hello", "world"}"##
|
||||
);
|
||||
schema.set_all_displayed();
|
||||
assert_eq!(
|
||||
format!("{:?}", schema.displayed_names()),
|
||||
r##"{"bar", "foo"}"##
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_all_searchable() {
|
||||
let mut schema = Schema::default();
|
||||
assert!(schema.is_searchable_all());
|
||||
schema.update_searchable(vec!["foobar"]).unwrap();
|
||||
assert!(!schema.is_searchable_all());
|
||||
schema.set_all_searchable();
|
||||
assert!(schema.is_searchable_all());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_all_displayed() {
|
||||
let mut schema = Schema::default();
|
||||
assert!(schema.is_displayed_all());
|
||||
schema.update_displayed(vec!["foobar"]).unwrap();
|
||||
assert!(!schema.is_displayed_all());
|
||||
schema.set_all_displayed();
|
||||
assert!(schema.is_displayed_all());
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
[package]
|
||||
name = "meilisearch-tokenizer"
|
||||
version = "0.16.0"
|
||||
version = "0.20.0"
|
||||
license = "MIT"
|
||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
deunicode = "1.1.0"
|
||||
deunicode = "1.1.1"
|
||||
slice-group-by = "0.2.6"
|
||||
|
@ -4,22 +4,22 @@ use slice_group_by::StrGroupBy;
|
||||
use std::iter::Peekable;
|
||||
|
||||
pub fn is_cjk(c: char) -> bool {
|
||||
(c >= '\u{1100}' && c <= '\u{11ff}') // Hangul Jamo
|
||||
|| (c >= '\u{2e80}' && c <= '\u{2eff}') // CJK Radicals Supplement
|
||||
|| (c >= '\u{2f00}' && c <= '\u{2fdf}') // Kangxi radical
|
||||
|| (c >= '\u{3000}' && c <= '\u{303f}') // Japanese-style punctuation
|
||||
|| (c >= '\u{3040}' && c <= '\u{309f}') // Japanese Hiragana
|
||||
|| (c >= '\u{30a0}' && c <= '\u{30ff}') // Japanese Katakana
|
||||
|| (c >= '\u{3100}' && c <= '\u{312f}')
|
||||
|| (c >= '\u{3130}' && c <= '\u{318F}') // Hangul Compatibility Jamo
|
||||
|| (c >= '\u{3200}' && c <= '\u{32ff}') // Enclosed CJK Letters and Months
|
||||
|| (c >= '\u{3400}' && c <= '\u{4dbf}') // CJK Unified Ideographs Extension A
|
||||
|| (c >= '\u{4e00}' && c <= '\u{9fff}') // CJK Unified Ideographs
|
||||
|| (c >= '\u{a960}' && c <= '\u{a97f}') // Hangul Jamo Extended-A
|
||||
|| (c >= '\u{ac00}' && c <= '\u{d7a3}') // Hangul Syllables
|
||||
|| (c >= '\u{d7b0}' && c <= '\u{d7ff}') // Hangul Jamo Extended-B
|
||||
|| (c >= '\u{f900}' && c <= '\u{faff}') // CJK Compatibility Ideographs
|
||||
|| (c >= '\u{ff00}' && c <= '\u{ffef}') // Full-width roman characters and half-width katakana
|
||||
('\u{1100}'..='\u{11ff}').contains(&c)
|
||||
|| ('\u{2e80}'..='\u{2eff}').contains(&c) // CJK Radicals Supplement
|
||||
|| ('\u{2f00}'..='\u{2fdf}').contains(&c) // Kangxi radical
|
||||
|| ('\u{3000}'..='\u{303f}').contains(&c) // Japanese-style punctuation
|
||||
|| ('\u{3040}'..='\u{309f}').contains(&c) // Japanese Hiragana
|
||||
|| ('\u{30a0}'..='\u{30ff}').contains(&c) // Japanese Katakana
|
||||
|| ('\u{3100}'..='\u{312f}').contains(&c)
|
||||
|| ('\u{3130}'..='\u{318F}').contains(&c) // Hangul Compatibility Jamo
|
||||
|| ('\u{3200}'..='\u{32ff}').contains(&c) // Enclosed CJK Letters and Months
|
||||
|| ('\u{3400}'..='\u{4dbf}').contains(&c) // CJK Unified Ideographs Extension A
|
||||
|| ('\u{4e00}'..='\u{9fff}').contains(&c) // CJK Unified Ideographs
|
||||
|| ('\u{a960}'..='\u{a97f}').contains(&c) // Hangul Jamo Extended-A
|
||||
|| ('\u{ac00}'..='\u{d7a3}').contains(&c) // Hangul Syllables
|
||||
|| ('\u{d7b0}'..='\u{d7ff}').contains(&c) // Hangul Jamo Extended-B
|
||||
|| ('\u{f900}'..='\u{faff}').contains(&c) // CJK Compatibility Ideographs
|
||||
|| ('\u{ff00}'..='\u{ffef}').contains(&c) // Full-width roman characters and half-width katakana
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "meilisearch-types"
|
||||
version = "0.16.0"
|
||||
version = "0.20.0"
|
||||
license = "MIT"
|
||||
authors = ["Clément Renault <renault.cle@gmail.com>"]
|
||||
edition = "2018"
|
||||
@ -10,7 +10,7 @@ version = "0.3.0"
|
||||
optional = true
|
||||
|
||||
[dependencies.serde]
|
||||
version = "1.0.105"
|
||||
version = "1.0.118"
|
||||
features = ["derive"]
|
||||
optional = true
|
||||
|
||||
|
Reference in New Issue
Block a user