mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-21 13:51:05 +00:00
Compare commits
96 Commits
v1.3.0
...
prototype-
Author | SHA1 | Date | |
---|---|---|---|
6243422ff4 | |||
3b69233906 | |||
e9b62aacb3 | |||
456960d2c7 | |||
3dda176723 | |||
af0f6f0bf0 | |||
ccf3ba3f32 | |||
65528a3e06 | |||
cdb4b3e024 | |||
8c0ebd1331 | |||
5130e06b41 | |||
08e27ef73f | |||
914b125c5f | |||
717b069907 | |||
7ea154673a | |||
b947f3bb9d | |||
4c35817c5f | |||
c53841e166 | |||
fd81945597 | |||
794e491152 | |||
cab27c2ab4 | |||
624fa9052f | |||
359ede4862 | |||
60c11dbdbd | |||
dacee40ebc | |||
6089083a8e | |||
cc2c19d4c3 | |||
a5c56fac8a | |||
e4e49e63d0 | |||
00bd7bd19a | |||
ef3d098b4d | |||
8084cf29f3 | |||
5a7c1bde84 | |||
6b2d671be7 | |||
43c13faeda | |||
29adfc2f68 | |||
064ee95b1c | |||
604d533b31 | |||
44c1900f36 | |||
04671d0751 | |||
4f4c669d50 | |||
35758db9ec | |||
4988199bb9 | |||
83991ee770 | |||
9d061cec26 | |||
4a21fecf67 | |||
ae8e69c030 | |||
fe819a9d80 | |||
e338ceb97f | |||
75c87d5391 | |||
dd57873f8e | |||
3dda93d50f | |||
117146ec4e | |||
884b4d47b1 | |||
023cb0c2de | |||
f391039a6f | |||
fcdd20b533 | |||
b45c36cd71 | |||
151c31c18f | |||
a8ad0902d3 | |||
e917dbdebb | |||
ba919b6123 | |||
9d5e3457e5 | |||
04694071fe | |||
b0c1a9504a | |||
d57026cd96 | |||
41c9e8856a | |||
d4ff59fcf5 | |||
9c485f8563 | |||
d8d12d5979 | |||
0597a97c84 | |||
2dfbb6813a | |||
8f589a5cce | |||
0b8bbd8750 | |||
eef95de30e | |||
13a13a4862 | |||
e691c92ed5 | |||
928ab2f9b1 | |||
7c18a9375f | |||
05a311f9be | |||
9b1b9b409e | |||
7f555f23e8 | |||
a0bfc9f63a | |||
3155264381 | |||
42400c381e | |||
08c7dab528 | |||
8590687515 | |||
8f5d127b1e | |||
2b4160ebb9 | |||
8ba1c8f88f | |||
8e7edf8ea7 | |||
9daccdf7f0 | |||
437ee55c57 | |||
b1717865ea | |||
176f716292 | |||
40ad19ba9e |
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@ -35,7 +35,7 @@ jobs:
|
||||
- name: Build deb package
|
||||
run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
|
||||
- name: Upload debian pkg to release
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/debian/meilisearch.deb
|
||||
|
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@ -54,7 +54,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/meilisearch
|
||||
@ -87,7 +87,7 @@ jobs:
|
||||
# No need to upload binaries for dry run (cron)
|
||||
- name: Upload binaries to release
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/release/${{ matrix.artifact_name }}
|
||||
@ -121,7 +121,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
@ -183,7 +183,7 @@ jobs:
|
||||
- name: Upload the binary to release
|
||||
# No need to upload binaries for dry run (cron)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.6.1
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: target/${{ matrix.target }}/release/meilisearch
|
||||
|
34
.github/workflows/test-suite.yml
vendored
34
.github/workflows/test-suite.yml
vendored
@ -30,20 +30,20 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
- name: Setup test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
- name: Setup test with Rust nightly
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -78,12 +78,12 @@ jobs:
|
||||
args: --locked --release --all
|
||||
|
||||
test-all-features:
|
||||
name: Tests all features on cron schedule only
|
||||
name: Tests all features
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
if: github.event_name == 'schedule'
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
@ -110,24 +110,24 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ubuntu:18.04
|
||||
if: github.event_name == 'schedule'
|
||||
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Install needed dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
|
||||
- name: Run cargo tree with default features and check lindera is pressent
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
|
||||
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
@ -146,7 +146,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -161,11 +161,11 @@ jobs:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.69.0
|
||||
toolchain: 1.71.1
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@ -184,7 +184,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
751
Cargo.lock
generated
751
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.3.0"
|
||||
version = "1.4.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
19
PROFILING.md
Normal file
19
PROFILING.md
Normal file
@ -0,0 +1,19 @@
|
||||
# Profiling Meilisearch
|
||||
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||
|
||||

|
||||
|
||||
## Profiling the Indexing Process
|
||||
|
||||
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
|
||||
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||
|
||||
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||
|
||||
## Profiling the Search Process
|
||||
|
||||
We still need to take the time to profile the search side of the engine with Puffin. It would require time to profile the filtering phase, query parsing, creation, and execution. We could even profile the Actix HTTP server.
|
||||
|
||||
The only issue we see is the framing system. Puffin requires a global frame-based profiling phase, which collides with Meilisearch's ability to accept and answer multiple requests on different threads simultaneously.
|
68
README.md
68
README.md
@ -1,16 +1,20 @@
|
||||
<p align="center">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
</a>
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<h4 align="center">
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=engine&utm_medium=meilisearch">Meilisearch Cloud</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://www.meilisearch.com/docs">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com">Discord</a>
|
||||
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Meilisearch Cloud</a> |
|
||||
<a href="https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Blog</a> |
|
||||
<a href="https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Discord</a>
|
||||
</h4>
|
||||
|
||||
<p align="center">
|
||||
@ -24,40 +28,40 @@
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
<a href="https://where2watch.meilisearch.com/#gh-light-mode-only" target="_blank">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/demo-light.gif#gh-light-mode-only" alt="A bright colored application for finding movies screening near the user">
|
||||
</a>
|
||||
<a href="https://where2watch.meilisearch.com/#gh-dark-mode-only" target="_blank">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/demo-dark.gif#gh-dark-mode-only" alt="A dark colored application for finding movies screening near the user">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ⚡ Supercharge your Meilisearch experience
|
||||
|
||||
@ -67,29 +71,29 @@ Say goodbye to server deployment and manual updates with [Meilisearch Cloud](htt
|
||||
|
||||
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
|
||||
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-link).
|
||||
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-logos)
|
||||
|
||||
## ⚙️ Advanced usage
|
||||
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) close at hand.
|
||||
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/)
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
|
BIN
assets/profiling-example.png
Normal file
BIN
assets/profiling-example.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 MiB |
@ -14,7 +14,7 @@ license.workspace = true
|
||||
anyhow = "1.0.70"
|
||||
csv = "1.2.1"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@ -7,7 +7,7 @@ use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
|
||||
use meilisearch_types::InstanceUid;
|
||||
use roaring::RoaringBitmap;
|
||||
use roaring::RoaringTreemap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@ -121,11 +121,11 @@ pub enum KindDump {
|
||||
},
|
||||
TaskCancelation {
|
||||
query: String,
|
||||
tasks: RoaringBitmap,
|
||||
tasks: RoaringTreemap,
|
||||
},
|
||||
TasksDeletion {
|
||||
query: String,
|
||||
tasks: RoaringBitmap,
|
||||
tasks: RoaringTreemap,
|
||||
},
|
||||
DumpCreation {
|
||||
keys: Vec<Key>,
|
||||
@ -262,6 +262,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
|
@ -69,7 +69,7 @@ impl CompatV5ToV6 {
|
||||
}
|
||||
|
||||
let task = v6::Task {
|
||||
uid: task_view.uid,
|
||||
uid: task_view.uid as u64,
|
||||
index_uid: task_view.index_uid,
|
||||
status: match task_view.status {
|
||||
v5::Status::Enqueued => v6::Status::Enqueued,
|
||||
@ -340,6 +340,9 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
}
|
||||
},
|
||||
stop_words: settings.stop_words.into(),
|
||||
non_separator_tokens: v6::Setting::NotSet,
|
||||
separator_tokens: v6::Setting::NotSet,
|
||||
dictionary: v6::Setting::NotSet,
|
||||
synonyms: settings.synonyms.into(),
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
typo_tolerance: match settings.typo_tolerance {
|
||||
|
@ -0,0 +1,24 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: spells.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: products.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {
|
||||
"android": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"iphone": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"phone": [
|
||||
"android",
|
||||
"iphone",
|
||||
"smartphone"
|
||||
]
|
||||
},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: movies.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness",
|
||||
"release_date:asc"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
@ -13,7 +13,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.0", features = ["derive"] }
|
||||
clap = { version = "4.3.0", features = ["derive"] }
|
||||
fastrand = "1.9.0"
|
||||
fastrand = "2.0.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
@ -22,6 +22,7 @@ log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = "0.16.0"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
@ -32,11 +32,11 @@ use meilisearch_types::milli::update::{
|
||||
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
Settings as MilliSettings,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Filter, BEU32};
|
||||
use meilisearch_types::milli::{self, Filter, BEU64};
|
||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||
use roaring::RoaringBitmap;
|
||||
use roaring::RoaringTreemap;
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
@ -58,7 +58,7 @@ pub(crate) enum Batch {
|
||||
/// The date and time at which the previously processing tasks started.
|
||||
previous_started_at: OffsetDateTime,
|
||||
/// The list of tasks that were processing when this task cancelation appeared.
|
||||
previous_processing_tasks: RoaringBitmap,
|
||||
previous_processing_tasks: RoaringTreemap,
|
||||
},
|
||||
TaskDeletion(Task),
|
||||
SnapshotCreation(Vec<Task>),
|
||||
@ -471,6 +471,8 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
puffin::profile_function!();
|
||||
|
||||
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
||||
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||
|
||||
@ -575,6 +577,9 @@ impl IndexScheduler {
|
||||
self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?;
|
||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
puffin::profile_function!(format!("{:?}", batch));
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
@ -1060,7 +1065,13 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
/// Swap the index `lhs` with the index `rhs`.
|
||||
fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
|
||||
fn apply_index_swap(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
task_id: TaskId,
|
||||
lhs: &str,
|
||||
rhs: &str,
|
||||
) -> Result<()> {
|
||||
// 1. Verify that both lhs and rhs are existing indexes
|
||||
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
|
||||
if !index_lhs_exists {
|
||||
@ -1081,7 +1092,7 @@ impl IndexScheduler {
|
||||
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
|
||||
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
||||
self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
|
||||
self.all_tasks.put(wtxn, &BEU64::new(task_id), &task)?;
|
||||
}
|
||||
|
||||
// 4. remove the task from indexuid = before_name
|
||||
@ -1111,6 +1122,8 @@ impl IndexScheduler {
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
) -> Result<Vec<Task>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
match operation {
|
||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
||||
@ -1382,7 +1395,11 @@ impl IndexScheduler {
|
||||
/// Delete each given task from all the databases (if it is deleteable).
|
||||
///
|
||||
/// Return the number of tasks that were actually deleted.
|
||||
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
|
||||
fn delete_matched_tasks(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
matched_tasks: &RoaringTreemap,
|
||||
) -> Result<u64> {
|
||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
|
||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||
@ -1397,7 +1414,7 @@ impl IndexScheduler {
|
||||
let mut affected_indexes = HashSet::new();
|
||||
let mut affected_statuses = HashSet::new();
|
||||
let mut affected_kinds = HashSet::new();
|
||||
let mut affected_canceled_by = RoaringBitmap::new();
|
||||
let mut affected_canceled_by = RoaringTreemap::new();
|
||||
|
||||
for task_id in to_delete_tasks.iter() {
|
||||
let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
@ -1434,10 +1451,10 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
for task in to_delete_tasks.iter() {
|
||||
self.all_tasks.delete(wtxn, &BEU32::new(task))?;
|
||||
self.all_tasks.delete(wtxn, &BEU64::new(task))?;
|
||||
}
|
||||
for canceled_by in affected_canceled_by {
|
||||
let canceled_by = BEU32::new(canceled_by);
|
||||
let canceled_by = BEU64::new(canceled_by);
|
||||
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
|
||||
tasks -= &to_delete_tasks;
|
||||
if tasks.is_empty() {
|
||||
@ -1458,9 +1475,9 @@ impl IndexScheduler {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
cancel_task_id: TaskId,
|
||||
matched_tasks: &RoaringBitmap,
|
||||
matched_tasks: &RoaringTreemap,
|
||||
previous_started_at: OffsetDateTime,
|
||||
previous_processing_tasks: &RoaringBitmap,
|
||||
previous_processing_tasks: &RoaringTreemap,
|
||||
) -> Result<Vec<Uuid>> {
|
||||
let now = OffsetDateTime::now_utc();
|
||||
|
||||
@ -1485,7 +1502,7 @@ impl IndexScheduler {
|
||||
task.details = task.details.map(|d| d.to_failed());
|
||||
self.update_task(wtxn, &task)?;
|
||||
}
|
||||
self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
|
||||
self.canceled_by.put(wtxn, &BEU64::new(cancel_task_id), &tasks_to_cancel)?;
|
||||
|
||||
Ok(content_files_to_delete)
|
||||
}
|
||||
|
@ -48,6 +48,8 @@ impl From<DateField> for Code {
|
||||
pub enum Error {
|
||||
#[error("{1}")]
|
||||
WithCustomErrorCode(Code, Box<Self>),
|
||||
#[error("Received bad task id: {received} should be >= to {expected}.")]
|
||||
BadTaskId { received: TaskId, expected: TaskId },
|
||||
#[error("Index `{0}` not found.")]
|
||||
IndexNotFound(String),
|
||||
#[error("Index `{0}` already exists.")]
|
||||
@ -159,6 +161,7 @@ impl Error {
|
||||
match self {
|
||||
Error::IndexNotFound(_)
|
||||
| Error::WithCustomErrorCode(_, _)
|
||||
| Error::BadTaskId { .. }
|
||||
| Error::IndexAlreadyExists(_)
|
||||
| Error::SwapDuplicateIndexFound(_)
|
||||
| Error::SwapDuplicateIndexesFound(_)
|
||||
@ -202,6 +205,7 @@ impl ErrorCode for Error {
|
||||
fn error_code(&self) -> Code {
|
||||
match self {
|
||||
Error::WithCustomErrorCode(code, _) => *code,
|
||||
Error::BadTaskId { .. } => Code::BadRequest,
|
||||
Error::IndexNotFound(_) => Code::IndexNotFound,
|
||||
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
|
||||
Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
|
||||
|
@ -3,9 +3,10 @@ use std::fmt::Write;
|
||||
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::heed_codec::{CboRoaringTreemapCodec, RoaringTreemapCodec};
|
||||
use meilisearch_types::milli::BEU64;
|
||||
use meilisearch_types::tasks::{Details, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
use roaring::RoaringTreemap;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::{IndexScheduler, Kind, Status, BEI128};
|
||||
@ -47,7 +48,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
let processing_tasks = processing_tasks.read().unwrap().processing.clone();
|
||||
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
|
||||
snap.push_str("### Processing Tasks:\n");
|
||||
snap.push_str(&snapshot_bitmap(&processing_tasks));
|
||||
snap.push_str(&snapshot_treemap(&processing_tasks));
|
||||
snap.push_str("\n----------------------------------------------------------------------\n");
|
||||
|
||||
snap.push_str("### All Tasks:\n");
|
||||
@ -103,7 +104,7 @@ pub fn snapshot_file_store(file_store: &file_store::FileStore) -> String {
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
|
||||
pub fn snapshot_treemap(r: &RoaringTreemap) -> String {
|
||||
let mut snap = String::new();
|
||||
snap.push('[');
|
||||
for x in r {
|
||||
@ -113,7 +114,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
|
||||
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU64>, SerdeJson<Task>>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
@ -125,13 +126,13 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson
|
||||
|
||||
pub fn snapshot_date_db(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
db: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
|
||||
) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
let (_timestamp, task_ids) = next.unwrap();
|
||||
snap.push_str(&format!("[timestamp] {}\n", snapshot_bitmap(&task_ids)));
|
||||
snap.push_str(&format!("[timestamp] {}\n", snapshot_treemap(&task_ids)));
|
||||
}
|
||||
snap
|
||||
}
|
||||
@ -216,45 +217,48 @@ fn snapshot_details(d: &Details) -> String {
|
||||
|
||||
pub fn snapshot_status(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
db: Database<SerdeBincode<Status>, RoaringTreemapCodec>,
|
||||
) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
let (status, task_ids) = next.unwrap();
|
||||
writeln!(snap, "{status} {}", snapshot_bitmap(&task_ids)).unwrap();
|
||||
writeln!(snap, "{status} {}", snapshot_treemap(&task_ids)).unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snapshot_kind(rtxn: &RoTxn, db: Database<SerdeBincode<Kind>, RoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
let (kind, task_ids) = next.unwrap();
|
||||
let kind = serde_json::to_string(&kind).unwrap();
|
||||
writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
let (index, task_ids) = next.unwrap();
|
||||
writeln!(snap, "{index} {}", snapshot_bitmap(&task_ids)).unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snapshot_canceled_by(
|
||||
pub fn snapshot_kind(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
|
||||
db: Database<SerdeBincode<Kind>, RoaringTreemapCodec>,
|
||||
) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
let (kind, task_ids) = next.unwrap();
|
||||
writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap();
|
||||
let kind = serde_json::to_string(&kind).unwrap();
|
||||
writeln!(snap, "{kind} {}", snapshot_treemap(&task_ids)).unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
|
||||
pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringTreemapCodec>) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
let (index, task_ids) = next.unwrap();
|
||||
writeln!(snap, "{index} {}", snapshot_treemap(&task_ids)).unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
pub fn snapshot_canceled_by(
|
||||
rtxn: &RoTxn,
|
||||
db: Database<OwnedType<BEU64>, RoaringTreemapCodec>,
|
||||
) -> String {
|
||||
let mut snap = String::new();
|
||||
let iter = db.iter(rtxn).unwrap();
|
||||
for next in iter {
|
||||
let (kind, task_ids) = next.unwrap();
|
||||
writeln!(snap, "{kind} {}", snapshot_treemap(&task_ids)).unwrap();
|
||||
}
|
||||
snap
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
|
@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
|
||||
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
|
||||
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
|
||||
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,2,3,]
|
||||
|
@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
|
||||
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
|
||||
3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
|
||||
3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
|
||||
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
|
||||
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
|
||||
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,2,3,]
|
||||
|
@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
|
@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,]
|
||||
|
@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
|
||||
0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
1 {uid: 1, status: canceled, canceled_by: 3, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }}
|
||||
2 {uid: 2, status: canceled, canceled_by: 3, details: { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }}
|
||||
3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
|
||||
3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued []
|
||||
|
@ -7,8 +7,8 @@ source: index-scheduler/src/lib.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,]
|
||||
|
@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,2,]
|
||||
|
@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
|
||||
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [1,]
|
||||
|
@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
|
||||
3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,2,]
|
||||
|
@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
|
||||
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,2,3,]
|
||||
|
@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
|
||||
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
|
||||
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,1,2,3,]
|
||||
|
@ -41,6 +41,18 @@ source: index-scheduler/src/lib.rs
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
|
@ -21,6 +21,18 @@ source: index-scheduler/src/lib.rs
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
|
@ -106,6 +106,18 @@ source: index-scheduler/src/lib.rs
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
|
@ -61,6 +61,18 @@ source: index-scheduler/src/lib.rs
|
||||
"taskDeletion": {
|
||||
"query": "[query]",
|
||||
"tasks": [
|
||||
1,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
58,
|
||||
48,
|
||||
0,
|
||||
|
@ -5,15 +5,16 @@ use std::ops::Bound;
|
||||
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
|
||||
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
|
||||
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::milli::heed_codec::CboRoaringTreemapCodec;
|
||||
use meilisearch_types::milli::BEU64;
|
||||
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use roaring::{MultiOps, RoaringTreemap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128};
|
||||
|
||||
impl IndexScheduler {
|
||||
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringTreemap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
|
||||
@ -26,7 +27,7 @@ impl IndexScheduler {
|
||||
}
|
||||
|
||||
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
|
||||
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
|
||||
Ok(self.all_tasks.get(rtxn, &BEU64::new(task_id))?)
|
||||
}
|
||||
|
||||
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
|
||||
@ -88,12 +89,12 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
|
||||
self.all_tasks.put(wtxn, &BEU64::new(task.uid), task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the whole set of tasks that belongs to this index.
|
||||
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
|
||||
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringTreemap> {
|
||||
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -101,7 +102,7 @@ impl IndexScheduler {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &str,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
f: impl Fn(&mut RoaringTreemap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.index_tasks(wtxn, index)?;
|
||||
f(&mut tasks);
|
||||
@ -114,7 +115,7 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
|
||||
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringTreemap> {
|
||||
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -122,7 +123,7 @@ impl IndexScheduler {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
bitmap: &RoaringBitmap,
|
||||
bitmap: &RoaringTreemap,
|
||||
) -> Result<()> {
|
||||
Ok(self.status.put(wtxn, &status, bitmap)?)
|
||||
}
|
||||
@ -131,7 +132,7 @@ impl IndexScheduler {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
status: Status,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
f: impl Fn(&mut RoaringTreemap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_status(wtxn, status)?;
|
||||
f(&mut tasks);
|
||||
@ -140,7 +141,7 @@ impl IndexScheduler {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
|
||||
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringTreemap> {
|
||||
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
@ -148,7 +149,7 @@ impl IndexScheduler {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
bitmap: &RoaringBitmap,
|
||||
bitmap: &RoaringTreemap,
|
||||
) -> Result<()> {
|
||||
Ok(self.kind.put(wtxn, &kind, bitmap)?)
|
||||
}
|
||||
@ -157,7 +158,7 @@ impl IndexScheduler {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
kind: Kind,
|
||||
f: impl Fn(&mut RoaringBitmap),
|
||||
f: impl Fn(&mut RoaringTreemap),
|
||||
) -> Result<()> {
|
||||
let mut tasks = self.get_kind(wtxn, kind)?;
|
||||
f(&mut tasks);
|
||||
@ -169,20 +170,20 @@ impl IndexScheduler {
|
||||
|
||||
pub(crate) fn insert_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
let timestamp = BEI128::new(time.unix_timestamp_nanos());
|
||||
let mut task_ids = database.get(wtxn, ×tamp)?.unwrap_or_default();
|
||||
task_ids.insert(task_id);
|
||||
database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(task_ids))?;
|
||||
database.put(wtxn, ×tamp, &RoaringTreemap::from_iter(task_ids))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn remove_task_datetime(
|
||||
wtxn: &mut RwTxn,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
|
||||
time: OffsetDateTime,
|
||||
task_id: TaskId,
|
||||
) -> Result<()> {
|
||||
@ -192,7 +193,7 @@ pub(crate) fn remove_task_datetime(
|
||||
if existing.is_empty() {
|
||||
database.delete(wtxn, ×tamp)?;
|
||||
} else {
|
||||
database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(existing))?;
|
||||
database.put(wtxn, ×tamp, &RoaringTreemap::from_iter(existing))?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -201,8 +202,8 @@ pub(crate) fn remove_task_datetime(
|
||||
|
||||
pub(crate) fn keep_tasks_within_datetimes(
|
||||
rtxn: &RoTxn,
|
||||
tasks: &mut RoaringBitmap,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
|
||||
tasks: &mut RoaringTreemap,
|
||||
database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
|
||||
after: Option<OffsetDateTime>,
|
||||
before: Option<OffsetDateTime>,
|
||||
) -> Result<()> {
|
||||
@ -212,7 +213,7 @@ pub(crate) fn keep_tasks_within_datetimes(
|
||||
(Some(after), None) => (Bound::Excluded(*after), Bound::Unbounded),
|
||||
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
|
||||
};
|
||||
let mut collected_task_ids = RoaringBitmap::new();
|
||||
let mut collected_task_ids = RoaringTreemap::new();
|
||||
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
|
||||
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
|
||||
let iter = database.range(rtxn, &(start, end))?;
|
||||
|
@ -167,7 +167,9 @@ macro_rules! snapshot {
|
||||
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name));
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
}
|
||||
});
|
||||
};
|
||||
($value:expr, @$inline:literal) => {
|
||||
@ -176,7 +178,9 @@ macro_rules! snapshot {
|
||||
let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument"));
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
meili_snap::insta::assert_snapshot!(snap, @$inline);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(snap, @$inline);
|
||||
}
|
||||
});
|
||||
};
|
||||
($value:expr) => {
|
||||
@ -194,7 +198,9 @@ macro_rules! snapshot {
|
||||
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None);
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
}
|
||||
});
|
||||
};
|
||||
}
|
||||
|
@ -15,13 +15,13 @@ actix-web = { version = "4.3.1", default-features = false }
|
||||
anyhow = "1.0.70"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.2.1"
|
||||
deserr = "0.5.0"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.25"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.5.10"
|
||||
memmap2 = "0.7.1"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
|
@ -104,6 +104,7 @@ macro_rules! impl_from_query_param_wrap_original_value_in_error {
|
||||
}
|
||||
impl_from_query_param_wrap_original_value_in_error!(usize, DeserrParseIntError);
|
||||
impl_from_query_param_wrap_original_value_in_error!(u32, DeserrParseIntError);
|
||||
impl_from_query_param_wrap_original_value_in_error!(u64, DeserrParseIntError);
|
||||
impl_from_query_param_wrap_original_value_in_error!(bool, DeserrParseBoolError);
|
||||
|
||||
impl FromQueryParameter for String {
|
||||
|
@ -1,4 +1,3 @@
|
||||
use std::borrow::Borrow;
|
||||
use std::fmt::{self, Debug, Display};
|
||||
use std::fs::File;
|
||||
use std::io::{self, Seek, Write};
|
||||
@ -42,7 +41,7 @@ impl Display for DocumentFormatError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Io(e) => write!(f, "{e}"),
|
||||
Self::MalformedPayload(me, b) => match me.borrow() {
|
||||
Self::MalformedPayload(me, b) => match me {
|
||||
Error::Json(se) => {
|
||||
let mut message = match se.classify() {
|
||||
Category::Data => {
|
||||
|
@ -259,6 +259,9 @@ InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsStopWords , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsNonSeparatorTokens , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSeparatorTokens , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDictionary , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSynonyms , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsTypoTolerance , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidState , Internal , INTERNAL_SERVER_ERROR ;
|
||||
|
@ -171,6 +171,15 @@ pub struct Settings<T> {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
|
||||
pub non_separator_tokens: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
|
||||
pub separator_tokens: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
|
||||
pub dictionary: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
@ -201,6 +210,9 @@ impl Settings<Checked> {
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
non_separator_tokens: Setting::Reset,
|
||||
separator_tokens: Setting::Reset,
|
||||
dictionary: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
@ -217,6 +229,9 @@ impl Settings<Checked> {
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
@ -232,6 +247,9 @@ impl Settings<Checked> {
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
@ -274,6 +292,9 @@ impl Settings<Unchecked> {
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
non_separator_tokens: self.non_separator_tokens,
|
||||
separator_tokens: self.separator_tokens,
|
||||
dictionary: self.dictionary,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
@ -335,6 +356,28 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.non_separator_tokens {
|
||||
Setting::Set(ref non_separator_tokens) => {
|
||||
builder.set_non_separator_tokens(non_separator_tokens.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_non_separator_tokens(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.separator_tokens {
|
||||
Setting::Set(ref separator_tokens) => {
|
||||
builder.set_separator_tokens(separator_tokens.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_separator_tokens(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.dictionary {
|
||||
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
|
||||
Setting::Reset => builder.reset_dictionary(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
||||
Setting::Reset => builder.reset_synonyms(),
|
||||
@ -459,15 +502,14 @@ pub fn settings(
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
|
||||
let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default();
|
||||
let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default();
|
||||
let dictionary = index.dictionary(rtxn)?.unwrap_or_default();
|
||||
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = index
|
||||
.synonyms(rtxn)?
|
||||
.iter()
|
||||
.map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect()))
|
||||
.collect();
|
||||
let synonyms = index.user_defined_synonyms(rtxn)?;
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||
@ -520,6 +562,9 @@ pub fn settings(
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
non_separator_tokens: Setting::Set(non_separator_tokens),
|
||||
separator_tokens: Setting::Set(separator_tokens),
|
||||
dictionary: Setting::Set(dictionary),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
@ -642,6 +687,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
@ -663,6 +711,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
|
@ -5,7 +5,7 @@ use std::str::FromStr;
|
||||
|
||||
use enum_iterator::Sequence;
|
||||
use milli::update::IndexDocumentsMethod;
|
||||
use roaring::RoaringBitmap;
|
||||
use roaring::RoaringTreemap;
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use time::{Duration, OffsetDateTime};
|
||||
use uuid::Uuid;
|
||||
@ -15,7 +15,7 @@ use crate::keys::Key;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::InstanceUid;
|
||||
|
||||
pub type TaskId = u32;
|
||||
pub type TaskId = u64;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@ -127,11 +127,11 @@ pub enum KindWithContent {
|
||||
},
|
||||
TaskCancelation {
|
||||
query: String,
|
||||
tasks: RoaringBitmap,
|
||||
tasks: RoaringTreemap,
|
||||
},
|
||||
TaskDeletion {
|
||||
query: String,
|
||||
tasks: RoaringBitmap,
|
||||
tasks: RoaringTreemap,
|
||||
},
|
||||
DumpCreation {
|
||||
keys: Vec<Key>,
|
||||
|
@ -39,7 +39,7 @@ byte-unit = { version = "4.0.19", default-features = false, features = [
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
dump = { path = "../dump" }
|
||||
either = "1.8.1"
|
||||
env_logger = "0.10.0"
|
||||
@ -50,15 +50,15 @@ futures = "0.3.28"
|
||||
futures-util = "0.3.28"
|
||||
http = "0.2.9"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "1.9.3", features = ["serde-1"] }
|
||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||
is-terminal = "0.4.8"
|
||||
itertools = "0.10.5"
|
||||
itertools = "0.11.0"
|
||||
jsonwebtoken = "8.3.0"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.15.0"
|
||||
obkv = "0.2.0"
|
||||
@ -69,6 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = "0.16.0"
|
||||
puffin_http = { version = "0.13.0", optional = true }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
@ -85,7 +87,7 @@ sha2 = "0.10.6"
|
||||
siphasher = "0.3.10"
|
||||
slice-group-by = "0.3.0"
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
sysinfo = "0.28.4"
|
||||
sysinfo = "0.29.7"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
@ -133,7 +135,18 @@ zip = { version = "0.6.4", optional = true }
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
"anyhow",
|
||||
"cargo_toml",
|
||||
"hex",
|
||||
"reqwest",
|
||||
"sha-1",
|
||||
"tempfile",
|
||||
"zip",
|
||||
]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
|
@ -20,7 +20,7 @@ pub struct SearchAggregator;
|
||||
#[allow(dead_code)]
|
||||
impl SearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
@ -32,7 +32,7 @@ pub struct MultiSearchAggregator;
|
||||
#[allow(dead_code)]
|
||||
impl MultiSearchAggregator {
|
||||
pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self) {}
|
||||
@ -44,7 +44,7 @@ pub struct FacetSearchAggregator;
|
||||
#[allow(dead_code)]
|
||||
impl FacetSearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
|
@ -203,7 +203,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
.name(String::from("register-snapshot-tasks"))
|
||||
.spawn(move || loop {
|
||||
thread::sleep(snapshot_delay);
|
||||
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
|
||||
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation, None) {
|
||||
error!("Error while registering snapshot: {}", e);
|
||||
}
|
||||
})
|
||||
|
@ -30,6 +30,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
#[cfg(feature = "profile-with-puffin")]
|
||||
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
|
@ -11,7 +11,7 @@ use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
use crate::routes::{get_task_id, SummarizedTaskView};
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
||||
@ -29,8 +29,9 @@ pub async fn create_dump(
|
||||
keys: auth_controller.list_keys()?,
|
||||
instance_uid: analytics.instance_uid().cloned(),
|
||||
};
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
@ -7,7 +7,7 @@ use bstr::ByteSlice;
|
||||
use deserr::actix_web::{AwebJson, AwebQueryParameter};
|
||||
use deserr::Deserr;
|
||||
use futures::StreamExt;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use index_scheduler::{IndexScheduler, TaskId};
|
||||
use log::debug;
|
||||
use meilisearch_types::deserr::query_params::Param;
|
||||
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
|
||||
@ -36,7 +36,7 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::search::parse_filter;
|
||||
|
||||
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
|
||||
@ -129,8 +129,9 @@ pub async fn delete_document(
|
||||
index_uid: index_uid.to_string(),
|
||||
documents_ids: vec![document_id],
|
||||
};
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@ -277,6 +278,7 @@ pub async fn replace_documents(
|
||||
analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let uid = get_task_id(&req)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
@ -285,6 +287,7 @@ pub async fn replace_documents(
|
||||
params.csv_delimiter,
|
||||
body,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
uid,
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
@ -308,6 +311,7 @@ pub async fn update_documents(
|
||||
analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req);
|
||||
|
||||
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
|
||||
let uid = get_task_id(&req)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
@ -316,6 +320,7 @@ pub async fn update_documents(
|
||||
params.csv_delimiter,
|
||||
body,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
uid,
|
||||
allow_index_creation,
|
||||
)
|
||||
.await?;
|
||||
@ -332,6 +337,7 @@ async fn document_addition(
|
||||
csv_delimiter: Option<u8>,
|
||||
mut body: Payload,
|
||||
method: IndexDocumentsMethod,
|
||||
task_id: Option<TaskId>,
|
||||
allow_index_creation: bool,
|
||||
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
|
||||
let format = match (
|
||||
@ -445,7 +451,7 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id)).await? {
|
||||
Ok(task) => task,
|
||||
Err(e) => {
|
||||
index_scheduler.delete_update_file(uuid)?;
|
||||
@ -476,8 +482,9 @@ pub async fn delete_documents_batch(
|
||||
|
||||
let task =
|
||||
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@ -512,8 +519,9 @@ pub async fn delete_documents_by_filter(
|
||||
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@ -529,8 +537,9 @@ pub async fn clear_all_documents(
|
||||
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
|
||||
|
||||
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
@ -17,7 +17,7 @@ use serde::Serialize;
|
||||
use serde_json::json;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::{AuthenticationError, GuardedData};
|
||||
@ -135,8 +135,9 @@ pub async fn create_index(
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
@ -203,8 +204,9 @@ pub async fn update_index(
|
||||
primary_key: body.primary_key,
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@ -213,11 +215,13 @@ pub async fn update_index(
|
||||
pub async fn delete_index(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ use serde_json::json;
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
use crate::routes::{get_task_id, SummarizedTaskView};
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
@ -33,7 +33,7 @@ macro_rules! make_setting_route {
|
||||
use $crate::extractors::authentication::policies::*;
|
||||
use $crate::extractors::authentication::GuardedData;
|
||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||
use $crate::routes::SummarizedTaskView;
|
||||
use $crate::routes::{get_task_id, SummarizedTaskView};
|
||||
|
||||
pub async fn delete(
|
||||
index_scheduler: GuardedData<
|
||||
@ -41,6 +41,7 @@ macro_rules! make_setting_route {
|
||||
Data<IndexScheduler>,
|
||||
>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
@ -55,8 +56,9 @@ macro_rules! make_setting_route {
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
@ -97,8 +99,9 @@ macro_rules! make_setting_route {
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
@ -310,6 +313,81 @@ make_setting_route!(
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/non-separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
non_separator_tokens,
|
||||
"nonSeparatorTokens",
|
||||
analytics,
|
||||
|non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"nonSeparatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"non_separator_tokens": {
|
||||
"total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
separator_tokens,
|
||||
"separatorTokens",
|
||||
analytics,
|
||||
|separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"separatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"separator_tokens": {
|
||||
"total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/dictionary",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
dictionary,
|
||||
"dictionary",
|
||||
analytics,
|
||||
|dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"dictionary Updated".to_string(),
|
||||
json!({
|
||||
"dictionary": {
|
||||
"total": dictionary.as_ref().map(|dictionary| dictionary.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/synonyms",
|
||||
put,
|
||||
@ -466,6 +544,9 @@ generate_configure!(
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
stop_words,
|
||||
separator_tokens,
|
||||
non_separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
@ -586,8 +667,9 @@ pub async fn update_all(
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@ -609,6 +691,7 @@ pub async fn get_all(
|
||||
pub async fn delete_all(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
|
||||
index_uid: web::Path<String>,
|
||||
req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
|
||||
@ -622,8 +705,9 @@ pub async fn delete_all(
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
@ -5,7 +5,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@ -41,6 +41,34 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::scope("/experimental-features").configure(features::configure));
|
||||
}
|
||||
|
||||
pub fn get_task_id(req: &HttpRequest) -> Result<Option<TaskId>, ResponseError> {
|
||||
let task_id = req
|
||||
.headers()
|
||||
.get("TaskId")
|
||||
.map(|header| {
|
||||
header.to_str().map_err(|e| {
|
||||
ResponseError::from_msg(
|
||||
format!("TaskId is not a valid utf-8 string: {e}"),
|
||||
Code::BadRequest,
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()?
|
||||
.map(|s| {
|
||||
s.parse::<TaskId>().map_err(|e| {
|
||||
ResponseError::from_msg(
|
||||
format!(
|
||||
"Could not parse the TaskId as a {}: {e}",
|
||||
std::any::type_name::<TaskId>(),
|
||||
),
|
||||
Code::BadRequest,
|
||||
)
|
||||
})
|
||||
})
|
||||
.transpose()?;
|
||||
Ok(task_id)
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
|
@ -10,7 +10,7 @@ use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
|
||||
use serde_json::json;
|
||||
|
||||
use super::SummarizedTaskView;
|
||||
use super::{get_task_id, SummarizedTaskView};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
@ -61,7 +61,8 @@ pub async fn swap_indexes(
|
||||
|
||||
let task = KindWithContent::IndexSwap { swaps };
|
||||
|
||||
let task = index_scheduler.register(task)?;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
let uid = get_task_id(&req)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
@ -20,13 +20,13 @@ use time::macros::format_description;
|
||||
use time::{Date, Duration, OffsetDateTime, Time};
|
||||
use tokio::task;
|
||||
|
||||
use super::SummarizedTaskView;
|
||||
use super::{get_task_id, SummarizedTaskView};
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
const DEFAULT_LIMIT: u32 = 20;
|
||||
const DEFAULT_LIMIT: u64 = 20;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(
|
||||
@ -175,14 +175,14 @@ impl From<Details> for DetailsView {
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct TasksFilterQuery {
|
||||
#[deserr(default = Param(DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidTaskLimit>)]
|
||||
pub limit: Param<u32>,
|
||||
pub limit: Param<TaskId>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskFrom>)]
|
||||
pub from: Option<Param<TaskId>>,
|
||||
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
|
||||
pub uids: OptionStarOrList<u32>,
|
||||
pub uids: OptionStarOrList<TaskId>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
|
||||
pub canceled_by: OptionStarOrList<u32>,
|
||||
pub canceled_by: OptionStarOrList<TaskId>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
|
||||
pub types: OptionStarOrList<Kind>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
|
||||
@ -249,9 +249,9 @@ impl TaskDeletionOrCancelationQuery {
|
||||
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct TaskDeletionOrCancelationQuery {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
|
||||
pub uids: OptionStarOrList<u32>,
|
||||
pub uids: OptionStarOrList<TaskId>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
|
||||
pub canceled_by: OptionStarOrList<u32>,
|
||||
pub canceled_by: OptionStarOrList<TaskId>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
|
||||
pub types: OptionStarOrList<Kind>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
|
||||
@ -333,7 +333,9 @@ async fn cancel_tasks(
|
||||
let task_cancelation =
|
||||
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
|
||||
let uid = get_task_id(&req)?;
|
||||
let task =
|
||||
task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
@ -378,7 +380,8 @@ async fn delete_tasks(
|
||||
let task_deletion =
|
||||
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
|
||||
let uid = get_task_id(&req)?;
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
@ -388,9 +391,9 @@ async fn delete_tasks(
|
||||
pub struct AllTasks {
|
||||
results: Vec<TaskView>,
|
||||
total: u64,
|
||||
limit: u32,
|
||||
from: Option<u32>,
|
||||
next: Option<u32>,
|
||||
limit: TaskId,
|
||||
from: Option<TaskId>,
|
||||
next: Option<TaskId>,
|
||||
}
|
||||
|
||||
async fn get_tasks(
|
||||
|
@ -491,6 +491,20 @@ pub fn perform_search(
|
||||
tokenizer_builder.allow_list(&script_lang_map);
|
||||
}
|
||||
|
||||
let separators = index.allowed_separators(&rtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
||||
formatter_builder.crop_marker(query.crop_marker);
|
||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||
@ -666,6 +680,7 @@ fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
Ok(vectors
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|v| OrderedFloat(dot_product_similarity(query, &v)))
|
||||
.max()
|
||||
.map(OrderedFloat::into_inner))
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -199,3 +199,74 @@ async fn error_create_with_invalid_index_uid() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn send_task_id() {
|
||||
let server = Server::new().await;
|
||||
let app = server.init_web_app().await;
|
||||
let index = server.index("catto");
|
||||
let (response, code) = index.create(None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 0,
|
||||
"indexUid": "catto",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let body = serde_json::to_string(&json!({
|
||||
"uid": "doggo",
|
||||
"primaryKey": None::<&str>,
|
||||
}))
|
||||
.unwrap();
|
||||
let req = test::TestRequest::post()
|
||||
.uri("/indexes")
|
||||
.insert_header(("TaskId", "25"))
|
||||
.insert_header(ContentType::json())
|
||||
.set_payload(body)
|
||||
.to_request();
|
||||
|
||||
let res = test::call_service(&app, req).await;
|
||||
snapshot!(res.status(), @"202 Accepted");
|
||||
|
||||
let bytes = test::read_body(res).await;
|
||||
let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
|
||||
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 25,
|
||||
"indexUid": "doggo",
|
||||
"status": "enqueued",
|
||||
"type": "indexCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let body = serde_json::to_string(&json!({
|
||||
"uid": "girafo",
|
||||
"primaryKey": None::<&str>,
|
||||
}))
|
||||
.unwrap();
|
||||
let req = test::TestRequest::post()
|
||||
.uri("/indexes")
|
||||
.insert_header(("TaskId", "12"))
|
||||
.insert_header(ContentType::json())
|
||||
.set_payload(body)
|
||||
.to_request();
|
||||
|
||||
let res = test::call_service(&app, req).await;
|
||||
snapshot!(res.status(), @"400 Bad Request");
|
||||
|
||||
let bytes = test::read_body(res).await;
|
||||
let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Received bad task id: 12 should be >= to 26.",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -1,3 +1,4 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
@ -60,3 +61,59 @@ async fn geo_sort_with_geo_strings() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn geo_bounding_box_with_string_and_number() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||
index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "_geoBoundingBox([89, 179], [-89, -179])",
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Taco Truck",
|
||||
"address": "444 Salsa Street, Burritoville",
|
||||
"type": "Mexican",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": 34.0522,
|
||||
"lng": -118.2437
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "La Bella Italia",
|
||||
"address": "456 Elm Street, Townsville",
|
||||
"type": "Italian",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": "45.4777599",
|
||||
"lng": "9.1967508"
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 2
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -16,6 +16,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
|
||||
);
|
||||
map.insert("stop_words", json!([]));
|
||||
map.insert("non_separator_tokens", json!([]));
|
||||
map.insert("separator_tokens", json!([]));
|
||||
map.insert("dictionary", json!([]));
|
||||
map.insert("synonyms", json!({}));
|
||||
map.insert(
|
||||
"faceting",
|
||||
@ -51,7 +54,7 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 11);
|
||||
assert_eq!(settings.keys().len(), 14);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
@ -62,6 +65,9 @@ async fn get_settings() {
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"])
|
||||
);
|
||||
assert_eq!(settings["stopWords"], json!([]));
|
||||
assert_eq!(settings["nonSeparatorTokens"], json!([]));
|
||||
assert_eq!(settings["separatorTokens"], json!([]));
|
||||
assert_eq!(settings["dictionary"], json!([]));
|
||||
assert_eq!(
|
||||
settings["faceting"],
|
||||
json!({
|
||||
@ -272,6 +278,9 @@ test_setting_routes!(
|
||||
searchable_attributes put,
|
||||
distinct_attribute put,
|
||||
stop_words put,
|
||||
separator_tokens put,
|
||||
non_separator_tokens put,
|
||||
dictionary put,
|
||||
ranking_rules put,
|
||||
synonyms put,
|
||||
pagination patch,
|
||||
|
@ -1,3 +1,4 @@
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod get_settings;
|
||||
mod tokenizer_customization;
|
||||
|
467
meilisearch/tests/settings/tokenizer_customization.rs
Normal file
467
meilisearch/tests/settings/tokenizer_customization.rs
Normal file
@ -0,0 +1,467 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::common::Server;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn set_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": ["#", "&"],
|
||||
"separatorTokens": ["&sep", "<br/>"],
|
||||
"dictionary": ["J.R.R.", "J. R. R."],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
snapshot!(json_string!(response["nonSeparatorTokens"]), @r###"
|
||||
[
|
||||
"#",
|
||||
"&"
|
||||
]
|
||||
"###);
|
||||
snapshot!(json_string!(response["separatorTokens"]), @r###"
|
||||
[
|
||||
"&sep",
|
||||
"<br/>"
|
||||
]
|
||||
"###);
|
||||
snapshot!(json_string!(response["dictionary"]), @r###"
|
||||
[
|
||||
"J. R. R.",
|
||||
"J.R.R."
|
||||
]
|
||||
"###);
|
||||
|
||||
index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": null,
|
||||
"separatorTokens": null,
|
||||
"dictionary": null,
|
||||
}))
|
||||
.await;
|
||||
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]");
|
||||
snapshot!(json_string!(response["separatorTokens"]), @"[]");
|
||||
snapshot!(json_string!(response["dictionary"]), @"[]");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn set_and_search() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "G#D#G#D#G#C#D#G#C#",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
},
|
||||
]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": ["#", "&"],
|
||||
"separatorTokens": ["<br/>", "&sep"],
|
||||
"dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "Mac <em>&</em> cheese"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "Mac&sep<em>&</em>&sepcheese"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"content": "G#D#G#D#G#C#D#G#C#",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>G</em>#<em>D#</em><em>G</em>#<em>D#</em><em>G</em>#<em>C#</em><em>D#</em><em>G</em>#<em>C#</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn advanced_synergies() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
},
|
||||
]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"dictionary": ["J.R.R.", "J. R. R."],
|
||||
"synonyms": {
|
||||
"J.R.R.": ["jrr", "J. R. R."],
|
||||
"J. R. R.": ["jrr", "J.R.R."],
|
||||
"jrr": ["J.R.R.", "J. R. R."],
|
||||
"J.K.": ["jk", "J. K."],
|
||||
"J. K.": ["jk", "J.K."],
|
||||
"jk": ["J.K.", "J. K."],
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// Only update dictionary, the synonyms should be recomputed.
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R.</em> R. Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
@ -17,10 +17,10 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.2", default-features = false }
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
@ -32,18 +32,18 @@ grenad = { version = "0.4.4", default-features = false, features = [
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
|
||||
"lmdb", "read-txn-no-tls"
|
||||
] }
|
||||
indexmap = { version = "1.9.3", features = ["serde"] }
|
||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||
instant-distance = { version = "0.6.1", features = ["with-serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.5.10"
|
||||
memmap2 = "0.7.1"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.6.0"
|
||||
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
||||
rayon = "1.7.0"
|
||||
roaring = "0.10.1"
|
||||
rstar = { version = "0.10.0", features = ["serde"] }
|
||||
rstar = { version = "0.11.0", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.0"
|
||||
@ -63,7 +63,10 @@ uuid = { version = "1.3.1", features = ["v4"] }
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
# documents words self-join
|
||||
itertools = "0.10.5"
|
||||
itertools = "0.11.0"
|
||||
|
||||
# profiling
|
||||
puffin = "0.16.0"
|
||||
|
||||
# logging
|
||||
log = "0.4.17"
|
||||
@ -71,7 +74,7 @@ logging_timer = "1.1.0"
|
||||
csv = "1.2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
big_s = "1.0.2"
|
||||
insta = "1.29.0"
|
||||
maplit = "1.0.2"
|
||||
|
@ -122,22 +122,28 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
true => "This index does not have configured sortable attributes.".to_string(),
|
||||
false => format!("Available sortable attributes are: `{}`.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
false => format!("Available sortable attributes are: `{}{}`.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
|
||||
#[error("Attribute `{}` is not facet-searchable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
|
||||
false => format!("Available facet-searchable attributes are: `{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidFacetSearchFacetName { field: String, valid_fields: BTreeSet<String> },
|
||||
InvalidFacetSearchFacetName {
|
||||
field: String,
|
||||
valid_fields: BTreeSet<String>,
|
||||
hidden_fields: bool,
|
||||
},
|
||||
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
|
||||
.field,
|
||||
.valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
@ -340,8 +346,11 @@ fn conditionally_lookup_for_error_message() {
|
||||
];
|
||||
|
||||
for (list, suffix) in messages {
|
||||
let err =
|
||||
UserError::InvalidSortableAttribute { field: "name".to_string(), valid_fields: list };
|
||||
let err = UserError::InvalidSortableAttribute {
|
||||
field: "name".to_string(),
|
||||
valid_fields: list,
|
||||
hidden_fields: false,
|
||||
};
|
||||
|
||||
assert_eq!(err.to_string(), format!("{} {}", prefix, suffix));
|
||||
}
|
||||
|
@ -20,7 +20,10 @@ pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap::{
|
||||
BoRoaringBitmapCodec, CboRoaringBitmapCodec, CboRoaringTreemapCodec, RoaringBitmapCodec,
|
||||
RoaringTreemapCodec,
|
||||
};
|
||||
pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
};
|
||||
|
196
milli/src/heed_codec/roaring_bitmap/cbo_roaring_treemap_codec.rs
Normal file
196
milli/src/heed_codec/roaring_bitmap/cbo_roaring_treemap_codec.rs
Normal file
@ -0,0 +1,196 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io;
|
||||
use std::mem::size_of;
|
||||
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use roaring::RoaringTreemap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
/// This is the limit where using a byteorder became less size efficient
|
||||
/// than using a direct roaring encoding, it is also the point where we are able
|
||||
/// to determine the encoding used only by using the array of bytes length.
|
||||
pub const THRESHOLD: usize = 4;
|
||||
|
||||
/// A conditionnal codec that either use the RoaringBitmap
|
||||
/// or a lighter ByteOrder en/decoding method.
|
||||
pub struct CboRoaringTreemapCodec;
|
||||
|
||||
impl CboRoaringTreemapCodec {
|
||||
pub fn serialized_size(roaring: &RoaringTreemap) -> usize {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
roaring.len() as usize * size_of::<u64>()
|
||||
} else {
|
||||
roaring.serialized_size()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_into(roaring: &RoaringTreemap, vec: &mut Vec<u8>) {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
// it means that it would weigh the same or less than the RoaringBitmap
|
||||
// header, so we directly encode them using ByteOrder instead.
|
||||
for integer in roaring {
|
||||
vec.write_u64::<NativeEndian>(integer).unwrap();
|
||||
}
|
||||
} else {
|
||||
// Otherwise, we use the classic RoaringBitmapCodec that writes a header.
|
||||
roaring.serialize_into(vec).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringTreemap> {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u64>() {
|
||||
// If there is threshold or less than threshold integers that can fit into this array
|
||||
// of bytes it means that we used the ByteOrder codec serializer.
|
||||
let mut bitmap = RoaringTreemap::new();
|
||||
while let Ok(integer) = bytes.read_u64::<NativeEndian>() {
|
||||
bitmap.insert(integer);
|
||||
}
|
||||
Ok(bitmap)
|
||||
} else {
|
||||
// Otherwise, it means we used the classic RoaringBitmapCodec and
|
||||
// that the header takes threshold integers.
|
||||
RoaringTreemap::deserialize_unchecked_from(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge serialized CboRoaringBitmaps in a buffer.
|
||||
///
|
||||
/// if the merged values length is under the threshold, values are directly
|
||||
/// serialized in the buffer else a RoaringBitmap is created from the
|
||||
/// values and is serialized in the buffer.
|
||||
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
|
||||
let mut roaring = RoaringTreemap::new();
|
||||
let mut vec = Vec::new();
|
||||
|
||||
for bytes in slices {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u64>() {
|
||||
let mut reader = bytes.as_ref();
|
||||
while let Ok(integer) = reader.read_u64::<NativeEndian>() {
|
||||
vec.push(integer);
|
||||
}
|
||||
} else {
|
||||
roaring |= RoaringTreemap::deserialize_unchecked_from(bytes.as_ref())?;
|
||||
}
|
||||
}
|
||||
|
||||
if roaring.is_empty() {
|
||||
vec.sort_unstable();
|
||||
vec.dedup();
|
||||
|
||||
if vec.len() <= THRESHOLD {
|
||||
for integer in vec {
|
||||
buffer.extend_from_slice(&integer.to_ne_bytes());
|
||||
}
|
||||
} else {
|
||||
// We can unwrap safely because the vector is sorted upper.
|
||||
let roaring = RoaringTreemap::from_sorted_iter(vec.into_iter()).unwrap();
|
||||
roaring.serialize_into(buffer)?;
|
||||
}
|
||||
} else {
|
||||
roaring.extend(vec);
|
||||
roaring.serialize_into(buffer)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for CboRoaringTreemapCodec {
|
||||
type DItem = RoaringTreemap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::deserialize_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for CboRoaringTreemapCodec {
|
||||
type DItem = RoaringTreemap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::deserialize_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for CboRoaringTreemapCodec {
|
||||
type EItem = RoaringTreemap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||
Self::serialize_into(item, &mut vec);
|
||||
Some(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn verify_encoding_decoding() {
|
||||
let input = RoaringTreemap::from_iter(0..THRESHOLD as u64);
|
||||
let bytes = CboRoaringTreemapCodec::bytes_encode(&input).unwrap();
|
||||
let output = CboRoaringTreemapCodec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!(input, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_threshold() {
|
||||
let input = RoaringTreemap::from_iter(0..THRESHOLD as u64);
|
||||
|
||||
// use roaring treemap
|
||||
let mut bytes = Vec::new();
|
||||
input.serialize_into(&mut bytes).unwrap();
|
||||
let roaring_size = bytes.len();
|
||||
|
||||
// use byteorder directly
|
||||
let mut bytes = Vec::new();
|
||||
for integer in input {
|
||||
bytes.write_u64::<NativeEndian>(integer).unwrap();
|
||||
}
|
||||
let bo_size = bytes.len();
|
||||
|
||||
assert!(roaring_size > bo_size, "roaring size: {}, bo size {}", roaring_size, bo_size);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_cbo_roaring_bitmaps() {
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
let small_data = vec![
|
||||
RoaringTreemap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringTreemap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringTreemap::from_sorted_iter(4..6).unwrap(),
|
||||
RoaringTreemap::from_sorted_iter(1..3).unwrap(),
|
||||
];
|
||||
|
||||
let small_data: Vec<_> =
|
||||
small_data.iter().map(|b| CboRoaringTreemapCodec::bytes_encode(b).unwrap()).collect();
|
||||
CboRoaringTreemapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
|
||||
let bitmap = CboRoaringTreemapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringTreemap::from_sorted_iter(1..6).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
|
||||
let medium_data = vec![
|
||||
RoaringTreemap::from_sorted_iter(1..4).unwrap(),
|
||||
RoaringTreemap::from_sorted_iter(2..5).unwrap(),
|
||||
RoaringTreemap::from_sorted_iter(4..8).unwrap(),
|
||||
RoaringTreemap::from_sorted_iter(0..3).unwrap(),
|
||||
RoaringTreemap::from_sorted_iter(7..23).unwrap(),
|
||||
];
|
||||
|
||||
let medium_data: Vec<_> =
|
||||
medium_data.iter().map(|b| CboRoaringTreemapCodec::bytes_encode(b).unwrap()).collect();
|
||||
buffer.clear();
|
||||
CboRoaringTreemapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
|
||||
|
||||
let bitmap = CboRoaringTreemapCodec::deserialize_from(&buffer).unwrap();
|
||||
let expected = RoaringTreemap::from_sorted_iter(0..23).unwrap();
|
||||
assert_eq!(bitmap, expected);
|
||||
}
|
||||
}
|
@ -1,7 +1,11 @@
|
||||
mod bo_roaring_bitmap_codec;
|
||||
pub mod cbo_roaring_bitmap_codec;
|
||||
pub mod cbo_roaring_treemap_codec;
|
||||
mod roaring_bitmap_codec;
|
||||
mod roaring_treemap_codec;
|
||||
|
||||
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
||||
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
pub use self::cbo_roaring_treemap_codec::CboRoaringTreemapCodec;
|
||||
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
||||
pub use self::roaring_treemap_codec::RoaringTreemapCodec;
|
||||
|
33
milli/src/heed_codec/roaring_bitmap/roaring_treemap_codec.rs
Normal file
33
milli/src/heed_codec/roaring_bitmap/roaring_treemap_codec.rs
Normal file
@ -0,0 +1,33 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use roaring::RoaringTreemap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
pub struct RoaringTreemapCodec;
|
||||
|
||||
impl heed::BytesDecode<'_> for RoaringTreemapCodec {
|
||||
type DItem = RoaringTreemap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringTreemap::deserialize_unchecked_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for RoaringTreemapCodec {
|
||||
type DItem = RoaringTreemap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringTreemap::deserialize_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for RoaringTreemapCodec {
|
||||
type EItem = RoaringTreemap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(item.serialized_size());
|
||||
item.serialize_into(&mut bytes).ok()?;
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::mem::size_of;
|
||||
use std::path::Path;
|
||||
@ -61,8 +61,12 @@ pub mod main_key {
|
||||
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
|
||||
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
||||
pub const STOP_WORDS_KEY: &str = "stop-words";
|
||||
pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
|
||||
pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
|
||||
pub const DICTIONARY_KEY: &str = "dictionary";
|
||||
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
|
||||
pub const SYNONYMS_KEY: &str = "synonyms";
|
||||
pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
|
||||
pub const WORDS_FST_KEY: &str = "words-fst";
|
||||
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
||||
pub const CREATED_AT_KEY: &str = "created-at";
|
||||
@ -651,6 +655,26 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* remove hidden fields */
|
||||
pub fn remove_hidden_fields(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
fields: impl IntoIterator<Item = impl AsRef<str>>,
|
||||
) -> Result<(BTreeSet<String>, bool)> {
|
||||
let mut valid_fields =
|
||||
fields.into_iter().map(|f| f.as_ref().to_string()).collect::<BTreeSet<String>>();
|
||||
|
||||
let fields_len = valid_fields.len();
|
||||
|
||||
if let Some(dn) = self.displayed_fields(rtxn)? {
|
||||
let displayable_names = dn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &displayable_names;
|
||||
}
|
||||
|
||||
let hidden_fields = fields_len > valid_fields.len();
|
||||
Ok((valid_fields, hidden_fields))
|
||||
}
|
||||
|
||||
/* searchable fields */
|
||||
|
||||
/// Write the user defined searchable fields and generate the real searchable fields from the specified fields ids map.
|
||||
@ -1055,18 +1079,116 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* non separator tokens */
|
||||
|
||||
pub(crate) fn put_non_separator_tokens(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
|
||||
rtxn,
|
||||
main_key::NON_SEPARATOR_TOKENS_KEY,
|
||||
)?)
|
||||
}
|
||||
|
||||
/* separator tokens */
|
||||
|
||||
pub(crate) fn put_separator_tokens(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
|
||||
}
|
||||
|
||||
/* separators easing method */
|
||||
|
||||
pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
let default_separators =
|
||||
charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
|
||||
let mut separators: Option<BTreeSet<_>> = None;
|
||||
if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
|
||||
separator_tokens.extend(default_separators.clone());
|
||||
separators = Some(separator_tokens);
|
||||
}
|
||||
|
||||
if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
|
||||
separators = separators
|
||||
.or_else(|| Some(default_separators.collect()))
|
||||
.map(|separators| &separators - &non_separator_tokens);
|
||||
}
|
||||
|
||||
Ok(separators)
|
||||
}
|
||||
|
||||
/* dictionary */
|
||||
|
||||
pub(crate) fn put_dictionary(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
|
||||
}
|
||||
|
||||
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
|
||||
}
|
||||
|
||||
/* synonyms */
|
||||
|
||||
pub(crate) fn put_synonyms(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
|
||||
user_defined_synonyms: &BTreeMap<String, Vec<String>>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(
|
||||
wtxn,
|
||||
main_key::USER_DEFINED_SYNONYMS_KEY,
|
||||
user_defined_synonyms,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
|
||||
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
|
||||
}
|
||||
|
||||
pub fn user_defined_synonyms(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
) -> heed::Result<BTreeMap<String, Vec<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
|
||||
@ -1718,11 +1840,11 @@ pub(crate) mod tests {
|
||||
.unwrap();
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "_geo": { "lat": 0, "lng": 0 } },
|
||||
{ "id": 1, "_geo": { "lat": 0, "lng": -175 } },
|
||||
{ "id": 2, "_geo": { "lat": 0, "lng": 175 } },
|
||||
{ "id": 0, "_geo": { "lat": "0", "lng": "0" } },
|
||||
{ "id": 1, "_geo": { "lat": 0, "lng": "-175" } },
|
||||
{ "id": 2, "_geo": { "lat": "0", "lng": 175 } },
|
||||
{ "id": 3, "_geo": { "lat": 85, "lng": 0 } },
|
||||
{ "id": 4, "_geo": { "lat": -85, "lng": 0 } },
|
||||
{ "id": 4, "_geo": { "lat": "-85", "lng": "0" } },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
|
@ -97,7 +97,7 @@ const MAX_LMDB_KEY_LENGTH: usize = 500;
|
||||
///
|
||||
/// This number is determined by the keys of the different facet databases
|
||||
/// and adding a margin of safety.
|
||||
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 20;
|
||||
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32;
|
||||
|
||||
/// The maximum length a word can be
|
||||
pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
|
||||
@ -293,15 +293,15 @@ pub fn normalize_facet(original: &str) -> String {
|
||||
#[derive(serde::Serialize, serde::Deserialize, Debug)]
|
||||
#[serde(transparent)]
|
||||
pub struct VectorOrArrayOfVectors {
|
||||
#[serde(with = "either::serde_untagged")]
|
||||
inner: either::Either<Vec<f32>, Vec<Vec<f32>>>,
|
||||
#[serde(with = "either::serde_untagged_optional")]
|
||||
inner: Option<either::Either<Vec<f32>, Vec<Vec<f32>>>>,
|
||||
}
|
||||
|
||||
impl VectorOrArrayOfVectors {
|
||||
pub fn into_array_of_vectors(self) -> Vec<Vec<f32>> {
|
||||
match self.inner {
|
||||
either::Either::Left(vector) => vec![vector],
|
||||
either::Either::Right(vectors) => vectors,
|
||||
pub fn into_array_of_vectors(self) -> Option<Vec<Vec<f32>>> {
|
||||
match self.inner? {
|
||||
either::Either::Left(vector) => Some(vec![vector]),
|
||||
either::Either::Right(vectors) => Some(vectors),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -280,9 +280,13 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
let (valid_fields, hidden_fields) =
|
||||
index.remove_hidden_fields(rtxn, filterable_fields)?;
|
||||
|
||||
return Err(UserError::InvalidFacetSearchFacetName {
|
||||
field: self.facet.clone(),
|
||||
valid_fields: filterable_fields.into_iter().collect(),
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
@ -91,11 +91,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
/// Update the universes accordingly and inform the logger.
|
||||
macro_rules! back {
|
||||
() => {
|
||||
assert!(
|
||||
ranking_rule_universes[cur_ranking_rule_index].is_empty(),
|
||||
"The ranking rule {} did not sort its bucket exhaustively",
|
||||
ranking_rules[cur_ranking_rule_index].id()
|
||||
);
|
||||
// FIXME: temporarily disabled assert: see <https://github.com/meilisearch/meilisearch/pull/4013>
|
||||
// assert!(
|
||||
// ranking_rule_universes[cur_ranking_rule_index].is_empty(),
|
||||
// "The ranking rule {} did not sort its bucket exhaustively",
|
||||
// ranking_rules[cur_ranking_rule_index].id()
|
||||
// );
|
||||
logger.end_iteration_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
|
@ -20,7 +20,7 @@ mod sort;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::collections::HashSet;
|
||||
|
||||
use bucket_sort::{bucket_sort, BucketSortOutput};
|
||||
use charabia::TokenizerBuilder;
|
||||
@ -108,24 +108,11 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
(None, None) => continue,
|
||||
// The field is not searchable => User error
|
||||
(_fid, Some(false)) => {
|
||||
let mut valid_fields: BTreeSet<_> =
|
||||
fids_map.names().map(String::from).collect();
|
||||
let (valid_fields, hidden_fields) = match searchable_names {
|
||||
Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
|
||||
None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
|
||||
};
|
||||
|
||||
// Filter by the searchable names
|
||||
if let Some(sn) = searchable_names {
|
||||
let searchable_names = sn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &searchable_names;
|
||||
}
|
||||
|
||||
let searchable_count = valid_fields.len();
|
||||
|
||||
// Remove hidden fields
|
||||
if let Some(dn) = self.index.displayed_fields(self.txn)? {
|
||||
let displayable_names = dn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &displayable_names;
|
||||
}
|
||||
|
||||
let hidden_fields = searchable_count > valid_fields.len();
|
||||
let field = field_name.to_string();
|
||||
return Err(UserError::InvalidSearchableAttribute {
|
||||
field,
|
||||
@ -488,6 +475,20 @@ pub fn execute_search(
|
||||
tokbuilder.stop_words(stop_words);
|
||||
}
|
||||
|
||||
let separators = ctx.index.allowed_separators(ctx.txn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokbuilder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = ctx.index.dictionary(ctx.txn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokbuilder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let script_lang_map = ctx.index.script_language(ctx.txn)?;
|
||||
if !script_lang_map.is_empty() {
|
||||
tokbuilder.allow_list(&script_lang_map);
|
||||
@ -590,16 +591,24 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
|
||||
for asc_desc in sort_criteria {
|
||||
match asc_desc.member() {
|
||||
Member::Field(ref field) if !crate::is_faceted(field, &sortable_fields) => {
|
||||
let (valid_fields, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
|
||||
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: field.to_string(),
|
||||
valid_fields: sortable_fields.into_iter().collect(),
|
||||
})?
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
})?;
|
||||
}
|
||||
Member::Geo(_) if !sortable_fields.contains("_geo") => {
|
||||
let (valid_fields, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
|
||||
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: "_geo".to_string(),
|
||||
valid_fields: sortable_fields.into_iter().collect(),
|
||||
})?
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
})?;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
@ -2,7 +2,7 @@ use std::io::Cursor;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{hashmap, hashset};
|
||||
use maplit::{btreemap, hashset};
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
@ -33,7 +33,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
});
|
||||
builder.set_synonyms(hashmap! {
|
||||
builder.set_synonyms(btreemap! {
|
||||
S("hello") => vec![S("good morning")],
|
||||
S("world") => vec![S("earth")],
|
||||
S("america") => vec![S("the united states")],
|
||||
|
@ -15,7 +15,7 @@ they store fewer sprximities than the regular word sprximity DB.
|
||||
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
@ -336,7 +336,7 @@ fn test_proximity_split_word() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
let mut syns = HashMap::new();
|
||||
let mut syns = BTreeMap::new();
|
||||
syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]);
|
||||
s.set_synonyms(syns);
|
||||
})
|
||||
|
@ -18,7 +18,7 @@ if `words` doesn't exist before it.
|
||||
14. Synonyms cost nothing according to the typo ranking rule
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
@ -591,7 +591,7 @@ fn test_typo_synonyms() {
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Typo]);
|
||||
|
||||
let mut synonyms = HashMap::new();
|
||||
let mut synonyms = BTreeMap::new();
|
||||
synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
|
||||
synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);
|
||||
|
||||
|
@ -15,6 +15,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
}
|
||||
|
||||
pub fn execute(self) -> Result<u64> {
|
||||
puffin::profile_function!();
|
||||
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
let Index {
|
||||
env: _env,
|
||||
|
@ -109,6 +109,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
Some(docid)
|
||||
}
|
||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
||||
self.execute_inner()?;
|
||||
|
||||
|
@ -94,7 +94,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::update::index_documents::create_sorter;
|
||||
use crate::update::merge_btreeset_string;
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16};
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod delete;
|
||||
@ -191,7 +191,16 @@ impl<'i> FacetsUpdate<'i> {
|
||||
for result in database.iter(wtxn)? {
|
||||
let (facet_group_key, ()) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
let normalized_facet = left_bound.normalize(&options);
|
||||
let mut normalized_facet = left_bound.normalize(&options);
|
||||
let normalized_truncated_facet: String;
|
||||
if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
|
||||
normalized_truncated_facet = normalized_facet
|
||||
.char_indices()
|
||||
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||
.map(|(_, c)| c)
|
||||
.collect();
|
||||
normalized_facet = normalized_truncated_facet.into();
|
||||
}
|
||||
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||
let key = (field_id, normalized_facet.as_ref());
|
||||
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
|
||||
|
@ -31,6 +31,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
autogenerate_docids: bool,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
||||
|
@ -28,8 +28,12 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
allowed_separators: Option<&[&str]>,
|
||||
dictionary: Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
@ -50,6 +54,12 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
if let Some(stop_words) = stop_words {
|
||||
tokenizer_builder.stop_words(stop_words);
|
||||
}
|
||||
if let Some(dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
if let Some(separators) = allowed_separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
let tokenizer = tokenizer_builder.build();
|
||||
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
@ -20,6 +20,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_number: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_number_docids_sorter = create_sorter(
|
||||
|
@ -18,6 +18,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_string_docids_sorter = create_sorter(
|
||||
@ -44,7 +46,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
|
||||
normalised_truncated_value = normalised_value
|
||||
.char_indices()
|
||||
.take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
|
||||
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||
.map(|(_, c)| c)
|
||||
.collect();
|
||||
normalised_value = normalised_truncated_value.as_str();
|
||||
|
@ -28,12 +28,16 @@ pub struct ExtractedFacetValues {
|
||||
///
|
||||
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
|
||||
/// and the normalized value as value extracted from the given chunk of documents.
|
||||
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||
@ -82,7 +86,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
|
||||
let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
||||
|
||||
match extract_facet_values(&value) {
|
||||
match extract_facet_values(
|
||||
&value,
|
||||
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
|
||||
) {
|
||||
FilterableValues::Null => {
|
||||
facet_is_null_docids.entry(field_id).or_default().insert(document);
|
||||
}
|
||||
@ -175,12 +182,13 @@ enum FilterableValues {
|
||||
Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
|
||||
}
|
||||
|
||||
fn extract_facet_values(value: &Value) -> FilterableValues {
|
||||
fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
|
||||
fn inner_extract_facet_values(
|
||||
value: &Value,
|
||||
can_recurse: bool,
|
||||
output_numbers: &mut Vec<f64>,
|
||||
output_strings: &mut Vec<(String, String)>,
|
||||
geo_field: bool,
|
||||
) {
|
||||
match value {
|
||||
Value::Null => (),
|
||||
@ -191,13 +199,30 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
|
||||
}
|
||||
}
|
||||
Value::String(original) => {
|
||||
// if we're working on a geofield it MUST be something we can parse or else there was an internal error
|
||||
// in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs.
|
||||
if geo_field {
|
||||
if let Ok(float) = original.parse() {
|
||||
output_numbers.push(float);
|
||||
} else {
|
||||
log::warn!(
|
||||
"Internal error, could not parse a geofield that has been validated. Please open an issue."
|
||||
)
|
||||
}
|
||||
}
|
||||
let normalized = crate::normalize_facet(original);
|
||||
output_strings.push((normalized, original.clone()));
|
||||
}
|
||||
Value::Array(values) => {
|
||||
if can_recurse {
|
||||
for value in values {
|
||||
inner_extract_facet_values(value, false, output_numbers, output_strings);
|
||||
inner_extract_facet_values(
|
||||
value,
|
||||
false,
|
||||
output_numbers,
|
||||
output_strings,
|
||||
geo_field,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -213,7 +238,7 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
|
||||
otherwise => {
|
||||
let mut numbers = Vec::new();
|
||||
let mut strings = Vec::new();
|
||||
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
|
||||
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field);
|
||||
FilterableValues::Values { numbers, strings }
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,8 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_word_count_docids_sorter = create_sorter(
|
||||
|
@ -19,6 +19,8 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
primary_key_id: FieldId,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
@ -19,6 +19,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
primary_key_id: FieldId,
|
||||
vectors_fid: FieldId,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
@ -33,7 +35,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
// lazily get it when needed
|
||||
let document_id = || -> Value {
|
||||
let document_id = obkv.get(primary_key_id).unwrap();
|
||||
serde_json::from_slice(document_id).unwrap()
|
||||
from_slice(document_id).unwrap()
|
||||
};
|
||||
|
||||
// first we retrieve the _vectors field
|
||||
@ -50,12 +52,14 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
}
|
||||
};
|
||||
|
||||
for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
||||
let index = u16::try_from(i).unwrap();
|
||||
let mut key = docid_bytes.to_vec();
|
||||
key.extend_from_slice(&index.to_be_bytes());
|
||||
let bytes = cast_slice(&vector);
|
||||
writer.insert(key, bytes)?;
|
||||
if let Some(vectors) = vectors {
|
||||
for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
||||
let index = u16::try_from(i).unwrap();
|
||||
let mut key = docid_bytes.to_vec();
|
||||
key.extend_from_slice(&index.to_be_bytes());
|
||||
let bytes = cast_slice(&vector);
|
||||
writer.insert(key, bytes)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
// else => the `_vectors` object was `null`, there is nothing to do
|
||||
|
@ -27,6 +27,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
exact_attributes: &HashSet<FieldId>,
|
||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_docids_sorter = create_sorter(
|
||||
|
@ -15,6 +15,8 @@ pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_fid_docids_sorter = create_sorter(
|
||||
|
@ -21,6 +21,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_pair_proximity_docids_sorter = create_sorter(
|
||||
|
@ -18,6 +18,8 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_position_docids_sorter = create_sorter(
|
||||
|
@ -49,9 +49,13 @@ pub(crate) fn data_from_obkv_documents(
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: Option<&[&str]>,
|
||||
dictionary: Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
exact_attributes: HashSet<FieldId>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
original_obkv_chunks
|
||||
.par_bridge()
|
||||
.map(|original_documents_chunk| {
|
||||
@ -74,6 +78,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
&stop_words,
|
||||
&allowed_separators,
|
||||
&dictionary,
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
})
|
||||
@ -238,11 +244,13 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
M::Output: Send,
|
||||
{
|
||||
rayon::spawn(move || {
|
||||
puffin::profile_scope!("extract_multiple_chunks", name);
|
||||
let chunks: Result<M> =
|
||||
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
|
||||
rayon::spawn(move || match chunks {
|
||||
Ok(chunks) => {
|
||||
debug!("merge {} database", name);
|
||||
puffin::profile_scope!("merge_multiple_chunks", name);
|
||||
let reader = chunks.merge(merge_fn, &indexer);
|
||||
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
|
||||
}
|
||||
@ -285,6 +293,8 @@ fn send_and_extract_flattened_documents_data(
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: &Option<&[&str]>,
|
||||
dictionary: &Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
@ -340,6 +350,8 @@ fn send_and_extract_flattened_documents_data(
|
||||
indexer,
|
||||
searchable_fields,
|
||||
stop_words.as_ref(),
|
||||
*allowed_separators,
|
||||
*dictionary,
|
||||
max_positions_per_attributes,
|
||||
)?;
|
||||
|
||||
@ -366,6 +378,7 @@ fn send_and_extract_flattened_documents_data(
|
||||
flattened_documents_chunk.clone(),
|
||||
indexer,
|
||||
faceted_fields,
|
||||
geo_fields_ids,
|
||||
)?;
|
||||
|
||||
// send docid_fid_facet_numbers_chunk to DB writer
|
||||
|
@ -214,6 +214,7 @@ pub fn sorter_into_lmdb_database(
|
||||
sorter: Sorter<MergeFn>,
|
||||
merge: MergeFn,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
debug!("Writing MTBL sorter...");
|
||||
let before = Instant::now();
|
||||
|
||||
|
@ -137,6 +137,8 @@ where
|
||||
mut self,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// Early return when there is no document to add
|
||||
if reader.is_empty() {
|
||||
return Ok((self, Ok(0)));
|
||||
@ -175,6 +177,8 @@ where
|
||||
mut self,
|
||||
to_delete: Vec<String>,
|
||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// Early return when there is no document to add
|
||||
if to_delete.is_empty() {
|
||||
return Ok((self, Ok(0)));
|
||||
@ -194,6 +198,8 @@ where
|
||||
|
||||
#[logging_timer::time("IndexDocuments::{}")]
|
||||
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
if self.added_documents == 0 {
|
||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
||||
@ -232,6 +238,8 @@ where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let TransformOutput {
|
||||
primary_key,
|
||||
fields_ids_map,
|
||||
@ -308,6 +316,12 @@ where
|
||||
let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors");
|
||||
|
||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||
let separators = self.index.allowed_separators(self.wtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let dictionary = self.index.dictionary(self.wtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
||||
|
||||
let pool_params = GrenadParameters {
|
||||
@ -322,6 +336,7 @@ where
|
||||
|
||||
// Run extraction pipeline in parallel.
|
||||
pool.install(|| {
|
||||
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
||||
// split obkv file into several chunks
|
||||
let original_chunk_iter =
|
||||
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
|
||||
@ -344,6 +359,8 @@ where
|
||||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
stop_words,
|
||||
separators.as_deref(),
|
||||
dictionary.as_deref(),
|
||||
max_positions_per_attributes,
|
||||
exact_attributes,
|
||||
)
|
||||
@ -477,6 +494,8 @@ where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
// Merged databases are already been indexed, we start from this count;
|
||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||
|
||||
@ -511,26 +530,36 @@ where
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||
let current_prefix_fst;
|
||||
let common_prefix_fst_words_tmp;
|
||||
let common_prefix_fst_words: Vec<_>;
|
||||
let new_prefix_fst_words;
|
||||
let del_prefix_fst_words;
|
||||
|
||||
// We retrieve the common words between the previous and new prefix word fst.
|
||||
let common_prefix_fst_words = fst_stream_into_vec(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(),
|
||||
);
|
||||
let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
|
||||
.as_slice()
|
||||
.linear_group_by_key(|x| x.chars().next().unwrap())
|
||||
.collect();
|
||||
{
|
||||
puffin::profile_scope!("compute_prefix_diffs");
|
||||
|
||||
// We retrieve the newly added words between the previous and new prefix word fst.
|
||||
let new_prefix_fst_words = fst_stream_into_vec(
|
||||
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
|
||||
);
|
||||
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||
|
||||
// We compute the set of prefixes that are no more part of the prefix fst.
|
||||
let del_prefix_fst_words = fst_stream_into_hashset(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(),
|
||||
);
|
||||
// We retrieve the common words between the previous and new prefix word fst.
|
||||
common_prefix_fst_words_tmp = fst_stream_into_vec(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(),
|
||||
);
|
||||
common_prefix_fst_words = common_prefix_fst_words_tmp
|
||||
.as_slice()
|
||||
.linear_group_by_key(|x| x.chars().next().unwrap())
|
||||
.collect();
|
||||
|
||||
// We retrieve the newly added words between the previous and new prefix word fst.
|
||||
new_prefix_fst_words = fst_stream_into_vec(
|
||||
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
|
||||
);
|
||||
|
||||
// We compute the set of prefixes that are no more part of the prefix fst.
|
||||
del_prefix_fst_words = fst_stream_into_hashset(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(),
|
||||
);
|
||||
}
|
||||
|
||||
databases_seen += 1;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
@ -668,6 +697,8 @@ fn execute_word_prefix_docids(
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let cursor = reader.into_cursor()?;
|
||||
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
|
||||
builder.chunk_compression_type = indexer_config.chunk_compression_type;
|
||||
|
@ -558,6 +558,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let primary_key = self
|
||||
.index
|
||||
.primary_key(wtxn)?
|
||||
|
@ -46,6 +46,66 @@ pub(crate) enum TypedChunk {
|
||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||
}
|
||||
|
||||
impl TypedChunk {
|
||||
pub fn to_debug_string(&self) -> String {
|
||||
match self {
|
||||
TypedChunk::FieldIdDocidFacetStrings(grenad) => {
|
||||
format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdDocidFacetNumbers(grenad) => {
|
||||
format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::Documents(grenad) => {
|
||||
format!("Documents {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdWordcountDocids(grenad) => {
|
||||
format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::NewDocumentsIds(grenad) => {
|
||||
format!("NewDocumentsIds {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => format!(
|
||||
"WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {} }}",
|
||||
word_docids_reader.len(),
|
||||
exact_word_docids_reader.len()
|
||||
),
|
||||
TypedChunk::WordPositionDocids(grenad) => {
|
||||
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordFidDocids(grenad) => {
|
||||
format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordPairProximityDocids(grenad) => {
|
||||
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetStringDocids(grenad) => {
|
||||
format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetNumberDocids(grenad) => {
|
||||
format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetExistsDocids(grenad) => {
|
||||
format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsNullDocids(grenad) => {
|
||||
format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => {
|
||||
format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::GeoPoints(grenad) => {
|
||||
format!("GeoPoints {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::VectorPoints(grenad) => {
|
||||
format!("VectorPoints {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::ScriptLanguageDocids(grenad) => {
|
||||
format!("ScriptLanguageDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||
/// Return new documents seen.
|
||||
pub(crate) fn write_typed_chunk_into_index(
|
||||
@ -54,6 +114,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn: &mut RwTxn,
|
||||
index_is_empty: bool,
|
||||
) -> Result<(RoaringBitmap, bool)> {
|
||||
puffin::profile_function!(typed_chunk.to_debug_string());
|
||||
|
||||
let mut is_merged_database = false;
|
||||
match typed_chunk {
|
||||
TypedChunk::Documents(obkv_documents_iter) => {
|
||||
@ -350,6 +412,8 @@ where
|
||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
||||
{
|
||||
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let database = database.remap_types::<ByteSlice, ByteSlice>();
|
||||
|
||||
@ -392,6 +456,8 @@ where
|
||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
||||
{
|
||||
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||
|
||||
if !index_is_empty {
|
||||
return write_entries_into_database(
|
||||
data,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user