mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-30 09:45:38 +00:00
Compare commits
101 Commits
prototype-
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1585b3ed2f | ||
|
|
c48f72e6b8 | ||
|
|
41178e59fc | ||
|
|
d1bc7ec58a | ||
|
|
c5ec817f52 | ||
|
|
d0a3582a56 | ||
|
|
0c18962b13 | ||
|
|
366144146b | ||
|
|
03b510945b | ||
|
|
c3a8d4b7fb | ||
|
|
c573261ac4 | ||
|
|
8f04353b7d | ||
|
|
b9983a48c7 | ||
|
|
b22f1260bf | ||
|
|
dfb84f80da | ||
|
|
98b67f217a | ||
|
|
6325cda74f | ||
|
|
c71ba72f73 | ||
|
|
ecd36b15f0 | ||
|
|
8a2e8a887f | ||
|
|
309c33a418 | ||
|
|
9b01506cee | ||
|
|
f37fdceb15 | ||
|
|
f544cfa444 | ||
|
|
c158d03337 | ||
|
|
b7109c0fd2 | ||
|
|
a53a0fdb77 | ||
|
|
719fdd701b | ||
|
|
01c13c98ac | ||
|
|
5b89276fcc | ||
|
|
41697c4d65 | ||
|
|
7d85753573 | ||
|
|
76657af1f9 | ||
|
|
966cbdab69 | ||
|
|
0c68b9ed4c | ||
|
|
d7233ecdb8 | ||
|
|
95a011af13 | ||
|
|
e257710961 | ||
|
|
9dd4423054 | ||
|
|
8c3ad57ef9 | ||
|
|
2d1434da81 | ||
|
|
c488a4a351 | ||
|
|
0c7d7c68bc | ||
|
|
854745c670 | ||
|
|
777eebb759 | ||
|
|
61ccfaf9bc | ||
|
|
f0c4d36ff7 | ||
|
|
8c20d6e2fe | ||
|
|
8e437ed76c | ||
|
|
1191ec5939 | ||
|
|
0d20d08daf | ||
|
|
b66bf049b5 | ||
|
|
b2f36b9b97 | ||
|
|
b311089435 | ||
|
|
3d46e84d97 | ||
|
|
ad7f8edff8 | ||
|
|
5ce01bcb53 | ||
|
|
d5523cc6ac | ||
|
|
fe7a312ec6 | ||
|
|
57dc4b148c | ||
|
|
a325ddfe6a | ||
|
|
0cd81573b4 | ||
|
|
b0ff595f60 | ||
|
|
3eb6f4b56f | ||
|
|
49f976c8d8 | ||
|
|
84d56f3320 | ||
|
|
97e3dfd99d | ||
|
|
dc38da95c4 | ||
|
|
2ce8b42757 | ||
|
|
f391039a6f | ||
|
|
fcdd20b533 | ||
|
|
b45c36cd71 | ||
|
|
151c31c18f | ||
|
|
a8ad0902d3 | ||
|
|
e917dbdebb | ||
|
|
ba919b6123 | ||
|
|
2dfbb6813a | ||
|
|
8f589a5cce | ||
|
|
0b8bbd8750 | ||
|
|
eef95de30e | ||
|
|
13a13a4862 | ||
|
|
e691c92ed5 | ||
|
|
928ab2f9b1 | ||
|
|
7c18a9375f | ||
|
|
05a311f9be | ||
|
|
9b1b9b409e | ||
|
|
7f555f23e8 | ||
|
|
a0bfc9f63a | ||
|
|
3155264381 | ||
|
|
42400c381e | ||
|
|
08c7dab528 | ||
|
|
8590687515 | ||
|
|
8f5d127b1e | ||
|
|
2b4160ebb9 | ||
|
|
8ba1c8f88f | ||
|
|
8e7edf8ea7 | ||
|
|
9daccdf7f0 | ||
|
|
437ee55c57 | ||
|
|
b1717865ea | ||
|
|
176f716292 | ||
|
|
40ad19ba9e |
24
.github/workflows/test-suite.yml
vendored
24
.github/workflows/test-suite.yml
vendored
@@ -30,20 +30,20 @@ jobs:
|
||||
run: |
|
||||
apt-get update && apt-get install -y curl
|
||||
apt-get install build-essential -y
|
||||
- name: Run test with Rust stable
|
||||
- name: Setup test with Rust stable
|
||||
if: github.event_name != 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run test with Rust nightly
|
||||
- name: Setup test with Rust nightly
|
||||
if: github.event_name == 'schedule'
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -117,17 +117,17 @@ jobs:
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install --assume-yes build-essential curl
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Run cargo tree without default features and check lindera is not present
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
|
||||
- name: Run cargo tree with default features and check lindera is pressent
|
||||
run: |
|
||||
cargo tree -f '{p} {f}' -e normal | grep lindera -qz
|
||||
|
||||
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -165,7 +165,7 @@ jobs:
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.4.0
|
||||
uses: Swatinem/rust-cache@v2.5.0
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
509
Cargo.lock
generated
509
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
23
Cargo.toml
23
Cargo.toml
@@ -37,23 +37,8 @@ opt-level = 3
|
||||
[profile.dev.package.roaring]
|
||||
opt-level = 3
|
||||
|
||||
[profile.dev.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.dev.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.dev.package.yada]
|
||||
opt-level = 3
|
||||
[patch.crates-io]
|
||||
strois = { git = "https://github.com/meilisearch/strois", rev = "eeb945c" }
|
||||
|
||||
[profile.release.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.release.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.release.package.yada]
|
||||
opt-level = 3
|
||||
|
||||
[profile.bench.package.lindera-ipadic-builder]
|
||||
opt-level = 3
|
||||
[profile.bench.package.encoding]
|
||||
opt-level = 3
|
||||
[profile.bench.package.yada]
|
||||
opt-level = 3
|
||||
[patch."https://github.com/irevoire/zookeeper-client-sync"]
|
||||
zookeeper-client-sync = { path = "../zookeeper-client-sync" }
|
||||
|
||||
19
PROFILING.md
Normal file
19
PROFILING.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# Profiling Meilisearch
|
||||
|
||||
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||
|
||||

|
||||
|
||||
## Profiling the Indexing Process
|
||||
|
||||
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||
|
||||
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||
|
||||
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||
|
||||
## Profiling the Search Process
|
||||
|
||||
We still need to take the time to profile the search side of the engine with Puffin. It would require time to profile the filtering phase, query parsing, creation, and execution. We could even profile the Actix HTTP server.
|
||||
|
||||
The only issue we see is the framing system. Puffin requires a global frame-based profiling phase, which collides with Meilisearch's ability to accept and answer multiple requests on different threads simultaneously.
|
||||
68
README.md
68
README.md
@@ -1,16 +1,20 @@
|
||||
<p align="center">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-light.svg?sanitize=true#gh-light-mode-only">
|
||||
</a>
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=logo#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/meilisearch-logo-dark.svg?sanitize=true#gh-dark-mode-only">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
<h4 align="center">
|
||||
<a href="https://www.meilisearch.com">Website</a> |
|
||||
<a href="https://www.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Website</a> |
|
||||
<a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
|
||||
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=engine&utm_medium=meilisearch">Meilisearch Cloud</a> |
|
||||
<a href="https://blog.meilisearch.com">Blog</a> |
|
||||
<a href="https://www.meilisearch.com/docs">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com">Discord</a>
|
||||
<a href="https://www.meilisearch.com/pricing?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Meilisearch Cloud</a> |
|
||||
<a href="https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Blog</a> |
|
||||
<a href="https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Documentation</a> |
|
||||
<a href="https://www.meilisearch.com/docs/faq?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">FAQ</a> |
|
||||
<a href="https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=nav">Discord</a>
|
||||
</h4>
|
||||
|
||||
<p align="center">
|
||||
@@ -24,40 +28,40 @@
|
||||
Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.
|
||||
|
||||
<p align="center" name="demo">
|
||||
<a href="https://where2watch.meilisearch.com/#gh-light-mode-only" target="_blank">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
|
||||
<img src="assets/demo-light.gif#gh-light-mode-only" alt="A bright colored application for finding movies screening near the user">
|
||||
</a>
|
||||
<a href="https://where2watch.meilisearch.com/#gh-dark-mode-only" target="_blank">
|
||||
<a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-dark-mode-only" target="_blank">
|
||||
<img src="assets/demo-dark.gif#gh-dark-mode-only" alt="A dark colored application for finding movies screening near the user">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥
|
||||
🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
|
||||
|
||||
## ✨ Features
|
||||
|
||||
- **Search-as-you-type:** find search results in less than 50 milliseconds
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
|
||||
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
|
||||
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
|
||||
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
|
||||
- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results
|
||||
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
|
||||
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
|
||||
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
|
||||
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
|
||||
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
|
||||
- **Easy to install, deploy, and maintain**
|
||||
|
||||
## 📖 Documentation
|
||||
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).
|
||||
You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=docs).
|
||||
|
||||
## 🚀 Getting started
|
||||
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.
|
||||
For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
|
||||
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
|
||||
You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features.
|
||||
|
||||
## ⚡ Supercharge your Meilisearch experience
|
||||
|
||||
@@ -67,29 +71,29 @@ Say goodbye to server deployment and manual updates with [Meilisearch Cloud](htt
|
||||
|
||||
Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!
|
||||
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).
|
||||
Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-link).
|
||||
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)
|
||||
[](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=sdks-logos)
|
||||
|
||||
## ⚙️ Advanced usage
|
||||
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.
|
||||
Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) close at hand.
|
||||
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).
|
||||
We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
|
||||
Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.
|
||||
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
|
||||
If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) of our documentation.
|
||||
|
||||
## 📫 Get in touch!
|
||||
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/)
|
||||
Meilisearch is a search engine created by [Meili](https://www.welcometothejungle.com/en/companies/meilisearch), a software development company based in France and with team members all over the world. Want to know more about us? [Check out our blog!](https://blog.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact)
|
||||
|
||||
🗞 [Subscribe to our newsletter](https://meilisearch.us2.list-manage.com/subscribe?u=27870f7b71c908a8b359599fb&id=79582d828e) if you don't want to miss any updates! We promise we won't clutter your mailbox: we only send one edition every two months.
|
||||
|
||||
|
||||
BIN
assets/profiling-example.png
Normal file
BIN
assets/profiling-example.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.2 MiB |
@@ -14,7 +14,7 @@ license.workspace = true
|
||||
anyhow = "1.0.70"
|
||||
csv = "1.2.1"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
59
docker-compose.yml
Normal file
59
docker-compose.yml
Normal file
@@ -0,0 +1,59 @@
|
||||
version: "3.9"
|
||||
services:
|
||||
zk1:
|
||||
container_name: zk1
|
||||
hostname: zk1
|
||||
image: bitnami/zookeeper:3.7.1
|
||||
ports:
|
||||
- 21811:2181
|
||||
environment:
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
- ZOO_SERVER_ID=1
|
||||
- ZOO_SERVERS=0.0.0.0:2888:3888,zk2:2888:3888,zk3:2888:3888
|
||||
zk2:
|
||||
container_name: zk2
|
||||
hostname: zk2
|
||||
image: bitnami/zookeeper:3.7.1
|
||||
ports:
|
||||
- 21812:2181
|
||||
environment:
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
- ZOO_SERVER_ID=2
|
||||
- ZOO_SERVERS=zk1:2888:3888,0.0.0.0:2888:3888,zk3:2888:3888
|
||||
zk3:
|
||||
container_name: zk3
|
||||
hostname: zk3
|
||||
image: bitnami/zookeeper:3.7.1
|
||||
ports:
|
||||
- 21813:2181
|
||||
environment:
|
||||
- ALLOW_ANONYMOUS_LOGIN=yes
|
||||
- ZOO_SERVER_ID=3
|
||||
- ZOO_SERVERS=zk1:2888:3888,zk2:2888:3888,0.0.0.0:2888:3888
|
||||
zoonavigator:
|
||||
container_name: zoonavigator
|
||||
image: elkozmon/zoonavigator
|
||||
ports:
|
||||
- 9000:9000
|
||||
|
||||
# Meilisearch instances
|
||||
# m1:
|
||||
# container_name: m1
|
||||
# hostname: m1
|
||||
# image: getmeili/meilisearch:prototype-zookeeper-ha-0
|
||||
# ports:
|
||||
# - 7700:7700
|
||||
# environment:
|
||||
# - MEILI_ZK_URL=zk1:2181
|
||||
# - MEILI_MASTER_KEY=masterkey
|
||||
# restart: always
|
||||
# m2:
|
||||
# container_name: m2
|
||||
# hostname: m2
|
||||
# image: getmeili/meilisearch:prototype-zookeeper-ha-0
|
||||
# ports:
|
||||
# - 7701:7700
|
||||
# environment:
|
||||
# - MEILI_ZK_URL=zk2:2181
|
||||
# - MEILI_MASTER_KEY=masterkey
|
||||
# restart: always
|
||||
@@ -22,20 +22,6 @@ pub enum Error {
|
||||
|
||||
pub type Result<T> = std::result::Result<T, Error>;
|
||||
|
||||
impl Deref for File {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for File {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileStore {
|
||||
path: PathBuf,
|
||||
@@ -146,6 +132,20 @@ impl File {
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for File {
|
||||
type Target = NamedTempFile;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.file
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for File {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.file
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use std::io::Write;
|
||||
|
||||
61
ha_test/run.sh
Normal file
61
ha_test/run.sh
Normal file
@@ -0,0 +1,61 @@
|
||||
#!/bin/bash
|
||||
|
||||
function is_everything_installed {
|
||||
everything_ok=yes
|
||||
|
||||
if hash zkli 2>/dev/null; then
|
||||
echo "✅ zkli installed"
|
||||
else
|
||||
everything_ok=no
|
||||
echo "🥺 zkli is missing, please run \`cargo install zkli\`"
|
||||
fi
|
||||
|
||||
if hash s3cmd 2>/dev/null; then
|
||||
echo "✅ s3cmd installed"
|
||||
else
|
||||
everything_ok=no
|
||||
echo "🥺 s3cmd is missing, see how to install it here https://s3tools.org/s3cmd"
|
||||
fi
|
||||
|
||||
if [ $everything_ok = "no" ]; then
|
||||
echo "Exiting..."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# param: addr of zookeeper
|
||||
function connect_to_zookeeper {
|
||||
if ! zkli -a "$1" ls > /dev/null; then
|
||||
echo "zkli can't connect"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# param: addr of the s3 bucket
|
||||
function connect_to_s3 {
|
||||
# S3_SECRET_KEY
|
||||
# S3_ACCESS_KEY
|
||||
# S3_HOST
|
||||
# S3_BUCKET
|
||||
|
||||
s3cmd --host="$S3_HOST" --host-bucket="$S3_BUCKET" --access_key="$ACCESS_KEY" --secret_key="$S3_SECRET_KEY" ls
|
||||
|
||||
if $?; then
|
||||
echo "s3cmd can't connect"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
is_everything_installed
|
||||
|
||||
ZOOKEEPER_ADDR="localhost:2181"
|
||||
if ! connect_to_zookeeper $ZOOKEEPER_ADDR; then
|
||||
ZOOKEEPER_ADDR="localhost:21811"
|
||||
if ! connect_to_zookeeper $ZOOKEEPER_ADDR; then
|
||||
echo "Can't connect to zkli"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
connect_to_s3
|
||||
@@ -22,6 +22,7 @@ log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
page_size = "0.5.0"
|
||||
puffin = "0.16.0"
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
@@ -30,6 +31,10 @@ tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
tokio = { version = "1.27.0", features = ["full"] }
|
||||
zookeeper-client-sync = { git = "https://github.com/irevoire/zookeeper-client-sync.git" }
|
||||
parking_lot = "0.12.1"
|
||||
strois = "0.0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
||||
@@ -43,7 +43,7 @@ use uuid::Uuid;
|
||||
|
||||
use crate::autobatcher::{self, BatchKind};
|
||||
use crate::utils::{self, swap_index_uid_in_task};
|
||||
use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
|
||||
use crate::{Error, IndexSchedulerInner, ProcessingTasks, Result, TaskId};
|
||||
|
||||
/// Represents a combination of tasks that can all be processed at the same time.
|
||||
///
|
||||
@@ -198,6 +198,35 @@ impl Batch {
|
||||
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the content fields uuids associated with this batch.
|
||||
pub fn content_uuids(&self) -> Vec<Uuid> {
|
||||
match self {
|
||||
Batch::TaskCancelation { .. }
|
||||
| Batch::TaskDeletion(_)
|
||||
| Batch::Dump(_)
|
||||
| Batch::IndexCreation { .. }
|
||||
| Batch::IndexDocumentDeletionByFilter { .. }
|
||||
| Batch::IndexUpdate { .. }
|
||||
| Batch::SnapshotCreation(_)
|
||||
| Batch::IndexDeletion { .. }
|
||||
| Batch::IndexSwap { .. } => vec![],
|
||||
Batch::IndexOperation { op, .. } => match op {
|
||||
IndexOperation::DocumentOperation { operations, .. } => operations
|
||||
.iter()
|
||||
.flat_map(|op| match op {
|
||||
DocumentOperation::Add(uuid) => Some(*uuid),
|
||||
DocumentOperation::Delete(_) => None,
|
||||
})
|
||||
.collect(),
|
||||
IndexOperation::DocumentDeletion { .. }
|
||||
| IndexOperation::Settings { .. }
|
||||
| IndexOperation::DocumentClear { .. }
|
||||
| IndexOperation::SettingsAndDocumentOperation { .. }
|
||||
| IndexOperation::DocumentClearAndSetting { .. } => vec![],
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexOperation {
|
||||
@@ -213,7 +242,7 @@ impl IndexOperation {
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexScheduler {
|
||||
impl IndexSchedulerInner {
|
||||
/// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
|
||||
///
|
||||
/// ## Arguments
|
||||
@@ -471,6 +500,8 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||
|
||||
puffin::profile_function!();
|
||||
|
||||
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
||||
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||
|
||||
@@ -478,8 +509,7 @@ impl IndexScheduler {
|
||||
if let Some(task_id) = to_cancel.max() {
|
||||
// We retrieve the tasks that were processing before this tasks cancelation started.
|
||||
// We must *not* reset the processing tasks before calling this method.
|
||||
let ProcessingTasks { started_at, processing } =
|
||||
&*self.processing_tasks.read().unwrap();
|
||||
let ProcessingTasks { started_at, processing, .. } = &*self.processing_tasks.read();
|
||||
return Ok(Some(Batch::TaskCancelation {
|
||||
task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?,
|
||||
previous_started_at: *started_at,
|
||||
@@ -575,6 +605,9 @@ impl IndexScheduler {
|
||||
self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?;
|
||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||
}
|
||||
|
||||
puffin::profile_function!(format!("{:?}", batch));
|
||||
|
||||
match batch {
|
||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||
@@ -1111,6 +1144,8 @@ impl IndexScheduler {
|
||||
index: &'i Index,
|
||||
operation: IndexOperation,
|
||||
) -> Result<Vec<Task>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
match operation {
|
||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
||||
@@ -1385,7 +1420,7 @@ impl IndexScheduler {
|
||||
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
|
||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
|
||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||
let processing_tasks = &self.processing_tasks.read().processing.clone();
|
||||
|
||||
let all_task_ids = self.all_task_ids(wtxn)?;
|
||||
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
||||
|
||||
@@ -295,6 +295,11 @@ impl IndexMap {
|
||||
"Attempt to finish deletion of an index that was being closed"
|
||||
);
|
||||
}
|
||||
|
||||
/// Returns the indexes that were opened by the `IndexMap`.
|
||||
pub fn clear(&mut self) -> Vec<Index> {
|
||||
self.available.clear().into_iter().map(|(_, (_, index))| index).collect()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create or open an index in the specified path.
|
||||
@@ -335,7 +340,8 @@ mod tests {
|
||||
impl IndexMapper {
|
||||
fn test() -> (Self, Env, IndexSchedulerHandle) {
|
||||
let (index_scheduler, handle) = IndexScheduler::test(true, vec![]);
|
||||
(index_scheduler.index_mapper, index_scheduler.env, handle)
|
||||
let index_scheduler = index_scheduler.inner();
|
||||
(index_scheduler.index_mapper.clone(), index_scheduler.env.clone(), handle)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ pub struct IndexMapper {
|
||||
pub(crate) index_stats: Database<UuidCodec, SerdeJson<IndexStats>>,
|
||||
|
||||
/// Path to the folder where the LMDB environments of each index are.
|
||||
base_path: PathBuf,
|
||||
pub(crate) base_path: PathBuf,
|
||||
/// The map size an index is opened with on the first time.
|
||||
index_base_map_size: usize,
|
||||
/// The quantity by which the map size of an index is incremented upon reopening, in bytes.
|
||||
@@ -135,7 +135,7 @@ impl IndexMapper {
|
||||
index_growth_amount: usize,
|
||||
index_count: usize,
|
||||
enable_mdb_writemap: bool,
|
||||
indexer_config: IndexerConfig,
|
||||
indexer_config: Arc<IndexerConfig>,
|
||||
) -> Result<Self> {
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
|
||||
@@ -150,7 +150,7 @@ impl IndexMapper {
|
||||
index_base_map_size,
|
||||
index_growth_amount,
|
||||
enable_mdb_writemap,
|
||||
indexer_config: Arc::new(indexer_config),
|
||||
indexer_config,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -428,6 +428,11 @@ impl IndexMapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the indexes that were opened by the `IndexMapper`.
|
||||
pub fn clear(&mut self) -> Vec<Index> {
|
||||
self.index_map.write().unwrap().clear()
|
||||
}
|
||||
|
||||
/// The stats of an index.
|
||||
///
|
||||
/// If available in the cache, they are directly returned.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::Write;
|
||||
use std::ops::Deref;
|
||||
|
||||
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, RoTxn};
|
||||
@@ -8,12 +9,13 @@ use meilisearch_types::tasks::{Details, Task};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::index_mapper::IndexMapper;
|
||||
use crate::{IndexScheduler, Kind, Status, BEI128};
|
||||
use crate::{IndexScheduler, IndexSchedulerInner, Kind, Status, BEI128};
|
||||
|
||||
pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
scheduler.assert_internally_consistent();
|
||||
|
||||
let IndexScheduler {
|
||||
let inner = scheduler.inner();
|
||||
let IndexSchedulerInner {
|
||||
autobatching_enabled,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
@@ -38,13 +40,15 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
} = scheduler;
|
||||
zookeeper: _,
|
||||
options: _,
|
||||
} = inner.deref();
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
|
||||
let mut snap = String::new();
|
||||
|
||||
let processing_tasks = processing_tasks.read().unwrap().processing.clone();
|
||||
let processing_tasks = processing_tasks.read().processing.clone();
|
||||
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
|
||||
snap.push_str("### Processing Tasks:\n");
|
||||
snap.push_str(&snapshot_bitmap(&processing_tasks));
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,6 @@
|
||||
//! Thread-safe `Vec`-backend LRU cache using [`std::sync::atomic::AtomicU64`] for synchronization.
|
||||
|
||||
use std::mem;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
/// Thread-safe `Vec`-backend LRU cache
|
||||
@@ -190,6 +191,11 @@ where
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns the generation associated to the key and values of the `LruMap`.
|
||||
pub fn clear(&mut self) -> Vec<(AtomicU64, (K, V))> {
|
||||
mem::take(&mut self.0.data)
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of an insertion in a LRU map.
|
||||
|
||||
@@ -10,9 +10,9 @@ use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128};
|
||||
use crate::{Error, IndexSchedulerInner, Result, Task, TaskId, BEI128};
|
||||
|
||||
impl IndexScheduler {
|
||||
impl IndexSchedulerInner {
|
||||
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
|
||||
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
|
||||
}
|
||||
@@ -331,11 +331,12 @@ pub fn clamp_to_page_size(size: usize) -> usize {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl IndexScheduler {
|
||||
impl crate::IndexScheduler {
|
||||
/// Asserts that the index scheduler's content is internally consistent.
|
||||
pub fn assert_internally_consistent(&self) {
|
||||
let rtxn = self.env.read_txn().unwrap();
|
||||
for task in self.all_tasks.iter(&rtxn).unwrap() {
|
||||
let this = self.inner();
|
||||
let rtxn = this.env.read_txn().unwrap();
|
||||
for task in this.all_tasks.iter(&rtxn).unwrap() {
|
||||
let (task_id, task) = task.unwrap();
|
||||
let task_id = task_id.get();
|
||||
|
||||
@@ -354,21 +355,21 @@ impl IndexScheduler {
|
||||
} = task;
|
||||
assert_eq!(uid, task.uid);
|
||||
if let Some(task_index_uid) = &task_index_uid {
|
||||
assert!(self
|
||||
assert!(this
|
||||
.index_tasks
|
||||
.get(&rtxn, task_index_uid.as_str())
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.contains(task.uid));
|
||||
}
|
||||
let db_enqueued_at = self
|
||||
let db_enqueued_at = this
|
||||
.enqueued_at
|
||||
.get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
assert!(db_enqueued_at.contains(task_id));
|
||||
if let Some(started_at) = started_at {
|
||||
let db_started_at = self
|
||||
let db_started_at = this
|
||||
.started_at
|
||||
.get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
|
||||
.unwrap()
|
||||
@@ -376,7 +377,7 @@ impl IndexScheduler {
|
||||
assert!(db_started_at.contains(task_id));
|
||||
}
|
||||
if let Some(finished_at) = finished_at {
|
||||
let db_finished_at = self
|
||||
let db_finished_at = this
|
||||
.finished_at
|
||||
.get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
|
||||
.unwrap()
|
||||
@@ -384,9 +385,9 @@ impl IndexScheduler {
|
||||
assert!(db_finished_at.contains(task_id));
|
||||
}
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
let db_canceled_tasks = self.get_status(&rtxn, Status::Canceled).unwrap();
|
||||
let db_canceled_tasks = this.get_status(&rtxn, Status::Canceled).unwrap();
|
||||
assert!(db_canceled_tasks.contains(uid));
|
||||
let db_canceling_task = self.get_task(&rtxn, canceled_by).unwrap().unwrap();
|
||||
let db_canceling_task = this.get_task(&rtxn, canceled_by).unwrap().unwrap();
|
||||
assert_eq!(db_canceling_task.status, Status::Succeeded);
|
||||
match db_canceling_task.kind {
|
||||
KindWithContent::TaskCancelation { query: _, tasks } => {
|
||||
@@ -427,7 +428,7 @@ impl IndexScheduler {
|
||||
Details::IndexInfo { primary_key: pk1 } => match &kind {
|
||||
KindWithContent::IndexCreation { index_uid, primary_key: pk2 }
|
||||
| KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => {
|
||||
self.index_tasks
|
||||
this.index_tasks
|
||||
.get(&rtxn, index_uid.as_str())
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
@@ -535,23 +536,23 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
assert!(self.get_status(&rtxn, status).unwrap().contains(uid));
|
||||
assert!(self.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid));
|
||||
assert!(this.get_status(&rtxn, status).unwrap().contains(uid));
|
||||
assert!(this.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid));
|
||||
|
||||
if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind {
|
||||
match status {
|
||||
Status::Enqueued | Status::Processing => {
|
||||
assert!(self
|
||||
assert!(this
|
||||
.file_store
|
||||
.all_uuids()
|
||||
.unwrap()
|
||||
.any(|uuid| uuid.as_ref().unwrap() == &content_file),
|
||||
"Could not find uuid `{content_file}` in the file_store. Available uuids are {:?}.",
|
||||
self.file_store.all_uuids().unwrap().collect::<std::result::Result<Vec<_>, file_store::Error>>().unwrap(),
|
||||
this.file_store.all_uuids().unwrap().collect::<std::result::Result<Vec<_>, file_store::Error>>().unwrap(),
|
||||
);
|
||||
}
|
||||
Status::Succeeded | Status::Failed | Status::Canceled => {
|
||||
assert!(self
|
||||
assert!(this
|
||||
.file_store
|
||||
.all_uuids()
|
||||
.unwrap()
|
||||
|
||||
@@ -14,6 +14,7 @@ license.workspace = true
|
||||
base64 = "0.21.0"
|
||||
enum-iterator = "1.4.0"
|
||||
hmac = "0.12.1"
|
||||
log = "0.4.19"
|
||||
maplit = "1.0.2"
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
rand = "0.8.5"
|
||||
@@ -24,3 +25,4 @@ sha2 = "0.10.6"
|
||||
thiserror = "1.0.40"
|
||||
time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
|
||||
uuid = { version = "1.3.1", features = ["serde", "v4"] }
|
||||
zookeeper-client-sync = { git = "https://github.com/irevoire/zookeeper-client-sync.git" }
|
||||
|
||||
@@ -19,6 +19,7 @@ internal_error!(
|
||||
AuthControllerError: meilisearch_types::milli::heed::Error,
|
||||
std::io::Error,
|
||||
serde_json::Error,
|
||||
zookeeper_client_sync::Error,
|
||||
std::str::Utf8Error
|
||||
);
|
||||
|
||||
|
||||
@@ -16,22 +16,111 @@ pub use store::open_auth_store_env;
|
||||
use store::{generate_key_as_hexa, HeedAuthStore};
|
||||
use time::OffsetDateTime;
|
||||
use uuid::Uuid;
|
||||
use zookeeper_client_sync::{
|
||||
Acls, AddWatchMode, CreateMode, Error as ZkError, EventType, WatchedEvent, Zookeeper,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AuthController {
|
||||
store: Arc<HeedAuthStore>,
|
||||
master_key: Option<String>,
|
||||
zookeeper: Option<Arc<Zookeeper>>,
|
||||
}
|
||||
|
||||
impl AuthController {
|
||||
pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
|
||||
pub fn new(
|
||||
db_path: impl AsRef<Path>,
|
||||
master_key: &Option<String>,
|
||||
zookeeper: Option<Arc<Zookeeper>>,
|
||||
) -> Result<Self> {
|
||||
let store = HeedAuthStore::new(db_path)?;
|
||||
let controller = Self { store: Arc::new(store), master_key: master_key.clone(), zookeeper };
|
||||
|
||||
if store.is_empty()? {
|
||||
generate_default_keys(&store)?;
|
||||
match controller.zookeeper {
|
||||
// setup the auth zk environment, the `auth` node
|
||||
Some(ref zookeeper) => {
|
||||
// Zookeeper Event listener loop
|
||||
let controller_clone = controller.clone();
|
||||
let zkk = zookeeper.clone();
|
||||
let watcher = zookeeper.watch("/auth", AddWatchMode::PersistentRecursive)?;
|
||||
watcher.run_on_change(move |event| {
|
||||
let WatchedEvent { event_type, path, .. } = dbg!(event);
|
||||
|
||||
match event_type {
|
||||
EventType::NodeDeleted => {
|
||||
// TODO: ugly unwraps
|
||||
let uuid = path.strip_prefix("/auth/").unwrap();
|
||||
let uuid = Uuid::parse_str(uuid).unwrap();
|
||||
log::info!("The key {} has been deleted", uuid);
|
||||
controller_clone.store.delete_api_key(uuid).unwrap();
|
||||
}
|
||||
EventType::NodeCreated | EventType::NodeDataChanged => {
|
||||
if path.strip_prefix("/auth/").map_or(false, |s| !s.is_empty()) {
|
||||
let (key, _stat) = zkk.get_data(&path).unwrap();
|
||||
let key: Key = serde_json::from_slice(&key).unwrap();
|
||||
log::info!("The key {} has been inserted", key.uid);
|
||||
controller_clone.store.put_api_key(key).unwrap();
|
||||
}
|
||||
}
|
||||
otherwise => panic!("Got the unexpected `{otherwise:?}` event!"),
|
||||
}
|
||||
});
|
||||
|
||||
// TODO: we should catch the potential unexpected errors here https://docs.rs/zookeeper-client/latest/zookeeper_client/struct.Client.html#method.create
|
||||
// for the moment we consider that `create` only returns Error::NodeExists.
|
||||
match zookeeper.create(
|
||||
"/auth",
|
||||
&[],
|
||||
&CreateMode::Persistent.with_acls(Acls::anyone_all()),
|
||||
) {
|
||||
// If the store is empty, we must generate and push the default api-keys.
|
||||
Ok(_) => generate_default_keys(&controller)?,
|
||||
// If the node exist we should clear our DB and download all the existing api-keys
|
||||
Err(ZkError::NodeExists) => {
|
||||
log::warn!("Auth directory already exists, we need to clear our keys + import the one in zookeeper");
|
||||
|
||||
let store = controller.store.clone();
|
||||
store.delete_all_keys()?;
|
||||
let (children, _) = zookeeper
|
||||
.get_children("/auth")
|
||||
.expect("Internal, the auth directory was deleted during execution.");
|
||||
|
||||
log::info!("Importing {} api-keys", children.len());
|
||||
for path in children {
|
||||
log::info!(" Importing {}", path);
|
||||
match zookeeper.get_data(&format!("/auth/{}", &path)) {
|
||||
Ok((key, _stat)) => {
|
||||
let key = serde_json::from_slice(&key).unwrap();
|
||||
let store = controller.store.clone();
|
||||
store.put_api_key(key)?;
|
||||
}
|
||||
Err(e) => panic!("{e}"),
|
||||
}
|
||||
// else the file was deleted while we were inserting the key. We ignore it.
|
||||
// TODO: What happens if someone updates the files before we have the time
|
||||
// to setup the watcher
|
||||
}
|
||||
}
|
||||
e @ Err(
|
||||
ZkError::NoNode | ZkError::NoChildrenForEphemerals | ZkError::InvalidAcl,
|
||||
) => unreachable!("{e:?}"),
|
||||
Err(e) => panic!("{e}"),
|
||||
}
|
||||
// TODO: Race condition above:
|
||||
// What happens if two node join exactly at the same moment:
|
||||
// One will create the dir
|
||||
// The second one will delete its DB, load nothing and install a watcher
|
||||
// The first one will push its keys and should wake up and update the second one.
|
||||
// / BUT, if the second one delete its DB and the first one push its files before the second one install the watcher we're fucked
|
||||
}
|
||||
None => {
|
||||
if controller.store.is_empty()? {
|
||||
generate_default_keys(&controller)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
|
||||
Ok(controller)
|
||||
}
|
||||
|
||||
/// Return `Ok(())` if the auth controller is able to access one of its database.
|
||||
@@ -53,7 +142,23 @@ impl AuthController {
|
||||
pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
|
||||
match self.store.get_api_key(create_key.uid)? {
|
||||
Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
|
||||
None => self.store.put_api_key(create_key.to_key()),
|
||||
None => self.put_key(create_key.to_key()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put_key(&self, key: Key) -> Result<Key> {
|
||||
let store = self.store.clone();
|
||||
match &self.zookeeper {
|
||||
Some(zookeeper) => {
|
||||
zookeeper.create(
|
||||
&format!("/auth/{}", key.uid),
|
||||
&serde_json::to_vec_pretty(&key)?,
|
||||
&CreateMode::Persistent.with_acls(Acls::anyone_all()),
|
||||
)?;
|
||||
|
||||
Ok(key)
|
||||
}
|
||||
None => store.put_api_key(key),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,7 +173,20 @@ impl AuthController {
|
||||
name => key.name = name.set(),
|
||||
};
|
||||
key.updated_at = OffsetDateTime::now_utc();
|
||||
self.store.put_api_key(key)
|
||||
let store = self.store.clone();
|
||||
// TODO: we may commit only after zk persisted the keys
|
||||
match &self.zookeeper {
|
||||
Some(zookeeper) => {
|
||||
zookeeper.set_data(
|
||||
&format!("/auth/{}", key.uid),
|
||||
&serde_json::to_vec_pretty(&key)?,
|
||||
None,
|
||||
)?;
|
||||
|
||||
Ok(key)
|
||||
}
|
||||
None => store.put_api_key(key),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_key(&self, uid: Uuid) -> Result<Key> {
|
||||
@@ -110,7 +228,19 @@ impl AuthController {
|
||||
}
|
||||
|
||||
pub fn delete_key(&self, uid: Uuid) -> Result<()> {
|
||||
if self.store.delete_api_key(uid)? {
|
||||
let deleted = match &self.zookeeper {
|
||||
Some(zookeeper) => match zookeeper.delete(&format!("/auth/{}", uid), None) {
|
||||
Ok(()) => true,
|
||||
Err(ZkError::NoNode) => false,
|
||||
Err(e) => return Err(e.into()),
|
||||
},
|
||||
None => {
|
||||
let store = self.store.clone();
|
||||
store.delete_api_key(uid)?
|
||||
}
|
||||
};
|
||||
|
||||
if deleted {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(AuthControllerError::ApiKeyNotFound(uid.to_string()))
|
||||
@@ -159,7 +289,7 @@ impl AuthController {
|
||||
self.store.delete_all_keys()
|
||||
}
|
||||
|
||||
/// Delete all the keys in the DB.
|
||||
/// Insert a key in the DB without any check on its validity
|
||||
pub fn raw_insert_key(&mut self, key: Key) -> Result<()> {
|
||||
self.store.put_api_key(key)?;
|
||||
Ok(())
|
||||
@@ -304,10 +434,9 @@ pub struct IndexSearchRules {
|
||||
pub filter: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
fn generate_default_keys(store: &HeedAuthStore) -> Result<()> {
|
||||
store.put_api_key(Key::default_admin())?;
|
||||
store.put_api_key(Key::default_search())?;
|
||||
|
||||
fn generate_default_keys(controller: &AuthController) -> Result<()> {
|
||||
controller.put_key(Key::default_admin())?;
|
||||
controller.put_key(Key::default_search())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::borrow::Borrow;
|
||||
use std::fmt::{self, Debug, Display};
|
||||
use std::fs::File;
|
||||
use std::io::{self, Seek, Write};
|
||||
@@ -42,7 +41,7 @@ impl Display for DocumentFormatError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Io(e) => write!(f, "{e}"),
|
||||
Self::MalformedPayload(me, b) => match me.borrow() {
|
||||
Self::MalformedPayload(me, b) => match me {
|
||||
Error::Json(se) => {
|
||||
let mut message = match se.classify() {
|
||||
Category::Data => {
|
||||
|
||||
@@ -175,6 +175,7 @@ macro_rules! make_error_codes {
|
||||
|
||||
// An exhaustive list of all the error codes used by meilisearch.
|
||||
make_error_codes! {
|
||||
S3Error , System , INTERNAL_SERVER_ERROR;
|
||||
ApiKeyAlreadyExists , InvalidRequest , CONFLICT ;
|
||||
ApiKeyNotFound , InvalidRequest , NOT_FOUND ;
|
||||
BadParameter , InvalidRequest , BAD_REQUEST;
|
||||
|
||||
@@ -58,7 +58,7 @@ lazy_static = "1.4.0"
|
||||
log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
mimalloc = { version = "0.1.36", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.15.0"
|
||||
obkv = "0.2.0"
|
||||
@@ -69,6 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
pin-project-lite = "0.2.9"
|
||||
platform-dirs = "0.3.0"
|
||||
prometheus = { version = "0.13.3", features = ["process"] }
|
||||
puffin = "0.16.0"
|
||||
puffin_http = { version = "0.13.0", optional = true }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.7.0"
|
||||
regex = "1.7.3"
|
||||
@@ -78,6 +80,7 @@ reqwest = { version = "0.11.16", features = [
|
||||
], default-features = false }
|
||||
rustls = "0.20.8"
|
||||
rustls-pemfile = "1.0.2"
|
||||
strois = "0.0.4"
|
||||
segment = { version = "0.2.2", optional = true }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
@@ -103,6 +106,7 @@ walkdir = "2.3.3"
|
||||
yaup = "0.2.1"
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.2.0"
|
||||
zookeeper-client-sync = { git = "https://github.com/irevoire/zookeeper-client-sync.git" }
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.8.0"
|
||||
@@ -133,7 +137,18 @@ zip = { version = "0.6.4", optional = true }
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
analytics = ["segment"]
|
||||
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = [
|
||||
"actix-web-static-files",
|
||||
"static-files",
|
||||
"anyhow",
|
||||
"cargo_toml",
|
||||
"hex",
|
||||
"reqwest",
|
||||
"sha-1",
|
||||
"tempfile",
|
||||
"zip",
|
||||
]
|
||||
chinese = ["meilisearch-types/chinese"]
|
||||
hebrew = ["meilisearch-types/hebrew"]
|
||||
japanese = ["meilisearch-types/japanese"]
|
||||
|
||||
@@ -312,6 +312,13 @@ impl From<Opt> for Infos {
|
||||
config_file_path,
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
no_analytics: _,
|
||||
zk_url: _,
|
||||
s3_url: _,
|
||||
s3_region: _,
|
||||
s3_bucket: _,
|
||||
s3_access_key: _,
|
||||
s3_secret_key: _,
|
||||
s3_security_token: _,
|
||||
} = options;
|
||||
|
||||
let schedule_snapshot = match schedule_snapshot {
|
||||
|
||||
@@ -33,6 +33,8 @@ pub enum MeilisearchHttpError {
|
||||
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
|
||||
)]
|
||||
SwapIndexPayloadWrongLength(Vec<IndexUid>),
|
||||
#[error("S3 Error: {0}")]
|
||||
S3Error(#[from] strois::Error),
|
||||
#[error(transparent)]
|
||||
IndexUid(#[from] IndexUidFormatError),
|
||||
#[error(transparent)]
|
||||
@@ -65,6 +67,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
|
||||
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
|
||||
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
|
||||
MeilisearchHttpError::S3Error(_) => Code::S3Error,
|
||||
MeilisearchHttpError::IndexUid(e) => e.error_code(),
|
||||
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
|
||||
MeilisearchHttpError::HeedError(_) => Code::Internal,
|
||||
|
||||
@@ -39,6 +39,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
|
||||
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
|
||||
pub use option::Opt;
|
||||
use option::ScheduleSnapshot;
|
||||
use strois::Bucket;
|
||||
use zookeeper_client_sync::Zookeeper;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
|
||||
@@ -136,14 +138,17 @@ enum OnFailure {
|
||||
KeepDb,
|
||||
}
|
||||
|
||||
pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
pub fn setup_meilisearch(
|
||||
opt: &Opt,
|
||||
zookeeper: Option<Arc<Zookeeper>>,
|
||||
) -> anyhow::Result<(Arc<IndexScheduler>, Arc<AuthController>)> {
|
||||
let empty_db = is_empty_db(&opt.db_path);
|
||||
let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot {
|
||||
let snapshot_path_exists = snapshot_path.exists();
|
||||
// the db is empty and the snapshot exists, import it
|
||||
if empty_db && snapshot_path_exists {
|
||||
match compression::from_tar_gz(snapshot_path, &opt.db_path) {
|
||||
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
|
||||
Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb, zookeeper)?,
|
||||
Err(e) => {
|
||||
std::fs::remove_dir_all(&opt.db_path)?;
|
||||
return Err(e);
|
||||
@@ -160,14 +165,14 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
bail!("snapshot doesn't exist at {}", snapshot_path.display())
|
||||
// the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db)?
|
||||
open_or_create_database(opt, empty_db, zookeeper)?
|
||||
}
|
||||
} else if let Some(ref path) = opt.import_dump {
|
||||
let src_path_exists = path.exists();
|
||||
// the db is empty and the dump exists, import it
|
||||
if empty_db && src_path_exists {
|
||||
let (mut index_scheduler, mut auth_controller) =
|
||||
open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
|
||||
open_or_create_database_unchecked(opt, OnFailure::RemoveDb, zookeeper)?;
|
||||
match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
|
||||
Ok(()) => (index_scheduler, auth_controller),
|
||||
Err(e) => {
|
||||
@@ -187,10 +192,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
// the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
|
||||
// or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db)?
|
||||
open_or_create_database(opt, empty_db, zookeeper)?
|
||||
}
|
||||
} else {
|
||||
open_or_create_database(opt, empty_db)?
|
||||
open_or_create_database(opt, empty_db, zookeeper)?
|
||||
};
|
||||
|
||||
// We create a loop in a thread that registers snapshotCreation tasks
|
||||
@@ -199,15 +204,12 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
if let ScheduleSnapshot::Enabled(snapshot_delay) = opt.schedule_snapshot {
|
||||
let snapshot_delay = Duration::from_secs(snapshot_delay);
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
thread::Builder::new()
|
||||
.name(String::from("register-snapshot-tasks"))
|
||||
.spawn(move || loop {
|
||||
thread::sleep(snapshot_delay);
|
||||
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
|
||||
error!("Error while registering snapshot: {}", e);
|
||||
}
|
||||
})
|
||||
.unwrap();
|
||||
thread::spawn(move || loop {
|
||||
thread::sleep(snapshot_delay);
|
||||
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
|
||||
error!("Error while registering snapshot: {}", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Ok((index_scheduler, auth_controller))
|
||||
@@ -217,34 +219,48 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
|
||||
fn open_or_create_database_unchecked(
|
||||
opt: &Opt,
|
||||
on_failure: OnFailure,
|
||||
zookeeper: Option<Arc<Zookeeper>>,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
// we don't want to create anything in the data.ms yet, thus we
|
||||
// wrap our two builders in a closure that'll be executed later.
|
||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
|
||||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key, zookeeper.clone());
|
||||
let instance_features = opt.to_instance_features();
|
||||
let index_scheduler_builder = || -> anyhow::Result<_> {
|
||||
Ok(IndexScheduler::new(IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
tasks_path: opt.db_path.join("tasks"),
|
||||
update_file_path: opt.db_path.join("update_files"),
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
})?)
|
||||
};
|
||||
let index_scheduler = IndexScheduler::new(Arc::new(IndexSchedulerOptions {
|
||||
version_file_path: opt.db_path.join(VERSION_FILE_NAME),
|
||||
auth_path: opt.db_path.join("auth"),
|
||||
tasks_path: opt.db_path.join("tasks"),
|
||||
update_file_path: opt.db_path.join("update_files"),
|
||||
indexes_path: opt.db_path.join("indexes"),
|
||||
snapshots_path: opt.snapshot_dir.clone(),
|
||||
dumps_path: opt.dump_dir.clone(),
|
||||
task_db_size: opt.max_task_db_size.get_bytes() as usize,
|
||||
index_base_map_size: opt.max_index_size.get_bytes() as usize,
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into().map(Arc::new)?,
|
||||
autobatching_enabled: true,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
zookeeper: zookeeper.clone(),
|
||||
s3: opt.s3_url.as_ref().map(|url| {
|
||||
Arc::new(
|
||||
Bucket::builder(url)
|
||||
.unwrap()
|
||||
.key(opt.s3_access_key.as_ref().expect("Need s3 key to work").clone())
|
||||
.secret(opt.s3_secret_key.as_ref().expect("Need s3 secret to work").clone())
|
||||
.maybe_token(opt.s3_security_token.clone())
|
||||
.region(&opt.s3_region)
|
||||
.with_url_path_style(true)
|
||||
.bucket(opt.s3_bucket.as_ref().expect("Need an s3 bucket to work"))
|
||||
.unwrap(),
|
||||
)
|
||||
}),
|
||||
}))
|
||||
.map_err(anyhow::Error::from);
|
||||
|
||||
match (
|
||||
index_scheduler_builder(),
|
||||
index_scheduler,
|
||||
auth_controller.map_err(anyhow::Error::from),
|
||||
create_version_file(&opt.db_path).map_err(anyhow::Error::from),
|
||||
) {
|
||||
@@ -262,12 +278,13 @@ fn open_or_create_database_unchecked(
|
||||
fn open_or_create_database(
|
||||
opt: &Opt,
|
||||
empty_db: bool,
|
||||
zookeeper: Option<Arc<Zookeeper>>,
|
||||
) -> anyhow::Result<(IndexScheduler, AuthController)> {
|
||||
if !empty_db {
|
||||
check_version_file(&opt.db_path)?;
|
||||
}
|
||||
|
||||
open_or_create_database_unchecked(opt, OnFailure::KeepDb)
|
||||
open_or_create_database_unchecked(opt, OnFailure::KeepDb, zookeeper)
|
||||
}
|
||||
|
||||
fn import_dump(
|
||||
@@ -277,6 +294,7 @@ fn import_dump(
|
||||
auth: &mut AuthController,
|
||||
) -> Result<(), anyhow::Error> {
|
||||
let reader = File::open(dump_path)?;
|
||||
let index_scheduler = index_scheduler.inner();
|
||||
let mut dump_reader = dump::DumpReader::open(reader)?;
|
||||
|
||||
if let Some(date) = dump_reader.date() {
|
||||
|
||||
@@ -12,6 +12,7 @@ use meilisearch::analytics::Analytics;
|
||||
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use zookeeper_client_sync::Zookeeper;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
@@ -30,6 +31,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
#[cfg(feature = "profile-with-puffin")]
|
||||
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||
|
||||
anyhow::ensure!(
|
||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||
@@ -59,7 +64,13 @@ async fn main() -> anyhow::Result<()> {
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
|
||||
let zookeeper = match opt.zk_url {
|
||||
Some(ref url) => Some(Arc::new(Zookeeper::connect(url).await.unwrap())),
|
||||
None => None,
|
||||
};
|
||||
let optc = opt.clone();
|
||||
let (index_scheduler, auth_controller) =
|
||||
tokio::task::spawn_blocking(move || setup_meilisearch(&optc, zookeeper)).await.unwrap()?;
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
let analytics = if !opt.no_analytics {
|
||||
|
||||
@@ -28,6 +28,13 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
|
||||
const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
|
||||
const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
|
||||
const MEILI_ENV: &str = "MEILI_ENV";
|
||||
const MEILI_ZK_URL: &str = "MEILI_ZK_URL";
|
||||
const MEILI_S3_URL: &str = "MEILI_S3_URL";
|
||||
const MEILI_S3_BUCKET: &str = "MEILI_S3_BUCKET";
|
||||
const MEILI_S3_ACCESS_KEY: &str = "MEILI_S3_ACCESS_KEY";
|
||||
const MEILI_S3_SECRET_KEY: &str = "MEILI_S3_SECRET_KEY";
|
||||
const MEILI_S3_SECURITY_TOKEN: &str = "MEILI_S3_SECURITY_TOKEN";
|
||||
const MEILI_S3_REGION: &str = "MEILI_S3_REGION";
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
|
||||
const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
|
||||
@@ -56,6 +63,7 @@ const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||
const DEFAULT_ENV: &str = "development";
|
||||
const DEFAULT_S3_REGION: &str = "eu-central-1";
|
||||
const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB";
|
||||
const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/";
|
||||
const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400;
|
||||
@@ -154,6 +162,36 @@ pub struct Opt {
|
||||
#[serde(default = "default_env")]
|
||||
pub env: String,
|
||||
|
||||
/// Sets the HTTP address and port used to communicate with the zookeeper cluster.
|
||||
/// If ran locally, the default url is `http://localhost:2181/`.
|
||||
#[clap(long, env = MEILI_ZK_URL)]
|
||||
pub zk_url: Option<String>,
|
||||
|
||||
/// Sets the address and port used to communicate with the S3 bucket.
|
||||
#[clap(long, env = MEILI_S3_URL)]
|
||||
pub s3_url: Option<String>,
|
||||
|
||||
/// Sets the region used to communicate with the s3 bucket.
|
||||
#[clap(long, env = MEILI_S3_REGION, default_value_t = default_s3_region())]
|
||||
#[serde(default = "default_s3_region")]
|
||||
pub s3_region: String,
|
||||
|
||||
/// Sets the S3 bucket name to use.
|
||||
#[clap(long, env = MEILI_S3_BUCKET)]
|
||||
pub s3_bucket: Option<String>,
|
||||
|
||||
/// Set the S3 access key. If used you must also set the secret key.
|
||||
#[clap(long, env = MEILI_S3_ACCESS_KEY)]
|
||||
pub s3_access_key: Option<String>,
|
||||
|
||||
/// Set the S3 secret key. If used you must also set the access key.
|
||||
#[clap(long, env = MEILI_S3_SECRET_KEY)]
|
||||
pub s3_secret_key: Option<String>,
|
||||
|
||||
/// Security token, can't be used with access key and secret key.
|
||||
#[clap(long, env = MEILI_S3_SECURITY_TOKEN)]
|
||||
pub s3_security_token: Option<String>,
|
||||
|
||||
/// Deactivates Meilisearch's built-in telemetry when provided.
|
||||
///
|
||||
/// Meilisearch automatically collects data from all instances that do not opt out using this flag.
|
||||
@@ -368,6 +406,13 @@ impl Opt {
|
||||
http_addr,
|
||||
master_key,
|
||||
env,
|
||||
zk_url,
|
||||
s3_url,
|
||||
s3_region,
|
||||
s3_bucket,
|
||||
s3_access_key,
|
||||
s3_secret_key,
|
||||
s3_security_token,
|
||||
max_index_size: _,
|
||||
max_task_db_size: _,
|
||||
http_payload_size_limit,
|
||||
@@ -401,6 +446,25 @@ impl Opt {
|
||||
export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_ENV, env);
|
||||
if let Some(zk_url) = zk_url {
|
||||
export_to_env_if_not_present(MEILI_ZK_URL, zk_url);
|
||||
}
|
||||
if let Some(s3_url) = s3_url {
|
||||
export_to_env_if_not_present(MEILI_S3_URL, s3_url);
|
||||
}
|
||||
export_to_env_if_not_present(MEILI_S3_REGION, s3_region);
|
||||
if let Some(s3_bucket) = s3_bucket {
|
||||
export_to_env_if_not_present(MEILI_S3_BUCKET, s3_bucket);
|
||||
}
|
||||
if let Some(s3_access_key) = s3_access_key {
|
||||
export_to_env_if_not_present(MEILI_S3_ACCESS_KEY, s3_access_key);
|
||||
}
|
||||
if let Some(s3_secret_key) = s3_secret_key {
|
||||
export_to_env_if_not_present(MEILI_S3_SECRET_KEY, s3_secret_key);
|
||||
}
|
||||
if let Some(s3_security_token) = s3_security_token {
|
||||
export_to_env_if_not_present(MEILI_S3_SECURITY_TOKEN, s3_security_token);
|
||||
}
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
{
|
||||
export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
|
||||
@@ -547,7 +611,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
Ok(Self {
|
||||
log_every_n: Some(DEFAULT_LOG_EVERY_N),
|
||||
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
|
||||
thread_pool: Some(thread_pool),
|
||||
thread_pool: Some(Arc::new(thread_pool)),
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: other.skip_index_budget,
|
||||
..Default::default()
|
||||
@@ -715,6 +779,10 @@ fn default_env() -> String {
|
||||
DEFAULT_ENV.to_string()
|
||||
}
|
||||
|
||||
fn default_s3_region() -> String {
|
||||
DEFAULT_S3_REGION.to_string()
|
||||
}
|
||||
|
||||
fn default_max_index_size() -> Byte {
|
||||
Byte::from_bytes(INDEX_SIZE)
|
||||
}
|
||||
|
||||
@@ -41,14 +41,10 @@ pub async fn create_api_key(
|
||||
_req: HttpRequest,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let v = body.into_inner();
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let key = auth_controller.create_key(v)?;
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
let key = auth_controller.create_key(v)?;
|
||||
let key = KeyView::from_key(key, &auth_controller);
|
||||
|
||||
Ok(HttpResponse::Created().json(res))
|
||||
Ok(HttpResponse::Created().json(key))
|
||||
}
|
||||
|
||||
#[derive(Deserr, Debug, Clone, Copy)]
|
||||
@@ -110,17 +106,11 @@ pub async fn patch_api_key(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
let patch_api_key = body.into_inner();
|
||||
let res = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
let key = auth_controller.update_key(uid, patch_api_key)?;
|
||||
let uid = Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
let key = auth_controller.update_key(uid, patch_api_key)?;
|
||||
let key = KeyView::from_key(key, &auth_controller);
|
||||
|
||||
Ok(KeyView::from_key(key, &auth_controller))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
|
||||
Ok(HttpResponse::Ok().json(res))
|
||||
Ok(HttpResponse::Ok().json(key))
|
||||
}
|
||||
|
||||
pub async fn delete_api_key(
|
||||
@@ -128,13 +118,8 @@ pub async fn delete_api_key(
|
||||
path: web::Path<AuthParam>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let key = path.into_inner().key;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let uid =
|
||||
Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
auth_controller.delete_key(uid)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::Internal))??;
|
||||
let uid = Uuid::parse_str(&key).or_else(|_| auth_controller.get_uid_from_encoded_key(&key))?;
|
||||
auth_controller.delete_key(uid)?;
|
||||
|
||||
Ok(HttpResponse::NoContent().finish())
|
||||
}
|
||||
|
||||
@@ -29,8 +29,7 @@ pub async fn create_dump(
|
||||
keys: auth_controller.list_keys()?,
|
||||
instance_uid: analytics.instance_uid().cloned(),
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -78,6 +78,6 @@ async fn patch_features(
|
||||
}),
|
||||
Some(&req),
|
||||
);
|
||||
index_scheduler.put_runtime_features(new_features)?;
|
||||
index_scheduler.inner().put_runtime_features(new_features)?;
|
||||
Ok(HttpResponse::Ok().json(new_features))
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::io::ErrorKind;
|
||||
use std::io::{BufReader, ErrorKind, Seek, SeekFrom};
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::web::Data;
|
||||
@@ -129,8 +129,7 @@ pub async fn delete_document(
|
||||
index_uid: index_uid.to_string(),
|
||||
documents_ids: vec![document_id],
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@@ -396,11 +395,12 @@ async fn document_addition(
|
||||
return Err(MeilisearchHttpError::MissingPayload(format));
|
||||
}
|
||||
|
||||
if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
|
||||
if let Err(e) = buffer.seek(SeekFrom::Start(0)).await {
|
||||
return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
|
||||
}
|
||||
|
||||
let read_file = buffer.into_inner().into_std().await;
|
||||
let s3 = index_scheduler.s3.clone();
|
||||
let documents_count = tokio::task::spawn_blocking(move || {
|
||||
let documents_count = match format {
|
||||
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
|
||||
@@ -409,8 +409,16 @@ async fn document_addition(
|
||||
}
|
||||
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
|
||||
};
|
||||
|
||||
if let Some(s3) = s3 {
|
||||
update_file.seek(SeekFrom::Start(0)).unwrap();
|
||||
let mut reader = BufReader::new(&*update_file);
|
||||
s3.put_object_multipart(format!("update-files/{}", uuid), &mut reader)?;
|
||||
}
|
||||
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
|
||||
Ok(documents_count)
|
||||
})
|
||||
.await;
|
||||
@@ -445,7 +453,7 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let scheduler = index_scheduler.clone();
|
||||
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
|
||||
let task = match scheduler.register(task) {
|
||||
Ok(task) => task,
|
||||
Err(e) => {
|
||||
index_scheduler.delete_update_file(uuid)?;
|
||||
@@ -476,8 +484,7 @@ pub async fn delete_documents_batch(
|
||||
|
||||
let task =
|
||||
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -512,8 +519,7 @@ pub async fn delete_documents_by_filter(
|
||||
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -529,8 +535,7 @@ pub async fn clear_all_documents(
|
||||
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
|
||||
|
||||
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -135,8 +135,7 @@ pub async fn create_index(
|
||||
);
|
||||
|
||||
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
@@ -203,8 +202,7 @@ pub async fn update_index(
|
||||
primary_key: body.primary_key,
|
||||
};
|
||||
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -216,8 +214,7 @@ pub async fn delete_index(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
@@ -55,10 +55,7 @@ macro_rules! make_setting_route {
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -97,10 +94,7 @@ macro_rules! make_setting_route {
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -586,8 +580,7 @@ pub async fn update_all(
|
||||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -622,8 +615,7 @@ pub async fn delete_all(
|
||||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
let task: SummarizedTaskView = index_scheduler.register(task)?.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -18,7 +18,6 @@ use serde_json::json;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use time::macros::format_description;
|
||||
use time::{Date, Duration, OffsetDateTime, Time};
|
||||
use tokio::task;
|
||||
|
||||
use super::SummarizedTaskView;
|
||||
use crate::analytics::Analytics;
|
||||
@@ -325,15 +324,12 @@ async fn cancel_tasks(
|
||||
|
||||
let query = params.into_query();
|
||||
|
||||
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||
&index_scheduler.read_txn()?,
|
||||
&query,
|
||||
index_scheduler.filters(),
|
||||
)?;
|
||||
let (tasks, _) =
|
||||
index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?;
|
||||
let task_cancelation =
|
||||
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
|
||||
let task = index_scheduler.register(task_cancelation)?;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
@@ -370,15 +366,12 @@ async fn delete_tasks(
|
||||
);
|
||||
let query = params.into_query();
|
||||
|
||||
let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes(
|
||||
&index_scheduler.read_txn()?,
|
||||
&query,
|
||||
index_scheduler.filters(),
|
||||
)?;
|
||||
let (tasks, _) =
|
||||
index_scheduler.get_task_ids_from_authorized_indexes(&query, index_scheduler.filters())?;
|
||||
let task_deletion =
|
||||
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
|
||||
|
||||
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
|
||||
let task = index_scheduler.register(task_deletion)?;
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
Ok(HttpResponse::Ok().json(task))
|
||||
|
||||
@@ -39,7 +39,7 @@ impl Server {
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, None).await.unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir) }
|
||||
@@ -54,7 +54,7 @@ impl Server {
|
||||
|
||||
options.master_key = Some("MASTER_KEY".to_string());
|
||||
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options).unwrap();
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, None).await.unwrap();
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Server { service, _dir: Some(dir) }
|
||||
@@ -67,7 +67,7 @@ impl Server {
|
||||
}
|
||||
|
||||
pub async fn new_with_options(options: Opt) -> Result<Self, anyhow::Error> {
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options)?;
|
||||
let (index_scheduler, auth) = setup_meilisearch(&options, None).await?;
|
||||
let service = Service { index_scheduler, auth, options, api_key: None };
|
||||
|
||||
Ok(Server { service, _dir: None })
|
||||
|
||||
@@ -17,7 +17,7 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.2", default-features = false }
|
||||
charabia = { version = "0.8.4", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
@@ -65,13 +65,16 @@ filter-parser = { path = "../filter-parser" }
|
||||
# documents words self-join
|
||||
itertools = "0.10.5"
|
||||
|
||||
# profiling
|
||||
puffin = "0.16.0"
|
||||
|
||||
# logging
|
||||
log = "0.4.17"
|
||||
logging_timer = "1.1.0"
|
||||
csv = "1.2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
mimalloc = { version = "0.1.37", default-features = false }
|
||||
big_s = "1.0.2"
|
||||
insta = "1.29.0"
|
||||
maplit = "1.0.2"
|
||||
|
||||
@@ -15,6 +15,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
}
|
||||
|
||||
pub fn execute(self) -> Result<u64> {
|
||||
puffin::profile_function!();
|
||||
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
let Index {
|
||||
env: _env,
|
||||
|
||||
@@ -109,6 +109,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
Some(docid)
|
||||
}
|
||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
||||
self.execute_inner()?;
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
autogenerate_docids: bool,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
||||
|
||||
@@ -30,6 +30,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -20,6 +20,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_number: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_number_docids_sorter = create_sorter(
|
||||
|
||||
@@ -18,6 +18,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_string_docids_sorter = create_sorter(
|
||||
|
||||
@@ -34,6 +34,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||
|
||||
@@ -22,6 +22,8 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_word_count_docids_sorter = create_sorter(
|
||||
|
||||
@@ -19,6 +19,8 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
primary_key_id: FieldId,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
||||
@@ -19,6 +19,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
primary_key_id: FieldId,
|
||||
vectors_fid: FieldId,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
||||
@@ -27,6 +27,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
exact_attributes: &HashSet<FieldId>,
|
||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_docids_sorter = create_sorter(
|
||||
|
||||
@@ -15,6 +15,8 @@ pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_fid_docids_sorter = create_sorter(
|
||||
|
||||
@@ -21,6 +21,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_pair_proximity_docids_sorter = create_sorter(
|
||||
|
||||
@@ -18,6 +18,8 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_position_docids_sorter = create_sorter(
|
||||
|
||||
@@ -52,6 +52,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
exact_attributes: HashSet<FieldId>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
original_obkv_chunks
|
||||
.par_bridge()
|
||||
.map(|original_documents_chunk| {
|
||||
@@ -238,11 +240,13 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
M::Output: Send,
|
||||
{
|
||||
rayon::spawn(move || {
|
||||
puffin::profile_scope!("extract_multiple_chunks", name);
|
||||
let chunks: Result<M> =
|
||||
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
|
||||
rayon::spawn(move || match chunks {
|
||||
Ok(chunks) => {
|
||||
debug!("merge {} database", name);
|
||||
puffin::profile_scope!("merge_multiple_chunks", name);
|
||||
let reader = chunks.merge(merge_fn, &indexer);
|
||||
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
|
||||
}
|
||||
|
||||
@@ -214,6 +214,7 @@ pub fn sorter_into_lmdb_database(
|
||||
sorter: Sorter<MergeFn>,
|
||||
merge: MergeFn,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
debug!("Writing MTBL sorter...");
|
||||
let before = Instant::now();
|
||||
|
||||
|
||||
@@ -137,6 +137,8 @@ where
|
||||
mut self,
|
||||
reader: DocumentsBatchReader<R>,
|
||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// Early return when there is no document to add
|
||||
if reader.is_empty() {
|
||||
return Ok((self, Ok(0)));
|
||||
@@ -175,6 +177,8 @@ where
|
||||
mut self,
|
||||
to_delete: Vec<String>,
|
||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// Early return when there is no document to add
|
||||
if to_delete.is_empty() {
|
||||
return Ok((self, Ok(0)));
|
||||
@@ -194,6 +198,8 @@ where
|
||||
|
||||
#[logging_timer::time("IndexDocuments::{}")]
|
||||
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
||||
puffin::profile_function!();
|
||||
|
||||
if self.added_documents == 0 {
|
||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
||||
@@ -232,6 +238,8 @@ where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let TransformOutput {
|
||||
primary_key,
|
||||
fields_ids_map,
|
||||
@@ -322,6 +330,7 @@ where
|
||||
|
||||
// Run extraction pipeline in parallel.
|
||||
pool.install(|| {
|
||||
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
||||
// split obkv file into several chunks
|
||||
let original_chunk_iter =
|
||||
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
|
||||
@@ -477,6 +486,8 @@ where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
// Merged databases are already been indexed, we start from this count;
|
||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||
|
||||
@@ -511,26 +522,36 @@ where
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
|
||||
let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||
let current_prefix_fst;
|
||||
let common_prefix_fst_words_tmp;
|
||||
let common_prefix_fst_words: Vec<_>;
|
||||
let new_prefix_fst_words;
|
||||
let del_prefix_fst_words;
|
||||
|
||||
// We retrieve the common words between the previous and new prefix word fst.
|
||||
let common_prefix_fst_words = fst_stream_into_vec(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(),
|
||||
);
|
||||
let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
|
||||
.as_slice()
|
||||
.linear_group_by_key(|x| x.chars().next().unwrap())
|
||||
.collect();
|
||||
{
|
||||
puffin::profile_scope!("compute_prefix_diffs");
|
||||
|
||||
// We retrieve the newly added words between the previous and new prefix word fst.
|
||||
let new_prefix_fst_words = fst_stream_into_vec(
|
||||
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
|
||||
);
|
||||
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||
|
||||
// We compute the set of prefixes that are no more part of the prefix fst.
|
||||
let del_prefix_fst_words = fst_stream_into_hashset(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(),
|
||||
);
|
||||
// We retrieve the common words between the previous and new prefix word fst.
|
||||
common_prefix_fst_words_tmp = fst_stream_into_vec(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(),
|
||||
);
|
||||
common_prefix_fst_words = common_prefix_fst_words_tmp
|
||||
.as_slice()
|
||||
.linear_group_by_key(|x| x.chars().next().unwrap())
|
||||
.collect();
|
||||
|
||||
// We retrieve the newly added words between the previous and new prefix word fst.
|
||||
new_prefix_fst_words = fst_stream_into_vec(
|
||||
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
|
||||
);
|
||||
|
||||
// We compute the set of prefixes that are no more part of the prefix fst.
|
||||
del_prefix_fst_words = fst_stream_into_hashset(
|
||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(),
|
||||
);
|
||||
}
|
||||
|
||||
databases_seen += 1;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
@@ -668,6 +689,8 @@ fn execute_word_prefix_docids(
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let cursor = reader.into_cursor()?;
|
||||
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
|
||||
builder.chunk_compression_type = indexer_config.chunk_compression_type;
|
||||
|
||||
@@ -558,6 +558,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
where
|
||||
F: Fn(UpdateIndexingStep) + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let primary_key = self
|
||||
.index
|
||||
.primary_key(wtxn)?
|
||||
|
||||
@@ -46,6 +46,66 @@ pub(crate) enum TypedChunk {
|
||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||
}
|
||||
|
||||
impl TypedChunk {
|
||||
pub fn to_debug_string(&self) -> String {
|
||||
match self {
|
||||
TypedChunk::FieldIdDocidFacetStrings(grenad) => {
|
||||
format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdDocidFacetNumbers(grenad) => {
|
||||
format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::Documents(grenad) => {
|
||||
format!("Documents {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdWordcountDocids(grenad) => {
|
||||
format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::NewDocumentsIds(grenad) => {
|
||||
format!("NewDocumentsIds {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => format!(
|
||||
"WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {} }}",
|
||||
word_docids_reader.len(),
|
||||
exact_word_docids_reader.len()
|
||||
),
|
||||
TypedChunk::WordPositionDocids(grenad) => {
|
||||
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordFidDocids(grenad) => {
|
||||
format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::WordPairProximityDocids(grenad) => {
|
||||
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetStringDocids(grenad) => {
|
||||
format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetNumberDocids(grenad) => {
|
||||
format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetExistsDocids(grenad) => {
|
||||
format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsNullDocids(grenad) => {
|
||||
format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => {
|
||||
format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::GeoPoints(grenad) => {
|
||||
format!("GeoPoints {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::VectorPoints(grenad) => {
|
||||
format!("VectorPoints {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
TypedChunk::ScriptLanguageDocids(grenad) => {
|
||||
format!("ScriptLanguageDocids {{ number_of_entries: {} }}", grenad.len())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||
/// Return new documents seen.
|
||||
pub(crate) fn write_typed_chunk_into_index(
|
||||
@@ -54,6 +114,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn: &mut RwTxn,
|
||||
index_is_empty: bool,
|
||||
) -> Result<(RoaringBitmap, bool)> {
|
||||
puffin::profile_function!(typed_chunk.to_debug_string());
|
||||
|
||||
let mut is_merged_database = false;
|
||||
match typed_chunk {
|
||||
TypedChunk::Documents(obkv_documents_iter) => {
|
||||
@@ -350,6 +412,8 @@ where
|
||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
||||
{
|
||||
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||
|
||||
let mut buffer = Vec::new();
|
||||
let database = database.remap_types::<ByteSlice, ByteSlice>();
|
||||
|
||||
@@ -392,6 +456,8 @@ where
|
||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
||||
{
|
||||
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||
|
||||
if !index_is_empty {
|
||||
return write_entries_into_database(
|
||||
data,
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
@@ -9,7 +11,7 @@ pub struct IndexerConfig {
|
||||
pub max_memory: Option<usize>,
|
||||
pub chunk_compression_type: CompressionType,
|
||||
pub chunk_compression_level: Option<u32>,
|
||||
pub thread_pool: Option<ThreadPool>,
|
||||
pub thread_pool: Option<Arc<ThreadPool>>,
|
||||
pub max_positions_per_attributes: Option<u32>,
|
||||
pub skip_index_budget: bool,
|
||||
}
|
||||
|
||||
@@ -50,6 +50,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
||||
common_prefix_fst_words: &[&'a [String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
index_word_prefix_database(
|
||||
self.wtxn,
|
||||
self.index.word_pair_proximity_docids,
|
||||
|
||||
@@ -27,6 +27,8 @@ pub fn index_prefix_word_database(
|
||||
chunk_compression_type: CompressionType,
|
||||
chunk_compression_level: Option<u32>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_proximity = max_proximity - 1;
|
||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||
|
||||
|
||||
@@ -191,6 +191,7 @@ pub fn index_word_prefix_database(
|
||||
chunk_compression_type: CompressionType,
|
||||
chunk_compression_level: Option<u32>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||
|
||||
// Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length
|
||||
|
||||
@@ -303,6 +303,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
puffin::profile_function!();
|
||||
|
||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
// if the settings are set before any document update, we don't need to do anything, and
|
||||
// will set the primary key during the first document addition.
|
||||
|
||||
@@ -45,6 +45,8 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
// It is forbidden to keep a mutable reference into the database
|
||||
// and write into it at the same time, therefore we write into another file.
|
||||
let mut prefix_docids_sorter = create_sorter(
|
||||
|
||||
@@ -50,6 +50,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
||||
common_prefix_fst_words: &[&[String]],
|
||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||
) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
||||
|
||||
let mut prefix_integer_docids_sorter = create_sorter(
|
||||
|
||||
@@ -42,6 +42,8 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
|
||||
|
||||
#[logging_timer::time("WordsPrefixesFst::{}")]
|
||||
pub fn execute(self) -> Result<()> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let words_fst = self.index.words_fst(self.wtxn)?;
|
||||
|
||||
let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];
|
||||
|
||||
Reference in New Issue
Block a user