mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-23 05:06:11 +00:00
Compare commits
45 Commits
prototype-
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
119bb805e5 | ||
|
|
9f10011525 | ||
|
|
54a2029b1f | ||
|
|
4fe09f406c | ||
|
|
9d95f8187d | ||
|
|
fb0904b893 | ||
|
|
eb829e93c5 | ||
|
|
be36afe9e6 | ||
|
|
27c087a4fb | ||
|
|
c618877bce | ||
|
|
982e554639 | ||
|
|
a8cfec19b5 | ||
|
|
5d2b1c2637 | ||
|
|
65fefc7122 | ||
|
|
180cfd71cc | ||
|
|
614cd8cc1c | ||
|
|
c6f6808981 | ||
|
|
9cb77fd310 | ||
|
|
fa8bd6430c | ||
|
|
4c016432e1 | ||
|
|
0aa1f63061 | ||
|
|
d4c88f28f3 | ||
|
|
d90c76d3cc | ||
|
|
f6bc6854f8 | ||
|
|
42001a25ff | ||
|
|
080d5f94dd | ||
|
|
ba0f50e5ef | ||
|
|
ce6230aa85 | ||
|
|
6dc241f9de | ||
|
|
01d1ef65c4 | ||
|
|
3246667590 | ||
|
|
109395c199 | ||
|
|
a0b71a8785 | ||
|
|
00a5c86f13 | ||
|
|
366c37a686 | ||
|
|
afc164a271 | ||
|
|
bdc2d1e64d | ||
|
|
b85657de1e | ||
|
|
626be0ef28 | ||
|
|
1b476b8a35 | ||
|
|
a1b42c10e2 | ||
|
|
d67db6e3c2 | ||
|
|
760ccffdbd | ||
|
|
338806283b | ||
|
|
fe15e11c9d |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -5,7 +5,7 @@
|
||||
**/*.json_lines
|
||||
**/*.rs.bk
|
||||
/*.mdb
|
||||
/data.ms
|
||||
/*.ms
|
||||
/snapshots
|
||||
/dumps
|
||||
/bench
|
||||
|
||||
46
Cargo.lock
generated
46
Cargo.lock
generated
@@ -1194,21 +1194,6 @@ version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
|
||||
|
||||
[[package]]
|
||||
name = "cohere-rust"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8b553b385b0f2562138baea705b5707335314f8e91a58e7d1a03c3a6c332423"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"strum_macros 0.26.4",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "color-spantrace"
|
||||
version = "0.3.0"
|
||||
@@ -3460,7 +3445,7 @@ dependencies = [
|
||||
"serde_json",
|
||||
"serde_yaml",
|
||||
"strum",
|
||||
"strum_macros 0.27.1",
|
||||
"strum_macros",
|
||||
"unicode-blocks",
|
||||
"unicode-normalization",
|
||||
"unicode-segmentation",
|
||||
@@ -3760,6 +3745,7 @@ dependencies = [
|
||||
"actix-web-lab",
|
||||
"anyhow",
|
||||
"async-openai",
|
||||
"backoff",
|
||||
"brotli",
|
||||
"bstr",
|
||||
"build-info",
|
||||
@@ -3768,7 +3754,6 @@ dependencies = [
|
||||
"bytes",
|
||||
"cargo_toml",
|
||||
"clap",
|
||||
"cohere-rust",
|
||||
"crossbeam-channel",
|
||||
"deserr",
|
||||
"dump",
|
||||
@@ -3791,6 +3776,7 @@ dependencies = [
|
||||
"meili-snap",
|
||||
"meilisearch-auth",
|
||||
"meilisearch-types",
|
||||
"memmap2",
|
||||
"mimalloc",
|
||||
"mime",
|
||||
"mopa-maintained",
|
||||
@@ -3924,9 +3910,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.9.5"
|
||||
version = "0.9.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
|
||||
checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"stable_deref_trait",
|
||||
@@ -4003,6 +3989,7 @@ dependencies = [
|
||||
"time",
|
||||
"tokenizers",
|
||||
"tracing",
|
||||
"twox-hash",
|
||||
"ureq",
|
||||
"url",
|
||||
"utoipa",
|
||||
@@ -5837,20 +5824,7 @@ version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
|
||||
dependencies = [
|
||||
"strum_macros 0.27.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.26.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.101",
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6458,6 +6432,12 @@ version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||
|
||||
[[package]]
|
||||
name = "twox-hash"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
|
||||
|
||||
[[package]]
|
||||
name = "typeid"
|
||||
version = "1.0.3"
|
||||
|
||||
8
LICENSE
8
LICENSE
@@ -19,3 +19,11 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---
|
||||
|
||||
đź”’ Meilisearch Enterprise Edition (EE)
|
||||
|
||||
Certain parts of this codebase are not licensed under the MIT license and governed by the Business Source License 1.1.
|
||||
|
||||
See the LICENSE-EE file for details.
|
||||
|
||||
67
LICENSE-EE
Normal file
67
LICENSE-EE
Normal file
@@ -0,0 +1,67 @@
|
||||
Business Source License 1.1 – Adapted for Meili SAS
|
||||
This license is based on the Business Source License version 1.1, as published by MariaDB Corporation Ab.
|
||||
|
||||
Parameters
|
||||
|
||||
Licensor: Meili SAS
|
||||
|
||||
Licensed Work: Any file explicitly marked as “Enterprise Edition (EE)” or “governed by the Business Source License”.
|
||||
|
||||
Additional Use Grant:
|
||||
You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.
|
||||
|
||||
Production use of the Licensed Work requires a commercial license agreement with Meilisearch. Contact bonjour@meilisearch.com for licensing.
|
||||
|
||||
Change License: MIT
|
||||
|
||||
Change Date: Four years from the date the Licensed Work is published.
|
||||
|
||||
This License does not apply to any code outside of the Licensed Work, which remains under the MIT license.
|
||||
|
||||
For information about alternative licensing arrangements for the Licensed Work,
|
||||
please contact bonjour@meilisearch.com or sales@meilisearch.com.
|
||||
|
||||
Notice
|
||||
|
||||
Business Source License 1.1
|
||||
|
||||
Terms
|
||||
|
||||
The Licensor hereby grants you the right to copy, modify, create derivative
|
||||
works, redistribute, and make non-production use of the Licensed Work. The
|
||||
Licensor may make an Additional Use Grant, above, permitting limited production use.
|
||||
|
||||
Effective on the Change Date, or the fourth anniversary of the first publicly
|
||||
available distribution of a specific version of the Licensed Work under this
|
||||
License, whichever comes first, the Licensor hereby grants you rights under
|
||||
the terms of the Change License, and the rights granted in the paragraph
|
||||
above terminate.
|
||||
|
||||
If your use of the Licensed Work does not comply with the requirements
|
||||
currently in effect as described in this License, you must purchase a
|
||||
commercial license from the Licensor, its affiliated entities, or authorized
|
||||
resellers, or you must refrain from using the Licensed Work.
|
||||
|
||||
All copies of the original and modified Licensed Work, and derivative works
|
||||
of the Licensed Work, are subject to this License. This License applies
|
||||
separately for each version of the Licensed Work and the Change Date may vary
|
||||
for each version of the Licensed Work released by Licensor.
|
||||
|
||||
You must conspicuously display this License on each original or modified copy
|
||||
of the Licensed Work. If you receive the Licensed Work in original or
|
||||
modified form from a third party, the terms and conditions set forth in this
|
||||
License apply to your use of that work.
|
||||
|
||||
Any use of the Licensed Work in violation of this License will automatically
|
||||
terminate your rights under this License for the current and all other
|
||||
versions of the Licensed Work.
|
||||
|
||||
This License does not grant you any right in any trademark or logo of
|
||||
Licensor or its affiliates (provided that you may use a trademark or logo of
|
||||
Licensor as expressly required by this License).
|
||||
|
||||
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
||||
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
||||
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
||||
TITLE.
|
||||
20
README.md
20
README.md
@@ -89,6 +89,26 @@ We also offer a wide range of dedicated guides to all Meilisearch features, such
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
## đź§ľ Editions & Licensing
|
||||
|
||||
Meilisearch is available in two editions:
|
||||
|
||||
### đź§Ş Community Edition (CE)
|
||||
|
||||
- Fully open source under the [MIT license](./LICENSE)
|
||||
- Core search engine with fast and relevant full-text, semantic or hybrid search
|
||||
- Free to use for anyone, including commercial usage
|
||||
|
||||
### 🏢 Enterprise Edition (EE)
|
||||
|
||||
- Includes advanced features such as:
|
||||
- Sharding
|
||||
- Governed by a [commercial license](./LICENSE-EE) or the [Business Source License 1.1](https://mariadb.com/bsl11)
|
||||
- Not allowed in production without a commercial agreement with Meilisearch.
|
||||
- You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.
|
||||
|
||||
Want access to Enterprise features? → Contact us at [sales@meilisearch.com](maito:sales@meilisearch.com).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
@@ -14,7 +14,7 @@ license.workspace = true
|
||||
anyhow = "1.0.98"
|
||||
bumpalo = "3.18.1"
|
||||
csv = "1.3.1"
|
||||
memmap2 = "0.9.5"
|
||||
memmap2 = "0.9.7"
|
||||
milli = { path = "../milli" }
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||
@@ -55,4 +55,3 @@ harness = false
|
||||
[[bench]]
|
||||
name = "sort"
|
||||
harness = false
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -221,6 +222,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -266,6 +268,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -335,6 +338,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -412,6 +416,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -457,6 +462,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -498,6 +504,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -566,6 +573,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -633,6 +641,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -700,6 +709,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -766,6 +776,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -811,6 +822,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -879,6 +891,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -956,6 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1002,6 +1016,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1044,6 +1059,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1111,6 +1127,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1177,6 +1194,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1222,6 +1240,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1290,6 +1309,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1404,6 +1424,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1449,6 +1470,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1490,6 +1512,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1580,6 +1603,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1671,6 +1695,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1754,6 +1779,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1821,6 +1847,7 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1887,6 +1914,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1932,6 +1960,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2000,6 +2029,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -123,6 +123,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::{
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
|
||||
};
|
||||
use meilisearch_types::InstanceUid;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -94,6 +94,8 @@ pub struct TaskDump {
|
||||
default
|
||||
)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
}
|
||||
|
||||
// A `Kind` specific version made for the dump. If modified you may break the dump.
|
||||
@@ -171,6 +173,7 @@ impl From<Task> for TaskDump {
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
network: task.network,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -250,11 +253,12 @@ pub(crate) mod test {
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status};
|
||||
@@ -383,6 +387,7 @@ pub(crate) mod test {
|
||||
enqueued_at: datetime!(2022-11-11 0:00 UTC),
|
||||
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
|
||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||
network: None,
|
||||
},
|
||||
None,
|
||||
),
|
||||
@@ -407,6 +412,7 @@ pub(crate) mod test {
|
||||
enqueued_at: datetime!(2022-11-11 0:00 UTC),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
network: None,
|
||||
},
|
||||
Some(vec![
|
||||
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
|
||||
@@ -426,6 +432,7 @@ pub(crate) mod test {
|
||||
enqueued_at: datetime!(2022-11-15 0:00 UTC),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
network: None,
|
||||
},
|
||||
None,
|
||||
),
|
||||
@@ -538,7 +545,8 @@ pub(crate) mod test {
|
||||
fn create_test_network() -> Network {
|
||||
Network {
|
||||
local: Some("myself".to_string()),
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
|
||||
sharding: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::fs::File;
|
||||
use std::str::FromStr;
|
||||
|
||||
use super::v2_to_v3::CompatV2ToV3;
|
||||
@@ -94,6 +95,10 @@ impl CompatIndexV1ToV2 {
|
||||
self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>)
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.from.documents_file()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> {
|
||||
Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check())
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::fs::File;
|
||||
use std::str::FromStr;
|
||||
|
||||
use time::OffsetDateTime;
|
||||
@@ -122,6 +123,13 @@ impl CompatIndexV2ToV3 {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
CompatIndexV2ToV3::V2(v2) => v2.documents_file(),
|
||||
CompatIndexV2ToV3::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
|
||||
let settings = match self {
|
||||
CompatIndexV2ToV3::V2(from) => from.settings()?,
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::fs::File;
|
||||
|
||||
use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3};
|
||||
use super::v4_to_v5::CompatV4ToV5;
|
||||
use crate::reader::{v3, v4, UpdateFile};
|
||||
@@ -252,6 +254,13 @@ impl CompatIndexV3ToV4 {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
CompatIndexV3ToV4::V3(v3) => v3.documents_file(),
|
||||
CompatIndexV3ToV4::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> {
|
||||
Ok(match self {
|
||||
CompatIndexV3ToV4::V3(v3) => {
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
use std::fs::File;
|
||||
|
||||
use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4};
|
||||
use super::v5_to_v6::CompatV5ToV6;
|
||||
use crate::reader::{v4, v5, Document};
|
||||
@@ -241,6 +243,13 @@ impl CompatIndexV4ToV5 {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
CompatIndexV4ToV5::V4(v4) => v4.documents_file(),
|
||||
CompatIndexV4ToV5::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> {
|
||||
match self {
|
||||
CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::fs::File;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::str::FromStr;
|
||||
|
||||
@@ -160,6 +161,7 @@ impl CompatV5ToV6 {
|
||||
enqueued_at: task_view.enqueued_at,
|
||||
started_at: task_view.started_at,
|
||||
finished_at: task_view.finished_at,
|
||||
network: None,
|
||||
};
|
||||
|
||||
(task, content_file)
|
||||
@@ -243,6 +245,13 @@ impl CompatIndexV5ToV6 {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
CompatIndexV5ToV6::V5(v5) => v5.documents_file(),
|
||||
CompatIndexV5ToV6::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
|
||||
match self {
|
||||
CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()),
|
||||
|
||||
@@ -192,6 +192,14 @@ impl DumpIndexReader {
|
||||
}
|
||||
}
|
||||
|
||||
/// A reference to a file in the NDJSON format containing all the documents of the index
|
||||
pub fn documents_file(&self) -> &File {
|
||||
match self {
|
||||
DumpIndexReader::Current(v6) => v6.documents_file(),
|
||||
DumpIndexReader::Compat(compat) => compat.documents_file(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
|
||||
match self {
|
||||
DumpIndexReader::Current(v6) => v6.settings(),
|
||||
|
||||
@@ -72,6 +72,10 @@ impl V1IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<self::settings::Settings> {
|
||||
Ok(serde_json::from_reader(&mut self.settings)?)
|
||||
}
|
||||
|
||||
@@ -203,6 +203,10 @@ impl V2IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
|
||||
@@ -215,6 +215,10 @@ impl V3IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
|
||||
@@ -210,6 +210,10 @@ impl V4IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
|
||||
@@ -247,6 +247,10 @@ impl V5IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
Ok(self.settings.clone())
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
pub type Network = meilisearch_types::network::Network;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
// everything related to the tasks
|
||||
@@ -284,6 +284,10 @@ impl V6IndexReader {
|
||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||
}
|
||||
|
||||
pub fn documents_file(&self) -> &File {
|
||||
self.documents.get_ref()
|
||||
}
|
||||
|
||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||
patch_embedders(&mut settings);
|
||||
|
||||
@@ -5,8 +5,9 @@ use std::path::PathBuf;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use serde_json::{Map, Value};
|
||||
use tempfile::TempDir;
|
||||
|
||||
@@ -148,11 +148,10 @@ impl File {
|
||||
Ok(Self { path: PathBuf::new(), file: None })
|
||||
}
|
||||
|
||||
pub fn persist(self) -> Result<()> {
|
||||
if let Some(file) = self.file {
|
||||
file.persist(&self.path)?;
|
||||
}
|
||||
Ok(())
|
||||
pub fn persist(self) -> Result<Option<StdFile>> {
|
||||
let Some(file) = self.file else { return Ok(None) };
|
||||
|
||||
Ok(Some(file.persist(&self.path)?))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -129,6 +129,7 @@ fn main() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ flate2 = "1.1.2"
|
||||
indexmap = "2.9.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.5"
|
||||
memmap2 = "0.9.7"
|
||||
page_size = "0.6.0"
|
||||
rayon = "1.10.0"
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
|
||||
@@ -147,6 +147,7 @@ impl<'a> Dump<'a> {
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
network: task.network,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
|
||||
use meilisearch_types::network::Network;
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
@@ -24,7 +25,6 @@ pub(crate) struct FeatureData {
|
||||
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
|
||||
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
|
||||
network: Arc<RwLock<Network>>,
|
||||
experimental_personalization_api_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
@@ -175,12 +175,7 @@ impl FeatureData {
|
||||
|
||||
let persisted_features: RuntimeTogglableFeatures =
|
||||
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
|
||||
let InstanceTogglableFeatures {
|
||||
metrics,
|
||||
logs_route,
|
||||
contains_filter,
|
||||
experimental_personalization_api_key,
|
||||
} = instance_features;
|
||||
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
|
||||
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
|
||||
metrics: metrics || persisted_features.metrics,
|
||||
logs_route: logs_route || persisted_features.logs_route,
|
||||
@@ -195,7 +190,6 @@ impl FeatureData {
|
||||
persisted: runtime_features_db,
|
||||
runtime,
|
||||
network: Arc::new(RwLock::new(network)),
|
||||
experimental_personalization_api_key,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -226,10 +220,6 @@ impl FeatureData {
|
||||
RoFeatures::new(self)
|
||||
}
|
||||
|
||||
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
|
||||
self.experimental_personalization_api_key.as_ref()
|
||||
}
|
||||
|
||||
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
|
||||
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
|
||||
&mut wtxn,
|
||||
|
||||
@@ -20,6 +20,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let IndexScheduler {
|
||||
cleanup_enabled: _,
|
||||
experimental_no_edition_2024_for_dumps: _,
|
||||
processing_tasks,
|
||||
env,
|
||||
version,
|
||||
@@ -212,6 +213,7 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
details,
|
||||
status,
|
||||
kind,
|
||||
network,
|
||||
} = task;
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
@@ -229,6 +231,9 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
snap.push_str(&format!("details: {}, ", &snapshot_details(details)));
|
||||
}
|
||||
snap.push_str(&format!("kind: {kind:?}"));
|
||||
if let Some(network) = network {
|
||||
snap.push_str(&format!("network: {network:?}, "))
|
||||
}
|
||||
|
||||
snap.push('}');
|
||||
snap
|
||||
|
||||
@@ -52,7 +52,7 @@ use flate2::bufread::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{
|
||||
ChatCompletionSettings, InstanceTogglableFeatures, Network, RuntimeTogglableFeatures,
|
||||
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
|
||||
};
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
|
||||
@@ -63,8 +63,9 @@ use meilisearch_types::milli::vector::{
|
||||
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Index};
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{KindWithContent, Task};
|
||||
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use processing::ProcessingTasks;
|
||||
pub use queue::Query;
|
||||
@@ -168,6 +169,9 @@ pub struct IndexScheduler {
|
||||
/// Whether we should automatically cleanup the task queue or not.
|
||||
pub(crate) cleanup_enabled: bool,
|
||||
|
||||
/// Whether we should use the old document indexer or the new one.
|
||||
pub(crate) experimental_no_edition_2024_for_dumps: bool,
|
||||
|
||||
/// The webhook url we should send tasks to after processing every batches.
|
||||
pub(crate) webhook_url: Option<String>,
|
||||
/// The Authorization header to send to the webhook URL.
|
||||
@@ -210,6 +214,7 @@ impl IndexScheduler {
|
||||
|
||||
index_mapper: self.index_mapper.clone(),
|
||||
cleanup_enabled: self.cleanup_enabled,
|
||||
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
|
||||
webhook_url: self.webhook_url.clone(),
|
||||
webhook_authorization_header: self.webhook_authorization_header.clone(),
|
||||
embedders: self.embedders.clone(),
|
||||
@@ -280,8 +285,7 @@ impl IndexScheduler {
|
||||
let version = versioning::Versioning::new(&env, from_db_version)?;
|
||||
|
||||
let mut wtxn = env.write_txn()?;
|
||||
let features =
|
||||
features::FeatureData::new(&env, &mut wtxn, options.instance_features.clone())?;
|
||||
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
|
||||
let queue = Queue::new(&env, &mut wtxn, &options)?;
|
||||
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
|
||||
let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?;
|
||||
@@ -297,6 +301,9 @@ impl IndexScheduler {
|
||||
index_mapper,
|
||||
env,
|
||||
cleanup_enabled: options.cleanup_enabled,
|
||||
experimental_no_edition_2024_for_dumps: options
|
||||
.indexer_config
|
||||
.experimental_no_edition_2024_for_dumps,
|
||||
webhook_url: options.webhook_url,
|
||||
webhook_authorization_header: options.webhook_authorization_header,
|
||||
embedders: Default::default(),
|
||||
@@ -595,6 +602,11 @@ impl IndexScheduler {
|
||||
Ok(nbr_index_processing_tasks > 0)
|
||||
}
|
||||
|
||||
/// Whether the index should use the old document indexer.
|
||||
pub fn no_edition_2024_for_dumps(&self) -> bool {
|
||||
self.experimental_no_edition_2024_for_dumps
|
||||
}
|
||||
|
||||
/// Return the tasks matching the query from the user's point of view along
|
||||
/// with the total number of tasks matching the query, ignoring from and limit.
|
||||
///
|
||||
@@ -633,6 +645,16 @@ impl IndexScheduler {
|
||||
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
|
||||
}
|
||||
|
||||
pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
|
||||
task.network = Some(network);
|
||||
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the batches matching the query from the user's point of view along
|
||||
/// with the total number of batches matching the query, ignoring from and limit.
|
||||
///
|
||||
@@ -835,10 +857,6 @@ impl IndexScheduler {
|
||||
self.features.features()
|
||||
}
|
||||
|
||||
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
|
||||
self.features.experimental_personalization_api_key()
|
||||
}
|
||||
|
||||
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
|
||||
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||
self.features.put_runtime_features(wtxn, features)?;
|
||||
|
||||
@@ -279,6 +279,7 @@ impl Queue {
|
||||
details: kind.default_details(),
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
network: None,
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||
|
||||
@@ -97,7 +97,22 @@ impl TaskQueue {
|
||||
Ok(self.all_tasks.get(rtxn, &task_id)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
/// Update the inverted task indexes and write the new value of the task.
|
||||
///
|
||||
/// The passed `task` object typically comes from a previous transaction, so two kinds of modification might have occurred:
|
||||
/// 1. Modification to the `task` object after loading it from the DB (the purpose of this method is to persist these changes)
|
||||
/// 2. Modification to the task committed by another transaction in the DB (an annoying consequence of having lost the original
|
||||
/// transaction from which the `task` instance was deserialized)
|
||||
///
|
||||
/// When calling this function, this `task` is modified to take into account any existing `network`
|
||||
/// that can have been added since the task was loaded into memory.
|
||||
///
|
||||
/// Any other modification to the task that was committed from the DB since the parameter was pulled from the DB will be overwritten.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// - CorruptedTaskQueue: The task doesn't exist in the database
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> {
|
||||
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
let reprocessing = old_task.status != Status::Enqueued;
|
||||
|
||||
@@ -157,6 +172,12 @@ impl TaskQueue {
|
||||
}
|
||||
}
|
||||
|
||||
task.network = match (old_task.network, task.network.take()) {
|
||||
(None, None) => None,
|
||||
(None, Some(network)) | (Some(network), None) => Some(network),
|
||||
(Some(_), Some(network)) => Some(network),
|
||||
};
|
||||
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -268,7 +268,7 @@ impl IndexScheduler {
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.update_task(&mut wtxn, &mut task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
@@ -349,7 +349,7 @@ impl IndexScheduler {
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.update_task(&mut wtxn, &mut task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ use std::sync::atomic::Ordering;
|
||||
|
||||
use dump::IndexMetadata;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{self};
|
||||
@@ -227,12 +228,21 @@ impl IndexScheduler {
|
||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||
};
|
||||
|
||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate,
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
};
|
||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||
use meilisearch_types::milli::update::{request_threads, Setting};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
@@ -229,12 +230,21 @@ impl IndexScheduler {
|
||||
));
|
||||
};
|
||||
|
||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||
),
|
||||
regenerate,
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
|
||||
@@ -66,6 +66,11 @@ impl IndexScheduler {
|
||||
}
|
||||
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
|
||||
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
||||
|
||||
let network = self.network();
|
||||
|
||||
let shards = network.shards();
|
||||
|
||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
||||
// to a fresh thread.
|
||||
@@ -130,6 +135,7 @@ impl IndexScheduler {
|
||||
&mut new_fields_ids_map,
|
||||
&|| must_stop_processing.get(),
|
||||
progress.clone(),
|
||||
shards.as_ref(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ use std::collections::BTreeMap;
|
||||
use big_s::S;
|
||||
use insta::assert_json_snapshot;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||
use meilisearch_types::milli::vector::SearchQuery;
|
||||
@@ -220,8 +221,8 @@ fn import_vectors() {
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == lab_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@@ -311,9 +312,9 @@ fn import_vectors() {
|
||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||
|
||||
// automatically changed to patou because set to regenerate
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == patou_embed, @"true");
|
||||
// remained beagle
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
|
||||
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
|
||||
|
||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
@@ -497,13 +498,13 @@ fn import_vectors_first_and_embedder_later() {
|
||||
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
|
||||
assert!(!embeddings.is_empty(), "{embeddings:?}");
|
||||
|
||||
// the document with the id 3 should keep its original embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let (embeddings, _) = &embeddings["my_doggo_embedder"];
|
||||
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
|
||||
|
||||
snapshot!(embeddings.len(), @"1");
|
||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||
@@ -558,7 +559,7 @@ fn import_vectors_first_and_embedder_later() {
|
||||
"###);
|
||||
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||
@@ -566,7 +567,7 @@ fn import_vectors_first_and_embedder_later() {
|
||||
// the document with the id 4 should generate an embedding
|
||||
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
||||
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
|
||||
|
||||
assert!(!embedding.is_empty());
|
||||
}
|
||||
@@ -696,7 +697,7 @@ fn delete_document_containing_vector() {
|
||||
"###);
|
||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||
let (embedding, _) = &embeddings["manual"];
|
||||
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["manual"];
|
||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||
|
||||
index_scheduler
|
||||
|
||||
@@ -88,6 +88,7 @@ pub fn upgrade_index_scheduler(
|
||||
details: Some(Details::UpgradeDatabase { from, to }),
|
||||
status: Status::Enqueued,
|
||||
kind: KindWithContent::UpgradeDatabase { from },
|
||||
network: None,
|
||||
},
|
||||
)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
//! Utility functions on the DBs. Mainly getter and setters.
|
||||
|
||||
use crate::milli::progress::EmbedderStats;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
@@ -15,6 +14,7 @@ use meilisearch_types::tasks::{
|
||||
use roaring::RoaringBitmap;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::milli::progress::EmbedderStats;
|
||||
use crate::{Error, Result, Task, TaskId, BEI128};
|
||||
|
||||
/// This structure contains all the information required to write a batch in the database without reading the tasks.
|
||||
@@ -372,6 +372,7 @@ impl crate::IndexScheduler {
|
||||
details,
|
||||
status,
|
||||
kind,
|
||||
network: _,
|
||||
} = task;
|
||||
assert_eq!(uid, task.uid);
|
||||
if task.status != Status::Enqueued {
|
||||
|
||||
@@ -24,7 +24,7 @@ enum-iterator = "2.1.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.1.2"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.9.5"
|
||||
memmap2 = "0.9.7"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.12", features = ["serde"] }
|
||||
rustc-hash = "2.1.1"
|
||||
|
||||
@@ -236,9 +236,11 @@ InvalidDocumentFields , InvalidRequest , BAD_REQU
|
||||
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
|
||||
InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -267,7 +269,9 @@ InvalidMultiSearchRemote , InvalidRequest , BAD_REQU
|
||||
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -310,8 +314,6 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
|
||||
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -652,18 +654,6 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `personalize` is invalid, expected a JSON object with optional `userContext` string.")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "the value of `userContext` is invalid, expected a string.")
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! internal_error {
|
||||
($target:ty : $($other:path), *) => {
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Code, ResponseError};
|
||||
@@ -25,29 +23,11 @@ pub struct RuntimeTogglableFeatures {
|
||||
pub multimodal: bool,
|
||||
}
|
||||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
#[derive(Default, Debug, Clone, Copy)]
|
||||
pub struct InstanceTogglableFeatures {
|
||||
pub metrics: bool,
|
||||
pub logs_route: bool,
|
||||
pub contains_filter: bool,
|
||||
pub experimental_personalization_api_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
|
||||
@@ -233,9 +233,6 @@ pub enum Action {
|
||||
#[serde(rename = "*")]
|
||||
#[deserr(rename = "*")]
|
||||
All = 0,
|
||||
#[serde(rename = "*.get")]
|
||||
#[deserr(rename = "*.get")]
|
||||
AllGet,
|
||||
#[serde(rename = "search")]
|
||||
#[deserr(rename = "search")]
|
||||
Search,
|
||||
@@ -365,6 +362,9 @@ pub enum Action {
|
||||
#[serde(rename = "chatsSettings.update")]
|
||||
#[deserr(rename = "chatsSettings.update")]
|
||||
ChatsSettingsUpdate,
|
||||
#[serde(rename = "*.get")]
|
||||
#[deserr(rename = "*.get")]
|
||||
AllGet,
|
||||
}
|
||||
|
||||
impl Action {
|
||||
@@ -403,6 +403,7 @@ impl Action {
|
||||
METRICS_GET => Some(Self::MetricsGet),
|
||||
DUMPS_ALL => Some(Self::DumpsAll),
|
||||
DUMPS_CREATE => Some(Self::DumpsCreate),
|
||||
SNAPSHOTS_ALL => Some(Self::SnapshotsAll),
|
||||
SNAPSHOTS_CREATE => Some(Self::SnapshotsCreate),
|
||||
VERSION => Some(Self::Version),
|
||||
KEYS_CREATE => Some(Self::KeysAdd),
|
||||
@@ -411,8 +412,10 @@ impl Action {
|
||||
KEYS_DELETE => Some(Self::KeysDelete),
|
||||
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
|
||||
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
|
||||
EXPORT => Some(Self::Export),
|
||||
NETWORK_GET => Some(Self::NetworkGet),
|
||||
NETWORK_UPDATE => Some(Self::NetworkUpdate),
|
||||
ALL_GET => Some(Self::AllGet),
|
||||
_otherwise => None,
|
||||
}
|
||||
}
|
||||
@@ -497,6 +500,7 @@ pub mod actions {
|
||||
pub const METRICS_GET: u8 = MetricsGet.repr();
|
||||
pub const DUMPS_ALL: u8 = DumpsAll.repr();
|
||||
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
|
||||
pub const SNAPSHOTS_ALL: u8 = SnapshotsAll.repr();
|
||||
pub const SNAPSHOTS_CREATE: u8 = SnapshotsCreate.repr();
|
||||
pub const VERSION: u8 = Version.repr();
|
||||
pub const KEYS_CREATE: u8 = KeysAdd.repr();
|
||||
@@ -519,3 +523,68 @@ pub mod actions {
|
||||
pub const CHATS_SETTINGS_GET: u8 = ChatsSettingsGet.repr();
|
||||
pub const CHATS_SETTINGS_UPDATE: u8 = ChatsSettingsUpdate.repr();
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use super::actions::*;
|
||||
use super::Action::*;
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_action_repr_and_constants() {
|
||||
assert!(All.repr() == 0 && ALL == 0);
|
||||
assert!(Search.repr() == 1 && SEARCH == 1);
|
||||
assert!(DocumentsAll.repr() == 2 && DOCUMENTS_ALL == 2);
|
||||
assert!(DocumentsAdd.repr() == 3 && DOCUMENTS_ADD == 3);
|
||||
assert!(DocumentsGet.repr() == 4 && DOCUMENTS_GET == 4);
|
||||
assert!(DocumentsDelete.repr() == 5 && DOCUMENTS_DELETE == 5);
|
||||
assert!(IndexesAll.repr() == 6 && INDEXES_ALL == 6);
|
||||
assert!(IndexesAdd.repr() == 7 && INDEXES_CREATE == 7);
|
||||
assert!(IndexesGet.repr() == 8 && INDEXES_GET == 8);
|
||||
assert!(IndexesUpdate.repr() == 9 && INDEXES_UPDATE == 9);
|
||||
assert!(IndexesDelete.repr() == 10 && INDEXES_DELETE == 10);
|
||||
assert!(IndexesSwap.repr() == 11 && INDEXES_SWAP == 11);
|
||||
assert!(TasksAll.repr() == 12 && TASKS_ALL == 12);
|
||||
assert!(TasksCancel.repr() == 13 && TASKS_CANCEL == 13);
|
||||
assert!(TasksDelete.repr() == 14 && TASKS_DELETE == 14);
|
||||
assert!(TasksGet.repr() == 15 && TASKS_GET == 15);
|
||||
assert!(SettingsAll.repr() == 16 && SETTINGS_ALL == 16);
|
||||
assert!(SettingsGet.repr() == 17 && SETTINGS_GET == 17);
|
||||
assert!(SettingsUpdate.repr() == 18 && SETTINGS_UPDATE == 18);
|
||||
assert!(StatsAll.repr() == 19 && STATS_ALL == 19);
|
||||
assert!(StatsGet.repr() == 20 && STATS_GET == 20);
|
||||
assert!(MetricsAll.repr() == 21 && METRICS_ALL == 21);
|
||||
assert!(MetricsGet.repr() == 22 && METRICS_GET == 22);
|
||||
assert!(DumpsAll.repr() == 23 && DUMPS_ALL == 23);
|
||||
assert!(DumpsCreate.repr() == 24 && DUMPS_CREATE == 24);
|
||||
assert!(SnapshotsAll.repr() == 25 && SNAPSHOTS_ALL == 25);
|
||||
assert!(SnapshotsCreate.repr() == 26 && SNAPSHOTS_CREATE == 26);
|
||||
assert!(Version.repr() == 27 && VERSION == 27);
|
||||
assert!(KeysAdd.repr() == 28 && KEYS_CREATE == 28);
|
||||
assert!(KeysGet.repr() == 29 && KEYS_GET == 29);
|
||||
assert!(KeysUpdate.repr() == 30 && KEYS_UPDATE == 30);
|
||||
assert!(KeysDelete.repr() == 31 && KEYS_DELETE == 31);
|
||||
assert!(ExperimentalFeaturesGet.repr() == 32 && EXPERIMENTAL_FEATURES_GET == 32);
|
||||
assert!(ExperimentalFeaturesUpdate.repr() == 33 && EXPERIMENTAL_FEATURES_UPDATE == 33);
|
||||
assert!(Export.repr() == 34 && EXPORT == 34);
|
||||
assert!(NetworkGet.repr() == 35 && NETWORK_GET == 35);
|
||||
assert!(NetworkUpdate.repr() == 36 && NETWORK_UPDATE == 36);
|
||||
assert!(ChatCompletions.repr() == 37 && CHAT_COMPLETIONS == 37);
|
||||
assert!(ChatsAll.repr() == 38 && CHATS_ALL == 38);
|
||||
assert!(ChatsGet.repr() == 39 && CHATS_GET == 39);
|
||||
assert!(ChatsDelete.repr() == 40 && CHATS_DELETE == 40);
|
||||
assert!(ChatsSettingsAll.repr() == 41 && CHATS_SETTINGS_ALL == 41);
|
||||
assert!(ChatsSettingsGet.repr() == 42 && CHATS_SETTINGS_GET == 42);
|
||||
assert!(ChatsSettingsUpdate.repr() == 43 && CHATS_SETTINGS_UPDATE == 43);
|
||||
assert!(AllGet.repr() == 44 && ALL_GET == 44);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_from_repr() {
|
||||
for action in enum_iterator::all::<Action>() {
|
||||
let repr = action.repr();
|
||||
let action_from_repr = Action::from_repr(repr);
|
||||
assert_eq!(Some(action), action_from_repr, "Failed for action: {:?}", action);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ pub mod index_uid;
|
||||
pub mod index_uid_pattern;
|
||||
pub mod keys;
|
||||
pub mod locales;
|
||||
pub mod network;
|
||||
pub mod settings;
|
||||
pub mod star_or;
|
||||
pub mod task_view;
|
||||
|
||||
47
crates/meilisearch-types/src/network.rs
Normal file
47
crates/meilisearch-types/src/network.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::update::new::indexer::sharding::Shards;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub sharding: bool,
|
||||
}
|
||||
|
||||
impl Network {
|
||||
pub fn shards(&self) -> Option<Shards> {
|
||||
if self.sharding {
|
||||
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
|
||||
let others = self
|
||||
.remotes
|
||||
.keys()
|
||||
.filter(|name| name.as_str() != this)
|
||||
.map(|name| name.to_owned())
|
||||
.collect();
|
||||
Some(Shards { own: vec![this.to_owned()], others })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
}
|
||||
@@ -11,6 +11,7 @@ use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::{
|
||||
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
|
||||
TaskNetwork,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
|
||||
@@ -51,6 +52,9 @@ pub struct TaskView {
|
||||
#[schema(value_type = String, example = json!("2024-08-08_14:12:09.393Z"))]
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
@@ -68,6 +72,7 @@ impl TaskView {
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
network: task.network.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +42,9 @@ pub struct Task {
|
||||
|
||||
pub status: Status,
|
||||
pub kind: KindWithContent,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
}
|
||||
|
||||
impl Task {
|
||||
@@ -704,6 +707,36 @@ pub enum Details {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(untagged, rename_all = "camelCase")]
|
||||
pub enum TaskNetwork {
|
||||
Origin { origin: Origin },
|
||||
Remotes { remote_tasks: BTreeMap<String, RemoteTask> },
|
||||
}
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(untagged, rename_all = "camelCase")]
|
||||
pub struct Origin {
|
||||
pub remote_name: String,
|
||||
pub task_uid: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RemoteTask {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
task_uid: Option<TaskId>,
|
||||
error: Option<ResponseError>,
|
||||
}
|
||||
|
||||
impl From<Result<TaskId, ResponseError>> for RemoteTask {
|
||||
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
|
||||
match res {
|
||||
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
|
||||
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct DetailsExportIndexSettings {
|
||||
|
||||
@@ -50,6 +50,7 @@ jsonwebtoken = "9.3.1"
|
||||
lazy_static = "1.5.0"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
memmap2 = "0.9.7"
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
mime = "0.3.17"
|
||||
num_cpus = "1.17.0"
|
||||
@@ -94,7 +95,6 @@ uuid = { version = "1.17.0", features = ["serde", "v4"] }
|
||||
serde_urlencoded = "0.7.1"
|
||||
termcolor = "1.4.1"
|
||||
url = { version = "2.5.4", features = ["serde"] }
|
||||
cohere-rust = "0.6.0"
|
||||
tracing = "0.1.41"
|
||||
tracing-subscriber = { version = "0.3.19", features = ["json"] }
|
||||
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
|
||||
@@ -115,6 +115,9 @@ utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
|
||||
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" }
|
||||
secrecy = "0.10.3"
|
||||
actix-web-lab = { version = "0.24.1", default-features = false }
|
||||
urlencoding = "2.1.3"
|
||||
backoff = { version = "0.4.0", features = ["tokio"] }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.10.0"
|
||||
@@ -125,7 +128,6 @@ manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
urlencoding = "2.1.3"
|
||||
wiremock = "0.6.3"
|
||||
yaup = "0.3.1"
|
||||
|
||||
|
||||
Binary file not shown.
@@ -203,6 +203,7 @@ struct Infos {
|
||||
experimental_composite_embedders: bool,
|
||||
experimental_embedding_cache_entries: usize,
|
||||
experimental_no_snapshot_compaction: bool,
|
||||
experimental_no_edition_2024_for_dumps: bool,
|
||||
experimental_no_edition_2024_for_settings: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
@@ -281,7 +282,6 @@ impl Infos {
|
||||
indexer_options,
|
||||
config_file_path,
|
||||
no_analytics: _,
|
||||
experimental_personalization_api_key: _,
|
||||
} = options;
|
||||
|
||||
let schedule_snapshot = match schedule_snapshot {
|
||||
@@ -294,6 +294,7 @@ impl Infos {
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
} = indexer_options;
|
||||
|
||||
let RuntimeTogglableFeatures {
|
||||
@@ -330,6 +331,7 @@ impl Infos {
|
||||
experimental_composite_embedders: composite_embedders,
|
||||
experimental_embedding_cache_entries,
|
||||
experimental_no_snapshot_compaction,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
|
||||
@@ -9,6 +9,8 @@ use meilisearch_types::milli::OrderBy;
|
||||
use serde_json::Value;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum MeilisearchHttpError {
|
||||
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
|
||||
@@ -80,6 +82,16 @@ pub enum MeilisearchHttpError {
|
||||
MissingSearchHybrid,
|
||||
#[error("Invalid request: both `media` and `vector` parameters are present.")]
|
||||
MediaAndVector,
|
||||
#[error("Inconsistent `Origin` headers: {} was provided but {} is missing.\n - Hint: Either both headers should be provided, or none of them", if *is_remote_missing {
|
||||
PROXY_ORIGIN_TASK_UID_HEADER
|
||||
} else { PROXY_ORIGIN_REMOTE_HEADER },
|
||||
if *is_remote_missing {
|
||||
PROXY_ORIGIN_REMOTE_HEADER
|
||||
} else { PROXY_ORIGIN_TASK_UID_HEADER }
|
||||
)]
|
||||
InconsistentOriginHeaders { is_remote_missing: bool },
|
||||
#[error("Invalid value for header {header_name}: {msg}")]
|
||||
InvalidHeaderValue { header_name: &'static str, msg: String },
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
@@ -124,6 +136,10 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
|
||||
Code::InvalidMultiSearchFacetOrder
|
||||
}
|
||||
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
|
||||
Code::InconsistentDocumentChangeHeaders
|
||||
}
|
||||
MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ pub mod middleware;
|
||||
pub mod option;
|
||||
#[cfg(test)]
|
||||
mod option_test;
|
||||
pub mod personalization;
|
||||
pub mod routes;
|
||||
pub mod search;
|
||||
pub mod search_queue;
|
||||
@@ -31,6 +30,7 @@ use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest};
|
||||
use analytics::Analytics;
|
||||
use anyhow::bail;
|
||||
use bumpalo::Bump;
|
||||
use error::PayloadError;
|
||||
use extractors::payload::PayloadConfig;
|
||||
use index_scheduler::versioning::Versioning;
|
||||
@@ -39,6 +39,7 @@ use meilisearch_auth::{open_auth_store_env, AuthController};
|
||||
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||
use meilisearch_types::milli::update::new::indexer;
|
||||
use meilisearch_types::milli::update::{
|
||||
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
|
||||
};
|
||||
@@ -534,7 +535,7 @@ fn import_dump(
|
||||
let mut index_reader = index_reader?;
|
||||
let metadata = index_reader.metadata();
|
||||
let uid = metadata.uid.clone();
|
||||
tracing::info!("Importing index `{}`.", metadata.uid);
|
||||
tracing::info!("Importing index `{uid}`.");
|
||||
|
||||
let date = Some((metadata.created_at, metadata.updated_at));
|
||||
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
|
||||
@@ -553,48 +554,101 @@ fn import_dump(
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
let embedder_stats: Arc<EmbedderStats> = Default::default();
|
||||
builder.execute(&|| false, &progress, embedder_stats.clone())?;
|
||||
wtxn.commit()?;
|
||||
|
||||
// 5.3 Import the documents.
|
||||
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
tracing::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
for document in index_reader.documents()? {
|
||||
builder.append_json_object(&document?)?;
|
||||
let mut wtxn = index.write_txn()?;
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
if index_scheduler.no_edition_2024_for_dumps() {
|
||||
// 5.3 Import the documents.
|
||||
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||
tracing::info!("Importing the documents.");
|
||||
let file = tempfile::tempfile()?;
|
||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||
for document in index_reader.documents()? {
|
||||
builder.append_json_object(&document?)?;
|
||||
}
|
||||
|
||||
// This flush the content of the batch builder.
|
||||
let file = builder.into_inner()?.into_inner()?;
|
||||
|
||||
// 5.3.2 We feed it to the milli index.
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
indexer_config,
|
||||
IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
&embedder_stats,
|
||||
)?;
|
||||
|
||||
let builder = builder.with_embedders(embedders);
|
||||
|
||||
let (builder, user_result) = builder.add_documents(reader)?;
|
||||
let user_result = user_result?;
|
||||
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
||||
builder.execute()?;
|
||||
} else {
|
||||
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let primary_key = index.primary_key(&rtxn)?;
|
||||
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||
|
||||
let mut indexer = indexer::DocumentOperation::new();
|
||||
let embedders = index.embedding_configs().embedding_configs(&rtxn)?;
|
||||
let embedders = index_scheduler.embedders(uid.clone(), embedders)?;
|
||||
|
||||
let mmap = unsafe { memmap2::Mmap::map(index_reader.documents_file())? };
|
||||
|
||||
indexer.replace_documents(&mmap)?;
|
||||
|
||||
let indexer_config = index_scheduler.indexer_config();
|
||||
let pool = &indexer_config.thread_pool;
|
||||
|
||||
let indexer_alloc = Bump::new();
|
||||
let (document_changes, mut operation_stats, primary_key) = indexer.into_changes(
|
||||
&indexer_alloc,
|
||||
&index,
|
||||
&rtxn,
|
||||
primary_key,
|
||||
&mut new_fields_ids_map,
|
||||
&|| false, // never stop processing a dump
|
||||
progress.clone(),
|
||||
None,
|
||||
)?;
|
||||
|
||||
let operation_stats = operation_stats.pop().unwrap();
|
||||
if let Some(error) = operation_stats.error {
|
||||
return Err(error.into());
|
||||
}
|
||||
|
||||
let _congestion = indexer::index(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
pool,
|
||||
indexer_config.grenad_parameters(),
|
||||
&db_fields_ids_map,
|
||||
new_fields_ids_map,
|
||||
primary_key,
|
||||
&document_changes,
|
||||
embedders,
|
||||
&|| false, // never stop processing a dump
|
||||
&progress,
|
||||
&embedder_stats,
|
||||
)?;
|
||||
}
|
||||
|
||||
// This flush the content of the batch builder.
|
||||
let file = builder.into_inner()?.into_inner()?;
|
||||
|
||||
// 5.3.2 We feed it to the milli index.
|
||||
let reader = BufReader::new(file);
|
||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||
|
||||
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
|
||||
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
||||
|
||||
let builder = milli::update::IndexDocuments::new(
|
||||
&mut wtxn,
|
||||
&index,
|
||||
indexer_config,
|
||||
IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
},
|
||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||
|| false,
|
||||
&embedder_stats,
|
||||
)?;
|
||||
|
||||
let builder = builder.with_embedders(embedders);
|
||||
|
||||
let (builder, user_result) = builder.add_documents(reader)?;
|
||||
let user_result = user_result?;
|
||||
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
||||
builder.execute()?;
|
||||
wtxn.commit()?;
|
||||
tracing::info!("All documents successfully imported.");
|
||||
|
||||
index_scheduler.refresh_index_stats(&uid)?;
|
||||
}
|
||||
|
||||
@@ -623,19 +677,12 @@ pub fn configure_data(
|
||||
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
|
||||
analytics: Data<Analytics>,
|
||||
) {
|
||||
// Create personalization service with API key from options
|
||||
let personalization_service = index_scheduler
|
||||
.experimental_personalization_api_key()
|
||||
.cloned()
|
||||
.map(personalization::PersonalizationService::cohere)
|
||||
.unwrap_or_else(personalization::PersonalizationService::uninitialized);
|
||||
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
|
||||
config
|
||||
.app_data(index_scheduler)
|
||||
.app_data(auth)
|
||||
.app_data(search_queue)
|
||||
.app_data(analytics)
|
||||
.app_data(web::Data::new(personalization_service))
|
||||
.app_data(web::Data::new(logs_route))
|
||||
.app_data(web::Data::new(logs_stderr))
|
||||
.app_data(web::Data::new(opt.clone()))
|
||||
|
||||
@@ -111,9 +111,4 @@ lazy_static! {
|
||||
"Meilisearch Task Queue Size Until Stop Registering",
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
|
||||
"meilisearch_personalized_search_requests",
|
||||
"Meilisearch number of search requests with personalization"
|
||||
))
|
||||
.expect("Can't create a metric");
|
||||
}
|
||||
|
||||
@@ -68,8 +68,8 @@ const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
|
||||
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
|
||||
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||
@@ -469,12 +469,6 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_no_snapshot_compaction: bool,
|
||||
|
||||
/// Experimental personalization API key feature.
|
||||
///
|
||||
/// Sets the API key for personalization features.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
|
||||
pub experimental_personalization_api_key: Option<String>,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@@ -580,7 +574,6 @@ impl Opt {
|
||||
experimental_limit_batched_tasks_total_size,
|
||||
experimental_embedding_cache_entries,
|
||||
experimental_no_snapshot_compaction,
|
||||
experimental_personalization_api_key,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -681,12 +674,6 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
|
||||
experimental_no_snapshot_compaction.to_string(),
|
||||
);
|
||||
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
|
||||
experimental_personalization_api_key,
|
||||
);
|
||||
}
|
||||
indexer_options.export_to_env();
|
||||
}
|
||||
|
||||
@@ -739,7 +726,6 @@ impl Opt {
|
||||
metrics: self.experimental_enable_metrics,
|
||||
logs_route: self.experimental_enable_logs_route,
|
||||
contains_filter: self.experimental_contains_filter,
|
||||
experimental_personalization_api_key: self.experimental_personalization_api_key.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -775,6 +761,15 @@ pub struct IndexerOpts {
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_settings: bool,
|
||||
|
||||
/// Experimental make dump imports use the old document indexer.
|
||||
///
|
||||
/// When enabled, Meilisearch will use the old document indexer when importing dumps.
|
||||
///
|
||||
/// For more information, see <https://github.com/orgs/meilisearch/discussions/851>.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_dumps: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
@@ -785,6 +780,7 @@ impl IndexerOpts {
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
@@ -804,6 +800,12 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_settings.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_no_edition_2024_for_dumps {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS,
|
||||
experimental_no_edition_2024_for_dumps.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -824,6 +826,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
skip_index_budget: other.skip_index_budget,
|
||||
experimental_no_edition_2024_for_settings: other
|
||||
.experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
|
||||
chunk_compression_type: Default::default(),
|
||||
chunk_compression_level: Default::default(),
|
||||
documents_chunk_size: Default::default(),
|
||||
|
||||
@@ -1,244 +0,0 @@
|
||||
use crate::search::{Personalize, SearchResult};
|
||||
use cohere_rust::{
|
||||
api::rerank::{ReRankModel, ReRankRequest},
|
||||
Cohere,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
pub struct CohereService {
|
||||
cohere: Cohere,
|
||||
}
|
||||
|
||||
impl CohereService {
|
||||
pub fn new(api_key: String) -> Self {
|
||||
info!("Personalization service initialized with Cohere API");
|
||||
Self { cohere: Cohere::new("https://api.cohere.ai", api_key) }
|
||||
}
|
||||
|
||||
pub async fn rerank_search_results(
|
||||
&self,
|
||||
search_result: SearchResult,
|
||||
personalize: Option<&Personalize>,
|
||||
query: Option<&str>,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
// Extract user context from personalization
|
||||
let Some(user_context) = personalize.and_then(|p| p.user_context.as_deref()) else {
|
||||
return Ok(search_result);
|
||||
};
|
||||
|
||||
// Build the prompt by merging query and user context
|
||||
let prompt = match query {
|
||||
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
|
||||
None => format!("User Context: {user_context}"),
|
||||
};
|
||||
|
||||
// Extract documents for reranking
|
||||
let documents: Vec<String> = search_result
|
||||
.hits
|
||||
.iter()
|
||||
.map(|hit| {
|
||||
// Convert the document to a string representation for reranking
|
||||
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
|
||||
})
|
||||
.collect();
|
||||
|
||||
if documents.is_empty() {
|
||||
return Ok(search_result);
|
||||
}
|
||||
|
||||
// Prepare the rerank request
|
||||
let rerank_request = ReRankRequest {
|
||||
query: &prompt,
|
||||
documents: &documents,
|
||||
model: ReRankModel::EnglishV3, // Use the default and more recent model
|
||||
top_n: None,
|
||||
max_chunks_per_doc: None,
|
||||
};
|
||||
|
||||
// Call Cohere's rerank API
|
||||
match self.cohere.rerank(&rerank_request).await {
|
||||
Ok(rerank_response) => {
|
||||
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
|
||||
|
||||
// Create a mapping from original index to new rank
|
||||
let reranked_indices: Vec<usize> =
|
||||
rerank_response.iter().map(|result| result.index as usize).collect();
|
||||
|
||||
// Reorder the hits based on Cohere's reranking
|
||||
let mut reranked_hits = Vec::new();
|
||||
for index in reranked_indices.iter() {
|
||||
reranked_hits.push(search_result.hits[*index].clone());
|
||||
}
|
||||
|
||||
Ok(SearchResult { hits: reranked_hits, ..search_result })
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Cohere rerank failed with model EnglishV3: {}", e);
|
||||
// Return original results on error
|
||||
Ok(search_result)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum PersonalizationService {
|
||||
Cohere(CohereService),
|
||||
Uninitialized,
|
||||
}
|
||||
|
||||
impl PersonalizationService {
|
||||
pub fn cohere(api_key: String) -> Self {
|
||||
Self::Cohere(CohereService::new(api_key))
|
||||
}
|
||||
|
||||
pub fn uninitialized() -> Self {
|
||||
debug!("Personalization service uninitialized");
|
||||
Self::Uninitialized
|
||||
}
|
||||
|
||||
pub async fn rerank_search_results(
|
||||
&self,
|
||||
search_result: SearchResult,
|
||||
personalize: Option<&Personalize>,
|
||||
query: Option<&str>,
|
||||
) -> Result<SearchResult, ResponseError> {
|
||||
match self {
|
||||
Self::Cohere(cohere_service) => {
|
||||
cohere_service.rerank_search_results(search_result, personalize, query).await
|
||||
}
|
||||
Self::Uninitialized => Ok(search_result),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::search::{HitsInfo, SearchHit};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_personalization_service_without_api_key() {
|
||||
let service = PersonalizationService::uninitialized();
|
||||
let personalize = Personalize { user_context: Some("test user".to_string()) };
|
||||
|
||||
let search_result = SearchResult {
|
||||
hits: vec![SearchHit {
|
||||
document: serde_json::Map::new(),
|
||||
formatted: serde_json::Map::new(),
|
||||
matches_position: None,
|
||||
ranking_score: Some(1.0),
|
||||
ranking_score_details: None,
|
||||
}],
|
||||
query: "test".to_string(),
|
||||
processing_time_ms: 10,
|
||||
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
|
||||
facet_distribution: None,
|
||||
facet_stats: None,
|
||||
semantic_hit_count: None,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
};
|
||||
|
||||
let result = service
|
||||
.rerank_search_results(search_result.clone(), Some(&personalize), Some("test"))
|
||||
.await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Should return original results when no API key is provided
|
||||
let reranked_result = result.unwrap();
|
||||
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_personalization_service_with_user_context_only() {
|
||||
let service = PersonalizationService::cohere("fake_key".to_string());
|
||||
let personalize = Personalize { user_context: Some("test user".to_string()) };
|
||||
|
||||
let search_result = SearchResult {
|
||||
hits: vec![SearchHit {
|
||||
document: serde_json::Map::new(),
|
||||
formatted: serde_json::Map::new(),
|
||||
matches_position: None,
|
||||
ranking_score: Some(1.0),
|
||||
ranking_score_details: None,
|
||||
}],
|
||||
query: "test".to_string(),
|
||||
processing_time_ms: 10,
|
||||
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
|
||||
facet_distribution: None,
|
||||
facet_stats: None,
|
||||
semantic_hit_count: None,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
};
|
||||
|
||||
let result =
|
||||
service.rerank_search_results(search_result.clone(), Some(&personalize), None).await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Should attempt reranking with user context only
|
||||
let reranked_result = result.unwrap();
|
||||
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_personalization_service_with_query_only() {
|
||||
let service = PersonalizationService::cohere("fake_key".to_string());
|
||||
|
||||
let search_result = SearchResult {
|
||||
hits: vec![SearchHit {
|
||||
document: serde_json::Map::new(),
|
||||
formatted: serde_json::Map::new(),
|
||||
matches_position: None,
|
||||
ranking_score: Some(1.0),
|
||||
ranking_score_details: None,
|
||||
}],
|
||||
query: "test".to_string(),
|
||||
processing_time_ms: 10,
|
||||
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
|
||||
facet_distribution: None,
|
||||
facet_stats: None,
|
||||
semantic_hit_count: None,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
};
|
||||
|
||||
let result = service.rerank_search_results(search_result.clone(), None, Some("test")).await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Should attempt reranking with query only
|
||||
let reranked_result = result.unwrap();
|
||||
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_personalization_service_both_none() {
|
||||
let service = PersonalizationService::cohere("fake_key".to_string());
|
||||
|
||||
let search_result = SearchResult {
|
||||
hits: vec![SearchHit {
|
||||
document: serde_json::Map::new(),
|
||||
formatted: serde_json::Map::new(),
|
||||
matches_position: None,
|
||||
ranking_score: Some(1.0),
|
||||
ranking_score_details: None,
|
||||
}],
|
||||
query: "test".to_string(),
|
||||
processing_time_ms: 10,
|
||||
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
|
||||
facet_distribution: None,
|
||||
facet_stats: None,
|
||||
semantic_hit_count: None,
|
||||
degraded: false,
|
||||
used_negative_operator: false,
|
||||
};
|
||||
|
||||
let result = service.rerank_search_results(search_result.clone(), None, None).await;
|
||||
assert!(result.is_ok());
|
||||
|
||||
// Should return original results when both query and user_context are None
|
||||
let reranked_result = result.unwrap();
|
||||
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,7 @@ use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::milli::documents::sort::recursive_sort;
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::{AscDesc, DocumentId};
|
||||
@@ -44,6 +45,7 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::proxy::{proxy, Body};
|
||||
use crate::routes::indexes::search::fix_sort_query_parameters;
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
@@ -333,6 +335,7 @@ pub async fn delete_document(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -350,10 +353,16 @@ pub async fn delete_document(
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@@ -791,7 +800,6 @@ pub async fn replace_documents(
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
params.primary_key,
|
||||
@@ -801,8 +809,10 @@ pub async fn replace_documents(
|
||||
uid,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
&req,
|
||||
)
|
||||
.await?;
|
||||
|
||||
debug!(returns = ?task, "Replace documents");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -892,7 +902,6 @@ pub async fn update_documents(
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
params.primary_key,
|
||||
@@ -902,6 +911,7 @@ pub async fn update_documents(
|
||||
uid,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
&req,
|
||||
)
|
||||
.await?;
|
||||
debug!(returns = ?task, "Update documents");
|
||||
@@ -911,7 +921,6 @@ pub async fn update_documents(
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn document_addition(
|
||||
mime_type: Option<Mime>,
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
@@ -921,7 +930,11 @@ async fn document_addition(
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
allow_index_creation: bool,
|
||||
req: &HttpRequest,
|
||||
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
|
||||
let mime_type = extract_mime_type(req)?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
let format = match (
|
||||
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
|
||||
csv_delimiter,
|
||||
@@ -953,7 +966,7 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let (uuid, mut update_file) = index_scheduler.queue.create_update_file(dry_run)?;
|
||||
let documents_count = match format {
|
||||
let res = match format {
|
||||
PayloadType::Ndjson => {
|
||||
let (path, file) = update_file.into_parts();
|
||||
let file = match file {
|
||||
@@ -968,19 +981,19 @@ async fn document_addition(
|
||||
None => None,
|
||||
};
|
||||
|
||||
let documents_count = tokio::task::spawn_blocking(move || {
|
||||
let res = tokio::task::spawn_blocking(move || {
|
||||
let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
|
||||
read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
|
||||
})?;
|
||||
|
||||
let update_file = file_store::File::from_parts(path, file);
|
||||
update_file.persist()?;
|
||||
let update_file = update_file.persist()?;
|
||||
|
||||
Ok(documents_count)
|
||||
Ok((documents_count, update_file))
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(documents_count)
|
||||
Ok(res)
|
||||
}
|
||||
PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
|
||||
let temp_file = match tempfile() {
|
||||
@@ -999,16 +1012,16 @@ async fn document_addition(
|
||||
unreachable!("We already wrote the user content into the update file")
|
||||
}
|
||||
};
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
Ok(documents_count)
|
||||
// we NEED to persist the file here because we moved the `update_file` in another task.
|
||||
let file = update_file.persist()?;
|
||||
Ok((documents_count, file))
|
||||
})
|
||||
.await
|
||||
}
|
||||
};
|
||||
|
||||
let documents_count = match documents_count {
|
||||
Ok(Ok(documents_count)) => documents_count,
|
||||
let (documents_count, file) = match res {
|
||||
Ok(Ok((documents_count, file))) => (documents_count, file),
|
||||
// in this case the file has not possibly be persisted.
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Err(e) => {
|
||||
@@ -1050,6 +1063,20 @@ async fn document_addition(
|
||||
}
|
||||
};
|
||||
|
||||
if network.sharding {
|
||||
if let Some(file) = file {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
&index_uid,
|
||||
req,
|
||||
network,
|
||||
Body::with_ndjson_payload(file),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(task.into())
|
||||
}
|
||||
|
||||
@@ -1128,6 +1155,7 @@ pub async fn delete_documents_batch(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by batch");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1148,16 +1176,22 @@ pub async fn delete_documents_batch(
|
||||
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
debug!(returns = ?task, "Delete documents by batch");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema)]
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct DocumentDeletionByFilter {
|
||||
@@ -1206,7 +1240,8 @@ pub async fn delete_documents_by_filter(
|
||||
debug!(parameters = ?body, "Delete documents by filter");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner().filter;
|
||||
let filter = body.into_inner();
|
||||
let network = index_scheduler.network();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1219,23 +1254,36 @@ pub async fn delete_documents_by_filter(
|
||||
);
|
||||
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
crate::search::parse_filter(
|
||||
&filter.filter,
|
||||
Code::InvalidDocumentFilter,
|
||||
index_scheduler.features(),
|
||||
)?
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
let task = KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: index_uid.clone(),
|
||||
filter_expr: filter.filter.clone(),
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
debug!(returns = ?task, "Delete documents by filter");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema)]
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct DocumentEditionByFunction {
|
||||
/// A string containing a RHAI function.
|
||||
@@ -1323,6 +1371,8 @@ pub async fn edit_documents_by_function(
|
||||
.features()
|
||||
.check_edit_documents_by_function("Using the documents edit route")?;
|
||||
|
||||
let network = index_scheduler.network();
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let params = params.into_inner();
|
||||
@@ -1336,13 +1386,12 @@ pub async fn edit_documents_by_function(
|
||||
&req,
|
||||
);
|
||||
|
||||
let DocumentEditionByFunction { filter, context, function } = params;
|
||||
let engine = milli::rhai::Engine::new();
|
||||
if let Err(e) = engine.compile(&function) {
|
||||
if let Err(e) = engine.compile(¶ms.function) {
|
||||
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
||||
}
|
||||
|
||||
if let Some(ref filter) = filter {
|
||||
if let Some(ref filter) = params.filter {
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
crate::search::parse_filter(
|
||||
filter,
|
||||
@@ -1352,9 +1401,9 @@ pub async fn edit_documents_by_function(
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
}
|
||||
let task = KindWithContent::DocumentEdition {
|
||||
index_uid,
|
||||
filter_expr: filter,
|
||||
context: match context {
|
||||
index_uid: index_uid.clone(),
|
||||
filter_expr: params.filter.clone(),
|
||||
context: match params.context.clone() {
|
||||
Some(Value::Object(m)) => Some(m),
|
||||
None => None,
|
||||
_ => {
|
||||
@@ -1364,15 +1413,21 @@ pub async fn edit_documents_by_function(
|
||||
))
|
||||
}
|
||||
},
|
||||
function,
|
||||
function: params.function.clone(),
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
debug!(returns = ?task, "Edit documents by function");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -1415,6 +1470,8 @@ pub async fn clear_all_documents(
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
clear_all: true,
|
||||
@@ -1428,10 +1485,18 @@ pub async fn clear_all_documents(
|
||||
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
debug!(returns = ?task, "Delete all documents");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -1460,9 +1525,13 @@ fn some_documents<'a, 't: 'a>(
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
|
||||
for (
|
||||
name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ },
|
||||
) in index.embeddings(rtxn, key)?
|
||||
{
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
|
||||
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||
|
||||
@@ -343,7 +343,6 @@ impl From<FacetSearchQuery> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ use crate::Opt;
|
||||
|
||||
pub mod documents;
|
||||
pub mod facet_search;
|
||||
mod proxy;
|
||||
pub mod search;
|
||||
mod search_analytics;
|
||||
#[cfg(test)]
|
||||
@@ -39,6 +40,8 @@ mod settings_analytics;
|
||||
pub mod similar;
|
||||
mod similar_analytics;
|
||||
|
||||
pub use proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
nest(
|
||||
|
||||
424
crates/meilisearch/src/routes/indexes/proxy.rs
Normal file
424
crates/meilisearch/src/routes/indexes/proxy.rs
Normal file
@@ -0,0 +1,424 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::HttpRequest;
|
||||
use bytes::Bytes;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::{Origin, RemoteTask, TaskNetwork};
|
||||
use reqwest::StatusCode;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::routes::indexes::proxy::error::{ProxyDocumentChangeError, ReqwestErrorWithoutUrl};
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
pub enum Body<T: serde::Serialize> {
|
||||
NdJsonPayload(File),
|
||||
Inline(T),
|
||||
None,
|
||||
}
|
||||
|
||||
impl Body<()> {
|
||||
pub fn with_ndjson_payload(file: File) -> Self {
|
||||
Self::NdJsonPayload(file)
|
||||
}
|
||||
|
||||
pub fn none() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
/// If necessary, proxies the passed request to the network and update the task description.
|
||||
///
|
||||
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
|
||||
/// has already been proxied.
|
||||
///
|
||||
/// - when it must proxy the request, the endpoint, method and query params are retrieved from the passed `req`, then the `body` is
|
||||
/// sent to all remotes of the `network` (except `self`). The response from the remotes are collected to update the passed `task`
|
||||
/// with the task ids from the task queues of the remotes.
|
||||
/// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task.
|
||||
/// This information is copied to the passed task.
|
||||
pub async fn proxy<T: serde::Serialize>(
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: &str,
|
||||
req: &HttpRequest,
|
||||
network: meilisearch_types::network::Network,
|
||||
body: Body<T>,
|
||||
task: &meilisearch_types::tasks::Task,
|
||||
) -> Result<(), MeilisearchHttpError> {
|
||||
match origin_from_req(req)? {
|
||||
Some(origin) => {
|
||||
index_scheduler.set_task_network(task.uid, TaskNetwork::Origin { origin })?
|
||||
}
|
||||
None => {
|
||||
let this = network
|
||||
.local
|
||||
.as_deref()
|
||||
.expect("inconsistent `network.sharding` and `network.self`")
|
||||
.to_owned();
|
||||
|
||||
let content_type = match &body {
|
||||
// for file bodies, force x-ndjson
|
||||
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
|
||||
// otherwise get content type from request
|
||||
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
|
||||
};
|
||||
|
||||
let body = match body {
|
||||
Body::NdJsonPayload(file) => Some(Bytes::from_owner(unsafe {
|
||||
memmap2::Mmap::map(&file).map_err(|err| {
|
||||
MeilisearchHttpError::from_milli(err.into(), Some(index_uid.to_owned()))
|
||||
})?
|
||||
})),
|
||||
|
||||
Body::Inline(payload) => {
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
|
||||
}
|
||||
|
||||
Body::None => None,
|
||||
};
|
||||
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_secs(3))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let method = from_old_http_method(req.method());
|
||||
|
||||
// send payload to all remotes
|
||||
for (node_name, node) in
|
||||
network.remotes.into_iter().filter(|(name, _)| name.as_str() != this)
|
||||
{
|
||||
let body = body.clone();
|
||||
let client = client.clone();
|
||||
let api_key = node.write_api_key;
|
||||
let this = this.clone();
|
||||
let method = method.clone();
|
||||
let path_and_query =
|
||||
req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
|
||||
|
||||
in_flight_remote_queries.insert(
|
||||
node_name,
|
||||
tokio::spawn({
|
||||
let url = format!("{}{}", node.url, path_and_query);
|
||||
|
||||
let url_encoded_this = urlencoding::encode(&this).into_owned();
|
||||
let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize
|
||||
|
||||
let content_type = content_type.map(|b| b.to_owned());
|
||||
|
||||
let backoff = backoff::ExponentialBackoffBuilder::new()
|
||||
.with_max_elapsed_time(Some(std::time::Duration::from_secs(25)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(backoff, move || {
|
||||
let url = url.clone();
|
||||
let client = client.clone();
|
||||
let url_encoded_this = url_encoded_this.clone();
|
||||
let url_encoded_task_uid = url_encoded_task_uid.clone();
|
||||
let content_type = content_type.clone();
|
||||
|
||||
let body = body.clone();
|
||||
let api_key = api_key.clone();
|
||||
let method = method.clone();
|
||||
|
||||
async move {
|
||||
try_proxy(
|
||||
method,
|
||||
&url,
|
||||
content_type.as_deref(),
|
||||
api_key.as_deref(),
|
||||
&client,
|
||||
&url_encoded_this,
|
||||
&url_encoded_task_uid,
|
||||
body,
|
||||
)
|
||||
.await
|
||||
}
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// wait for all in-flight queries to finish and collect their results
|
||||
let mut remote_tasks: BTreeMap<String, RemoteTask> = BTreeMap::new();
|
||||
for (node_name, handle) in in_flight_remote_queries {
|
||||
match handle.await {
|
||||
Ok(Ok(res)) => {
|
||||
let task_uid = res.task_uid;
|
||||
|
||||
remote_tasks.insert(node_name, Ok(task_uid).into());
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
|
||||
}
|
||||
Err(panic) => match panic.try_into_panic() {
|
||||
Ok(panic) => {
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
remote_tasks.insert(
|
||||
node_name,
|
||||
Err(ResponseError::from_msg(
|
||||
msg.to_string(),
|
||||
meilisearch_types::error::Code::Internal,
|
||||
))
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::error!("proxy task was unexpectedly cancelled")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// edit details to contain the return values from the remotes
|
||||
index_scheduler.set_task_network(task.uid, TaskNetwork::Remotes { remote_tasks })?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method {
|
||||
match method {
|
||||
&actix_http::Method::CONNECT => reqwest::Method::CONNECT,
|
||||
&actix_http::Method::DELETE => reqwest::Method::DELETE,
|
||||
&actix_http::Method::GET => reqwest::Method::GET,
|
||||
&actix_http::Method::HEAD => reqwest::Method::HEAD,
|
||||
&actix_http::Method::OPTIONS => reqwest::Method::OPTIONS,
|
||||
&actix_http::Method::PATCH => reqwest::Method::PATCH,
|
||||
&actix_http::Method::POST => reqwest::Method::POST,
|
||||
&actix_http::Method::PUT => reqwest::Method::PUT,
|
||||
&actix_http::Method::TRACE => reqwest::Method::TRACE,
|
||||
method => reqwest::Method::from_bytes(method.as_str().as_bytes()).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn try_proxy(
|
||||
method: reqwest::Method,
|
||||
url: &str,
|
||||
content_type: Option<&[u8]>,
|
||||
api_key: Option<&str>,
|
||||
client: &reqwest::Client,
|
||||
url_encoded_this: &str,
|
||||
url_encoded_task_uid: &str,
|
||||
body: Option<Bytes>,
|
||||
) -> Result<SummarizedTaskView, backoff::Error<ProxyDocumentChangeError>> {
|
||||
let request = client.request(method, url).timeout(std::time::Duration::from_secs(30));
|
||||
let request = if let Some(body) = body { request.body(body) } else { request };
|
||||
let request = if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
|
||||
let request = request.header(PROXY_ORIGIN_TASK_UID_HEADER, url_encoded_task_uid);
|
||||
let request = request.header(PROXY_ORIGIN_REMOTE_HEADER, url_encoded_this);
|
||||
let request = if let Some(content_type) = content_type {
|
||||
request.header(CONTENT_TYPE.as_str(), content_type)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) if error.is_timeout() => {
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::Timeout))
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::CouldNotSendRequest(
|
||||
ReqwestErrorWithoutUrl::new(error),
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
match response.status() {
|
||||
status_code if status_code.is_success() => (),
|
||||
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
|
||||
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::AuthenticationError))
|
||||
}
|
||||
status_code if status_code.is_client_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::BadRequest {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code if status_code.is_server_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::RemoteError {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code => {
|
||||
tracing::warn!(
|
||||
status_code = status_code.as_u16(),
|
||||
"remote replied with unexpected status code"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let response = match parse_response(response).await {
|
||||
Ok(response) => response,
|
||||
Err(response) => {
|
||||
return Err(backoff::Error::transient(
|
||||
ProxyDocumentChangeError::CouldNotParseResponse { response },
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn parse_error(response: reqwest::Response) -> Result<String, ReqwestErrorWithoutUrl> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
|
||||
};
|
||||
|
||||
Ok(parse_bytes_as_error(&bytes))
|
||||
}
|
||||
|
||||
fn parse_bytes_as_error(bytes: &[u8]) -> String {
|
||||
match serde_json::from_slice::<Value>(bytes) {
|
||||
Ok(value) => value.to_string(),
|
||||
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_response<T: DeserializeOwned>(
|
||||
response: reqwest::Response,
|
||||
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
|
||||
};
|
||||
|
||||
match serde_json::from_slice::<T>(&bytes) {
|
||||
Ok(value) => Ok(value),
|
||||
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
|
||||
}
|
||||
}
|
||||
|
||||
mod error {
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use reqwest::StatusCode;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ProxyDocumentChangeError {
|
||||
#[error("{0}")]
|
||||
CouldNotSendRequest(ReqwestErrorWithoutUrl),
|
||||
#[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `documents.add` action")]
|
||||
AuthenticationError,
|
||||
#[error(
|
||||
"could not parse response from the remote host as a document addition response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version",
|
||||
response_from_remote(response)
|
||||
)]
|
||||
CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))]
|
||||
BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host did not answer before the deadline")]
|
||||
Timeout,
|
||||
#[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))]
|
||||
RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
}
|
||||
|
||||
impl ProxyDocumentChangeError {
|
||||
pub fn as_response_error(&self) -> ResponseError {
|
||||
use meilisearch_types::error::Code;
|
||||
let message = self.to_string();
|
||||
let code = match self {
|
||||
ProxyDocumentChangeError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest,
|
||||
ProxyDocumentChangeError::AuthenticationError => Code::RemoteInvalidApiKey,
|
||||
ProxyDocumentChangeError::BadRequest { .. } => Code::RemoteBadRequest,
|
||||
ProxyDocumentChangeError::Timeout => Code::RemoteTimeout,
|
||||
ProxyDocumentChangeError::RemoteError { .. } => Code::RemoteRemoteError,
|
||||
ProxyDocumentChangeError::CouldNotParseResponse { .. } => Code::RemoteBadResponse,
|
||||
};
|
||||
ResponseError::from_msg(message, code)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error(transparent)]
|
||||
pub struct ReqwestErrorWithoutUrl(reqwest::Error);
|
||||
impl ReqwestErrorWithoutUrl {
|
||||
pub fn new(inner: reqwest::Error) -> Self {
|
||||
Self(inner.without_url())
|
||||
}
|
||||
}
|
||||
|
||||
fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String {
|
||||
match response {
|
||||
Ok(response) => {
|
||||
format!(":\n - response from remote: {}", response)
|
||||
}
|
||||
Err(error) => {
|
||||
format!(":\n - additionally, could not retrieve response from remote: {error}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
|
||||
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
|
||||
|
||||
pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchHttpError> {
|
||||
let (remote_name, task_uid) = match (
|
||||
req.headers().get(PROXY_ORIGIN_REMOTE_HEADER),
|
||||
req.headers().get(PROXY_ORIGIN_TASK_UID_HEADER),
|
||||
) {
|
||||
(None, None) => return Ok(None),
|
||||
(None, Some(_)) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true })
|
||||
}
|
||||
(Some(_), None) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders {
|
||||
is_remote_missing: false,
|
||||
})
|
||||
}
|
||||
(Some(remote_name), Some(task_uid)) => (
|
||||
urlencoding::decode(remote_name.to_str().map_err(|err| {
|
||||
MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_REMOTE_HEADER,
|
||||
msg: format!("while parsing remote name as UTF-8: {err}"),
|
||||
}
|
||||
})?)
|
||||
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_REMOTE_HEADER,
|
||||
msg: format!("while URL-decoding remote name: {err}"),
|
||||
})?,
|
||||
urlencoding::decode(task_uid.to_str().map_err(|err| {
|
||||
MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while parsing task UID as UTF-8: {err}"),
|
||||
}
|
||||
})?)
|
||||
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while URL-decoding task UID: {err}"),
|
||||
})?,
|
||||
),
|
||||
};
|
||||
|
||||
let task_uid: usize =
|
||||
task_uid.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while parsing the task UID as an integer: {err}"),
|
||||
})?;
|
||||
|
||||
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid }))
|
||||
}
|
||||
@@ -22,10 +22,10 @@ use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
|
||||
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
|
||||
use crate::search::{
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
|
||||
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio,
|
||||
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
|
||||
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
|
||||
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
|
||||
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
|
||||
};
|
||||
use crate::search_queue::SearchQueue;
|
||||
|
||||
@@ -132,8 +132,6 @@ pub struct SearchQueryGet {
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
|
||||
#[param(value_type = Vec<Locale>, explode = false)]
|
||||
pub locales: Option<CS<Locale>>,
|
||||
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
|
||||
pub personalize_user_context: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
|
||||
@@ -205,10 +203,6 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
));
|
||||
}
|
||||
|
||||
let personalize = other
|
||||
.personalize_user_context
|
||||
.map(|user_context| Personalize { user_context: Some(user_context) });
|
||||
|
||||
Ok(Self {
|
||||
q: other.q,
|
||||
// `media` not supported for `GET`
|
||||
@@ -238,7 +232,6 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
|
||||
hybrid,
|
||||
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
|
||||
locales: other.locales.map(|o| o.into_iter().collect()),
|
||||
personalize,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -327,7 +320,6 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
|
||||
pub async fn search_with_url_query(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
|
||||
req: HttpRequest,
|
||||
@@ -350,11 +342,6 @@ pub async fn search_with_url_query(
|
||||
let search_kind =
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.clone();
|
||||
let query_str = query.q.clone();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
@@ -374,12 +361,7 @@ pub async fn search_with_url_query(
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
let mut search_result = search_result?;
|
||||
|
||||
// Apply personalization if requested
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
|
||||
.await?;
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Search get");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
@@ -444,7 +426,6 @@ pub async fn search_with_url_query(
|
||||
pub async fn search_with_post(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
|
||||
search_queue: web::Data<SearchQueue>,
|
||||
personalization_service: web::Data<crate::personalization::PersonalizationService>,
|
||||
index_uid: web::Path<String>,
|
||||
params: AwebJson<SearchQuery, DeserrJsonError>,
|
||||
req: HttpRequest,
|
||||
@@ -468,10 +449,6 @@ pub async fn search_with_post(
|
||||
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
|
||||
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
|
||||
|
||||
// Extract personalization and query string before moving query
|
||||
let personalize = query.personalize.clone();
|
||||
let query_str = query.q.clone();
|
||||
|
||||
let permit = search_queue.try_get_search_permit().await?;
|
||||
let search_result = tokio::task::spawn_blocking(move || {
|
||||
perform_search(
|
||||
@@ -494,12 +471,7 @@ pub async fn search_with_post(
|
||||
}
|
||||
analytics.publish(aggregate, &req);
|
||||
|
||||
let mut search_result = search_result?;
|
||||
|
||||
// Apply personalization if requested
|
||||
search_result = personalization_service
|
||||
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
|
||||
.await?;
|
||||
let search_result = search_result?;
|
||||
|
||||
debug!(returns = ?search_result, "Search post");
|
||||
Ok(HttpResponse::Ok().json(search_result))
|
||||
|
||||
@@ -7,7 +7,6 @@ use serde_json::{json, Value};
|
||||
|
||||
use crate::aggregate_methods;
|
||||
use crate::analytics::{Aggregate, AggregateMethod};
|
||||
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
|
||||
use crate::search::{
|
||||
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
@@ -95,9 +94,6 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
||||
show_ranking_score_details: bool,
|
||||
ranking_score_threshold: bool,
|
||||
|
||||
// personalization
|
||||
total_personalized: usize,
|
||||
|
||||
marker: std::marker::PhantomData<Method>,
|
||||
}
|
||||
|
||||
@@ -132,7 +128,6 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = query;
|
||||
|
||||
let mut ret = Self::default();
|
||||
@@ -207,12 +202,6 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
||||
ret.locales = locales.iter().copied().collect();
|
||||
}
|
||||
|
||||
// personalization
|
||||
if personalize.is_some() {
|
||||
ret.total_personalized = 1;
|
||||
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
|
||||
}
|
||||
|
||||
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
|
||||
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
|
||||
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
|
||||
@@ -301,7 +290,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
mut locales,
|
||||
total_personalized,
|
||||
marker: _,
|
||||
} = *new;
|
||||
|
||||
@@ -386,9 +374,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
// locales
|
||||
self.locales.append(&mut locales);
|
||||
|
||||
// personalization
|
||||
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
|
||||
|
||||
self
|
||||
}
|
||||
|
||||
@@ -433,7 +418,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
total_used_negative_operator,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
total_personalized,
|
||||
marker: _,
|
||||
} = *self;
|
||||
|
||||
@@ -506,9 +490,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
||||
"show_ranking_score_details": show_ranking_score_details,
|
||||
"ranking_score_threshold": ranking_score_threshold,
|
||||
},
|
||||
"personalization": {
|
||||
"total_personalized": total_personalized,
|
||||
},
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -180,7 +180,7 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
|
||||
.is_some_and(|s| s.to_lowercase() == "true"))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
/// The task unique identifier.
|
||||
@@ -194,7 +194,10 @@ pub struct SummarizedTaskView {
|
||||
#[serde(rename = "type")]
|
||||
kind: Kind,
|
||||
/// The date on which the task was enqueued.
|
||||
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
|
||||
#[serde(
|
||||
serialize_with = "time::serde::rfc3339::serialize",
|
||||
deserialize_with = "time::serde::rfc3339::deserialize"
|
||||
)]
|
||||
enqueued_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
|
||||
@@ -67,7 +67,6 @@ impl MultiSearchAggregator {
|
||||
hybrid: _,
|
||||
ranking_score_threshold: _,
|
||||
locales: _,
|
||||
personalize: _,
|
||||
} in &federated_search.queries
|
||||
{
|
||||
if let Some(federation_options) = federation_options {
|
||||
|
||||
@@ -8,12 +8,13 @@ use index_scheduler::IndexScheduler;
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl,
|
||||
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
|
||||
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote};
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::network::{Network as DbNetwork, Remote as DbRemote};
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
@@ -57,9 +58,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
{
|
||||
"self": "ms-0",
|
||||
"remotes": {
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
|
||||
}
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
@@ -88,9 +89,9 @@ async fn get_network(
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
#[schema(value_type = Option<String>, example = json!({
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
|
||||
}))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
|
||||
#[serde(default)]
|
||||
@@ -99,6 +100,10 @@ pub struct Remote {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)]
|
||||
#[serde(default)]
|
||||
pub search_api_key: Setting<String>,
|
||||
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkWriteApiKey>)]
|
||||
#[serde(default)]
|
||||
pub write_api_key: Setting<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
@@ -114,6 +119,10 @@ pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
|
||||
pub local: Setting<String>,
|
||||
#[schema(value_type = Option<bool>, example = json!(true))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkSharding>)]
|
||||
pub sharding: Setting<bool>,
|
||||
}
|
||||
|
||||
impl Remote {
|
||||
@@ -136,6 +145,7 @@ impl Remote {
|
||||
Ok(url)
|
||||
})?,
|
||||
search_api_key: self.search_api_key.set(),
|
||||
write_api_key: self.write_api_key.set(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -174,9 +184,9 @@ impl Aggregate for PatchNetworkAnalytics {
|
||||
{
|
||||
"self": "ms-0",
|
||||
"remotes": {
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
|
||||
}
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
@@ -207,6 +217,19 @@ async fn patch_network(
|
||||
Setting::NotSet => old_network.local,
|
||||
};
|
||||
|
||||
let merged_sharding = match new_network.sharding {
|
||||
Setting::Set(new_sharding) => new_sharding,
|
||||
Setting::Reset => false,
|
||||
Setting::NotSet => old_network.sharding,
|
||||
};
|
||||
|
||||
if merged_sharding && merged_self.is_none() {
|
||||
return Err(ResponseError::from_msg(
|
||||
"`.sharding`: enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.".into(),
|
||||
meilisearch_types::error::Code::InvalidNetworkSharding,
|
||||
));
|
||||
}
|
||||
|
||||
let merged_remotes = match new_network.remotes {
|
||||
Setting::Set(new_remotes) => {
|
||||
let mut merged_remotes = BTreeMap::new();
|
||||
@@ -217,9 +240,17 @@ async fn patch_network(
|
||||
{
|
||||
match either_or_both {
|
||||
EitherOrBoth::Both((key, old), (_, Some(new))) => {
|
||||
let DbRemote { url: old_url, search_api_key: old_search_api_key } = old;
|
||||
let DbRemote {
|
||||
url: old_url,
|
||||
search_api_key: old_search_api_key,
|
||||
write_api_key: old_write_api_key,
|
||||
} = old;
|
||||
|
||||
let Remote { url: new_url, search_api_key: new_search_api_key } = new;
|
||||
let Remote {
|
||||
url: new_url,
|
||||
search_api_key: new_search_api_key,
|
||||
write_api_key: new_write_api_key,
|
||||
} = new;
|
||||
|
||||
let merged = DbRemote {
|
||||
url: match new_url {
|
||||
@@ -247,6 +278,11 @@ async fn patch_network(
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_search_api_key,
|
||||
},
|
||||
write_api_key: match new_write_api_key {
|
||||
Setting::Set(new_write_api_key) => Some(new_write_api_key),
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_write_api_key,
|
||||
},
|
||||
};
|
||||
merged_remotes.insert(key, merged);
|
||||
}
|
||||
@@ -274,7 +310,8 @@ async fn patch_network(
|
||||
&req,
|
||||
);
|
||||
|
||||
let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes };
|
||||
let merged_network =
|
||||
DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding };
|
||||
index_scheduler.put_network(merged_network.clone())?;
|
||||
debug!(returns = ?merged_network, "Patch network");
|
||||
Ok(HttpResponse::Ok().json(merged_network))
|
||||
|
||||
@@ -10,10 +10,10 @@ use actix_http::StatusCode;
|
||||
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||
use itertools::Itertools;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{Network, Remote};
|
||||
use meilisearch_types::milli::order_by_map::OrderByMap;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
|
||||
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use roaring::RoaringBitmap;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
pub use error::ProxySearchError;
|
||||
use error::ReqwestErrorWithoutUrl;
|
||||
use meilisearch_types::features::Remote;
|
||||
use meilisearch_types::network::Remote;
|
||||
use rand::Rng as _;
|
||||
use reqwest::{Client, Response, StatusCode};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
@@ -16,7 +16,7 @@ use meilisearch_types::error::{Code, ResponseError};
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::locales::Locale;
|
||||
use meilisearch_types::milli::index::{self, SearchParameters};
|
||||
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::vector::Embedder;
|
||||
@@ -57,13 +57,6 @@ pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
|
||||
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct Personalize {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
|
||||
pub user_context: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct SearchQuery {
|
||||
@@ -127,8 +120,6 @@ pub struct SearchQuery {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||
pub personalize: Option<Personalize>,
|
||||
}
|
||||
|
||||
impl From<SearchParameters> for SearchQuery {
|
||||
@@ -176,7 +167,6 @@ impl From<SearchParameters> for SearchQuery {
|
||||
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
|
||||
crop_marker: DEFAULT_CROP_MARKER(),
|
||||
locales: None,
|
||||
personalize: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -258,7 +248,6 @@ impl fmt::Debug for SearchQuery {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = self;
|
||||
|
||||
let mut debug = f.debug_struct("SearchQuery");
|
||||
@@ -347,10 +336,6 @@ impl fmt::Debug for SearchQuery {
|
||||
debug.field("locales", &locales);
|
||||
}
|
||||
|
||||
if let Some(personalize) = personalize {
|
||||
debug.field("personalize", &personalize);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
@@ -556,9 +541,6 @@ pub struct SearchQueryWithIndex {
|
||||
pub ranking_score_threshold: Option<RankingScoreThreshold>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
|
||||
pub locales: Option<Vec<Locale>>,
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
|
||||
#[serde(skip)]
|
||||
pub personalize: Option<Personalize>,
|
||||
|
||||
#[deserr(default)]
|
||||
pub federation_options: Option<FederationOptions>,
|
||||
@@ -616,7 +598,6 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = query;
|
||||
|
||||
SearchQueryWithIndex {
|
||||
@@ -648,7 +629,6 @@ impl SearchQueryWithIndex {
|
||||
attributes_to_search_on,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
federation_options,
|
||||
}
|
||||
}
|
||||
@@ -684,7 +664,6 @@ impl SearchQueryWithIndex {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
} = self;
|
||||
(
|
||||
index_uid,
|
||||
@@ -716,7 +695,6 @@ impl SearchQueryWithIndex {
|
||||
hybrid,
|
||||
ranking_score_threshold,
|
||||
locales,
|
||||
personalize,
|
||||
// do not use ..Default::default() here,
|
||||
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
|
||||
},
|
||||
@@ -941,7 +919,7 @@ pub struct SearchResultWithIndex {
|
||||
pub result: SearchResult,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
|
||||
#[serde(untagged)]
|
||||
pub enum HitsInfo {
|
||||
#[serde(rename_all = "camelCase")]
|
||||
@@ -1188,7 +1166,6 @@ pub fn perform_search(
|
||||
attributes_to_search_on: _,
|
||||
filter: _,
|
||||
distinct: _,
|
||||
personalize: _,
|
||||
} = query;
|
||||
|
||||
let format = AttributesFormat {
|
||||
@@ -1551,8 +1528,11 @@ impl<'a> HitMaker<'a> {
|
||||
Some(Value::Object(map)) => map,
|
||||
_ => Default::default(),
|
||||
};
|
||||
for (name, (vector, regenerate)) in self.index.embeddings(self.rtxn, id)? {
|
||||
let embeddings = ExplicitVectors { embeddings: Some(vector.into()), regenerate };
|
||||
for (name, EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ }) in
|
||||
self.index.embeddings(self.rtxn, id)?
|
||||
{
|
||||
let embeddings =
|
||||
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
|
||||
vectors.insert(
|
||||
name,
|
||||
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
|
||||
|
||||
@@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
|
||||
{
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `*.get`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
|
||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
|
||||
@@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r#"
|
||||
{
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `*.get`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
|
||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`",
|
||||
"code": "invalid_api_key_actions",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||
|
||||
@@ -466,6 +466,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
// Having 2 threads makes the tests way faster
|
||||
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
|
||||
experimental_no_edition_2024_for_settings: false,
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
},
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
|
||||
@@ -46,7 +46,7 @@ async fn errors_on_param() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `selfie`: expected one of `remotes`, `self`",
|
||||
"message": "Unknown field `selfie`: expected one of `remotes`, `self`, `sharding`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
@@ -149,7 +149,7 @@ async fn errors_on_param() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`",
|
||||
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`, `writeApiKey`",
|
||||
"code": "invalid_network_remotes",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_network_remotes"
|
||||
@@ -192,9 +192,11 @@ async fn errors_on_param() {
|
||||
"remotes": {
|
||||
"kefir": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = server
|
||||
@@ -266,7 +268,8 @@ async fn auth() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -274,11 +277,12 @@ async fn auth() {
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// try get with get permission
|
||||
server.use_api_key(get_network_key.as_str().unwrap());
|
||||
@@ -286,11 +290,12 @@ async fn auth() {
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// try update with update permission
|
||||
server.use_api_key(update_network_key.as_str().unwrap());
|
||||
@@ -303,11 +308,12 @@ async fn auth() {
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "api_key",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
{
|
||||
"self": "api_key",
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// try with the other's permission
|
||||
let (response, code) = server.get_network().await;
|
||||
@@ -383,7 +389,8 @@ async fn get_and_set_network() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": null,
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -393,7 +400,8 @@ async fn get_and_set_network() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -417,13 +425,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "foo"
|
||||
"searchApiKey": "foo",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -443,13 +454,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -470,17 +484,21 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -498,13 +516,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -518,13 +539,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -538,13 +562,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -553,60 +580,69 @@ async fn get_and_set_network() {
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
"###);
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// still doing nothing
|
||||
let (response, code) = server.set_network(json!({"remotes": {}})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
"###);
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// good time to check GET
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
"###);
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// deleting everything
|
||||
let (response, code) = server
|
||||
@@ -619,7 +655,8 @@ async fn get_and_set_network() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -131,7 +131,8 @@ async fn remote_sharding() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -139,7 +140,8 @@ async fn remote_sharding() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
|
||||
@@ -147,7 +149,8 @@ async fn remote_sharding() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -436,7 +439,8 @@ async fn error_unregistered_remote() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -444,7 +448,8 @@ async fn error_unregistered_remote() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -554,7 +559,8 @@ async fn error_no_weighted_score() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -562,7 +568,8 @@ async fn error_no_weighted_score() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -687,7 +694,8 @@ async fn error_bad_response() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -695,7 +703,8 @@ async fn error_bad_response() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -824,7 +833,8 @@ async fn error_bad_request() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -832,7 +842,8 @@ async fn error_bad_request() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -954,7 +965,8 @@ async fn error_bad_request_facets_by_index() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -962,7 +974,8 @@ async fn error_bad_request_facets_by_index() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1095,7 +1108,8 @@ async fn error_bad_request_facets_by_index_facet() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1103,7 +1117,8 @@ async fn error_bad_request_facets_by_index_facet() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1244,7 +1259,8 @@ async fn error_remote_does_not_answer() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1252,7 +1268,8 @@ async fn error_remote_does_not_answer() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1445,7 +1462,8 @@ async fn error_remote_404() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1453,7 +1471,8 @@ async fn error_remote_404() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1640,7 +1659,8 @@ async fn error_remote_sharding_auth() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1648,7 +1668,8 @@ async fn error_remote_sharding_auth() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1800,7 +1821,8 @@ async fn remote_sharding_auth() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1808,7 +1830,8 @@ async fn remote_sharding_auth() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1955,7 +1978,8 @@ async fn error_remote_500() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1963,7 +1987,8 @@ async fn error_remote_500() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2134,7 +2159,8 @@ async fn error_remote_500_once() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -2142,7 +2168,8 @@ async fn error_remote_500_once() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ use meilisearch_types::heed::{
|
||||
};
|
||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||
use meilisearch_types::tasks::{Status, Task};
|
||||
@@ -591,12 +592,21 @@ fn export_documents(
|
||||
.into());
|
||||
};
|
||||
|
||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
||||
for (
|
||||
embedder_name,
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
) in embeddings
|
||||
{
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
)),
|
||||
regenerate,
|
||||
regenerate: regenerate &&
|
||||
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||
// are marked as user-provided,
|
||||
// all embeddings would be regenerated on any settings change or document update.
|
||||
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||
!has_fragments,
|
||||
};
|
||||
vectors
|
||||
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||
|
||||
@@ -40,7 +40,7 @@ indexmap = { version = "2.9.0", features = ["serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memchr = "2.7.5"
|
||||
memmap2 = "0.9.5"
|
||||
memmap2 = "0.9.7"
|
||||
obkv = "0.3.0"
|
||||
once_cell = "1.21.3"
|
||||
ordered-float = "5.0.0"
|
||||
@@ -109,6 +109,7 @@ utoipa = { version = "5.4.0", features = [
|
||||
"openapi_extensions",
|
||||
] }
|
||||
lru = "0.14.0"
|
||||
twox-hash = { version = "2.1.1", default-features = false, features = ["std", "xxhash3_64", "xxhash64"] }
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
|
||||
@@ -1766,20 +1766,22 @@ impl Index {
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
docid: DocumentId,
|
||||
) -> Result<BTreeMap<String, (Vec<Embedding>, bool)>> {
|
||||
) -> Result<BTreeMap<String, EmbeddingsWithMetadata>> {
|
||||
let mut res = BTreeMap::new();
|
||||
let embedders = self.embedding_configs();
|
||||
for config in embedders.embedding_configs(rtxn)? {
|
||||
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
|
||||
let has_fragments = config.config.embedder_options.has_fragments();
|
||||
let reader = ArroyWrapper::new(
|
||||
self.vector_arroy,
|
||||
embedder_info.embedder_id,
|
||||
config.config.quantized(),
|
||||
);
|
||||
let embeddings = reader.item_vectors(rtxn, docid)?;
|
||||
let regenerate = embedder_info.embedding_status.must_regenerate(docid);
|
||||
res.insert(
|
||||
config.name.to_owned(),
|
||||
(embeddings, embedder_info.embedding_status.must_regenerate(docid)),
|
||||
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||
);
|
||||
}
|
||||
Ok(res)
|
||||
@@ -1919,6 +1921,12 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EmbeddingsWithMetadata {
|
||||
pub embeddings: Vec<Embedding>,
|
||||
pub regenerate: bool,
|
||||
pub has_fragments: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Deserialize, Serialize)]
|
||||
pub struct ChatConfig {
|
||||
pub description: String,
|
||||
|
||||
@@ -76,6 +76,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -84,6 +84,7 @@ impl TempIndex {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)?;
|
||||
|
||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||
@@ -167,6 +168,7 @@ impl TempIndex {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)?;
|
||||
|
||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||
@@ -242,6 +244,7 @@ fn aborting_indexation() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -93,7 +93,7 @@ pub struct ChatSearchParams {
|
||||
pub hybrid: Setting<HybridQuery>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default = Setting::Set(20))]
|
||||
#[deserr(default)]
|
||||
#[schema(value_type = Option<usize>)]
|
||||
pub limit: Setting<usize>,
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ use crate::progress::EmbedderStats;
|
||||
use crate::prompt::Prompt;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::db::{EmbedderInfo, EmbeddingStatus, EmbeddingStatusDelta};
|
||||
use crate::vector::db::{EmbedderInfo, EmbeddingStatusDelta};
|
||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||
use crate::vector::extractor::{Extractor, ExtractorDiff, RequestFragmentExtractor};
|
||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState};
|
||||
@@ -441,6 +441,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
{
|
||||
let embedder_is_manual = matches!(*runtime.embedder, Embedder::UserProvided(_));
|
||||
|
||||
let (old_is_user_provided, old_must_regenerate) =
|
||||
embedder_info.embedding_status.is_user_provided_must_regenerate(docid);
|
||||
let (old, new) = parsed_vectors.remove(embedder_name);
|
||||
let new_must_regenerate = new.must_regenerate();
|
||||
let delta = match action {
|
||||
@@ -499,16 +501,19 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
|
||||
let is_adding_fragments = has_fragments && !old_has_fragments;
|
||||
|
||||
if is_adding_fragments {
|
||||
if !has_fragments {
|
||||
// removing fragments
|
||||
regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)?
|
||||
} else if is_adding_fragments ||
|
||||
// regenerate all fragments when going from user provided to ! user provided
|
||||
old_is_user_provided
|
||||
{
|
||||
regenerate_all_fragments(
|
||||
runtime.fragments(),
|
||||
&doc_alloc,
|
||||
new_fields_ids_map,
|
||||
obkv,
|
||||
)
|
||||
} else if !has_fragments {
|
||||
// removing fragments
|
||||
regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)?
|
||||
} else {
|
||||
let mut fragment_diff = Vec::new();
|
||||
let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map();
|
||||
@@ -600,7 +605,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
docid,
|
||||
&delta,
|
||||
new_must_regenerate,
|
||||
&embedder_info.embedding_status,
|
||||
old_is_user_provided,
|
||||
old_must_regenerate,
|
||||
);
|
||||
|
||||
// and we finally push the unique vectors into the writer
|
||||
@@ -657,10 +663,9 @@ fn push_embedding_status_delta(
|
||||
docid: DocumentId,
|
||||
delta: &VectorStateDelta,
|
||||
new_must_regenerate: bool,
|
||||
embedding_status: &EmbeddingStatus,
|
||||
old_is_user_provided: bool,
|
||||
old_must_regenerate: bool,
|
||||
) {
|
||||
let (old_is_user_provided, old_must_regenerate) =
|
||||
embedding_status.is_user_provided_must_regenerate(docid);
|
||||
let new_is_user_provided = match delta {
|
||||
VectorStateDelta::NoChange => old_is_user_provided,
|
||||
VectorStateDelta::NowRemoved => {
|
||||
|
||||
@@ -1977,6 +1977,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2029,6 +2030,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2117,6 +2119,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2306,6 +2309,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2369,6 +2373,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2423,6 +2428,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2476,6 +2482,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2531,6 +2538,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2591,6 +2599,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2644,6 +2653,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2697,6 +2707,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2908,6 +2919,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2968,6 +2980,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -3025,6 +3038,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ pub struct IndexerConfig {
|
||||
pub max_positions_per_attributes: Option<u32>,
|
||||
pub skip_index_budget: bool,
|
||||
pub experimental_no_edition_2024_for_settings: bool,
|
||||
pub experimental_no_edition_2024_for_dumps: bool,
|
||||
}
|
||||
|
||||
impl IndexerConfig {
|
||||
@@ -65,6 +66,7 @@ impl Default for IndexerConfig {
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: false,
|
||||
experimental_no_edition_2024_for_settings: false,
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -620,12 +620,35 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
where
|
||||
'a: 'doc,
|
||||
{
|
||||
match &mut self.kind {
|
||||
ChunkType::Fragments { fragments: _, session } => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
self.set_status(docid, old_is_user_provided, true, false, true);
|
||||
|
||||
if old_is_user_provided | full_reindex {
|
||||
match &mut self.kind {
|
||||
ChunkType::Fragments { fragments, session } => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
let reindex_all_fragments =
|
||||
// when the vectors were user-provided, Meilisearch cannot know if they come from a particular fragment,
|
||||
// and so Meilisearch needs to clear all embeddings in that case.
|
||||
// Fortunately, as dump export fragment vector with `regenerate` set to `false`,
|
||||
// this case should be rare and opt-in.
|
||||
old_is_user_provided ||
|
||||
// full-reindex case
|
||||
full_reindex;
|
||||
|
||||
if reindex_all_fragments {
|
||||
session.on_embed_mut().clear_vectors(docid);
|
||||
let extractors = fragments.iter().map(|fragment| {
|
||||
RequestFragmentExtractor::new(fragment, doc_alloc).ignore_errors()
|
||||
});
|
||||
insert_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
extractors,
|
||||
document,
|
||||
&(),
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
settings_delta.try_for_each_fragment_diff(
|
||||
@@ -669,7 +692,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
Result::Ok(())
|
||||
},
|
||||
)?;
|
||||
self.set_status(docid, old_is_user_provided, true, false, true);
|
||||
}
|
||||
ChunkType::DocumentTemplate { document_template, session } => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
@@ -690,12 +712,18 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
|
||||
match extractor.diff_settings(document, &external_docid, old_extractor.as_ref())? {
|
||||
ExtractorDiff::Removed => {
|
||||
if old_is_user_provided || full_reindex {
|
||||
session.on_embed_mut().clear_vectors(docid);
|
||||
}
|
||||
OnEmbed::process_embedding_response(
|
||||
session.on_embed_mut(),
|
||||
crate::vector::session::EmbeddingResponse { metadata, embedding: None },
|
||||
);
|
||||
}
|
||||
ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => {
|
||||
if old_is_user_provided || full_reindex {
|
||||
session.on_embed_mut().clear_vectors(docid);
|
||||
}
|
||||
session.request_embedding(metadata, input, unused_vectors_distribution)?;
|
||||
}
|
||||
ExtractorDiff::Unchanged => { /* do nothing */ }
|
||||
@@ -722,6 +750,13 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
where
|
||||
'a: 'doc,
|
||||
{
|
||||
self.set_status(
|
||||
docid,
|
||||
old_is_user_provided,
|
||||
old_must_regenerate,
|
||||
false,
|
||||
new_must_regenerate,
|
||||
);
|
||||
match &mut self.kind {
|
||||
ChunkType::DocumentTemplate { document_template, session } => {
|
||||
let doc_alloc = session.doc_alloc();
|
||||
@@ -731,10 +766,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
new_fields_ids_map,
|
||||
);
|
||||
|
||||
if old_is_user_provided {
|
||||
session.on_embed_mut().clear_vectors(docid);
|
||||
}
|
||||
|
||||
update_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
@@ -743,6 +774,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
new_document,
|
||||
&external_docid,
|
||||
old_must_regenerate,
|
||||
old_is_user_provided,
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)?
|
||||
@@ -754,7 +786,21 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
});
|
||||
|
||||
if old_is_user_provided {
|
||||
// when the document was `userProvided`, Meilisearch cannot know whose fragments a particular
|
||||
// vector was referring to.
|
||||
// So as a result Meilisearch will regenerate all fragments on this case.
|
||||
// Fortunately, since dumps for fragments set regenerate to false, this case should be rare.
|
||||
session.on_embed_mut().clear_vectors(docid);
|
||||
insert_autogenerated(
|
||||
docid,
|
||||
external_docid,
|
||||
extractors,
|
||||
new_document,
|
||||
&(),
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
update_autogenerated(
|
||||
@@ -765,25 +811,18 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
new_document,
|
||||
&(),
|
||||
old_must_regenerate,
|
||||
false,
|
||||
session,
|
||||
unused_vectors_distribution,
|
||||
)?
|
||||
}
|
||||
};
|
||||
|
||||
self.set_status(
|
||||
docid,
|
||||
old_is_user_provided,
|
||||
old_must_regenerate,
|
||||
false,
|
||||
new_must_regenerate,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn insert_autogenerated<D: Document<'a> + Debug>(
|
||||
pub fn insert_autogenerated<'doc, D: Document<'doc> + Debug>(
|
||||
&mut self,
|
||||
docid: DocumentId,
|
||||
external_docid: &'a str,
|
||||
@@ -791,7 +830,10 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
new_fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
||||
new_must_regenerate: bool,
|
||||
) -> Result<()> {
|
||||
) -> Result<()>
|
||||
where
|
||||
'a: 'doc,
|
||||
{
|
||||
let (default_is_user_provided, default_must_regenerate) = (false, true);
|
||||
self.set_status(
|
||||
docid,
|
||||
@@ -956,6 +998,7 @@ fn update_autogenerated<'doc, 'a: 'doc, 'b, E, OD, ND>(
|
||||
new_document: ND,
|
||||
meta: &E::DocumentMetadata,
|
||||
old_must_regenerate: bool,
|
||||
mut must_clear_on_generation: bool,
|
||||
session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
||||
) -> Result<()>
|
||||
@@ -984,6 +1027,11 @@ where
|
||||
};
|
||||
|
||||
if must_regenerate {
|
||||
if must_clear_on_generation {
|
||||
must_clear_on_generation = false;
|
||||
session.on_embed_mut().clear_vectors(docid);
|
||||
}
|
||||
|
||||
let metadata =
|
||||
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
||||
|
||||
@@ -1002,7 +1050,7 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_autogenerated<'a, 'b, E, D: Document<'a> + Debug>(
|
||||
fn insert_autogenerated<'doc, 'a: 'doc, 'b, E, D: Document<'doc> + Debug>(
|
||||
docid: DocumentId,
|
||||
external_docid: &'a str,
|
||||
extractors: impl IntoIterator<Item = E>,
|
||||
|
||||
@@ -17,6 +17,7 @@ use super::guess_primary_key::retrieve_or_guess_primary_key;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::progress::{AtomicPayloadStep, Progress};
|
||||
use crate::update::new::document::{DocumentContext, Versions};
|
||||
use crate::update::new::indexer::sharding::Shards;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::MostlySend;
|
||||
use crate::update::new::{DocumentIdentifiers, Insertion, Update};
|
||||
@@ -71,6 +72,7 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
new_fields_ids_map: &mut FieldsIdsMap,
|
||||
must_stop_processing: &MSP,
|
||||
progress: Progress,
|
||||
shards: Option<&Shards>,
|
||||
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
|
||||
where
|
||||
MSP: Fn() -> bool,
|
||||
@@ -107,6 +109,7 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
&mut bytes,
|
||||
&docids_version_offsets,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
shards,
|
||||
payload,
|
||||
),
|
||||
Payload::Update(payload) => extract_addition_payload_changes(
|
||||
@@ -120,6 +123,7 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
&mut bytes,
|
||||
&docids_version_offsets,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
shards,
|
||||
payload,
|
||||
),
|
||||
Payload::Deletion(to_delete) => extract_deletion_payload_changes(
|
||||
@@ -127,6 +131,7 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
rtxn,
|
||||
&mut available_docids,
|
||||
&docids_version_offsets,
|
||||
shards,
|
||||
to_delete,
|
||||
),
|
||||
};
|
||||
@@ -173,6 +178,7 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
||||
bytes: &mut u64,
|
||||
main_docids_version_offsets: &hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>,
|
||||
method: IndexDocumentsMethod,
|
||||
shards: Option<&Shards>,
|
||||
payload: &'pl [u8],
|
||||
) -> Result<hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>> {
|
||||
use IndexDocumentsMethod::{ReplaceDocuments, UpdateDocuments};
|
||||
@@ -210,12 +216,20 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
||||
primary_key.as_ref().unwrap()
|
||||
};
|
||||
|
||||
let current_offset = iter.byte_offset();
|
||||
let content = &payload[previous_offset..current_offset];
|
||||
previous_offset = current_offset;
|
||||
|
||||
let external_id =
|
||||
retrieved_primary_key.extract_fields_and_docid(doc, new_fields_ids_map, indexer)?;
|
||||
|
||||
let external_id = external_id.to_de();
|
||||
let current_offset = iter.byte_offset();
|
||||
let document_offset = DocumentOffset { content: &payload[previous_offset..current_offset] };
|
||||
|
||||
if shards.is_some_and(|shards| !shards.must_process(external_id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let document_offset = DocumentOffset { content };
|
||||
|
||||
match main_docids_version_offsets.get(external_id) {
|
||||
None => {
|
||||
@@ -299,8 +313,6 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
previous_offset = iter.byte_offset();
|
||||
}
|
||||
|
||||
if payload.is_empty() {
|
||||
@@ -329,11 +341,16 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>(
|
||||
rtxn: &RoTxn,
|
||||
available_docids: &mut AvailableIds,
|
||||
main_docids_version_offsets: &hashbrown::HashMap<&'s str, PayloadOperations<'pl>>,
|
||||
shards: Option<&Shards>,
|
||||
to_delete: &'pl [&'pl str],
|
||||
) -> Result<hashbrown::HashMap<&'s str, PayloadOperations<'pl>>> {
|
||||
let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new();
|
||||
|
||||
for external_id in to_delete {
|
||||
if shards.is_some_and(|shards| !shards.must_process(external_id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
match main_docids_version_offsets.get(external_id) {
|
||||
None => {
|
||||
match index.external_documents_ids().get(rtxn, external_id) {
|
||||
|
||||
@@ -36,6 +36,7 @@ mod guess_primary_key;
|
||||
mod partial_dump;
|
||||
mod post_processing;
|
||||
pub mod settings_changes;
|
||||
pub mod sharding;
|
||||
mod update_by_function;
|
||||
mod write;
|
||||
|
||||
|
||||
22
crates/milli/src/update/new/indexer/sharding.rs
Normal file
22
crates/milli/src/update/new/indexer/sharding.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::hash::{BuildHasher as _, BuildHasherDefault};
|
||||
|
||||
pub struct Shards {
|
||||
pub own: Vec<String>,
|
||||
pub others: Vec<String>,
|
||||
}
|
||||
|
||||
impl Shards {
|
||||
pub fn must_process(&self, docid: &str) -> bool {
|
||||
let hasher = BuildHasherDefault::<twox_hash::XxHash3_64>::new();
|
||||
let to_hash = |shard: &String| hasher.hash_one((shard, docid));
|
||||
|
||||
let max_hash = self.others.iter().map(to_hash).max().unwrap_or_default();
|
||||
|
||||
self.own.iter().map(to_hash).any(|hash| hash > max_hash)
|
||||
}
|
||||
}
|
||||
@@ -841,6 +841,25 @@ impl EmbedderOptions {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_fragments(&self) -> bool {
|
||||
match &self {
|
||||
EmbedderOptions::HuggingFace(_)
|
||||
| EmbedderOptions::OpenAi(_)
|
||||
| EmbedderOptions::Ollama(_)
|
||||
| EmbedderOptions::UserProvided(_) => false,
|
||||
EmbedderOptions::Rest(embedder_options) => {
|
||||
!embedder_options.indexing_fragments.is_empty()
|
||||
}
|
||||
EmbedderOptions::Composite(embedder_options) => {
|
||||
if let SubEmbedderOptions::Rest(embedder_options) = &embedder_options.index {
|
||||
!embedder_options.indexing_fragments.is_empty()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for EmbedderOptions {
|
||||
|
||||
@@ -59,6 +59,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -95,6 +95,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -329,6 +329,7 @@ fn criteria_ascdesc() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -138,6 +138,7 @@ fn test_typo_disabled_on_word() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user