Compare commits

..

45 Commits

Author SHA1 Message Date
Louis Dureuil
119bb805e5 camel case the fields in "origin" 2025-07-31 17:32:45 +02:00
Louis Dureuil
9f10011525 Rename Body::with_file 2025-07-30 17:33:01 +02:00
Louis Dureuil
54a2029b1f Adjust timeouts 2025-07-30 17:32:54 +02:00
Louis Dureuil
4fe09f406c Don't always hardcode Content-Type in proxy 2025-07-29 17:52:48 +02:00
Louis Dureuil
9d95f8187d Update snap 2025-07-29 16:28:46 +02:00
Louis Dureuil
fb0904b893 Misc churn 2025-07-29 14:47:49 +02:00
Louis Dureuil
eb829e93c5 Move meilisearch_types::Network to its own module 2025-07-29 14:47:49 +02:00
Louis Dureuil
be36afe9e6 Make types Serialize and Deserialize for proxying 2025-07-29 14:47:49 +02:00
Louis Dureuil
27c087a4fb New errors 2025-07-29 14:47:49 +02:00
Louis Dureuil
c618877bce Dependency changes 2025-07-29 14:47:49 +02:00
Louis Dureuil
982e554639 IndexScheduler::update_task now merges the task.network and accepts &mut Task 2025-07-29 14:47:49 +02:00
Louis Dureuil
a8cfec19b5 IndexScheduler::set_task_network 2025-07-29 14:47:48 +02:00
Louis Dureuil
5d2b1c2637 file-store: persist returns the persisted File object 2025-07-29 14:47:48 +02:00
Louis Dureuil
65fefc7122 Dump support for network 2025-07-29 14:47:48 +02:00
Louis Dureuil
180cfd71cc Proxy all document tasks to the network when sharding is enabled 2025-07-29 14:47:48 +02:00
Louis Dureuil
614cd8cc1c Shard documents 2025-07-29 14:47:48 +02:00
Louis Dureuil
c6f6808981 network: add sharding to Network and writeApiKey to Remotes 2025-07-29 14:47:48 +02:00
Louis Dureuil
9cb77fd310 Add proxy module to proxy requests to members of a network 2025-07-29 14:47:48 +02:00
Louis Dureuil
fa8bd6430c Add new milli::update:🆕:indexer::sharding module 2025-07-29 14:47:48 +02:00
Louis Dureuil
4c016432e1 Add network to Task and TaskView 2025-07-29 14:47:48 +02:00
Louis Dureuil
0aa1f63061 Add EE license 2025-07-29 11:54:12 +02:00
Louis Dureuil
d4c88f28f3 Merge pull request #5780 from meilisearch/fix-api-keys
Fix api key action inconsistencies
2025-07-28 10:33:48 +00:00
Mubelotix
d90c76d3cc Update tests 2025-07-28 11:35:15 +02:00
Mubelotix
f6bc6854f8 Fix key action inconsistencies 2025-07-28 11:10:55 +02:00
Clément Renault
42001a25ff Merge pull request #5770 from meilisearch/fix/update-index-chat-settings
fix: index chat settings `searchParameters` incorrectly set with `limit: 20` when sending empty object
2025-07-22 13:26:01 +00:00
Louis Dureuil
080d5f94dd Merge pull request #5763 from meilisearch/embedding-fixes
Regenerate all fragments when coming from a user provided vector
2025-07-22 08:35:07 +00:00
nicolasvienot
ba0f50e5ef fix: update default deserialization for ChatSearchParams limit field 2025-07-22 10:24:18 +02:00
Louis Dureuil
ce6230aa85 Merge pull request #5762 from meilisearch/new-document-indexer-for-dumps
Use the edition 2024 documents indexer in the dumps
2025-07-21 14:53:43 +00:00
Louis Dureuil
6dc241f9de Fix tests 2025-07-21 15:11:24 +02:00
Louis Dureuil
01d1ef65c4 Update search and docs usages 2025-07-21 15:11:24 +02:00
Louis Dureuil
3246667590 when exporting vectors, for regenerate to false when the embedder has fragments 2025-07-21 15:11:24 +02:00
Louis Dureuil
109395c199 Index::embeddings specifies if the embedder has fragments 2025-07-21 15:11:24 +02:00
Louis Dureuil
a0b71a8785 EmbedderOptions::has_fragments() 2025-07-21 15:11:24 +02:00
Louis Dureuil
00a5c86f13 Remove accidentally added db snap 2025-07-21 15:11:24 +02:00
Louis Dureuil
366c37a686 Fix new indexer 2025-07-21 15:11:23 +02:00
Louis Dureuil
afc164a271 Fix in old indexer 2025-07-21 15:11:23 +02:00
Kerollmops
bdc2d1e64d Move the edition 2024 dump parameter to the right place 2025-07-21 14:50:05 +02:00
Clément Renault
b85657de1e Update memmap2 version everywhere 2025-07-17 17:30:44 +02:00
Clément Renault
626be0ef28 Small typo fix
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-17 17:27:00 +02:00
Clément Renault
1b476b8a35 Add documentation to the new documents_file dump reader method
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-17 17:26:41 +02:00
Clément Renault
a1b42c10e2 Make clippy happy 2025-07-17 17:21:03 +02:00
Clément Renault
d67db6e3c2 Use the edition 2024 documents indexer in the dumps 2025-07-17 17:12:51 +02:00
Clément Renault
760ccffdbd Expose the documents files from the dumps 2025-07-17 17:12:51 +02:00
Clément Renault
338806283b Do not track meilisearch databases 2025-07-17 17:12:51 +02:00
Clément Renault
fe15e11c9d Introduce a new CLI and env var to use the old document indexer when
importing dumps
2025-07-17 17:12:51 +02:00
89 changed files with 1620 additions and 736 deletions

2
.gitignore vendored
View File

@@ -5,7 +5,7 @@
**/*.json_lines
**/*.rs.bk
/*.mdb
/data.ms
/*.ms
/snapshots
/dumps
/bench

46
Cargo.lock generated
View File

@@ -1194,21 +1194,6 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
[[package]]
name = "cohere-rust"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8b553b385b0f2562138baea705b5707335314f8e91a58e7d1a03c3a6c332423"
dependencies = [
"bytes",
"reqwest",
"serde",
"serde_json",
"strum_macros 0.26.4",
"thiserror 1.0.69",
"tokio",
]
[[package]]
name = "color-spantrace"
version = "0.3.0"
@@ -3460,7 +3445,7 @@ dependencies = [
"serde_json",
"serde_yaml",
"strum",
"strum_macros 0.27.1",
"strum_macros",
"unicode-blocks",
"unicode-normalization",
"unicode-segmentation",
@@ -3760,6 +3745,7 @@ dependencies = [
"actix-web-lab",
"anyhow",
"async-openai",
"backoff",
"brotli",
"bstr",
"build-info",
@@ -3768,7 +3754,6 @@ dependencies = [
"bytes",
"cargo_toml",
"clap",
"cohere-rust",
"crossbeam-channel",
"deserr",
"dump",
@@ -3791,6 +3776,7 @@ dependencies = [
"meili-snap",
"meilisearch-auth",
"meilisearch-types",
"memmap2",
"mimalloc",
"mime",
"mopa-maintained",
@@ -3924,9 +3910,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "memmap2"
version = "0.9.5"
version = "0.9.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28"
dependencies = [
"libc",
"stable_deref_trait",
@@ -4003,6 +3989,7 @@ dependencies = [
"time",
"tokenizers",
"tracing",
"twox-hash",
"ureq",
"url",
"utoipa",
@@ -5837,20 +5824,7 @@ version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
dependencies = [
"strum_macros 0.27.1",
]
[[package]]
name = "strum_macros"
version = "0.26.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.101",
"strum_macros",
]
[[package]]
@@ -6458,6 +6432,12 @@ version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "twox-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56"
[[package]]
name = "typeid"
version = "1.0.3"

View File

@@ -19,3 +19,11 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---
đź”’ Meilisearch Enterprise Edition (EE)
Certain parts of this codebase are not licensed under the MIT license and governed by the Business Source License 1.1.
See the LICENSE-EE file for details.

67
LICENSE-EE Normal file
View File

@@ -0,0 +1,67 @@
Business Source License 1.1 – Adapted for Meili SAS
This license is based on the Business Source License version 1.1, as published by MariaDB Corporation Ab.
Parameters
Licensor: Meili SAS
Licensed Work: Any file explicitly marked as “Enterprise Edition (EE)” or “governed by the Business Source License”.
Additional Use Grant:
You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.
Production use of the Licensed Work requires a commercial license agreement with Meilisearch. Contact bonjour@meilisearch.com for licensing.
Change License: MIT
Change Date: Four years from the date the Licensed Work is published.
This License does not apply to any code outside of the Licensed Work, which remains under the MIT license.
For information about alternative licensing arrangements for the Licensed Work,
please contact bonjour@meilisearch.com or sales@meilisearch.com.
Notice
Business Source License 1.1
Terms
The Licensor hereby grants you the right to copy, modify, create derivative
works, redistribute, and make non-production use of the Licensed Work. The
Licensor may make an Additional Use Grant, above, permitting limited production use.
Effective on the Change Date, or the fourth anniversary of the first publicly
available distribution of a specific version of the Licensed Work under this
License, whichever comes first, the Licensor hereby grants you rights under
the terms of the Change License, and the rights granted in the paragraph
above terminate.
If your use of the Licensed Work does not comply with the requirements
currently in effect as described in this License, you must purchase a
commercial license from the Licensor, its affiliated entities, or authorized
resellers, or you must refrain from using the Licensed Work.
All copies of the original and modified Licensed Work, and derivative works
of the Licensed Work, are subject to this License. This License applies
separately for each version of the Licensed Work and the Change Date may vary
for each version of the Licensed Work released by Licensor.
You must conspicuously display this License on each original or modified copy
of the Licensed Work. If you receive the Licensed Work in original or
modified form from a third party, the terms and conditions set forth in this
License apply to your use of that work.
Any use of the Licensed Work in violation of this License will automatically
terminate your rights under this License for the current and all other
versions of the Licensed Work.
This License does not grant you any right in any trademark or logo of
Licensor or its affiliates (provided that you may use a trademark or logo of
Licensor as expressly required by this License).
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
TITLE.

View File

@@ -89,6 +89,26 @@ We also offer a wide range of dedicated guides to all Meilisearch features, such
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
## đź§ľ Editions & Licensing
Meilisearch is available in two editions:
### đź§Ş Community Edition (CE)
- Fully open source under the [MIT license](./LICENSE)
- Core search engine with fast and relevant full-text, semantic or hybrid search
- Free to use for anyone, including commercial usage
### 🏢 Enterprise Edition (EE)
- Includes advanced features such as:
- Sharding
- Governed by a [commercial license](./LICENSE-EE) or the [Business Source License 1.1](https://mariadb.com/bsl11)
- Not allowed in production without a commercial agreement with Meilisearch.
- You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.
Want access to Enterprise features? → Contact us at [sales@meilisearch.com](maito:sales@meilisearch.com).
## 📊 Telemetry
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.

View File

@@ -14,7 +14,7 @@ license.workspace = true
anyhow = "1.0.98"
bumpalo = "3.18.1"
csv = "1.3.1"
memmap2 = "0.9.5"
memmap2 = "0.9.7"
milli = { path = "../milli" }
mimalloc = { version = "0.1.47", default-features = false }
serde_json = { version = "1.0.140", features = ["preserve_order"] }
@@ -55,4 +55,3 @@ harness = false
[[bench]]
name = "sort"
harness = false

View File

@@ -154,6 +154,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -221,6 +222,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -266,6 +268,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -335,6 +338,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -412,6 +416,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -457,6 +462,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -498,6 +504,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -566,6 +573,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -633,6 +641,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -700,6 +709,7 @@ fn indexing_wiki(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -766,6 +776,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -811,6 +822,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -879,6 +891,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -956,6 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1002,6 +1016,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1044,6 +1059,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1111,6 +1127,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1177,6 +1194,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1222,6 +1240,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1290,6 +1309,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1404,6 +1424,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1449,6 +1470,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1490,6 +1512,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1580,6 +1603,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1671,6 +1695,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1754,6 +1779,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1821,6 +1847,7 @@ fn indexing_geo(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1887,6 +1914,7 @@ fn reindexing_geo(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -1932,6 +1960,7 @@ fn reindexing_geo(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2000,6 +2029,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -123,6 +123,7 @@ pub fn base_setup(conf: &Conf) -> Index {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -10,7 +10,7 @@ use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::{
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
@@ -94,6 +94,8 @@ pub struct TaskDump {
default
)]
pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
}
// A `Kind` specific version made for the dump. If modified you may break the dump.
@@ -171,6 +173,7 @@ impl From<Task> for TaskDump {
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
network: task.network,
}
}
}
@@ -250,11 +253,12 @@ pub(crate) mod test {
use maplit::{btreemap, btreeset};
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
use meilisearch_types::features::RuntimeTogglableFeatures;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, FilterableAttributesRule};
use meilisearch_types::network::{Network, Remote};
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status};
@@ -383,6 +387,7 @@ pub(crate) mod test {
enqueued_at: datetime!(2022-11-11 0:00 UTC),
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
network: None,
},
None,
),
@@ -407,6 +412,7 @@ pub(crate) mod test {
enqueued_at: datetime!(2022-11-11 0:00 UTC),
started_at: None,
finished_at: None,
network: None,
},
Some(vec![
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
@@ -426,6 +432,7 @@ pub(crate) mod test {
enqueued_at: datetime!(2022-11-15 0:00 UTC),
started_at: None,
finished_at: None,
network: None,
},
None,
),
@@ -538,7 +545,8 @@ pub(crate) mod test {
fn create_test_network() -> Network {
Network {
local: Some("myself".to_string()),
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
sharding: false,
}
}

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::str::FromStr;
use super::v2_to_v3::CompatV2ToV3;
@@ -94,6 +95,10 @@ impl CompatIndexV1ToV2 {
self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>)
}
pub fn documents_file(&self) -> &File {
self.from.documents_file()
}
pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> {
Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check())
}

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::str::FromStr;
use time::OffsetDateTime;
@@ -122,6 +123,13 @@ impl CompatIndexV2ToV3 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV2ToV3::V2(v2) => v2.documents_file(),
CompatIndexV2ToV3::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
let settings = match self {
CompatIndexV2ToV3::V2(from) => from.settings()?,

View File

@@ -1,3 +1,5 @@
use std::fs::File;
use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3};
use super::v4_to_v5::CompatV4ToV5;
use crate::reader::{v3, v4, UpdateFile};
@@ -252,6 +254,13 @@ impl CompatIndexV3ToV4 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV3ToV4::V3(v3) => v3.documents_file(),
CompatIndexV3ToV4::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> {
Ok(match self {
CompatIndexV3ToV4::V3(v3) => {

View File

@@ -1,3 +1,5 @@
use std::fs::File;
use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4};
use super::v5_to_v6::CompatV5ToV6;
use crate::reader::{v4, v5, Document};
@@ -241,6 +243,13 @@ impl CompatIndexV4ToV5 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV4ToV5::V4(v4) => v4.documents_file(),
CompatIndexV4ToV5::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> {
match self {
CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()),

View File

@@ -1,3 +1,4 @@
use std::fs::File;
use std::num::NonZeroUsize;
use std::str::FromStr;
@@ -160,6 +161,7 @@ impl CompatV5ToV6 {
enqueued_at: task_view.enqueued_at,
started_at: task_view.started_at,
finished_at: task_view.finished_at,
network: None,
};
(task, content_file)
@@ -243,6 +245,13 @@ impl CompatIndexV5ToV6 {
}
}
pub fn documents_file(&self) -> &File {
match self {
CompatIndexV5ToV6::V5(v5) => v5.documents_file(),
CompatIndexV5ToV6::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
match self {
CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()),

View File

@@ -192,6 +192,14 @@ impl DumpIndexReader {
}
}
/// A reference to a file in the NDJSON format containing all the documents of the index
pub fn documents_file(&self) -> &File {
match self {
DumpIndexReader::Current(v6) => v6.documents_file(),
DumpIndexReader::Compat(compat) => compat.documents_file(),
}
}
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
match self {
DumpIndexReader::Current(v6) => v6.settings(),

View File

@@ -72,6 +72,10 @@ impl V1IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<self::settings::Settings> {
Ok(serde_json::from_reader(&mut self.settings)?)
}

View File

@@ -203,6 +203,10 @@ impl V2IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -215,6 +215,10 @@ impl V3IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -210,6 +210,10 @@ impl V4IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -247,6 +247,10 @@ impl V5IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
Ok(self.settings.clone())
}

View File

@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
pub type Key = meilisearch_types::keys::Key;
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
pub type Network = meilisearch_types::features::Network;
pub type Network = meilisearch_types::network::Network;
// ===== Other types to clarify the code of the compat module
// everything related to the tasks
@@ -284,6 +284,10 @@ impl V6IndexReader {
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
}
pub fn documents_file(&self) -> &File {
self.documents.get_ref()
}
pub fn settings(&mut self) -> Result<Settings<Checked>> {
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
patch_embedders(&mut settings);

View File

@@ -5,8 +5,9 @@ use std::path::PathBuf;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
use meilisearch_types::keys::Key;
use meilisearch_types::network::Network;
use meilisearch_types::settings::{Checked, Settings};
use serde_json::{Map, Value};
use tempfile::TempDir;

View File

@@ -148,11 +148,10 @@ impl File {
Ok(Self { path: PathBuf::new(), file: None })
}
pub fn persist(self) -> Result<()> {
if let Some(file) = self.file {
file.persist(&self.path)?;
}
Ok(())
pub fn persist(self) -> Result<Option<StdFile>> {
let Some(file) = self.file else { return Ok(None) };
Ok(Some(file.persist(&self.path)?))
}
}

View File

@@ -129,6 +129,7 @@ fn main() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -26,7 +26,7 @@ flate2 = "1.1.2"
indexmap = "2.9.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.5"
memmap2 = "0.9.7"
page_size = "0.6.0"
rayon = "1.10.0"
roaring = { version = "0.10.12", features = ["serde"] }

View File

@@ -147,6 +147,7 @@ impl<'a> Dump<'a> {
canceled_by: task.canceled_by,
details: task.details,
status: task.status,
network: task.network,
kind: match task.kind {
KindDump::DocumentImport {
primary_key,

View File

@@ -1,8 +1,9 @@
use std::sync::{Arc, RwLock};
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
use meilisearch_types::network::Network;
use crate::error::FeatureNotEnabledError;
use crate::Result;
@@ -24,7 +25,6 @@ pub(crate) struct FeatureData {
persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
network: Arc<RwLock<Network>>,
experimental_personalization_api_key: Option<String>,
}
#[derive(Debug, Clone, Copy)]
@@ -175,12 +175,7 @@ impl FeatureData {
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(wtxn, db_keys::EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures {
metrics,
logs_route,
contains_filter,
experimental_personalization_api_key,
} = instance_features;
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: metrics || persisted_features.metrics,
logs_route: logs_route || persisted_features.logs_route,
@@ -195,7 +190,6 @@ impl FeatureData {
persisted: runtime_features_db,
runtime,
network: Arc::new(RwLock::new(network)),
experimental_personalization_api_key,
})
}
@@ -226,10 +220,6 @@ impl FeatureData {
RoFeatures::new(self)
}
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
self.experimental_personalization_api_key.as_ref()
}
pub fn put_network(&self, mut wtxn: RwTxn, new_network: Network) -> Result<()> {
self.persisted.remap_data_type::<SerdeJson<Network>>().put(
&mut wtxn,

View File

@@ -20,6 +20,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let IndexScheduler {
cleanup_enabled: _,
experimental_no_edition_2024_for_dumps: _,
processing_tasks,
env,
version,
@@ -212,6 +213,7 @@ pub fn snapshot_task(task: &Task) -> String {
details,
status,
kind,
network,
} = task;
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
@@ -229,6 +231,9 @@ pub fn snapshot_task(task: &Task) -> String {
snap.push_str(&format!("details: {}, ", &snapshot_details(details)));
}
snap.push_str(&format!("kind: {kind:?}"));
if let Some(network) = network {
snap.push_str(&format!("network: {network:?}, "))
}
snap.push('}');
snap

View File

@@ -52,7 +52,7 @@ use flate2::bufread::GzEncoder;
use flate2::Compression;
use meilisearch_types::batches::Batch;
use meilisearch_types::features::{
ChatCompletionSettings, InstanceTogglableFeatures, Network, RuntimeTogglableFeatures,
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
};
use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
@@ -63,8 +63,9 @@ use meilisearch_types::milli::vector::{
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
};
use meilisearch_types::milli::{self, Index};
use meilisearch_types::network::Network;
use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{KindWithContent, Task};
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
use milli::vector::db::IndexEmbeddingConfig;
use processing::ProcessingTasks;
pub use queue::Query;
@@ -168,6 +169,9 @@ pub struct IndexScheduler {
/// Whether we should automatically cleanup the task queue or not.
pub(crate) cleanup_enabled: bool,
/// Whether we should use the old document indexer or the new one.
pub(crate) experimental_no_edition_2024_for_dumps: bool,
/// The webhook url we should send tasks to after processing every batches.
pub(crate) webhook_url: Option<String>,
/// The Authorization header to send to the webhook URL.
@@ -210,6 +214,7 @@ impl IndexScheduler {
index_mapper: self.index_mapper.clone(),
cleanup_enabled: self.cleanup_enabled,
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
webhook_url: self.webhook_url.clone(),
webhook_authorization_header: self.webhook_authorization_header.clone(),
embedders: self.embedders.clone(),
@@ -280,8 +285,7 @@ impl IndexScheduler {
let version = versioning::Versioning::new(&env, from_db_version)?;
let mut wtxn = env.write_txn()?;
let features =
features::FeatureData::new(&env, &mut wtxn, options.instance_features.clone())?;
let features = features::FeatureData::new(&env, &mut wtxn, options.instance_features)?;
let queue = Queue::new(&env, &mut wtxn, &options)?;
let index_mapper = IndexMapper::new(&env, &mut wtxn, &options, budget)?;
let chat_settings = env.create_database(&mut wtxn, Some(CHAT_SETTINGS_DB_NAME))?;
@@ -297,6 +301,9 @@ impl IndexScheduler {
index_mapper,
env,
cleanup_enabled: options.cleanup_enabled,
experimental_no_edition_2024_for_dumps: options
.indexer_config
.experimental_no_edition_2024_for_dumps,
webhook_url: options.webhook_url,
webhook_authorization_header: options.webhook_authorization_header,
embedders: Default::default(),
@@ -595,6 +602,11 @@ impl IndexScheduler {
Ok(nbr_index_processing_tasks > 0)
}
/// Whether the index should use the old document indexer.
pub fn no_edition_2024_for_dumps(&self) -> bool {
self.experimental_no_edition_2024_for_dumps
}
/// Return the tasks matching the query from the user's point of view along
/// with the total number of tasks matching the query, ignoring from and limit.
///
@@ -633,6 +645,16 @@ impl IndexScheduler {
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
}
pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
let mut wtxn = self.env.write_txn()?;
let mut task =
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
task.network = Some(network);
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
wtxn.commit()?;
Ok(())
}
/// Return the batches matching the query from the user's point of view along
/// with the total number of batches matching the query, ignoring from and limit.
///
@@ -835,10 +857,6 @@ impl IndexScheduler {
self.features.features()
}
pub fn experimental_personalization_api_key(&self) -> Option<&String> {
self.features.experimental_personalization_api_key()
}
pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
let wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
self.features.put_runtime_features(wtxn, features)?;

View File

@@ -279,6 +279,7 @@ impl Queue {
details: kind.default_details(),
status: Status::Enqueued,
kind: kind.clone(),
network: None,
};
// For deletion and cancelation tasks, we want to make extra sure that they
// don't attempt to delete/cancel tasks that are newer than themselves.

View File

@@ -97,7 +97,22 @@ impl TaskQueue {
Ok(self.all_tasks.get(rtxn, &task_id)?)
}
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
/// Update the inverted task indexes and write the new value of the task.
///
/// The passed `task` object typically comes from a previous transaction, so two kinds of modification might have occurred:
/// 1. Modification to the `task` object after loading it from the DB (the purpose of this method is to persist these changes)
/// 2. Modification to the task committed by another transaction in the DB (an annoying consequence of having lost the original
/// transaction from which the `task` instance was deserialized)
///
/// When calling this function, this `task` is modified to take into account any existing `network`
/// that can have been added since the task was loaded into memory.
///
/// Any other modification to the task that was committed from the DB since the parameter was pulled from the DB will be overwritten.
///
/// # Errors
///
/// - CorruptedTaskQueue: The task doesn't exist in the database
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> {
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
let reprocessing = old_task.status != Status::Enqueued;
@@ -157,6 +172,12 @@ impl TaskQueue {
}
}
task.network = match (old_task.network, task.network.take()) {
(None, None) => None,
(None, Some(network)) | (Some(network), None) => Some(network),
(Some(_), Some(network)) => Some(network),
};
self.all_tasks.put(wtxn, &task.uid, task)?;
Ok(())
}

View File

@@ -268,7 +268,7 @@ impl IndexScheduler {
self.queue
.tasks
.update_task(&mut wtxn, &task)
.update_task(&mut wtxn, &mut task)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
}
if let Some(canceled_by) = canceled_by {
@@ -349,7 +349,7 @@ impl IndexScheduler {
self.queue
.tasks
.update_task(&mut wtxn, &task)
.update_task(&mut wtxn, &mut task)
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
}
}

View File

@@ -5,6 +5,7 @@ use std::sync::atomic::Ordering;
use dump::IndexMetadata;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{self};
@@ -227,12 +228,21 @@ impl IndexScheduler {
return Err(Error::from_milli(user_err, Some(uid.to_string())));
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate,
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}

View File

@@ -9,6 +9,7 @@ use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
use meilisearch_types::milli::update::{request_threads, Setting};
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
@@ -229,12 +230,21 @@ impl IndexScheduler {
));
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
),
regenerate,
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors.insert(
embedder_name,

View File

@@ -66,6 +66,11 @@ impl IndexScheduler {
}
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
let network = self.network();
let shards = network.shards();
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
// to a fresh thread.
@@ -130,6 +135,7 @@ impl IndexScheduler {
&mut new_fields_ids_map,
&|| must_stop_processing.get(),
progress.clone(),
shards.as_ref(),
)
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;

View File

@@ -3,6 +3,7 @@ use std::collections::BTreeMap;
use big_s::S;
use insta::assert_json_snapshot;
use meili_snap::{json_string, snapshot};
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
use meilisearch_types::milli::vector::SearchQuery;
@@ -220,8 +221,8 @@ fn import_vectors() {
let embeddings = index.embeddings(&rtxn, 0).unwrap();
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -311,9 +312,9 @@ fn import_vectors() {
let embeddings = index.embeddings(&rtxn, 0).unwrap();
// automatically changed to patou because set to regenerate
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == patou_embed, @"true");
// remained beagle
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@@ -497,13 +498,13 @@ fn import_vectors_first_and_embedder_later() {
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty(), "{embedding:?}");
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
assert!(!embeddings.is_empty(), "{embeddings:?}");
// the document with the id 3 should keep its original embedding
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embeddings, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
snapshot!(embeddings.len(), @"1");
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
@@ -558,7 +559,7 @@ fn import_vectors_first_and_embedder_later() {
"###);
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
@@ -566,7 +567,7 @@ fn import_vectors_first_and_embedder_later() {
// the document with the id 4 should generate an embedding
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["my_doggo_embedder"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
}
@@ -696,7 +697,7 @@ fn delete_document_containing_vector() {
"###);
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let (embedding, _) = &embeddings["manual"];
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["manual"];
assert!(!embedding.is_empty(), "{embedding:?}");
index_scheduler

View File

@@ -88,6 +88,7 @@ pub fn upgrade_index_scheduler(
details: Some(Details::UpgradeDatabase { from, to }),
status: Status::Enqueued,
kind: KindWithContent::UpgradeDatabase { from },
network: None,
},
)?;
wtxn.commit()?;

View File

@@ -1,6 +1,5 @@
//! Utility functions on the DBs. Mainly getter and setters.
use crate::milli::progress::EmbedderStats;
use std::collections::{BTreeSet, HashSet};
use std::ops::Bound;
use std::sync::Arc;
@@ -15,6 +14,7 @@ use meilisearch_types::tasks::{
use roaring::RoaringBitmap;
use time::OffsetDateTime;
use crate::milli::progress::EmbedderStats;
use crate::{Error, Result, Task, TaskId, BEI128};
/// This structure contains all the information required to write a batch in the database without reading the tasks.
@@ -372,6 +372,7 @@ impl crate::IndexScheduler {
details,
status,
kind,
network: _,
} = task;
assert_eq!(uid, task.uid);
if task.status != Status::Enqueued {

View File

@@ -24,7 +24,7 @@ enum-iterator = "2.1.0"
file-store = { path = "../file-store" }
flate2 = "1.1.2"
fst = "0.4.7"
memmap2 = "0.9.5"
memmap2 = "0.9.7"
milli = { path = "../milli" }
roaring = { version = "0.10.12", features = ["serde"] }
rustc-hash = "2.1.1"

View File

@@ -236,9 +236,11 @@ InvalidDocumentFields , InvalidRequest , BAD_REQU
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQUEST ;
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
@@ -267,7 +269,9 @@ InvalidMultiSearchRemote , InvalidRequest , BAD_REQU
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ;
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ;
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
@@ -310,8 +314,6 @@ InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQU
InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ;
InvalidSearchSort , InvalidRequest , BAD_REQUEST ;
InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalize , InvalidRequest , BAD_REQUEST ;
InvalidSearchPersonalizeUserContext , InvalidRequest , BAD_REQUEST ;
InvalidSearchMediaAndVector , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ;
InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ;
@@ -652,18 +654,6 @@ impl fmt::Display for deserr_codes::InvalidNetworkSearchApiKey {
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalize {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `personalize` is invalid, expected a JSON object with optional `userContext` string.")
}
}
impl fmt::Display for deserr_codes::InvalidSearchPersonalizeUserContext {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "the value of `userContext` is invalid, expected a string.")
}
}
#[macro_export]
macro_rules! internal_error {
($target:ty : $($other:path), *) => {

View File

@@ -1,5 +1,3 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use crate::error::{Code, ResponseError};
@@ -25,29 +23,11 @@ pub struct RuntimeTogglableFeatures {
pub multimodal: bool,
}
#[derive(Default, Debug, Clone)]
#[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
pub contains_filter: bool,
pub experimental_personalization_api_key: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]

View File

@@ -233,9 +233,6 @@ pub enum Action {
#[serde(rename = "*")]
#[deserr(rename = "*")]
All = 0,
#[serde(rename = "*.get")]
#[deserr(rename = "*.get")]
AllGet,
#[serde(rename = "search")]
#[deserr(rename = "search")]
Search,
@@ -365,6 +362,9 @@ pub enum Action {
#[serde(rename = "chatsSettings.update")]
#[deserr(rename = "chatsSettings.update")]
ChatsSettingsUpdate,
#[serde(rename = "*.get")]
#[deserr(rename = "*.get")]
AllGet,
}
impl Action {
@@ -403,6 +403,7 @@ impl Action {
METRICS_GET => Some(Self::MetricsGet),
DUMPS_ALL => Some(Self::DumpsAll),
DUMPS_CREATE => Some(Self::DumpsCreate),
SNAPSHOTS_ALL => Some(Self::SnapshotsAll),
SNAPSHOTS_CREATE => Some(Self::SnapshotsCreate),
VERSION => Some(Self::Version),
KEYS_CREATE => Some(Self::KeysAdd),
@@ -411,8 +412,10 @@ impl Action {
KEYS_DELETE => Some(Self::KeysDelete),
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
EXPORT => Some(Self::Export),
NETWORK_GET => Some(Self::NetworkGet),
NETWORK_UPDATE => Some(Self::NetworkUpdate),
ALL_GET => Some(Self::AllGet),
_otherwise => None,
}
}
@@ -497,6 +500,7 @@ pub mod actions {
pub const METRICS_GET: u8 = MetricsGet.repr();
pub const DUMPS_ALL: u8 = DumpsAll.repr();
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
pub const SNAPSHOTS_ALL: u8 = SnapshotsAll.repr();
pub const SNAPSHOTS_CREATE: u8 = SnapshotsCreate.repr();
pub const VERSION: u8 = Version.repr();
pub const KEYS_CREATE: u8 = KeysAdd.repr();
@@ -519,3 +523,68 @@ pub mod actions {
pub const CHATS_SETTINGS_GET: u8 = ChatsSettingsGet.repr();
pub const CHATS_SETTINGS_UPDATE: u8 = ChatsSettingsUpdate.repr();
}
#[cfg(test)]
pub(crate) mod test {
use super::actions::*;
use super::Action::*;
use super::*;
#[test]
fn test_action_repr_and_constants() {
assert!(All.repr() == 0 && ALL == 0);
assert!(Search.repr() == 1 && SEARCH == 1);
assert!(DocumentsAll.repr() == 2 && DOCUMENTS_ALL == 2);
assert!(DocumentsAdd.repr() == 3 && DOCUMENTS_ADD == 3);
assert!(DocumentsGet.repr() == 4 && DOCUMENTS_GET == 4);
assert!(DocumentsDelete.repr() == 5 && DOCUMENTS_DELETE == 5);
assert!(IndexesAll.repr() == 6 && INDEXES_ALL == 6);
assert!(IndexesAdd.repr() == 7 && INDEXES_CREATE == 7);
assert!(IndexesGet.repr() == 8 && INDEXES_GET == 8);
assert!(IndexesUpdate.repr() == 9 && INDEXES_UPDATE == 9);
assert!(IndexesDelete.repr() == 10 && INDEXES_DELETE == 10);
assert!(IndexesSwap.repr() == 11 && INDEXES_SWAP == 11);
assert!(TasksAll.repr() == 12 && TASKS_ALL == 12);
assert!(TasksCancel.repr() == 13 && TASKS_CANCEL == 13);
assert!(TasksDelete.repr() == 14 && TASKS_DELETE == 14);
assert!(TasksGet.repr() == 15 && TASKS_GET == 15);
assert!(SettingsAll.repr() == 16 && SETTINGS_ALL == 16);
assert!(SettingsGet.repr() == 17 && SETTINGS_GET == 17);
assert!(SettingsUpdate.repr() == 18 && SETTINGS_UPDATE == 18);
assert!(StatsAll.repr() == 19 && STATS_ALL == 19);
assert!(StatsGet.repr() == 20 && STATS_GET == 20);
assert!(MetricsAll.repr() == 21 && METRICS_ALL == 21);
assert!(MetricsGet.repr() == 22 && METRICS_GET == 22);
assert!(DumpsAll.repr() == 23 && DUMPS_ALL == 23);
assert!(DumpsCreate.repr() == 24 && DUMPS_CREATE == 24);
assert!(SnapshotsAll.repr() == 25 && SNAPSHOTS_ALL == 25);
assert!(SnapshotsCreate.repr() == 26 && SNAPSHOTS_CREATE == 26);
assert!(Version.repr() == 27 && VERSION == 27);
assert!(KeysAdd.repr() == 28 && KEYS_CREATE == 28);
assert!(KeysGet.repr() == 29 && KEYS_GET == 29);
assert!(KeysUpdate.repr() == 30 && KEYS_UPDATE == 30);
assert!(KeysDelete.repr() == 31 && KEYS_DELETE == 31);
assert!(ExperimentalFeaturesGet.repr() == 32 && EXPERIMENTAL_FEATURES_GET == 32);
assert!(ExperimentalFeaturesUpdate.repr() == 33 && EXPERIMENTAL_FEATURES_UPDATE == 33);
assert!(Export.repr() == 34 && EXPORT == 34);
assert!(NetworkGet.repr() == 35 && NETWORK_GET == 35);
assert!(NetworkUpdate.repr() == 36 && NETWORK_UPDATE == 36);
assert!(ChatCompletions.repr() == 37 && CHAT_COMPLETIONS == 37);
assert!(ChatsAll.repr() == 38 && CHATS_ALL == 38);
assert!(ChatsGet.repr() == 39 && CHATS_GET == 39);
assert!(ChatsDelete.repr() == 40 && CHATS_DELETE == 40);
assert!(ChatsSettingsAll.repr() == 41 && CHATS_SETTINGS_ALL == 41);
assert!(ChatsSettingsGet.repr() == 42 && CHATS_SETTINGS_GET == 42);
assert!(ChatsSettingsUpdate.repr() == 43 && CHATS_SETTINGS_UPDATE == 43);
assert!(AllGet.repr() == 44 && ALL_GET == 44);
}
#[test]
fn test_from_repr() {
for action in enum_iterator::all::<Action>() {
let repr = action.repr();
let action_from_repr = Action::from_repr(repr);
assert_eq!(Some(action), action_from_repr, "Failed for action: {:?}", action);
}
}
}

View File

@@ -10,6 +10,7 @@ pub mod index_uid;
pub mod index_uid_pattern;
pub mod keys;
pub mod locales;
pub mod network;
pub mod settings;
pub mod star_or;
pub mod task_view;

View File

@@ -0,0 +1,47 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::collections::BTreeMap;
use milli::update::new::indexer::sharding::Shards;
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct Network {
#[serde(default, rename = "self")]
pub local: Option<String>,
#[serde(default)]
pub remotes: BTreeMap<String, Remote>,
#[serde(default)]
pub sharding: bool,
}
impl Network {
pub fn shards(&self) -> Option<Shards> {
if self.sharding {
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
let others = self
.remotes
.keys()
.filter(|name| name.as_str() != this)
.map(|name| name.to_owned())
.collect();
Some(Shards { own: vec![this.to_owned()], others })
} else {
None
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct Remote {
pub url: String,
#[serde(default)]
pub search_api_key: Option<String>,
#[serde(default)]
pub write_api_key: Option<String>,
}

View File

@@ -11,6 +11,7 @@ use crate::error::ResponseError;
use crate::settings::{Settings, Unchecked};
use crate::tasks::{
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
TaskNetwork,
};
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
@@ -51,6 +52,9 @@ pub struct TaskView {
#[schema(value_type = String, example = json!("2024-08-08_14:12:09.393Z"))]
#[serde(with = "time::serde::rfc3339::option", default)]
pub finished_at: Option<OffsetDateTime>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
}
impl TaskView {
@@ -68,6 +72,7 @@ impl TaskView {
enqueued_at: task.enqueued_at,
started_at: task.started_at,
finished_at: task.finished_at,
network: task.network.clone(),
}
}
}

View File

@@ -42,6 +42,9 @@ pub struct Task {
pub status: Status,
pub kind: KindWithContent,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub network: Option<TaskNetwork>,
}
impl Task {
@@ -704,6 +707,36 @@ pub enum Details {
},
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(untagged, rename_all = "camelCase")]
pub enum TaskNetwork {
Origin { origin: Origin },
Remotes { remote_tasks: BTreeMap<String, RemoteTask> },
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(untagged, rename_all = "camelCase")]
pub struct Origin {
pub remote_name: String,
pub task_uid: usize,
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct RemoteTask {
#[serde(skip_serializing_if = "Option::is_none")]
task_uid: Option<TaskId>,
error: Option<ResponseError>,
}
impl From<Result<TaskId, ResponseError>> for RemoteTask {
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
match res {
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
}
}
}
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
#[schema(rename_all = "camelCase")]
pub struct DetailsExportIndexSettings {

View File

@@ -50,6 +50,7 @@ jsonwebtoken = "9.3.1"
lazy_static = "1.5.0"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
memmap2 = "0.9.7"
mimalloc = { version = "0.1.47", default-features = false }
mime = "0.3.17"
num_cpus = "1.17.0"
@@ -94,7 +95,6 @@ uuid = { version = "1.17.0", features = ["serde", "v4"] }
serde_urlencoded = "0.7.1"
termcolor = "1.4.1"
url = { version = "2.5.4", features = ["serde"] }
cohere-rust = "0.6.0"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
@@ -115,6 +115,9 @@ utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" }
secrecy = "0.10.3"
actix-web-lab = { version = "0.24.1", default-features = false }
urlencoding = "2.1.3"
backoff = { version = "0.4.0", features = ["tokio"] }
[dev-dependencies]
actix-rt = "2.10.0"
@@ -125,7 +128,6 @@ manifest-dir-macros = "0.1.18"
maplit = "1.0.2"
meili-snap = { path = "../meili-snap" }
temp-env = "0.3.6"
urlencoding = "2.1.3"
wiremock = "0.6.3"
yaup = "0.3.1"

Binary file not shown.

View File

@@ -203,6 +203,7 @@ struct Infos {
experimental_composite_embedders: bool,
experimental_embedding_cache_entries: usize,
experimental_no_snapshot_compaction: bool,
experimental_no_edition_2024_for_dumps: bool,
experimental_no_edition_2024_for_settings: bool,
gpu_enabled: bool,
db_path: bool,
@@ -281,7 +282,6 @@ impl Infos {
indexer_options,
config_file_path,
no_analytics: _,
experimental_personalization_api_key: _,
} = options;
let schedule_snapshot = match schedule_snapshot {
@@ -294,6 +294,7 @@ impl Infos {
max_indexing_threads,
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
} = indexer_options;
let RuntimeTogglableFeatures {
@@ -330,6 +331,7 @@ impl Infos {
experimental_composite_embedders: composite_embedders,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
experimental_no_edition_2024_for_dumps,
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(),

View File

@@ -9,6 +9,8 @@ use meilisearch_types::milli::OrderBy;
use serde_json::Value;
use tokio::task::JoinError;
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
#[derive(Debug, thiserror::Error)]
pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
@@ -80,6 +82,16 @@ pub enum MeilisearchHttpError {
MissingSearchHybrid,
#[error("Invalid request: both `media` and `vector` parameters are present.")]
MediaAndVector,
#[error("Inconsistent `Origin` headers: {} was provided but {} is missing.\n - Hint: Either both headers should be provided, or none of them", if *is_remote_missing {
PROXY_ORIGIN_TASK_UID_HEADER
} else { PROXY_ORIGIN_REMOTE_HEADER },
if *is_remote_missing {
PROXY_ORIGIN_REMOTE_HEADER
} else { PROXY_ORIGIN_TASK_UID_HEADER }
)]
InconsistentOriginHeaders { is_remote_missing: bool },
#[error("Invalid value for header {header_name}: {msg}")]
InvalidHeaderValue { header_name: &'static str, msg: String },
}
impl MeilisearchHttpError {
@@ -124,6 +136,10 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
Code::InvalidMultiSearchFacetOrder
}
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
Code::InconsistentDocumentChangeHeaders
}
MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue,
}
}
}

View File

@@ -9,7 +9,6 @@ pub mod middleware;
pub mod option;
#[cfg(test)]
mod option_test;
pub mod personalization;
pub mod routes;
pub mod search;
pub mod search_queue;
@@ -31,6 +30,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest};
use analytics::Analytics;
use anyhow::bail;
use bumpalo::Bump;
use error::PayloadError;
use extractors::payload::PayloadConfig;
use index_scheduler::versioning::Versioning;
@@ -39,6 +39,7 @@ use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
use meilisearch_types::milli::update::new::indexer;
use meilisearch_types::milli::update::{
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
};
@@ -534,7 +535,7 @@ fn import_dump(
let mut index_reader = index_reader?;
let metadata = index_reader.metadata();
let uid = metadata.uid.clone();
tracing::info!("Importing index `{}`.", metadata.uid);
tracing::info!("Importing index `{uid}`.");
let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@@ -553,48 +554,101 @@ fn import_dump(
apply_settings_to_builder(&settings, &mut builder);
let embedder_stats: Arc<EmbedderStats> = Default::default();
builder.execute(&|| false, &progress, embedder_stats.clone())?;
wtxn.commit()?;
// 5.3 Import the documents.
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
let mut wtxn = index.write_txn()?;
let rtxn = index.read_txn()?;
if index_scheduler.no_edition_2024_for_dumps() {
// 5.3 Import the documents.
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? {
builder.append_json_object(&document?)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 5.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
&embedder_stats,
)?;
let builder = builder.with_embedders(embedders);
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?;
} else {
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
let primary_key = index.primary_key(&rtxn)?;
let mut new_fields_ids_map = db_fields_ids_map.clone();
let mut indexer = indexer::DocumentOperation::new();
let embedders = index.embedding_configs().embedding_configs(&rtxn)?;
let embedders = index_scheduler.embedders(uid.clone(), embedders)?;
let mmap = unsafe { memmap2::Mmap::map(index_reader.documents_file())? };
indexer.replace_documents(&mmap)?;
let indexer_config = index_scheduler.indexer_config();
let pool = &indexer_config.thread_pool;
let indexer_alloc = Bump::new();
let (document_changes, mut operation_stats, primary_key) = indexer.into_changes(
&indexer_alloc,
&index,
&rtxn,
primary_key,
&mut new_fields_ids_map,
&|| false, // never stop processing a dump
progress.clone(),
None,
)?;
let operation_stats = operation_stats.pop().unwrap();
if let Some(error) = operation_stats.error {
return Err(error.into());
}
let _congestion = indexer::index(
&mut wtxn,
&index,
pool,
indexer_config.grenad_parameters(),
&db_fields_ids_map,
new_fields_ids_map,
primary_key,
&document_changes,
embedders,
&|| false, // never stop processing a dump
&progress,
&embedder_stats,
)?;
}
// This flush the content of the batch builder.
let file = builder.into_inner()?.into_inner()?;
// 5.3.2 We feed it to the milli index.
let reader = BufReader::new(file);
let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let builder = milli::update::IndexDocuments::new(
&mut wtxn,
&index,
indexer_config,
IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
},
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false,
&embedder_stats,
)?;
let builder = builder.with_embedders(embedders);
let (builder, user_result) = builder.add_documents(reader)?;
let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?;
wtxn.commit()?;
tracing::info!("All documents successfully imported.");
index_scheduler.refresh_index_stats(&uid)?;
}
@@ -623,19 +677,12 @@ pub fn configure_data(
(logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle),
analytics: Data<Analytics>,
) {
// Create personalization service with API key from options
let personalization_service = index_scheduler
.experimental_personalization_api_key()
.cloned()
.map(personalization::PersonalizationService::cohere)
.unwrap_or_else(personalization::PersonalizationService::uninitialized);
let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize;
config
.app_data(index_scheduler)
.app_data(auth)
.app_data(search_queue)
.app_data(analytics)
.app_data(web::Data::new(personalization_service))
.app_data(web::Data::new(logs_route))
.app_data(web::Data::new(logs_stderr))
.app_data(web::Data::new(opt.clone()))

View File

@@ -111,9 +111,4 @@ lazy_static! {
"Meilisearch Task Queue Size Until Stop Registering",
))
.expect("Can't create a metric");
pub static ref MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!(
"meilisearch_personalized_search_requests",
"Meilisearch number of search requests with personalization"
))
.expect("Can't create a metric");
}

View File

@@ -68,8 +68,8 @@ const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
const MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY: &str =
"MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
@@ -469,12 +469,6 @@ pub struct Opt {
#[serde(default)]
pub experimental_no_snapshot_compaction: bool,
/// Experimental personalization API key feature.
///
/// Sets the API key for personalization features.
#[clap(long, env = MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY)]
pub experimental_personalization_api_key: Option<String>,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@@ -580,7 +574,6 @@ impl Opt {
experimental_limit_batched_tasks_total_size,
experimental_embedding_cache_entries,
experimental_no_snapshot_compaction,
experimental_personalization_api_key,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -681,12 +674,6 @@ impl Opt {
MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION,
experimental_no_snapshot_compaction.to_string(),
);
if let Some(experimental_personalization_api_key) = experimental_personalization_api_key {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_PERSONALIZATION_API_KEY,
experimental_personalization_api_key,
);
}
indexer_options.export_to_env();
}
@@ -739,7 +726,6 @@ impl Opt {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
contains_filter: self.experimental_contains_filter,
experimental_personalization_api_key: self.experimental_personalization_api_key.clone(),
}
}
}
@@ -775,6 +761,15 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
#[serde(default)]
pub experimental_no_edition_2024_for_settings: bool,
/// Experimental make dump imports use the old document indexer.
///
/// When enabled, Meilisearch will use the old document indexer when importing dumps.
///
/// For more information, see <https://github.com/orgs/meilisearch/discussions/851>.
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
#[serde(default)]
pub experimental_no_edition_2024_for_dumps: bool,
}
impl IndexerOpts {
@@ -785,6 +780,7 @@ impl IndexerOpts {
max_indexing_threads,
skip_index_budget: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
@@ -804,6 +800,12 @@ impl IndexerOpts {
experimental_no_edition_2024_for_settings.to_string(),
);
}
if experimental_no_edition_2024_for_dumps {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS,
experimental_no_edition_2024_for_dumps.to_string(),
);
}
}
}
@@ -824,6 +826,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
skip_index_budget: other.skip_index_budget,
experimental_no_edition_2024_for_settings: other
.experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
chunk_compression_type: Default::default(),
chunk_compression_level: Default::default(),
documents_chunk_size: Default::default(),

View File

@@ -1,244 +0,0 @@
use crate::search::{Personalize, SearchResult};
use cohere_rust::{
api::rerank::{ReRankModel, ReRankRequest},
Cohere,
};
use meilisearch_types::error::ResponseError;
use tracing::{debug, error, info};
pub struct CohereService {
cohere: Cohere,
}
impl CohereService {
pub fn new(api_key: String) -> Self {
info!("Personalization service initialized with Cohere API");
Self { cohere: Cohere::new("https://api.cohere.ai", api_key) }
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: Option<&Personalize>,
query: Option<&str>,
) -> Result<SearchResult, ResponseError> {
// Extract user context from personalization
let Some(user_context) = personalize.and_then(|p| p.user_context.as_deref()) else {
return Ok(search_result);
};
// Build the prompt by merging query and user context
let prompt = match query {
Some(q) => format!("User Context: {user_context}\nQuery: {q}"),
None => format!("User Context: {user_context}"),
};
// Extract documents for reranking
let documents: Vec<String> = search_result
.hits
.iter()
.map(|hit| {
// Convert the document to a string representation for reranking
serde_json::to_string(&hit.document).unwrap_or_else(|_| "{}".to_string())
})
.collect();
if documents.is_empty() {
return Ok(search_result);
}
// Prepare the rerank request
let rerank_request = ReRankRequest {
query: &prompt,
documents: &documents,
model: ReRankModel::EnglishV3, // Use the default and more recent model
top_n: None,
max_chunks_per_doc: None,
};
// Call Cohere's rerank API
match self.cohere.rerank(&rerank_request).await {
Ok(rerank_response) => {
debug!("Cohere rerank successful, reordering {} results", search_result.hits.len());
// Create a mapping from original index to new rank
let reranked_indices: Vec<usize> =
rerank_response.iter().map(|result| result.index as usize).collect();
// Reorder the hits based on Cohere's reranking
let mut reranked_hits = Vec::new();
for index in reranked_indices.iter() {
reranked_hits.push(search_result.hits[*index].clone());
}
Ok(SearchResult { hits: reranked_hits, ..search_result })
}
Err(e) => {
error!("Cohere rerank failed with model EnglishV3: {}", e);
// Return original results on error
Ok(search_result)
}
}
}
}
pub enum PersonalizationService {
Cohere(CohereService),
Uninitialized,
}
impl PersonalizationService {
pub fn cohere(api_key: String) -> Self {
Self::Cohere(CohereService::new(api_key))
}
pub fn uninitialized() -> Self {
debug!("Personalization service uninitialized");
Self::Uninitialized
}
pub async fn rerank_search_results(
&self,
search_result: SearchResult,
personalize: Option<&Personalize>,
query: Option<&str>,
) -> Result<SearchResult, ResponseError> {
match self {
Self::Cohere(cohere_service) => {
cohere_service.rerank_search_results(search_result, personalize, query).await
}
Self::Uninitialized => Ok(search_result),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::search::{HitsInfo, SearchHit};
#[tokio::test]
async fn test_personalization_service_without_api_key() {
let service = PersonalizationService::uninitialized();
let personalize = Personalize { user_context: Some("test user".to_string()) };
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service
.rerank_search_results(search_result.clone(), Some(&personalize), Some("test"))
.await;
assert!(result.is_ok());
// Should return original results when no API key is provided
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_with_user_context_only() {
let service = PersonalizationService::cohere("fake_key".to_string());
let personalize = Personalize { user_context: Some("test user".to_string()) };
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result =
service.rerank_search_results(search_result.clone(), Some(&personalize), None).await;
assert!(result.is_ok());
// Should attempt reranking with user context only
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_with_query_only() {
let service = PersonalizationService::cohere("fake_key".to_string());
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service.rerank_search_results(search_result.clone(), None, Some("test")).await;
assert!(result.is_ok());
// Should attempt reranking with query only
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
#[tokio::test]
async fn test_personalization_service_both_none() {
let service = PersonalizationService::cohere("fake_key".to_string());
let search_result = SearchResult {
hits: vec![SearchHit {
document: serde_json::Map::new(),
formatted: serde_json::Map::new(),
matches_position: None,
ranking_score: Some(1.0),
ranking_score_details: None,
}],
query: "test".to_string(),
processing_time_ms: 10,
hits_info: HitsInfo::OffsetLimit { limit: 1, offset: 0, estimated_total_hits: 1 },
facet_distribution: None,
facet_stats: None,
semantic_hit_count: None,
degraded: false,
used_negative_operator: false,
};
let result = service.rerank_search_results(search_result.clone(), None, None).await;
assert!(result.is_ok());
// Should return original results when both query and user_context are None
let reranked_result = result.unwrap();
assert_eq!(reranked_result.hits.len(), search_result.hits.len());
}
}

View File

@@ -19,6 +19,7 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::documents::sort::recursive_sort;
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::{AscDesc, DocumentId};
@@ -44,6 +45,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::indexes::proxy::{proxy, Body};
use crate::routes::indexes::search::fix_sort_query_parameters;
use crate::routes::{
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
@@ -333,6 +335,7 @@ pub async fn delete_document(
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = path.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
let network = index_scheduler.network();
analytics.publish(
DocumentsDeletionAggregator {
@@ -350,10 +353,16 @@ pub async fn delete_document(
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -791,7 +800,6 @@ pub async fn replace_documents(
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
@@ -801,8 +809,10 @@ pub async fn replace_documents(
uid,
dry_run,
allow_index_creation,
&req,
)
.await?;
debug!(returns = ?task, "Replace documents");
Ok(HttpResponse::Accepted().json(task))
@@ -892,7 +902,6 @@ pub async fn update_documents(
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
@@ -902,6 +911,7 @@ pub async fn update_documents(
uid,
dry_run,
allow_index_creation,
&req,
)
.await?;
debug!(returns = ?task, "Update documents");
@@ -911,7 +921,6 @@ pub async fn update_documents(
#[allow(clippy::too_many_arguments)]
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: IndexUid,
primary_key: Option<String>,
@@ -921,7 +930,11 @@ async fn document_addition(
task_id: Option<TaskId>,
dry_run: bool,
allow_index_creation: bool,
req: &HttpRequest,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let mime_type = extract_mime_type(req)?;
let network = index_scheduler.network();
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
csv_delimiter,
@@ -953,7 +966,7 @@ async fn document_addition(
};
let (uuid, mut update_file) = index_scheduler.queue.create_update_file(dry_run)?;
let documents_count = match format {
let res = match format {
PayloadType::Ndjson => {
let (path, file) = update_file.into_parts();
let file = match file {
@@ -968,19 +981,19 @@ async fn document_addition(
None => None,
};
let documents_count = tokio::task::spawn_blocking(move || {
let res = tokio::task::spawn_blocking(move || {
let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
})?;
let update_file = file_store::File::from_parts(path, file);
update_file.persist()?;
let update_file = update_file.persist()?;
Ok(documents_count)
Ok((documents_count, update_file))
})
.await?;
Ok(documents_count)
Ok(res)
}
PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
let temp_file = match tempfile() {
@@ -999,16 +1012,16 @@ async fn document_addition(
unreachable!("We already wrote the user content into the update file")
}
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.
update_file.persist()?;
Ok(documents_count)
// we NEED to persist the file here because we moved the `update_file` in another task.
let file = update_file.persist()?;
Ok((documents_count, file))
})
.await
}
};
let documents_count = match documents_count {
Ok(Ok(documents_count)) => documents_count,
let (documents_count, file) = match res {
Ok(Ok((documents_count, file))) => (documents_count, file),
// in this case the file has not possibly be persisted.
Ok(Err(e)) => return Err(e),
Err(e) => {
@@ -1050,6 +1063,20 @@ async fn document_addition(
}
};
if network.sharding {
if let Some(file) = file {
proxy(
&index_scheduler,
&index_uid,
req,
network,
Body::with_ndjson_payload(file),
&task,
)
.await?;
}
}
Ok(task.into())
}
@@ -1128,6 +1155,7 @@ pub async fn delete_documents_batch(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Delete documents by batch");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let network = index_scheduler.network();
analytics.publish(
DocumentsDeletionAggregator {
@@ -1148,16 +1176,22 @@ pub async fn delete_documents_batch(
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!(returns = ?task, "Delete documents by batch");
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, Deserr, ToSchema)]
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[schema(rename_all = "camelCase")]
pub struct DocumentDeletionByFilter {
@@ -1206,7 +1240,8 @@ pub async fn delete_documents_by_filter(
debug!(parameters = ?body, "Delete documents by filter");
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter;
let filter = body.into_inner();
let network = index_scheduler.network();
analytics.publish(
DocumentsDeletionAggregator {
@@ -1219,23 +1254,36 @@ pub async fn delete_documents_by_filter(
);
// we ensure the filter is well formed before enqueuing it
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
.ok_or(MeilisearchHttpError::EmptyFilter)?;
crate::search::parse_filter(
&filter.filter,
Code::InvalidDocumentFilter,
index_scheduler.features(),
)?
.ok_or(MeilisearchHttpError::EmptyFilter)?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let task = KindWithContent::DocumentDeletionByFilter {
index_uid: index_uid.clone(),
filter_expr: filter.filter.clone(),
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!(returns = ?task, "Delete documents by filter");
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, Deserr, ToSchema)]
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct DocumentEditionByFunction {
/// A string containing a RHAI function.
@@ -1323,6 +1371,8 @@ pub async fn edit_documents_by_function(
.features()
.check_edit_documents_by_function("Using the documents edit route")?;
let network = index_scheduler.network();
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner();
let params = params.into_inner();
@@ -1336,13 +1386,12 @@ pub async fn edit_documents_by_function(
&req,
);
let DocumentEditionByFunction { filter, context, function } = params;
let engine = milli::rhai::Engine::new();
if let Err(e) = engine.compile(&function) {
if let Err(e) = engine.compile(&params.function) {
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
}
if let Some(ref filter) = filter {
if let Some(ref filter) = params.filter {
// we ensure the filter is well formed before enqueuing it
crate::search::parse_filter(
filter,
@@ -1352,9 +1401,9 @@ pub async fn edit_documents_by_function(
.ok_or(MeilisearchHttpError::EmptyFilter)?;
}
let task = KindWithContent::DocumentEdition {
index_uid,
filter_expr: filter,
context: match context {
index_uid: index_uid.clone(),
filter_expr: params.filter.clone(),
context: match params.context.clone() {
Some(Value::Object(m)) => Some(m),
None => None,
_ => {
@@ -1364,15 +1413,21 @@ pub async fn edit_documents_by_function(
))
}
},
function,
function: params.function.clone(),
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!(returns = ?task, "Edit documents by function");
Ok(HttpResponse::Accepted().json(task))
@@ -1415,6 +1470,8 @@ pub async fn clear_all_documents(
analytics: web::Data<Analytics>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let network = index_scheduler.network();
analytics.publish(
DocumentsDeletionAggregator {
clear_all: true,
@@ -1428,10 +1485,18 @@ pub async fn clear_all_documents(
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
.await??
.into();
let task = {
let index_scheduler = index_scheduler.clone();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
};
if network.sharding && !dry_run {
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
}
let task: SummarizedTaskView = task.into();
debug!(returns = ?task, "Delete all documents");
Ok(HttpResponse::Accepted().json(task))
@@ -1460,9 +1525,13 @@ fn some_documents<'a, 't: 'a>(
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
for (
name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ },
) in index.embeddings(rtxn, key)?
{
let embeddings =
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,

View File

@@ -343,7 +343,6 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid,
ranking_score_threshold,
locales,
personalize: None,
}
}
}

View File

@@ -30,6 +30,7 @@ use crate::Opt;
pub mod documents;
pub mod facet_search;
mod proxy;
pub mod search;
mod search_analytics;
#[cfg(test)]
@@ -39,6 +40,8 @@ mod settings_analytics;
pub mod similar;
mod similar_analytics;
pub use proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
#[derive(OpenApi)]
#[openapi(
nest(

View File

@@ -0,0 +1,424 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::collections::BTreeMap;
use std::fs::File;
use actix_web::http::header::CONTENT_TYPE;
use actix_web::HttpRequest;
use bytes::Bytes;
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::{Origin, RemoteTask, TaskNetwork};
use reqwest::StatusCode;
use serde::de::DeserializeOwned;
use serde_json::Value;
use crate::error::MeilisearchHttpError;
use crate::routes::indexes::proxy::error::{ProxyDocumentChangeError, ReqwestErrorWithoutUrl};
use crate::routes::SummarizedTaskView;
pub enum Body<T: serde::Serialize> {
NdJsonPayload(File),
Inline(T),
None,
}
impl Body<()> {
pub fn with_ndjson_payload(file: File) -> Self {
Self::NdJsonPayload(file)
}
pub fn none() -> Self {
Self::None
}
}
/// If necessary, proxies the passed request to the network and update the task description.
///
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
/// has already been proxied.
///
/// - when it must proxy the request, the endpoint, method and query params are retrieved from the passed `req`, then the `body` is
/// sent to all remotes of the `network` (except `self`). The response from the remotes are collected to update the passed `task`
/// with the task ids from the task queues of the remotes.
/// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task.
/// This information is copied to the passed task.
pub async fn proxy<T: serde::Serialize>(
index_scheduler: &IndexScheduler,
index_uid: &str,
req: &HttpRequest,
network: meilisearch_types::network::Network,
body: Body<T>,
task: &meilisearch_types::tasks::Task,
) -> Result<(), MeilisearchHttpError> {
match origin_from_req(req)? {
Some(origin) => {
index_scheduler.set_task_network(task.uid, TaskNetwork::Origin { origin })?
}
None => {
let this = network
.local
.as_deref()
.expect("inconsistent `network.sharding` and `network.self`")
.to_owned();
let content_type = match &body {
// for file bodies, force x-ndjson
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
// otherwise get content type from request
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
};
let body = match body {
Body::NdJsonPayload(file) => Some(Bytes::from_owner(unsafe {
memmap2::Mmap::map(&file).map_err(|err| {
MeilisearchHttpError::from_milli(err.into(), Some(index_uid.to_owned()))
})?
})),
Body::Inline(payload) => {
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
}
Body::None => None,
};
let mut in_flight_remote_queries = BTreeMap::new();
let client = reqwest::ClientBuilder::new()
.connect_timeout(std::time::Duration::from_secs(3))
.build()
.unwrap();
let method = from_old_http_method(req.method());
// send payload to all remotes
for (node_name, node) in
network.remotes.into_iter().filter(|(name, _)| name.as_str() != this)
{
let body = body.clone();
let client = client.clone();
let api_key = node.write_api_key;
let this = this.clone();
let method = method.clone();
let path_and_query =
req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
in_flight_remote_queries.insert(
node_name,
tokio::spawn({
let url = format!("{}{}", node.url, path_and_query);
let url_encoded_this = urlencoding::encode(&this).into_owned();
let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize
let content_type = content_type.map(|b| b.to_owned());
let backoff = backoff::ExponentialBackoffBuilder::new()
.with_max_elapsed_time(Some(std::time::Duration::from_secs(25)))
.build();
backoff::future::retry(backoff, move || {
let url = url.clone();
let client = client.clone();
let url_encoded_this = url_encoded_this.clone();
let url_encoded_task_uid = url_encoded_task_uid.clone();
let content_type = content_type.clone();
let body = body.clone();
let api_key = api_key.clone();
let method = method.clone();
async move {
try_proxy(
method,
&url,
content_type.as_deref(),
api_key.as_deref(),
&client,
&url_encoded_this,
&url_encoded_task_uid,
body,
)
.await
}
})
}),
);
}
// wait for all in-flight queries to finish and collect their results
let mut remote_tasks: BTreeMap<String, RemoteTask> = BTreeMap::new();
for (node_name, handle) in in_flight_remote_queries {
match handle.await {
Ok(Ok(res)) => {
let task_uid = res.task_uid;
remote_tasks.insert(node_name, Ok(task_uid).into());
}
Ok(Err(error)) => {
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
}
Err(panic) => match panic.try_into_panic() {
Ok(panic) => {
let msg = match panic.downcast_ref::<&'static str>() {
Some(s) => *s,
None => match panic.downcast_ref::<String>() {
Some(s) => &s[..],
None => "Box<dyn Any>",
},
};
remote_tasks.insert(
node_name,
Err(ResponseError::from_msg(
msg.to_string(),
meilisearch_types::error::Code::Internal,
))
.into(),
);
}
Err(_) => {
tracing::error!("proxy task was unexpectedly cancelled")
}
},
}
}
// edit details to contain the return values from the remotes
index_scheduler.set_task_network(task.uid, TaskNetwork::Remotes { remote_tasks })?;
}
}
Ok(())
}
fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method {
match method {
&actix_http::Method::CONNECT => reqwest::Method::CONNECT,
&actix_http::Method::DELETE => reqwest::Method::DELETE,
&actix_http::Method::GET => reqwest::Method::GET,
&actix_http::Method::HEAD => reqwest::Method::HEAD,
&actix_http::Method::OPTIONS => reqwest::Method::OPTIONS,
&actix_http::Method::PATCH => reqwest::Method::PATCH,
&actix_http::Method::POST => reqwest::Method::POST,
&actix_http::Method::PUT => reqwest::Method::PUT,
&actix_http::Method::TRACE => reqwest::Method::TRACE,
method => reqwest::Method::from_bytes(method.as_str().as_bytes()).unwrap(),
}
}
#[allow(clippy::too_many_arguments)]
async fn try_proxy(
method: reqwest::Method,
url: &str,
content_type: Option<&[u8]>,
api_key: Option<&str>,
client: &reqwest::Client,
url_encoded_this: &str,
url_encoded_task_uid: &str,
body: Option<Bytes>,
) -> Result<SummarizedTaskView, backoff::Error<ProxyDocumentChangeError>> {
let request = client.request(method, url).timeout(std::time::Duration::from_secs(30));
let request = if let Some(body) = body { request.body(body) } else { request };
let request = if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
let request = request.header(PROXY_ORIGIN_TASK_UID_HEADER, url_encoded_task_uid);
let request = request.header(PROXY_ORIGIN_REMOTE_HEADER, url_encoded_this);
let request = if let Some(content_type) = content_type {
request.header(CONTENT_TYPE.as_str(), content_type)
} else {
request
};
let response = request.send().await;
let response = match response {
Ok(response) => response,
Err(error) if error.is_timeout() => {
return Err(backoff::Error::transient(ProxyDocumentChangeError::Timeout))
}
Err(error) => {
return Err(backoff::Error::transient(ProxyDocumentChangeError::CouldNotSendRequest(
ReqwestErrorWithoutUrl::new(error),
)))
}
};
match response.status() {
status_code if status_code.is_success() => (),
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::AuthenticationError))
}
status_code if status_code.is_client_error() => {
let response = parse_error(response).await;
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::BadRequest {
status_code,
response,
}));
}
status_code if status_code.is_server_error() => {
let response = parse_error(response).await;
return Err(backoff::Error::transient(ProxyDocumentChangeError::RemoteError {
status_code,
response,
}));
}
status_code => {
tracing::warn!(
status_code = status_code.as_u16(),
"remote replied with unexpected status code"
);
}
}
let response = match parse_response(response).await {
Ok(response) => response,
Err(response) => {
return Err(backoff::Error::transient(
ProxyDocumentChangeError::CouldNotParseResponse { response },
))
}
};
Ok(response)
}
async fn parse_error(response: reqwest::Response) -> Result<String, ReqwestErrorWithoutUrl> {
let bytes = match response.bytes().await {
Ok(bytes) => bytes,
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
};
Ok(parse_bytes_as_error(&bytes))
}
fn parse_bytes_as_error(bytes: &[u8]) -> String {
match serde_json::from_slice::<Value>(bytes) {
Ok(value) => value.to_string(),
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
}
}
async fn parse_response<T: DeserializeOwned>(
response: reqwest::Response,
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
let bytes = match response.bytes().await {
Ok(bytes) => bytes,
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
};
match serde_json::from_slice::<T>(&bytes) {
Ok(value) => Ok(value),
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
}
}
mod error {
use meilisearch_types::error::ResponseError;
use reqwest::StatusCode;
#[derive(Debug, thiserror::Error)]
pub enum ProxyDocumentChangeError {
#[error("{0}")]
CouldNotSendRequest(ReqwestErrorWithoutUrl),
#[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `documents.add` action")]
AuthenticationError,
#[error(
"could not parse response from the remote host as a document addition response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version",
response_from_remote(response)
)]
CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> },
#[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))]
BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
#[error("remote host did not answer before the deadline")]
Timeout,
#[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))]
RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
}
impl ProxyDocumentChangeError {
pub fn as_response_error(&self) -> ResponseError {
use meilisearch_types::error::Code;
let message = self.to_string();
let code = match self {
ProxyDocumentChangeError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest,
ProxyDocumentChangeError::AuthenticationError => Code::RemoteInvalidApiKey,
ProxyDocumentChangeError::BadRequest { .. } => Code::RemoteBadRequest,
ProxyDocumentChangeError::Timeout => Code::RemoteTimeout,
ProxyDocumentChangeError::RemoteError { .. } => Code::RemoteRemoteError,
ProxyDocumentChangeError::CouldNotParseResponse { .. } => Code::RemoteBadResponse,
};
ResponseError::from_msg(message, code)
}
}
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct ReqwestErrorWithoutUrl(reqwest::Error);
impl ReqwestErrorWithoutUrl {
pub fn new(inner: reqwest::Error) -> Self {
Self(inner.without_url())
}
}
fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String {
match response {
Ok(response) => {
format!(":\n - response from remote: {}", response)
}
Err(error) => {
format!(":\n - additionally, could not retrieve response from remote: {error}")
}
}
}
}
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchHttpError> {
let (remote_name, task_uid) = match (
req.headers().get(PROXY_ORIGIN_REMOTE_HEADER),
req.headers().get(PROXY_ORIGIN_TASK_UID_HEADER),
) {
(None, None) => return Ok(None),
(None, Some(_)) => {
return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true })
}
(Some(_), None) => {
return Err(MeilisearchHttpError::InconsistentOriginHeaders {
is_remote_missing: false,
})
}
(Some(remote_name), Some(task_uid)) => (
urlencoding::decode(remote_name.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_REMOTE_HEADER,
msg: format!("while parsing remote name as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_REMOTE_HEADER,
msg: format!("while URL-decoding remote name: {err}"),
})?,
urlencoding::decode(task_uid.to_str().map_err(|err| {
MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while parsing task UID as UTF-8: {err}"),
}
})?)
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while URL-decoding task UID: {err}"),
})?,
),
};
let task_uid: usize =
task_uid.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
msg: format!("while parsing the task UID as an integer: {err}"),
})?;
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid }))
}

View File

@@ -22,10 +22,10 @@ use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST};
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, Personalize,
RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
RetrieveVectors, SearchKind, SearchQuery, SearchResult, SemanticRatio, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
};
use crate::search_queue::SearchQueue;
@@ -132,8 +132,6 @@ pub struct SearchQueryGet {
#[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
#[param(value_type = Vec<Locale>, explode = false)]
pub locales: Option<CS<Locale>>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchPersonalizeUserContext>)]
pub personalize_user_context: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
@@ -205,10 +203,6 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
));
}
let personalize = other
.personalize_user_context
.map(|user_context| Personalize { user_context: Some(user_context) });
Ok(Self {
q: other.q,
// `media` not supported for `GET`
@@ -238,7 +232,6 @@ impl TryFrom<SearchQueryGet> for SearchQuery {
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
locales: other.locales.map(|o| o.into_iter().collect()),
personalize,
})
}
}
@@ -327,7 +320,6 @@ pub fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest,
@@ -350,11 +342,6 @@ pub async fn search_with_url_query(
let search_kind =
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors);
// Extract personalization and query string before moving query
let personalize = query.personalize.clone();
let query_str = query.q.clone();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
@@ -374,12 +361,7 @@ pub async fn search_with_url_query(
}
analytics.publish(aggregate, &req);
let mut search_result = search_result?;
// Apply personalization if requested
search_result = personalization_service
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
.await?;
let search_result = search_result?;
debug!(returns = ?search_result, "Search get");
Ok(HttpResponse::Ok().json(search_result))
@@ -444,7 +426,6 @@ pub async fn search_with_url_query(
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
search_queue: web::Data<SearchQueue>,
personalization_service: web::Data<crate::personalization::PersonalizationService>,
index_uid: web::Path<String>,
params: AwebJson<SearchQuery, DeserrJsonError>,
req: HttpRequest,
@@ -468,10 +449,6 @@ pub async fn search_with_post(
search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index)?;
let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors);
// Extract personalization and query string before moving query
let personalize = query.personalize.clone();
let query_str = query.q.clone();
let permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(
@@ -494,12 +471,7 @@ pub async fn search_with_post(
}
analytics.publish(aggregate, &req);
let mut search_result = search_result?;
// Apply personalization if requested
search_result = personalization_service
.rerank_search_results(search_result, personalize.as_ref(), query_str.as_deref())
.await?;
let search_result = search_result?;
debug!(returns = ?search_result, "Search post");
Ok(HttpResponse::Ok().json(search_result))

View File

@@ -7,7 +7,6 @@ use serde_json::{json, Value};
use crate::aggregate_methods;
use crate::analytics::{Aggregate, AggregateMethod};
use crate::metrics::MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS;
use crate::search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
@@ -95,9 +94,6 @@ pub struct SearchAggregator<Method: AggregateMethod> {
show_ranking_score_details: bool,
ranking_score_threshold: bool,
// personalization
total_personalized: usize,
marker: std::marker::PhantomData<Method>,
}
@@ -132,7 +128,6 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = query;
let mut ret = Self::default();
@@ -207,12 +202,6 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
ret.locales = locales.iter().copied().collect();
}
// personalization
if personalize.is_some() {
ret.total_personalized = 1;
MEILISEARCH_PERSONALIZED_SEARCH_REQUESTS.inc();
}
ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
@@ -301,7 +290,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
mut locales,
total_personalized,
marker: _,
} = *new;
@@ -386,9 +374,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
// locales
self.locales.append(&mut locales);
// personalization
self.total_personalized = self.total_personalized.saturating_add(total_personalized);
self
}
@@ -433,7 +418,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
total_used_negative_operator,
ranking_score_threshold,
locales,
total_personalized,
marker: _,
} = *self;
@@ -506,9 +490,6 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
"personalization": {
"total_personalized": total_personalized,
},
})
}
}

View File

@@ -180,7 +180,7 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
.is_some_and(|s| s.to_lowercase() == "true"))
}
#[derive(Debug, Serialize, ToSchema)]
#[derive(Debug, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {
/// The task unique identifier.
@@ -194,7 +194,10 @@ pub struct SummarizedTaskView {
#[serde(rename = "type")]
kind: Kind,
/// The date on which the task was enqueued.
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
#[serde(
serialize_with = "time::serde::rfc3339::serialize",
deserialize_with = "time::serde::rfc3339::deserialize"
)]
enqueued_at: OffsetDateTime,
}

View File

@@ -67,7 +67,6 @@ impl MultiSearchAggregator {
hybrid: _,
ranking_score_threshold: _,
locales: _,
personalize: _,
} in &federated_search.queries
{
if let Some(federation_options) = federation_options {

View File

@@ -8,12 +8,13 @@ use index_scheduler::IndexScheduler;
use itertools::{EitherOrBoth, Itertools};
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::{
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl,
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
};
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote};
use meilisearch_types::keys::actions;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::network::{Network as DbNetwork, Remote as DbRemote};
use serde::Serialize;
use tracing::debug;
use utoipa::{OpenApi, ToSchema};
@@ -57,9 +58,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
{
"self": "ms-0",
"remotes": {
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
@@ -88,9 +89,9 @@ async fn get_network(
#[schema(rename_all = "camelCase")]
pub struct Remote {
#[schema(value_type = Option<String>, example = json!({
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
#[serde(default)]
@@ -99,6 +100,10 @@ pub struct Remote {
#[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)]
#[serde(default)]
pub search_api_key: Setting<String>,
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
#[deserr(default, error = DeserrJsonError<InvalidNetworkWriteApiKey>)]
#[serde(default)]
pub write_api_key: Setting<String>,
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
@@ -114,6 +119,10 @@ pub struct Network {
#[serde(default, rename = "self")]
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
pub local: Setting<String>,
#[schema(value_type = Option<bool>, example = json!(true))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidNetworkSharding>)]
pub sharding: Setting<bool>,
}
impl Remote {
@@ -136,6 +145,7 @@ impl Remote {
Ok(url)
})?,
search_api_key: self.search_api_key.set(),
write_api_key: self.write_api_key.set(),
})
}
}
@@ -174,9 +184,9 @@ impl Aggregate for PatchNetworkAnalytics {
{
"self": "ms-0",
"remotes": {
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
}
})),
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
@@ -207,6 +217,19 @@ async fn patch_network(
Setting::NotSet => old_network.local,
};
let merged_sharding = match new_network.sharding {
Setting::Set(new_sharding) => new_sharding,
Setting::Reset => false,
Setting::NotSet => old_network.sharding,
};
if merged_sharding && merged_self.is_none() {
return Err(ResponseError::from_msg(
"`.sharding`: enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.".into(),
meilisearch_types::error::Code::InvalidNetworkSharding,
));
}
let merged_remotes = match new_network.remotes {
Setting::Set(new_remotes) => {
let mut merged_remotes = BTreeMap::new();
@@ -217,9 +240,17 @@ async fn patch_network(
{
match either_or_both {
EitherOrBoth::Both((key, old), (_, Some(new))) => {
let DbRemote { url: old_url, search_api_key: old_search_api_key } = old;
let DbRemote {
url: old_url,
search_api_key: old_search_api_key,
write_api_key: old_write_api_key,
} = old;
let Remote { url: new_url, search_api_key: new_search_api_key } = new;
let Remote {
url: new_url,
search_api_key: new_search_api_key,
write_api_key: new_write_api_key,
} = new;
let merged = DbRemote {
url: match new_url {
@@ -247,6 +278,11 @@ async fn patch_network(
Setting::Reset => None,
Setting::NotSet => old_search_api_key,
},
write_api_key: match new_write_api_key {
Setting::Set(new_write_api_key) => Some(new_write_api_key),
Setting::Reset => None,
Setting::NotSet => old_write_api_key,
},
};
merged_remotes.insert(key, merged);
}
@@ -274,7 +310,8 @@ async fn patch_network(
&req,
);
let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes };
let merged_network =
DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding };
index_scheduler.put_network(merged_network.clone())?;
debug!(returns = ?merged_network, "Patch network");
Ok(HttpResponse::Ok().json(merged_network))

View File

@@ -10,10 +10,10 @@ use actix_http::StatusCode;
use index_scheduler::{IndexScheduler, RoFeatures};
use itertools::Itertools;
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{Network, Remote};
use meilisearch_types::milli::order_by_map::OrderByMap;
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
use meilisearch_types::network::{Network, Remote};
use roaring::RoaringBitmap;
use tokio::task::JoinHandle;

View File

@@ -1,6 +1,6 @@
pub use error::ProxySearchError;
use error::ReqwestErrorWithoutUrl;
use meilisearch_types::features::Remote;
use meilisearch_types::network::Remote;
use rand::Rng as _;
use reqwest::{Client, Response, StatusCode};
use serde::de::DeserializeOwned;

View File

@@ -16,7 +16,7 @@ use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::locales::Locale;
use meilisearch_types::milli::index::{self, SearchParameters};
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
@@ -57,13 +57,6 @@ pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);
#[derive(Clone, Default, PartialEq, Deserr, ToSchema, Debug)]
#[deserr(error = DeserrJsonError<InvalidSearchPersonalize>, rename_all = camelCase, deny_unknown_fields)]
pub struct Personalize {
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalizeUserContext>)]
pub user_context: Option<String>,
}
#[derive(Clone, Default, PartialEq, Deserr, ToSchema)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQuery {
@@ -127,8 +120,6 @@ pub struct SearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
pub personalize: Option<Personalize>,
}
impl From<SearchParameters> for SearchQuery {
@@ -176,7 +167,6 @@ impl From<SearchParameters> for SearchQuery {
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
crop_marker: DEFAULT_CROP_MARKER(),
locales: None,
personalize: None,
}
}
}
@@ -258,7 +248,6 @@ impl fmt::Debug for SearchQuery {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = self;
let mut debug = f.debug_struct("SearchQuery");
@@ -347,10 +336,6 @@ impl fmt::Debug for SearchQuery {
debug.field("locales", &locales);
}
if let Some(personalize) = personalize {
debug.field("personalize", &personalize);
}
debug.finish()
}
}
@@ -556,9 +541,6 @@ pub struct SearchQueryWithIndex {
pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchPersonalize>, default)]
#[serde(skip)]
pub personalize: Option<Personalize>,
#[deserr(default)]
pub federation_options: Option<FederationOptions>,
@@ -616,7 +598,6 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
} = query;
SearchQueryWithIndex {
@@ -648,7 +629,6 @@ impl SearchQueryWithIndex {
attributes_to_search_on,
ranking_score_threshold,
locales,
personalize,
federation_options,
}
}
@@ -684,7 +664,6 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
} = self;
(
index_uid,
@@ -716,7 +695,6 @@ impl SearchQueryWithIndex {
hybrid,
ranking_score_threshold,
locales,
personalize,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@@ -941,7 +919,7 @@ pub struct SearchResultWithIndex {
pub result: SearchResult,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, ToSchema)]
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, ToSchema)]
#[serde(untagged)]
pub enum HitsInfo {
#[serde(rename_all = "camelCase")]
@@ -1188,7 +1166,6 @@ pub fn perform_search(
attributes_to_search_on: _,
filter: _,
distinct: _,
personalize: _,
} = query;
let format = AttributesFormat {
@@ -1551,8 +1528,11 @@ impl<'a> HitMaker<'a> {
Some(Value::Object(map)) => map,
_ => Default::default(),
};
for (name, (vector, regenerate)) in self.index.embeddings(self.rtxn, id)? {
let embeddings = ExplicitVectors { embeddings: Some(vector.into()), regenerate };
for (name, EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ }) in
self.index.embeddings(self.rtxn, id)?
{
let embeddings =
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
vectors.insert(
name,
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,

View File

@@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
{
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `*.get`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"

View File

@@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r#"
{
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `*.get`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`",
"code": "invalid_api_key_actions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"

View File

@@ -466,6 +466,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
// Having 2 threads makes the tests way faster
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
experimental_no_edition_2024_for_settings: false,
experimental_no_edition_2024_for_dumps: false,
},
experimental_enable_metrics: false,
..Parser::parse_from(None as Option<&str>)

View File

@@ -46,7 +46,7 @@ async fn errors_on_param() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `selfie`: expected one of `remotes`, `self`",
"message": "Unknown field `selfie`: expected one of `remotes`, `self`, `sharding`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
@@ -149,7 +149,7 @@ async fn errors_on_param() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`",
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`, `writeApiKey`",
"code": "invalid_network_remotes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_network_remotes"
@@ -192,9 +192,11 @@ async fn errors_on_param() {
"remotes": {
"kefir": {
"url": "http://localhost:7700",
"searchApiKey": null
"searchApiKey": null,
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
let (response, code) = server
@@ -266,7 +268,8 @@ async fn auth() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -274,11 +277,12 @@ async fn auth() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
}
"###);
{
"self": "master",
"remotes": {},
"sharding": false
}
"###);
// try get with get permission
server.use_api_key(get_network_key.as_str().unwrap());
@@ -286,11 +290,12 @@ async fn auth() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "master",
"remotes": {}
}
"###);
{
"self": "master",
"remotes": {},
"sharding": false
}
"###);
// try update with update permission
server.use_api_key(update_network_key.as_str().unwrap());
@@ -303,11 +308,12 @@ async fn auth() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "api_key",
"remotes": {}
}
"###);
{
"self": "api_key",
"remotes": {},
"sharding": false
}
"###);
// try with the other's permission
let (response, code) = server.get_network().await;
@@ -383,7 +389,8 @@ async fn get_and_set_network() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": null,
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -393,7 +400,8 @@ async fn get_and_set_network() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "myself",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -417,13 +425,16 @@ async fn get_and_set_network() {
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
"searchApiKey": null,
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "foo"
"searchApiKey": "foo",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -443,13 +454,16 @@ async fn get_and_set_network() {
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
"searchApiKey": null,
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -470,17 +484,21 @@ async fn get_and_set_network() {
"remotes": {
"myself": {
"url": "http://localhost:7700",
"searchApiKey": null
"searchApiKey": null,
"writeApiKey": null
},
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -498,13 +516,16 @@ async fn get_and_set_network() {
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -518,13 +539,16 @@ async fn get_and_set_network() {
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -538,13 +562,16 @@ async fn get_and_set_network() {
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
"searchApiKey": "bar",
"writeApiKey": null
}
}
},
"sharding": false
}
"###);
@@ -553,60 +580,69 @@ async fn get_and_set_network() {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar",
"writeApiKey": null
}
"###);
},
"sharding": false
}
"###);
// still doing nothing
let (response, code) = server.set_network(json!({"remotes": {}})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar",
"writeApiKey": null
}
"###);
},
"sharding": false
}
"###);
// good time to check GET
let (response, code) = server.get_network().await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz"
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar"
}
}
{
"self": "thy",
"remotes": {
"them": {
"url": "http://localhost:7702",
"searchApiKey": "baz",
"writeApiKey": null
},
"thy": {
"url": "http://localhost:7701",
"searchApiKey": "bar",
"writeApiKey": null
}
"###);
},
"sharding": false
}
"###);
// deleting everything
let (response, code) = server
@@ -619,7 +655,8 @@ async fn get_and_set_network() {
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"self": "thy",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
}

View File

@@ -131,7 +131,8 @@ async fn remote_sharding() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -139,7 +140,8 @@ async fn remote_sharding() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
@@ -147,7 +149,8 @@ async fn remote_sharding() {
snapshot!(json_string!(response), @r###"
{
"self": "ms2",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -436,7 +439,8 @@ async fn error_unregistered_remote() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -444,7 +448,8 @@ async fn error_unregistered_remote() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -554,7 +559,8 @@ async fn error_no_weighted_score() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -562,7 +568,8 @@ async fn error_no_weighted_score() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -687,7 +694,8 @@ async fn error_bad_response() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -695,7 +703,8 @@ async fn error_bad_response() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -824,7 +833,8 @@ async fn error_bad_request() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -832,7 +842,8 @@ async fn error_bad_request() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -954,7 +965,8 @@ async fn error_bad_request_facets_by_index() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -962,7 +974,8 @@ async fn error_bad_request_facets_by_index() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1095,7 +1108,8 @@ async fn error_bad_request_facets_by_index_facet() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1103,7 +1117,8 @@ async fn error_bad_request_facets_by_index_facet() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1244,7 +1259,8 @@ async fn error_remote_does_not_answer() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1252,7 +1268,8 @@ async fn error_remote_does_not_answer() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1445,7 +1462,8 @@ async fn error_remote_404() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1453,7 +1471,8 @@ async fn error_remote_404() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1640,7 +1659,8 @@ async fn error_remote_sharding_auth() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1648,7 +1668,8 @@ async fn error_remote_sharding_auth() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1800,7 +1821,8 @@ async fn remote_sharding_auth() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1808,7 +1830,8 @@ async fn remote_sharding_auth() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -1955,7 +1978,8 @@ async fn error_remote_500() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -1963,7 +1987,8 @@ async fn error_remote_500() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
@@ -2134,7 +2159,8 @@ async fn error_remote_500_once() {
snapshot!(json_string!(response), @r###"
{
"self": "ms0",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
@@ -2142,7 +2168,8 @@ async fn error_remote_500_once() {
snapshot!(json_string!(response), @r###"
{
"self": "ms1",
"remotes": {}
"remotes": {},
"sharding": false
}
"###);

View File

@@ -15,6 +15,7 @@ use meilisearch_types::heed::{
};
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use meilisearch_types::milli::{obkv_to_json, BEU32};
use meilisearch_types::tasks::{Status, Task};
@@ -591,12 +592,21 @@ fn export_documents(
.into());
};
for (embedder_name, (embeddings, regenerate)) in embeddings {
for (
embedder_name,
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
) in embeddings
{
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate,
regenerate: regenerate &&
// Meilisearch does not handle well dumps with fragments, because as the fragments
// are marked as user-provided,
// all embeddings would be regenerated on any settings change or document update.
// To prevent this, we mark embeddings has non regenerate in this case.
!has_fragments,
};
vectors
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());

View File

@@ -40,7 +40,7 @@ indexmap = { version = "2.9.0", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memchr = "2.7.5"
memmap2 = "0.9.5"
memmap2 = "0.9.7"
obkv = "0.3.0"
once_cell = "1.21.3"
ordered-float = "5.0.0"
@@ -109,6 +109,7 @@ utoipa = { version = "5.4.0", features = [
"openapi_extensions",
] }
lru = "0.14.0"
twox-hash = { version = "2.1.1", default-features = false, features = ["std", "xxhash3_64", "xxhash64"] }
[dev-dependencies]
mimalloc = { version = "0.1.47", default-features = false }

View File

@@ -1766,20 +1766,22 @@ impl Index {
&self,
rtxn: &RoTxn<'_>,
docid: DocumentId,
) -> Result<BTreeMap<String, (Vec<Embedding>, bool)>> {
) -> Result<BTreeMap<String, EmbeddingsWithMetadata>> {
let mut res = BTreeMap::new();
let embedders = self.embedding_configs();
for config in embedders.embedding_configs(rtxn)? {
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
let has_fragments = config.config.embedder_options.has_fragments();
let reader = ArroyWrapper::new(
self.vector_arroy,
embedder_info.embedder_id,
config.config.quantized(),
);
let embeddings = reader.item_vectors(rtxn, docid)?;
let regenerate = embedder_info.embedding_status.must_regenerate(docid);
res.insert(
config.name.to_owned(),
(embeddings, embedder_info.embedding_status.must_regenerate(docid)),
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
);
}
Ok(res)
@@ -1919,6 +1921,12 @@ impl Index {
}
}
pub struct EmbeddingsWithMetadata {
pub embeddings: Vec<Embedding>,
pub regenerate: bool,
pub has_fragments: bool,
}
#[derive(Debug, Default, Deserialize, Serialize)]
pub struct ChatConfig {
pub description: String,

View File

@@ -76,6 +76,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -84,6 +84,7 @@ impl TempIndex {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)?;
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@@ -167,6 +168,7 @@ impl TempIndex {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)?;
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@@ -242,6 +244,7 @@ fn aborting_indexation() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -93,7 +93,7 @@ pub struct ChatSearchParams {
pub hybrid: Setting<HybridQuery>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default = Setting::Set(20))]
#[deserr(default)]
#[schema(value_type = Option<usize>)]
pub limit: Setting<usize>,

View File

@@ -23,7 +23,7 @@ use crate::progress::EmbedderStats;
use crate::prompt::Prompt;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::db::{EmbedderInfo, EmbeddingStatus, EmbeddingStatusDelta};
use crate::vector::db::{EmbedderInfo, EmbeddingStatusDelta};
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
use crate::vector::extractor::{Extractor, ExtractorDiff, RequestFragmentExtractor};
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState};
@@ -441,6 +441,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
{
let embedder_is_manual = matches!(*runtime.embedder, Embedder::UserProvided(_));
let (old_is_user_provided, old_must_regenerate) =
embedder_info.embedding_status.is_user_provided_must_regenerate(docid);
let (old, new) = parsed_vectors.remove(embedder_name);
let new_must_regenerate = new.must_regenerate();
let delta = match action {
@@ -499,16 +501,19 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
let is_adding_fragments = has_fragments && !old_has_fragments;
if is_adding_fragments {
if !has_fragments {
// removing fragments
regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)?
} else if is_adding_fragments ||
// regenerate all fragments when going from user provided to ! user provided
old_is_user_provided
{
regenerate_all_fragments(
runtime.fragments(),
&doc_alloc,
new_fields_ids_map,
obkv,
)
} else if !has_fragments {
// removing fragments
regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)?
} else {
let mut fragment_diff = Vec::new();
let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map();
@@ -600,7 +605,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
docid,
&delta,
new_must_regenerate,
&embedder_info.embedding_status,
old_is_user_provided,
old_must_regenerate,
);
// and we finally push the unique vectors into the writer
@@ -657,10 +663,9 @@ fn push_embedding_status_delta(
docid: DocumentId,
delta: &VectorStateDelta,
new_must_regenerate: bool,
embedding_status: &EmbeddingStatus,
old_is_user_provided: bool,
old_must_regenerate: bool,
) {
let (old_is_user_provided, old_must_regenerate) =
embedding_status.is_user_provided_must_regenerate(docid);
let new_is_user_provided = match delta {
VectorStateDelta::NoChange => old_is_user_provided,
VectorStateDelta::NowRemoved => {

View File

@@ -1977,6 +1977,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2029,6 +2030,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2117,6 +2119,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2306,6 +2309,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2369,6 +2373,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2423,6 +2428,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2476,6 +2482,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2531,6 +2538,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2591,6 +2599,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2644,6 +2653,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2697,6 +2707,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2908,6 +2919,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -2968,6 +2980,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();
@@ -3025,6 +3038,7 @@ mod tests {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -16,6 +16,7 @@ pub struct IndexerConfig {
pub max_positions_per_attributes: Option<u32>,
pub skip_index_budget: bool,
pub experimental_no_edition_2024_for_settings: bool,
pub experimental_no_edition_2024_for_dumps: bool,
}
impl IndexerConfig {
@@ -65,6 +66,7 @@ impl Default for IndexerConfig {
max_positions_per_attributes: None,
skip_index_budget: false,
experimental_no_edition_2024_for_settings: false,
experimental_no_edition_2024_for_dumps: false,
}
}
}

View File

@@ -620,12 +620,35 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
where
'a: 'doc,
{
match &mut self.kind {
ChunkType::Fragments { fragments: _, session } => {
let doc_alloc = session.doc_alloc();
self.set_status(docid, old_is_user_provided, true, false, true);
if old_is_user_provided | full_reindex {
match &mut self.kind {
ChunkType::Fragments { fragments, session } => {
let doc_alloc = session.doc_alloc();
let reindex_all_fragments =
// when the vectors were user-provided, Meilisearch cannot know if they come from a particular fragment,
// and so Meilisearch needs to clear all embeddings in that case.
// Fortunately, as dump export fragment vector with `regenerate` set to `false`,
// this case should be rare and opt-in.
old_is_user_provided ||
// full-reindex case
full_reindex;
if reindex_all_fragments {
session.on_embed_mut().clear_vectors(docid);
let extractors = fragments.iter().map(|fragment| {
RequestFragmentExtractor::new(fragment, doc_alloc).ignore_errors()
});
insert_autogenerated(
docid,
external_docid,
extractors,
document,
&(),
session,
unused_vectors_distribution,
)?;
return Ok(());
}
settings_delta.try_for_each_fragment_diff(
@@ -669,7 +692,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
Result::Ok(())
},
)?;
self.set_status(docid, old_is_user_provided, true, false, true);
}
ChunkType::DocumentTemplate { document_template, session } => {
let doc_alloc = session.doc_alloc();
@@ -690,12 +712,18 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
match extractor.diff_settings(document, &external_docid, old_extractor.as_ref())? {
ExtractorDiff::Removed => {
if old_is_user_provided || full_reindex {
session.on_embed_mut().clear_vectors(docid);
}
OnEmbed::process_embedding_response(
session.on_embed_mut(),
crate::vector::session::EmbeddingResponse { metadata, embedding: None },
);
}
ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => {
if old_is_user_provided || full_reindex {
session.on_embed_mut().clear_vectors(docid);
}
session.request_embedding(metadata, input, unused_vectors_distribution)?;
}
ExtractorDiff::Unchanged => { /* do nothing */ }
@@ -722,6 +750,13 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
where
'a: 'doc,
{
self.set_status(
docid,
old_is_user_provided,
old_must_regenerate,
false,
new_must_regenerate,
);
match &mut self.kind {
ChunkType::DocumentTemplate { document_template, session } => {
let doc_alloc = session.doc_alloc();
@@ -731,10 +766,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
new_fields_ids_map,
);
if old_is_user_provided {
session.on_embed_mut().clear_vectors(docid);
}
update_autogenerated(
docid,
external_docid,
@@ -743,6 +774,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
new_document,
&external_docid,
old_must_regenerate,
old_is_user_provided,
session,
unused_vectors_distribution,
)?
@@ -754,7 +786,21 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
});
if old_is_user_provided {
// when the document was `userProvided`, Meilisearch cannot know whose fragments a particular
// vector was referring to.
// So as a result Meilisearch will regenerate all fragments on this case.
// Fortunately, since dumps for fragments set regenerate to false, this case should be rare.
session.on_embed_mut().clear_vectors(docid);
insert_autogenerated(
docid,
external_docid,
extractors,
new_document,
&(),
session,
unused_vectors_distribution,
)?;
return Ok(());
}
update_autogenerated(
@@ -765,25 +811,18 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
new_document,
&(),
old_must_regenerate,
false,
session,
unused_vectors_distribution,
)?
}
};
self.set_status(
docid,
old_is_user_provided,
old_must_regenerate,
false,
new_must_regenerate,
);
Ok(())
}
#[allow(clippy::too_many_arguments)]
pub fn insert_autogenerated<D: Document<'a> + Debug>(
pub fn insert_autogenerated<'doc, D: Document<'doc> + Debug>(
&mut self,
docid: DocumentId,
external_docid: &'a str,
@@ -791,7 +830,10 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
new_fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
new_must_regenerate: bool,
) -> Result<()> {
) -> Result<()>
where
'a: 'doc,
{
let (default_is_user_provided, default_must_regenerate) = (false, true);
self.set_status(
docid,
@@ -956,6 +998,7 @@ fn update_autogenerated<'doc, 'a: 'doc, 'b, E, OD, ND>(
new_document: ND,
meta: &E::DocumentMetadata,
old_must_regenerate: bool,
mut must_clear_on_generation: bool,
session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>,
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
) -> Result<()>
@@ -984,6 +1027,11 @@ where
};
if must_regenerate {
if must_clear_on_generation {
must_clear_on_generation = false;
session.on_embed_mut().clear_vectors(docid);
}
let metadata =
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
@@ -1002,7 +1050,7 @@ where
Ok(())
}
fn insert_autogenerated<'a, 'b, E, D: Document<'a> + Debug>(
fn insert_autogenerated<'doc, 'a: 'doc, 'b, E, D: Document<'doc> + Debug>(
docid: DocumentId,
external_docid: &'a str,
extractors: impl IntoIterator<Item = E>,

View File

@@ -17,6 +17,7 @@ use super::guess_primary_key::retrieve_or_guess_primary_key;
use crate::documents::PrimaryKey;
use crate::progress::{AtomicPayloadStep, Progress};
use crate::update::new::document::{DocumentContext, Versions};
use crate::update::new::indexer::sharding::Shards;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::MostlySend;
use crate::update::new::{DocumentIdentifiers, Insertion, Update};
@@ -71,6 +72,7 @@ impl<'pl> DocumentOperation<'pl> {
new_fields_ids_map: &mut FieldsIdsMap,
must_stop_processing: &MSP,
progress: Progress,
shards: Option<&Shards>,
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
where
MSP: Fn() -> bool,
@@ -107,6 +109,7 @@ impl<'pl> DocumentOperation<'pl> {
&mut bytes,
&docids_version_offsets,
IndexDocumentsMethod::ReplaceDocuments,
shards,
payload,
),
Payload::Update(payload) => extract_addition_payload_changes(
@@ -120,6 +123,7 @@ impl<'pl> DocumentOperation<'pl> {
&mut bytes,
&docids_version_offsets,
IndexDocumentsMethod::UpdateDocuments,
shards,
payload,
),
Payload::Deletion(to_delete) => extract_deletion_payload_changes(
@@ -127,6 +131,7 @@ impl<'pl> DocumentOperation<'pl> {
rtxn,
&mut available_docids,
&docids_version_offsets,
shards,
to_delete,
),
};
@@ -173,6 +178,7 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
bytes: &mut u64,
main_docids_version_offsets: &hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>,
method: IndexDocumentsMethod,
shards: Option<&Shards>,
payload: &'pl [u8],
) -> Result<hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>> {
use IndexDocumentsMethod::{ReplaceDocuments, UpdateDocuments};
@@ -210,12 +216,20 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
primary_key.as_ref().unwrap()
};
let current_offset = iter.byte_offset();
let content = &payload[previous_offset..current_offset];
previous_offset = current_offset;
let external_id =
retrieved_primary_key.extract_fields_and_docid(doc, new_fields_ids_map, indexer)?;
let external_id = external_id.to_de();
let current_offset = iter.byte_offset();
let document_offset = DocumentOffset { content: &payload[previous_offset..current_offset] };
if shards.is_some_and(|shards| !shards.must_process(external_id)) {
continue;
}
let document_offset = DocumentOffset { content };
match main_docids_version_offsets.get(external_id) {
None => {
@@ -299,8 +313,6 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
},
},
}
previous_offset = iter.byte_offset();
}
if payload.is_empty() {
@@ -329,11 +341,16 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>(
rtxn: &RoTxn,
available_docids: &mut AvailableIds,
main_docids_version_offsets: &hashbrown::HashMap<&'s str, PayloadOperations<'pl>>,
shards: Option<&Shards>,
to_delete: &'pl [&'pl str],
) -> Result<hashbrown::HashMap<&'s str, PayloadOperations<'pl>>> {
let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new();
for external_id in to_delete {
if shards.is_some_and(|shards| !shards.must_process(external_id)) {
continue;
}
match main_docids_version_offsets.get(external_id) {
None => {
match index.external_documents_ids().get(rtxn, external_id) {

View File

@@ -36,6 +36,7 @@ mod guess_primary_key;
mod partial_dump;
mod post_processing;
pub mod settings_changes;
pub mod sharding;
mod update_by_function;
mod write;

View File

@@ -0,0 +1,22 @@
// Copyright © 2025 Meilisearch Some Rights Reserved
// This file is part of Meilisearch Enterprise Edition (EE).
// Use of this source code is governed by the Business Source License 1.1,
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
use std::hash::{BuildHasher as _, BuildHasherDefault};
pub struct Shards {
pub own: Vec<String>,
pub others: Vec<String>,
}
impl Shards {
pub fn must_process(&self, docid: &str) -> bool {
let hasher = BuildHasherDefault::<twox_hash::XxHash3_64>::new();
let to_hash = |shard: &String| hasher.hash_one((shard, docid));
let max_hash = self.others.iter().map(to_hash).max().unwrap_or_default();
self.own.iter().map(to_hash).any(|hash| hash > max_hash)
}
}

View File

@@ -841,6 +841,25 @@ impl EmbedderOptions {
}
}
}
pub fn has_fragments(&self) -> bool {
match &self {
EmbedderOptions::HuggingFace(_)
| EmbedderOptions::OpenAi(_)
| EmbedderOptions::Ollama(_)
| EmbedderOptions::UserProvided(_) => false,
EmbedderOptions::Rest(embedder_options) => {
!embedder_options.indexing_fragments.is_empty()
}
EmbedderOptions::Composite(embedder_options) => {
if let SubEmbedderOptions::Rest(embedder_options) = &embedder_options.index {
!embedder_options.indexing_fragments.is_empty()
} else {
false
}
}
}
}
}
impl Default for EmbedderOptions {

View File

@@ -59,6 +59,7 @@ fn test_facet_distribution_with_no_facet_values() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -95,6 +95,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -329,6 +329,7 @@ fn criteria_ascdesc() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();

View File

@@ -138,6 +138,7 @@ fn test_typo_disabled_on_word() {
&mut new_fields_ids_map,
&|| false,
Progress::default(),
None,
)
.unwrap();