mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-02 18:55:36 +00:00
Compare commits
54 Commits
xtask-gene
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6417884969 | ||
|
|
d68e6c355b | ||
|
|
c33d2558c1 | ||
|
|
94e4ef65d6 | ||
|
|
246290ef05 | ||
|
|
e2803da5bc | ||
|
|
3c59702e70 | ||
|
|
d08b62db7d | ||
|
|
27d2cd7bd2 | ||
|
|
619900e4d3 | ||
|
|
24b017e367 | ||
|
|
e64852208c | ||
|
|
e4b28464fd | ||
|
|
de90455809 | ||
|
|
ca5dc1b032 | ||
|
|
5c464e9855 | ||
|
|
114d50dfba | ||
|
|
6f0249cffc | ||
|
|
e000df8646 | ||
|
|
900e8a0d9c | ||
|
|
f5f173e451 | ||
|
|
ce9d56377c | ||
|
|
c2d912645f | ||
|
|
112d3f54e9 | ||
|
|
58a88c7933 | ||
|
|
203418ae49 | ||
|
|
3bc192ae52 | ||
|
|
db9f205184 | ||
|
|
4d3a9dc43e | ||
|
|
9a2a40a4fa | ||
|
|
bf921e9135 | ||
|
|
eda77aeb1a | ||
|
|
1fed0bed18 | ||
|
|
658023e01b | ||
|
|
b9c86e721f | ||
|
|
a29497f720 | ||
|
|
1ae1856ec2 | ||
|
|
184e9f72c1 | ||
|
|
4220c877e1 | ||
|
|
dcd6951a0b | ||
|
|
1258bdb2b9 | ||
|
|
08f15cdf4b | ||
|
|
85e3267490 | ||
|
|
753db805a8 | ||
|
|
bb52a8683a | ||
|
|
9a16c3a26d | ||
|
|
708bb766b0 | ||
|
|
be065c4c51 | ||
|
|
cda5995922 | ||
|
|
409ae70f0f | ||
|
|
47b8e53985 | ||
|
|
eadd1bb5b5 | ||
|
|
3a84f27738 | ||
|
|
401b064917 |
931
Cargo.lock
generated
931
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.18.0"
|
||||
version = "1.22.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
8
LICENSE
8
LICENSE
@@ -19,3 +19,11 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---
|
||||
|
||||
🔒 Meilisearch Enterprise Edition (EE)
|
||||
|
||||
Certain parts of this codebase are not licensed under the MIT license and governed by the Business Source License 1.1.
|
||||
|
||||
See the LICENSE-EE file for details.
|
||||
|
||||
67
LICENSE-EE
Normal file
67
LICENSE-EE
Normal file
@@ -0,0 +1,67 @@
|
||||
Business Source License 1.1 – Adapted for Meili SAS
|
||||
This license is based on the Business Source License version 1.1, as published by MariaDB Corporation Ab.
|
||||
|
||||
Parameters
|
||||
|
||||
Licensor: Meili SAS
|
||||
|
||||
Licensed Work: Any file explicitly marked as “Enterprise Edition (EE)” or “governed by the Business Source License”.
|
||||
|
||||
Additional Use Grant:
|
||||
You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.
|
||||
|
||||
Production use of the Licensed Work requires a commercial license agreement with Meilisearch. Contact bonjour@meilisearch.com for licensing.
|
||||
|
||||
Change License: MIT
|
||||
|
||||
Change Date: Four years from the date the Licensed Work is published.
|
||||
|
||||
This License does not apply to any code outside of the Licensed Work, which remains under the MIT license.
|
||||
|
||||
For information about alternative licensing arrangements for the Licensed Work,
|
||||
please contact bonjour@meilisearch.com or sales@meilisearch.com.
|
||||
|
||||
Notice
|
||||
|
||||
Business Source License 1.1
|
||||
|
||||
Terms
|
||||
|
||||
The Licensor hereby grants you the right to copy, modify, create derivative
|
||||
works, redistribute, and make non-production use of the Licensed Work. The
|
||||
Licensor may make an Additional Use Grant, above, permitting limited production use.
|
||||
|
||||
Effective on the Change Date, or the fourth anniversary of the first publicly
|
||||
available distribution of a specific version of the Licensed Work under this
|
||||
License, whichever comes first, the Licensor hereby grants you rights under
|
||||
the terms of the Change License, and the rights granted in the paragraph
|
||||
above terminate.
|
||||
|
||||
If your use of the Licensed Work does not comply with the requirements
|
||||
currently in effect as described in this License, you must purchase a
|
||||
commercial license from the Licensor, its affiliated entities, or authorized
|
||||
resellers, or you must refrain from using the Licensed Work.
|
||||
|
||||
All copies of the original and modified Licensed Work, and derivative works
|
||||
of the Licensed Work, are subject to this License. This License applies
|
||||
separately for each version of the Licensed Work and the Change Date may vary
|
||||
for each version of the Licensed Work released by Licensor.
|
||||
|
||||
You must conspicuously display this License on each original or modified copy
|
||||
of the Licensed Work. If you receive the Licensed Work in original or
|
||||
modified form from a third party, the terms and conditions set forth in this
|
||||
License apply to your use of that work.
|
||||
|
||||
Any use of the Licensed Work in violation of this License will automatically
|
||||
terminate your rights under this License for the current and all other
|
||||
versions of the Licensed Work.
|
||||
|
||||
This License does not grant you any right in any trademark or logo of
|
||||
Licensor or its affiliates (provided that you may use a trademark or logo of
|
||||
Licensor as expressly required by this License).
|
||||
|
||||
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
|
||||
AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
|
||||
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
|
||||
TITLE.
|
||||
20
README.md
20
README.md
@@ -89,6 +89,26 @@ We also offer a wide range of dedicated guides to all Meilisearch features, such
|
||||
|
||||
Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=advanced).
|
||||
|
||||
## 🧾 Editions & Licensing
|
||||
|
||||
Meilisearch is available in two editions:
|
||||
|
||||
### 🧪 Community Edition (CE)
|
||||
|
||||
- Fully open source under the [MIT license](./LICENSE)
|
||||
- Core search engine with fast and relevant full-text, semantic or hybrid search
|
||||
- Free to use for anyone, including commercial usage
|
||||
|
||||
### 🏢 Enterprise Edition (EE)
|
||||
|
||||
- Includes advanced features such as:
|
||||
- Sharding
|
||||
- Governed by a [commercial license](./LICENSE-EE) or the [Business Source License 1.1](https://mariadb.com/bsl11)
|
||||
- Not allowed in production without a commercial agreement with Meilisearch.
|
||||
- You may use, modify, and distribute the Licensed Work for non-production purposes only, such as testing, development, or evaluation.
|
||||
|
||||
Want access to Enterprise features? → Contact us at [sales@meilisearch.com](maito:sales@meilisearch.com).
|
||||
|
||||
## 📊 Telemetry
|
||||
|
||||
Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
|
||||
|
||||
@@ -154,6 +154,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -221,6 +222,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -266,6 +268,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -335,6 +338,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -412,6 +416,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -457,6 +462,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -498,6 +504,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -566,6 +573,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -633,6 +641,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -700,6 +709,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -766,6 +776,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -811,6 +822,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -879,6 +891,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -956,6 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1002,6 +1016,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1044,6 +1059,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1111,6 +1127,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1177,6 +1194,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1222,6 +1240,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1290,6 +1309,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1404,6 +1424,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1449,6 +1470,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1490,6 +1512,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1580,6 +1603,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1671,6 +1695,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1754,6 +1779,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1821,6 +1847,7 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1887,6 +1914,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -1932,6 +1960,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2000,6 +2029,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -123,6 +123,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||
use meilisearch_types::settings::Unchecked;
|
||||
use meilisearch_types::tasks::{
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId,
|
||||
Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, TaskNetwork,
|
||||
};
|
||||
use meilisearch_types::InstanceUid;
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -94,6 +94,8 @@ pub struct TaskDump {
|
||||
default
|
||||
)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
}
|
||||
|
||||
// A `Kind` specific version made for the dump. If modified you may break the dump.
|
||||
@@ -172,6 +174,7 @@ impl From<Task> for TaskDump {
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
network: task.network,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -251,11 +254,12 @@ pub(crate) mod test {
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meilisearch_types::batches::{Batch, BatchEnqueuedAt, BatchStats};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{BatchStopReason, Details, Kind, Status};
|
||||
@@ -384,6 +388,7 @@ pub(crate) mod test {
|
||||
enqueued_at: datetime!(2022-11-11 0:00 UTC),
|
||||
started_at: Some(datetime!(2022-11-20 0:00 UTC)),
|
||||
finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
|
||||
network: None,
|
||||
},
|
||||
None,
|
||||
),
|
||||
@@ -408,6 +413,7 @@ pub(crate) mod test {
|
||||
enqueued_at: datetime!(2022-11-11 0:00 UTC),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
network: None,
|
||||
},
|
||||
Some(vec![
|
||||
json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
|
||||
@@ -427,6 +433,7 @@ pub(crate) mod test {
|
||||
enqueued_at: datetime!(2022-11-15 0:00 UTC),
|
||||
started_at: None,
|
||||
finished_at: None,
|
||||
network: None,
|
||||
},
|
||||
None,
|
||||
),
|
||||
@@ -539,7 +546,8 @@ pub(crate) mod test {
|
||||
fn create_test_network() -> Network {
|
||||
Network {
|
||||
local: Some("myself".to_string()),
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()) }},
|
||||
remotes: maplit::btreemap! {"other".to_string() => Remote { url: "http://test".to_string(), search_api_key: Some("apiKey".to_string()), write_api_key: Some("docApiKey".to_string()) }},
|
||||
sharding: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -163,6 +163,7 @@ impl CompatV5ToV6 {
|
||||
enqueued_at: task_view.enqueued_at,
|
||||
started_at: task_view.started_at,
|
||||
finished_at: task_view.finished_at,
|
||||
network: None,
|
||||
};
|
||||
|
||||
(task, content_file)
|
||||
|
||||
@@ -24,7 +24,7 @@ pub type Batch = meilisearch_types::batches::Batch;
|
||||
pub type Key = meilisearch_types::keys::Key;
|
||||
pub type ChatCompletionSettings = meilisearch_types::features::ChatCompletionSettings;
|
||||
pub type RuntimeTogglableFeatures = meilisearch_types::features::RuntimeTogglableFeatures;
|
||||
pub type Network = meilisearch_types::features::Network;
|
||||
pub type Network = meilisearch_types::network::Network;
|
||||
pub type Webhooks = meilisearch_types::webhooks::WebhooksDumpView;
|
||||
|
||||
// ===== Other types to clarify the code of the compat module
|
||||
|
||||
@@ -5,8 +5,9 @@ use std::path::PathBuf;
|
||||
use flate2::write::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{ChatCompletionSettings, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{ChatCompletionSettings, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::keys::Key;
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::settings::{Checked, Settings};
|
||||
use meilisearch_types::webhooks::WebhooksDumpView;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
@@ -148,11 +148,10 @@ impl File {
|
||||
Ok(Self { path: PathBuf::new(), file: None })
|
||||
}
|
||||
|
||||
pub fn persist(self) -> Result<()> {
|
||||
if let Some(file) = self.file {
|
||||
file.persist(&self.path)?;
|
||||
}
|
||||
Ok(())
|
||||
pub fn persist(self) -> Result<Option<StdFile>> {
|
||||
let Some(file) = self.file else { return Ok(None) };
|
||||
|
||||
Ok(Some(file.persist(&self.path)?))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -129,6 +129,7 @@ fn main() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -147,6 +147,7 @@ impl<'a> Dump<'a> {
|
||||
canceled_by: task.canceled_by,
|
||||
details: task.details,
|
||||
status: task.status,
|
||||
network: task.network,
|
||||
kind: match task.kind {
|
||||
KindDump::DocumentImport {
|
||||
primary_key,
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, Network, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||
use meilisearch_types::heed::{Database, Env, RwTxn, WithoutTls};
|
||||
use meilisearch_types::network::Network;
|
||||
|
||||
use crate::error::FeatureNotEnabledError;
|
||||
use crate::Result;
|
||||
|
||||
@@ -143,10 +143,10 @@ impl IndexStats {
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||
let arroy_stats = index.arroy_stats(rtxn)?;
|
||||
let hannoy_stats = index.hannoy_stats(rtxn)?;
|
||||
Ok(IndexStats {
|
||||
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
|
||||
number_of_embedded_documents: Some(arroy_stats.documents.len()),
|
||||
number_of_embeddings: Some(hannoy_stats.number_of_embeddings),
|
||||
number_of_embedded_documents: Some(hannoy_stats.documents.len()),
|
||||
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
|
||||
number_of_documents: None,
|
||||
database_size: index.on_disk_size()?,
|
||||
|
||||
@@ -230,6 +230,7 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
details,
|
||||
status,
|
||||
kind,
|
||||
network,
|
||||
} = task;
|
||||
snap.push('{');
|
||||
snap.push_str(&format!("uid: {uid}, "));
|
||||
@@ -247,6 +248,9 @@ pub fn snapshot_task(task: &Task) -> String {
|
||||
snap.push_str(&format!("details: {}, ", &snapshot_details(details)));
|
||||
}
|
||||
snap.push_str(&format!("kind: {kind:?}"));
|
||||
if let Some(network) = network {
|
||||
snap.push_str(&format!("network: {network:?}, "))
|
||||
}
|
||||
|
||||
snap.push('}');
|
||||
snap
|
||||
|
||||
@@ -52,7 +52,7 @@ use flate2::bufread::GzEncoder;
|
||||
use flate2::Compression;
|
||||
use meilisearch_types::batches::Batch;
|
||||
use meilisearch_types::features::{
|
||||
ChatCompletionSettings, InstanceTogglableFeatures, Network, RuntimeTogglableFeatures,
|
||||
ChatCompletionSettings, InstanceTogglableFeatures, RuntimeTogglableFeatures,
|
||||
};
|
||||
use meilisearch_types::heed::byteorder::BE;
|
||||
use meilisearch_types::heed::types::{DecodeIgnore, SerdeJson, Str, I128};
|
||||
@@ -63,8 +63,9 @@ use meilisearch_types::milli::vector::{
|
||||
Embedder, EmbedderOptions, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||
};
|
||||
use meilisearch_types::milli::{self, Index};
|
||||
use meilisearch_types::network::Network;
|
||||
use meilisearch_types::task_view::TaskView;
|
||||
use meilisearch_types::tasks::{KindWithContent, Task};
|
||||
use meilisearch_types::tasks::{KindWithContent, Task, TaskNetwork};
|
||||
use meilisearch_types::webhooks::{Webhook, WebhooksDumpView, WebhooksView};
|
||||
use milli::vector::db::IndexEmbeddingConfig;
|
||||
use processing::ProcessingTasks;
|
||||
@@ -666,6 +667,16 @@ impl IndexScheduler {
|
||||
self.queue.get_task_ids_from_authorized_indexes(&rtxn, query, filters, &processing)
|
||||
}
|
||||
|
||||
pub fn set_task_network(&self, task_id: TaskId, network: TaskNetwork) -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
let mut task =
|
||||
self.queue.tasks.get_task(&wtxn, task_id)?.ok_or(Error::TaskNotFound(task_id))?;
|
||||
task.network = Some(network);
|
||||
self.queue.tasks.all_tasks.put(&mut wtxn, &task_id, &task)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the batches matching the query from the user's point of view along
|
||||
/// with the total number of batches matching the query, ignoring from and limit.
|
||||
///
|
||||
|
||||
@@ -279,6 +279,7 @@ impl Queue {
|
||||
details: kind.default_details(),
|
||||
status: Status::Enqueued,
|
||||
kind: kind.clone(),
|
||||
network: None,
|
||||
};
|
||||
// For deletion and cancelation tasks, we want to make extra sure that they
|
||||
// don't attempt to delete/cancel tasks that are newer than themselves.
|
||||
|
||||
@@ -97,7 +97,22 @@ impl TaskQueue {
|
||||
Ok(self.all_tasks.get(rtxn, &task_id)?)
|
||||
}
|
||||
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> {
|
||||
/// Update the inverted task indexes and write the new value of the task.
|
||||
///
|
||||
/// The passed `task` object typically comes from a previous transaction, so two kinds of modification might have occurred:
|
||||
/// 1. Modification to the `task` object after loading it from the DB (the purpose of this method is to persist these changes)
|
||||
/// 2. Modification to the task committed by another transaction in the DB (an annoying consequence of having lost the original
|
||||
/// transaction from which the `task` instance was deserialized)
|
||||
///
|
||||
/// When calling this function, this `task` is modified to take into account any existing `network`
|
||||
/// that can have been added since the task was loaded into memory.
|
||||
///
|
||||
/// Any other modification to the task that was committed from the DB since the parameter was pulled from the DB will be overwritten.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// - CorruptedTaskQueue: The task doesn't exist in the database
|
||||
pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &mut Task) -> Result<()> {
|
||||
let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||
let reprocessing = old_task.status != Status::Enqueued;
|
||||
|
||||
@@ -157,6 +172,12 @@ impl TaskQueue {
|
||||
}
|
||||
}
|
||||
|
||||
task.network = match (old_task.network, task.network.take()) {
|
||||
(None, None) => None,
|
||||
(None, Some(network)) | (Some(network), None) => Some(network),
|
||||
(Some(_), Some(network)) => Some(network),
|
||||
};
|
||||
|
||||
self.all_tasks.put(wtxn, &task.uid, task)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -268,7 +268,7 @@ impl IndexScheduler {
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.update_task(&mut wtxn, &mut task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
if let Some(canceled_by) = canceled_by {
|
||||
@@ -349,7 +349,7 @@ impl IndexScheduler {
|
||||
|
||||
self.queue
|
||||
.tasks
|
||||
.update_task(&mut wtxn, &task)
|
||||
.update_task(&mut wtxn, &mut task)
|
||||
.map_err(|e| Error::UnrecoverableError(Box::new(e)))?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -147,7 +147,6 @@ impl IndexScheduler {
|
||||
};
|
||||
|
||||
let mut index_wtxn = index.write_txn()?;
|
||||
|
||||
let index_version = index.get_version(&index_wtxn)?.unwrap_or((1, 12, 0));
|
||||
let package_version = (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);
|
||||
if index_version != package_version {
|
||||
|
||||
@@ -66,6 +66,11 @@ impl IndexScheduler {
|
||||
}
|
||||
IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => {
|
||||
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
||||
|
||||
let network = self.network();
|
||||
|
||||
let shards = network.shards();
|
||||
|
||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
||||
// to a fresh thread.
|
||||
@@ -130,6 +135,7 @@ impl IndexScheduler {
|
||||
&mut new_fields_ids_map,
|
||||
&|| must_stop_processing.get(),
|
||||
progress.clone(),
|
||||
shards.as_ref(),
|
||||
)
|
||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 18, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
|
||||
[timestamp] [4,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.18.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 18, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
enqueued [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 18, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 18, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
@@ -37,7 +37,7 @@ catto [1,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.18.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 18, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
----------------------------------------------------------------------
|
||||
@@ -40,7 +40,7 @@ doggo [2,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.18.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 18, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 22, 0) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
|
||||
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
|
||||
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
|
||||
@@ -43,7 +43,7 @@ doggo [2,3,]
|
||||
[timestamp] [0,]
|
||||
----------------------------------------------------------------------
|
||||
### All Batches:
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.18.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.22.0"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
|
||||
----------------------------------------------------------------------
|
||||
### Batch to tasks mapping:
|
||||
0 [0,]
|
||||
|
||||
@@ -42,6 +42,7 @@ pub fn upgrade_index_scheduler(
|
||||
(1, 16, _) => 0,
|
||||
(1, 17, _) => 0,
|
||||
(1, 18, _) => 0,
|
||||
(1, 22, _) => 0,
|
||||
(major, minor, patch) => {
|
||||
if major > current_major
|
||||
|| (major == current_major && minor > current_minor)
|
||||
@@ -91,6 +92,7 @@ pub fn upgrade_index_scheduler(
|
||||
details: Some(Details::UpgradeDatabase { from, to }),
|
||||
status: Status::Enqueued,
|
||||
kind: KindWithContent::UpgradeDatabase { from },
|
||||
network: None,
|
||||
},
|
||||
)?;
|
||||
wtxn.commit()?;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
//! Utility functions on the DBs. Mainly getter and setters.
|
||||
|
||||
use crate::milli::progress::EmbedderStats;
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::ops::Bound;
|
||||
use std::sync::Arc;
|
||||
@@ -15,6 +14,7 @@ use meilisearch_types::tasks::{
|
||||
use roaring::RoaringBitmap;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::milli::progress::EmbedderStats;
|
||||
use crate::{Error, Result, Task, TaskId, BEI128};
|
||||
|
||||
/// This structure contains all the information required to write a batch in the database without reading the tasks.
|
||||
@@ -377,6 +377,7 @@ impl crate::IndexScheduler {
|
||||
details,
|
||||
status,
|
||||
kind,
|
||||
network: _,
|
||||
} = task;
|
||||
assert_eq!(uid, task.uid);
|
||||
if task.status != Status::Enqueued {
|
||||
|
||||
@@ -235,9 +235,11 @@ InvalidDocumentFields , InvalidRequest , BAD_REQU
|
||||
InvalidDocumentRetrieveVectors , InvalidRequest , BAD_REQUEST ;
|
||||
MissingDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
MissingDocumentEditionFunction , InvalidRequest , BAD_REQUEST ;
|
||||
InconsistentDocumentChangeHeaders , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentSort , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidHeaderValue , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorDimensions , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidVectorsType , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidDocumentId , InvalidRequest , BAD_REQUEST ;
|
||||
@@ -266,7 +268,9 @@ InvalidMultiSearchRemote , InvalidRequest , BAD_REQU
|
||||
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkRemotes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSelf , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSharding , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkSearchApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkWriteApiKey , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidNetworkUrl , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToSearchOn , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::{Code, ResponseError};
|
||||
@@ -32,23 +30,6 @@ pub struct InstanceTogglableFeatures {
|
||||
pub contains_filter: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct ChatCompletionSettings {
|
||||
|
||||
@@ -10,6 +10,7 @@ pub mod index_uid;
|
||||
pub mod index_uid_pattern;
|
||||
pub mod keys;
|
||||
pub mod locales;
|
||||
pub mod network;
|
||||
pub mod settings;
|
||||
pub mod star_or;
|
||||
pub mod task_view;
|
||||
|
||||
47
crates/meilisearch-types/src/network.rs
Normal file
47
crates/meilisearch-types/src/network.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use milli::update::new::indexer::sharding::Shards;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
pub local: Option<String>,
|
||||
#[serde(default)]
|
||||
pub remotes: BTreeMap<String, Remote>,
|
||||
#[serde(default)]
|
||||
pub sharding: bool,
|
||||
}
|
||||
|
||||
impl Network {
|
||||
pub fn shards(&self) -> Option<Shards> {
|
||||
if self.sharding {
|
||||
let this = self.local.as_deref().expect("Inconsistent `sharding` and `self`");
|
||||
let others = self
|
||||
.remotes
|
||||
.keys()
|
||||
.filter(|name| name.as_str() != this)
|
||||
.map(|name| name.to_owned())
|
||||
.collect();
|
||||
Some(Shards { own: vec![this.to_owned()], others })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
pub url: String,
|
||||
#[serde(default)]
|
||||
pub search_api_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub write_api_key: Option<String>,
|
||||
}
|
||||
@@ -11,6 +11,7 @@ use crate::error::ResponseError;
|
||||
use crate::settings::{Settings, Unchecked};
|
||||
use crate::tasks::{
|
||||
serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId,
|
||||
TaskNetwork,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, ToSchema)]
|
||||
@@ -51,6 +52,9 @@ pub struct TaskView {
|
||||
#[schema(value_type = String, example = json!("2024-08-08_14:12:09.393Z"))]
|
||||
#[serde(with = "time::serde::rfc3339::option", default)]
|
||||
pub finished_at: Option<OffsetDateTime>,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
}
|
||||
|
||||
impl TaskView {
|
||||
@@ -68,6 +72,7 @@ impl TaskView {
|
||||
enqueued_at: task.enqueued_at,
|
||||
started_at: task.started_at,
|
||||
finished_at: task.finished_at,
|
||||
network: task.network.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,6 +42,9 @@ pub struct Task {
|
||||
|
||||
pub status: Status,
|
||||
pub kind: KindWithContent,
|
||||
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub network: Option<TaskNetwork>,
|
||||
}
|
||||
|
||||
impl Task {
|
||||
@@ -737,6 +740,36 @@ pub enum Details {
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(untagged, rename_all = "camelCase")]
|
||||
pub enum TaskNetwork {
|
||||
Origin { origin: Origin },
|
||||
Remotes { remote_tasks: BTreeMap<String, RemoteTask> },
|
||||
}
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Origin {
|
||||
pub remote_name: String,
|
||||
pub task_uid: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct RemoteTask {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
task_uid: Option<TaskId>,
|
||||
error: Option<ResponseError>,
|
||||
}
|
||||
|
||||
impl From<Result<TaskId, ResponseError>> for RemoteTask {
|
||||
fn from(res: Result<TaskId, ResponseError>) -> RemoteTask {
|
||||
match res {
|
||||
Ok(task_uid) => RemoteTask { task_uid: Some(task_uid), error: None },
|
||||
Err(err) => RemoteTask { task_uid: None, error: Some(err) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct DetailsExportIndexSettings {
|
||||
|
||||
@@ -115,6 +115,9 @@ utoipa-scalar = { version = "0.3.0", optional = true, features = ["actix-web"] }
|
||||
async-openai = { git = "https://github.com/meilisearch/async-openai", branch = "better-error-handling" }
|
||||
secrecy = "0.10.3"
|
||||
actix-web-lab = { version = "0.24.1", default-features = false }
|
||||
urlencoding = "2.1.3"
|
||||
backoff = { version = "0.4.0", features = ["tokio"] }
|
||||
|
||||
|
||||
[dev-dependencies]
|
||||
actix-rt = "2.10.0"
|
||||
@@ -125,7 +128,6 @@ manifest-dir-macros = "0.1.18"
|
||||
maplit = "1.0.2"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
temp-env = "0.3.6"
|
||||
urlencoding = "2.1.3"
|
||||
wiremock = "0.6.3"
|
||||
yaup = "0.3.1"
|
||||
|
||||
|
||||
@@ -9,6 +9,8 @@ use meilisearch_types::milli::OrderBy;
|
||||
use serde_json::Value;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use crate::routes::indexes::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum MeilisearchHttpError {
|
||||
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
|
||||
@@ -80,6 +82,16 @@ pub enum MeilisearchHttpError {
|
||||
MissingSearchHybrid,
|
||||
#[error("Invalid request: both `media` and `vector` parameters are present.")]
|
||||
MediaAndVector,
|
||||
#[error("Inconsistent `Origin` headers: {} was provided but {} is missing.\n - Hint: Either both headers should be provided, or none of them", if *is_remote_missing {
|
||||
PROXY_ORIGIN_TASK_UID_HEADER
|
||||
} else { PROXY_ORIGIN_REMOTE_HEADER },
|
||||
if *is_remote_missing {
|
||||
PROXY_ORIGIN_REMOTE_HEADER
|
||||
} else { PROXY_ORIGIN_TASK_UID_HEADER }
|
||||
)]
|
||||
InconsistentOriginHeaders { is_remote_missing: bool },
|
||||
#[error("Invalid value for header {header_name}: {msg}")]
|
||||
InvalidHeaderValue { header_name: &'static str, msg: String },
|
||||
}
|
||||
|
||||
impl MeilisearchHttpError {
|
||||
@@ -124,6 +136,10 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
|
||||
Code::InvalidMultiSearchFacetOrder
|
||||
}
|
||||
MeilisearchHttpError::InconsistentOriginHeaders { .. } => {
|
||||
Code::InconsistentDocumentChangeHeaders
|
||||
}
|
||||
MeilisearchHttpError::InvalidHeaderValue { .. } => Code::InvalidHeaderValue,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -628,6 +628,7 @@ fn import_dump(
|
||||
&mut new_fields_ids_map,
|
||||
&|| false, // never stop processing a dump
|
||||
progress.clone(),
|
||||
None,
|
||||
)?;
|
||||
|
||||
let operation_stats = operation_stats.pop().unwrap();
|
||||
|
||||
@@ -45,6 +45,7 @@ use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::payload::Payload;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::indexes::proxy::{proxy, Body};
|
||||
use crate::routes::indexes::search::fix_sort_query_parameters;
|
||||
use crate::routes::{
|
||||
get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
|
||||
@@ -338,6 +339,7 @@ pub async fn delete_document(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let DocumentParam { index_uid, document_id } = path.into_inner();
|
||||
let index_uid = IndexUid::try_from(index_uid)?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -355,10 +357,16 @@ pub async fn delete_document(
|
||||
};
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@@ -804,7 +812,6 @@ pub async fn replace_documents(
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
params.primary_key,
|
||||
@@ -814,8 +821,10 @@ pub async fn replace_documents(
|
||||
uid,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
&req,
|
||||
)
|
||||
.await?;
|
||||
|
||||
debug!(returns = ?task, "Replace documents");
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -905,7 +914,6 @@ pub async fn update_documents(
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task = document_addition(
|
||||
extract_mime_type(&req)?,
|
||||
index_scheduler,
|
||||
index_uid,
|
||||
params.primary_key,
|
||||
@@ -915,6 +923,7 @@ pub async fn update_documents(
|
||||
uid,
|
||||
dry_run,
|
||||
allow_index_creation,
|
||||
&req,
|
||||
)
|
||||
.await?;
|
||||
debug!(returns = ?task, "Update documents");
|
||||
@@ -924,7 +933,6 @@ pub async fn update_documents(
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn document_addition(
|
||||
mime_type: Option<Mime>,
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
|
||||
index_uid: IndexUid,
|
||||
primary_key: Option<String>,
|
||||
@@ -934,7 +942,11 @@ async fn document_addition(
|
||||
task_id: Option<TaskId>,
|
||||
dry_run: bool,
|
||||
allow_index_creation: bool,
|
||||
req: &HttpRequest,
|
||||
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
|
||||
let mime_type = extract_mime_type(req)?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
let format = match (
|
||||
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
|
||||
csv_delimiter,
|
||||
@@ -966,7 +978,7 @@ async fn document_addition(
|
||||
};
|
||||
|
||||
let (uuid, mut update_file) = index_scheduler.queue.create_update_file(dry_run)?;
|
||||
let documents_count = match format {
|
||||
let res = match format {
|
||||
PayloadType::Ndjson => {
|
||||
let (path, file) = update_file.into_parts();
|
||||
let file = match file {
|
||||
@@ -981,19 +993,19 @@ async fn document_addition(
|
||||
None => None,
|
||||
};
|
||||
|
||||
let documents_count = tokio::task::spawn_blocking(move || {
|
||||
let res = tokio::task::spawn_blocking(move || {
|
||||
let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
|
||||
read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
|
||||
})?;
|
||||
|
||||
let update_file = file_store::File::from_parts(path, file);
|
||||
update_file.persist()?;
|
||||
let update_file = update_file.persist()?;
|
||||
|
||||
Ok(documents_count)
|
||||
Ok((documents_count, update_file))
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok(documents_count)
|
||||
Ok(res)
|
||||
}
|
||||
PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
|
||||
let temp_file = match tempfile() {
|
||||
@@ -1012,16 +1024,16 @@ async fn document_addition(
|
||||
unreachable!("We already wrote the user content into the update file")
|
||||
}
|
||||
};
|
||||
// we NEED to persist the file here because we moved the `udpate_file` in another task.
|
||||
update_file.persist()?;
|
||||
Ok(documents_count)
|
||||
// we NEED to persist the file here because we moved the `update_file` in another task.
|
||||
let file = update_file.persist()?;
|
||||
Ok((documents_count, file))
|
||||
})
|
||||
.await
|
||||
}
|
||||
};
|
||||
|
||||
let documents_count = match documents_count {
|
||||
Ok(Ok(documents_count)) => documents_count,
|
||||
let (documents_count, file) = match res {
|
||||
Ok(Ok((documents_count, file))) => (documents_count, file),
|
||||
// in this case the file has not possibly be persisted.
|
||||
Ok(Err(e)) => return Err(e),
|
||||
Err(e) => {
|
||||
@@ -1063,6 +1075,20 @@ async fn document_addition(
|
||||
}
|
||||
};
|
||||
|
||||
if network.sharding {
|
||||
if let Some(file) = file {
|
||||
proxy(
|
||||
&index_scheduler,
|
||||
&index_uid,
|
||||
req,
|
||||
network,
|
||||
Body::with_ndjson_payload(file),
|
||||
&task,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(task.into())
|
||||
}
|
||||
|
||||
@@ -1141,6 +1167,7 @@ pub async fn delete_documents_batch(
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
debug!(parameters = ?body, "Delete documents by batch");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1161,16 +1188,22 @@ pub async fn delete_documents_batch(
|
||||
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(body), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
debug!(returns = ?task, "Delete documents by batch");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema)]
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct DocumentDeletionByFilter {
|
||||
@@ -1219,7 +1252,8 @@ pub async fn delete_documents_by_filter(
|
||||
debug!(parameters = ?body, "Delete documents by filter");
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let filter = body.into_inner().filter;
|
||||
let filter = body.into_inner();
|
||||
let network = index_scheduler.network();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
@@ -1232,23 +1266,36 @@ pub async fn delete_documents_by_filter(
|
||||
);
|
||||
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
crate::search::parse_filter(
|
||||
&filter.filter,
|
||||
Code::InvalidDocumentFilter,
|
||||
index_scheduler.features(),
|
||||
)?
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
|
||||
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
|
||||
let task = KindWithContent::DocumentDeletionByFilter {
|
||||
index_uid: index_uid.clone(),
|
||||
filter_expr: filter.filter.clone(),
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(filter), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
debug!(returns = ?task, "Delete documents by filter");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema)]
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct DocumentEditionByFunction {
|
||||
/// A string containing a RHAI function.
|
||||
@@ -1336,6 +1383,8 @@ pub async fn edit_documents_by_function(
|
||||
.features()
|
||||
.check_edit_documents_by_function("Using the documents edit route")?;
|
||||
|
||||
let network = index_scheduler.network();
|
||||
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let index_uid = index_uid.into_inner();
|
||||
let params = params.into_inner();
|
||||
@@ -1349,13 +1398,12 @@ pub async fn edit_documents_by_function(
|
||||
&req,
|
||||
);
|
||||
|
||||
let DocumentEditionByFunction { filter, context, function } = params;
|
||||
let engine = milli::rhai::Engine::new();
|
||||
if let Err(e) = engine.compile(&function) {
|
||||
if let Err(e) = engine.compile(¶ms.function) {
|
||||
return Err(ResponseError::from_msg(e.to_string(), Code::BadRequest));
|
||||
}
|
||||
|
||||
if let Some(ref filter) = filter {
|
||||
if let Some(ref filter) = params.filter {
|
||||
// we ensure the filter is well formed before enqueuing it
|
||||
crate::search::parse_filter(
|
||||
filter,
|
||||
@@ -1365,9 +1413,9 @@ pub async fn edit_documents_by_function(
|
||||
.ok_or(MeilisearchHttpError::EmptyFilter)?;
|
||||
}
|
||||
let task = KindWithContent::DocumentEdition {
|
||||
index_uid,
|
||||
filter_expr: filter,
|
||||
context: match context {
|
||||
index_uid: index_uid.clone(),
|
||||
filter_expr: params.filter.clone(),
|
||||
context: match params.context.clone() {
|
||||
Some(Value::Object(m)) => Some(m),
|
||||
None => None,
|
||||
_ => {
|
||||
@@ -1377,15 +1425,21 @@ pub async fn edit_documents_by_function(
|
||||
))
|
||||
}
|
||||
},
|
||||
function,
|
||||
function: params.function.clone(),
|
||||
};
|
||||
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::Inline(params), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
debug!(returns = ?task, "Edit documents by function");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
@@ -1428,6 +1482,8 @@ pub async fn clear_all_documents(
|
||||
analytics: web::Data<Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
|
||||
let network = index_scheduler.network();
|
||||
|
||||
analytics.publish(
|
||||
DocumentsDeletionAggregator {
|
||||
clear_all: true,
|
||||
@@ -1441,10 +1497,18 @@ pub async fn clear_all_documents(
|
||||
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
|
||||
let uid = get_task_id(&req, &opt)?;
|
||||
let dry_run = is_dry_run(&req, &opt)?;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
let task = {
|
||||
let index_scheduler = index_scheduler.clone();
|
||||
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)).await??
|
||||
};
|
||||
|
||||
if network.sharding && !dry_run {
|
||||
proxy(&index_scheduler, &index_uid, &req, network, Body::none(), &task).await?;
|
||||
}
|
||||
|
||||
let task: SummarizedTaskView = task.into();
|
||||
|
||||
debug!(returns = ?task, "Delete all documents");
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
||||
@@ -30,6 +30,7 @@ use crate::Opt;
|
||||
|
||||
pub mod documents;
|
||||
pub mod facet_search;
|
||||
mod proxy;
|
||||
pub mod search;
|
||||
mod search_analytics;
|
||||
#[cfg(test)]
|
||||
@@ -39,6 +40,8 @@ mod settings_analytics;
|
||||
pub mod similar;
|
||||
mod similar_analytics;
|
||||
|
||||
pub use proxy::{PROXY_ORIGIN_REMOTE_HEADER, PROXY_ORIGIN_TASK_UID_HEADER};
|
||||
|
||||
#[derive(OpenApi)]
|
||||
#[openapi(
|
||||
nest(
|
||||
|
||||
424
crates/meilisearch/src/routes/indexes/proxy.rs
Normal file
424
crates/meilisearch/src/routes/indexes/proxy.rs
Normal file
@@ -0,0 +1,424 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
|
||||
use actix_web::http::header::CONTENT_TYPE;
|
||||
use actix_web::HttpRequest;
|
||||
use bytes::Bytes;
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::{Origin, RemoteTask, TaskNetwork};
|
||||
use reqwest::StatusCode;
|
||||
use serde::de::DeserializeOwned;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error::MeilisearchHttpError;
|
||||
use crate::routes::indexes::proxy::error::{ProxyDocumentChangeError, ReqwestErrorWithoutUrl};
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
pub enum Body<T: serde::Serialize> {
|
||||
NdJsonPayload(File),
|
||||
Inline(T),
|
||||
None,
|
||||
}
|
||||
|
||||
impl Body<()> {
|
||||
pub fn with_ndjson_payload(file: File) -> Self {
|
||||
Self::NdJsonPayload(file)
|
||||
}
|
||||
|
||||
pub fn none() -> Self {
|
||||
Self::None
|
||||
}
|
||||
}
|
||||
|
||||
/// If necessary, proxies the passed request to the network and update the task description.
|
||||
///
|
||||
/// This function reads the custom headers from the request to determine if must proxy the request or if the request
|
||||
/// has already been proxied.
|
||||
///
|
||||
/// - when it must proxy the request, the endpoint, method and query params are retrieved from the passed `req`, then the `body` is
|
||||
/// sent to all remotes of the `network` (except `self`). The response from the remotes are collected to update the passed `task`
|
||||
/// with the task ids from the task queues of the remotes.
|
||||
/// - when the request has already been proxied, the custom headers contains information about the remote that created the initial task.
|
||||
/// This information is copied to the passed task.
|
||||
pub async fn proxy<T: serde::Serialize>(
|
||||
index_scheduler: &IndexScheduler,
|
||||
index_uid: &str,
|
||||
req: &HttpRequest,
|
||||
network: meilisearch_types::network::Network,
|
||||
body: Body<T>,
|
||||
task: &meilisearch_types::tasks::Task,
|
||||
) -> Result<(), MeilisearchHttpError> {
|
||||
match origin_from_req(req)? {
|
||||
Some(origin) => {
|
||||
index_scheduler.set_task_network(task.uid, TaskNetwork::Origin { origin })?
|
||||
}
|
||||
None => {
|
||||
let this = network
|
||||
.local
|
||||
.as_deref()
|
||||
.expect("inconsistent `network.sharding` and `network.self`")
|
||||
.to_owned();
|
||||
|
||||
let content_type = match &body {
|
||||
// for file bodies, force x-ndjson
|
||||
Body::NdJsonPayload(_) => Some(b"application/x-ndjson".as_slice()),
|
||||
// otherwise get content type from request
|
||||
_ => req.headers().get(CONTENT_TYPE).map(|h| h.as_bytes()),
|
||||
};
|
||||
|
||||
let body = match body {
|
||||
Body::NdJsonPayload(file) => Some(Bytes::from_owner(unsafe {
|
||||
memmap2::Mmap::map(&file).map_err(|err| {
|
||||
MeilisearchHttpError::from_milli(err.into(), Some(index_uid.to_owned()))
|
||||
})?
|
||||
})),
|
||||
|
||||
Body::Inline(payload) => {
|
||||
Some(Bytes::copy_from_slice(&serde_json::to_vec(&payload).unwrap()))
|
||||
}
|
||||
|
||||
Body::None => None,
|
||||
};
|
||||
|
||||
let mut in_flight_remote_queries = BTreeMap::new();
|
||||
let client = reqwest::ClientBuilder::new()
|
||||
.connect_timeout(std::time::Duration::from_secs(3))
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
let method = from_old_http_method(req.method());
|
||||
|
||||
// send payload to all remotes
|
||||
for (node_name, node) in
|
||||
network.remotes.into_iter().filter(|(name, _)| name.as_str() != this)
|
||||
{
|
||||
let body = body.clone();
|
||||
let client = client.clone();
|
||||
let api_key = node.write_api_key;
|
||||
let this = this.clone();
|
||||
let method = method.clone();
|
||||
let path_and_query =
|
||||
req.uri().path_and_query().map(|paq| paq.as_str()).unwrap_or("/");
|
||||
|
||||
in_flight_remote_queries.insert(
|
||||
node_name,
|
||||
tokio::spawn({
|
||||
let url = format!("{}{}", node.url, path_and_query);
|
||||
|
||||
let url_encoded_this = urlencoding::encode(&this).into_owned();
|
||||
let url_encoded_task_uid = task.uid.to_string(); // it's url encoded i promize
|
||||
|
||||
let content_type = content_type.map(|b| b.to_owned());
|
||||
|
||||
let backoff = backoff::ExponentialBackoffBuilder::new()
|
||||
.with_max_elapsed_time(Some(std::time::Duration::from_secs(25)))
|
||||
.build();
|
||||
|
||||
backoff::future::retry(backoff, move || {
|
||||
let url = url.clone();
|
||||
let client = client.clone();
|
||||
let url_encoded_this = url_encoded_this.clone();
|
||||
let url_encoded_task_uid = url_encoded_task_uid.clone();
|
||||
let content_type = content_type.clone();
|
||||
|
||||
let body = body.clone();
|
||||
let api_key = api_key.clone();
|
||||
let method = method.clone();
|
||||
|
||||
async move {
|
||||
try_proxy(
|
||||
method,
|
||||
&url,
|
||||
content_type.as_deref(),
|
||||
api_key.as_deref(),
|
||||
&client,
|
||||
&url_encoded_this,
|
||||
&url_encoded_task_uid,
|
||||
body,
|
||||
)
|
||||
.await
|
||||
}
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
// wait for all in-flight queries to finish and collect their results
|
||||
let mut remote_tasks: BTreeMap<String, RemoteTask> = BTreeMap::new();
|
||||
for (node_name, handle) in in_flight_remote_queries {
|
||||
match handle.await {
|
||||
Ok(Ok(res)) => {
|
||||
let task_uid = res.task_uid;
|
||||
|
||||
remote_tasks.insert(node_name, Ok(task_uid).into());
|
||||
}
|
||||
Ok(Err(error)) => {
|
||||
remote_tasks.insert(node_name, Err(error.as_response_error()).into());
|
||||
}
|
||||
Err(panic) => match panic.try_into_panic() {
|
||||
Ok(panic) => {
|
||||
let msg = match panic.downcast_ref::<&'static str>() {
|
||||
Some(s) => *s,
|
||||
None => match panic.downcast_ref::<String>() {
|
||||
Some(s) => &s[..],
|
||||
None => "Box<dyn Any>",
|
||||
},
|
||||
};
|
||||
remote_tasks.insert(
|
||||
node_name,
|
||||
Err(ResponseError::from_msg(
|
||||
msg.to_string(),
|
||||
meilisearch_types::error::Code::Internal,
|
||||
))
|
||||
.into(),
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
tracing::error!("proxy task was unexpectedly cancelled")
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// edit details to contain the return values from the remotes
|
||||
index_scheduler.set_task_network(task.uid, TaskNetwork::Remotes { remote_tasks })?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn from_old_http_method(method: &actix_http::Method) -> reqwest::Method {
|
||||
match method {
|
||||
&actix_http::Method::CONNECT => reqwest::Method::CONNECT,
|
||||
&actix_http::Method::DELETE => reqwest::Method::DELETE,
|
||||
&actix_http::Method::GET => reqwest::Method::GET,
|
||||
&actix_http::Method::HEAD => reqwest::Method::HEAD,
|
||||
&actix_http::Method::OPTIONS => reqwest::Method::OPTIONS,
|
||||
&actix_http::Method::PATCH => reqwest::Method::PATCH,
|
||||
&actix_http::Method::POST => reqwest::Method::POST,
|
||||
&actix_http::Method::PUT => reqwest::Method::PUT,
|
||||
&actix_http::Method::TRACE => reqwest::Method::TRACE,
|
||||
method => reqwest::Method::from_bytes(method.as_str().as_bytes()).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn try_proxy(
|
||||
method: reqwest::Method,
|
||||
url: &str,
|
||||
content_type: Option<&[u8]>,
|
||||
api_key: Option<&str>,
|
||||
client: &reqwest::Client,
|
||||
url_encoded_this: &str,
|
||||
url_encoded_task_uid: &str,
|
||||
body: Option<Bytes>,
|
||||
) -> Result<SummarizedTaskView, backoff::Error<ProxyDocumentChangeError>> {
|
||||
let request = client.request(method, url).timeout(std::time::Duration::from_secs(30));
|
||||
let request = if let Some(body) = body { request.body(body) } else { request };
|
||||
let request = if let Some(api_key) = api_key { request.bearer_auth(api_key) } else { request };
|
||||
let request = request.header(PROXY_ORIGIN_TASK_UID_HEADER, url_encoded_task_uid);
|
||||
let request = request.header(PROXY_ORIGIN_REMOTE_HEADER, url_encoded_this);
|
||||
let request = if let Some(content_type) = content_type {
|
||||
request.header(CONTENT_TYPE.as_str(), content_type)
|
||||
} else {
|
||||
request
|
||||
};
|
||||
|
||||
let response = request.send().await;
|
||||
let response = match response {
|
||||
Ok(response) => response,
|
||||
Err(error) if error.is_timeout() => {
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::Timeout))
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::CouldNotSendRequest(
|
||||
ReqwestErrorWithoutUrl::new(error),
|
||||
)))
|
||||
}
|
||||
};
|
||||
|
||||
match response.status() {
|
||||
status_code if status_code.is_success() => (),
|
||||
StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
|
||||
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::AuthenticationError))
|
||||
}
|
||||
status_code if status_code.is_client_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::Permanent(ProxyDocumentChangeError::BadRequest {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code if status_code.is_server_error() => {
|
||||
let response = parse_error(response).await;
|
||||
return Err(backoff::Error::transient(ProxyDocumentChangeError::RemoteError {
|
||||
status_code,
|
||||
response,
|
||||
}));
|
||||
}
|
||||
status_code => {
|
||||
tracing::warn!(
|
||||
status_code = status_code.as_u16(),
|
||||
"remote replied with unexpected status code"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let response = match parse_response(response).await {
|
||||
Ok(response) => response,
|
||||
Err(response) => {
|
||||
return Err(backoff::Error::transient(
|
||||
ProxyDocumentChangeError::CouldNotParseResponse { response },
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
async fn parse_error(response: reqwest::Response) -> Result<String, ReqwestErrorWithoutUrl> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(ReqwestErrorWithoutUrl::new(error)),
|
||||
};
|
||||
|
||||
Ok(parse_bytes_as_error(&bytes))
|
||||
}
|
||||
|
||||
fn parse_bytes_as_error(bytes: &[u8]) -> String {
|
||||
match serde_json::from_slice::<Value>(bytes) {
|
||||
Ok(value) => value.to_string(),
|
||||
Err(_) => String::from_utf8_lossy(bytes).into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_response<T: DeserializeOwned>(
|
||||
response: reqwest::Response,
|
||||
) -> Result<T, Result<String, ReqwestErrorWithoutUrl>> {
|
||||
let bytes = match response.bytes().await {
|
||||
Ok(bytes) => bytes,
|
||||
Err(error) => return Err(Err(ReqwestErrorWithoutUrl::new(error))),
|
||||
};
|
||||
|
||||
match serde_json::from_slice::<T>(&bytes) {
|
||||
Ok(value) => Ok(value),
|
||||
Err(_) => Err(Ok(parse_bytes_as_error(&bytes))),
|
||||
}
|
||||
}
|
||||
|
||||
mod error {
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use reqwest::StatusCode;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum ProxyDocumentChangeError {
|
||||
#[error("{0}")]
|
||||
CouldNotSendRequest(ReqwestErrorWithoutUrl),
|
||||
#[error("could not authenticate against the remote host\n - hint: check that the remote instance was registered with a valid API key having the `documents.add` action")]
|
||||
AuthenticationError,
|
||||
#[error(
|
||||
"could not parse response from the remote host as a document addition response{}\n - hint: check that the remote instance is a Meilisearch instance running the same version",
|
||||
response_from_remote(response)
|
||||
)]
|
||||
CouldNotParseResponse { response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host responded with code {}{}\n - hint: check that the remote instance has the correct index configuration for that request\n - hint: check that the `network` experimental feature is enabled on the remote instance", status_code.as_u16(), response_from_remote(response))]
|
||||
BadRequest { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
#[error("remote host did not answer before the deadline")]
|
||||
Timeout,
|
||||
#[error("remote host responded with code {}{}", status_code.as_u16(), response_from_remote(response))]
|
||||
RemoteError { status_code: StatusCode, response: Result<String, ReqwestErrorWithoutUrl> },
|
||||
}
|
||||
|
||||
impl ProxyDocumentChangeError {
|
||||
pub fn as_response_error(&self) -> ResponseError {
|
||||
use meilisearch_types::error::Code;
|
||||
let message = self.to_string();
|
||||
let code = match self {
|
||||
ProxyDocumentChangeError::CouldNotSendRequest(_) => Code::RemoteCouldNotSendRequest,
|
||||
ProxyDocumentChangeError::AuthenticationError => Code::RemoteInvalidApiKey,
|
||||
ProxyDocumentChangeError::BadRequest { .. } => Code::RemoteBadRequest,
|
||||
ProxyDocumentChangeError::Timeout => Code::RemoteTimeout,
|
||||
ProxyDocumentChangeError::RemoteError { .. } => Code::RemoteRemoteError,
|
||||
ProxyDocumentChangeError::CouldNotParseResponse { .. } => Code::RemoteBadResponse,
|
||||
};
|
||||
ResponseError::from_msg(message, code)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error(transparent)]
|
||||
pub struct ReqwestErrorWithoutUrl(reqwest::Error);
|
||||
impl ReqwestErrorWithoutUrl {
|
||||
pub fn new(inner: reqwest::Error) -> Self {
|
||||
Self(inner.without_url())
|
||||
}
|
||||
}
|
||||
|
||||
fn response_from_remote(response: &Result<String, ReqwestErrorWithoutUrl>) -> String {
|
||||
match response {
|
||||
Ok(response) => {
|
||||
format!(":\n - response from remote: {}", response)
|
||||
}
|
||||
Err(error) => {
|
||||
format!(":\n - additionally, could not retrieve response from remote: {error}")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const PROXY_ORIGIN_REMOTE_HEADER: &str = "Meili-Proxy-Origin-Remote";
|
||||
pub const PROXY_ORIGIN_TASK_UID_HEADER: &str = "Meili-Proxy-Origin-TaskUid";
|
||||
|
||||
pub fn origin_from_req(req: &HttpRequest) -> Result<Option<Origin>, MeilisearchHttpError> {
|
||||
let (remote_name, task_uid) = match (
|
||||
req.headers().get(PROXY_ORIGIN_REMOTE_HEADER),
|
||||
req.headers().get(PROXY_ORIGIN_TASK_UID_HEADER),
|
||||
) {
|
||||
(None, None) => return Ok(None),
|
||||
(None, Some(_)) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders { is_remote_missing: true })
|
||||
}
|
||||
(Some(_), None) => {
|
||||
return Err(MeilisearchHttpError::InconsistentOriginHeaders {
|
||||
is_remote_missing: false,
|
||||
})
|
||||
}
|
||||
(Some(remote_name), Some(task_uid)) => (
|
||||
urlencoding::decode(remote_name.to_str().map_err(|err| {
|
||||
MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_REMOTE_HEADER,
|
||||
msg: format!("while parsing remote name as UTF-8: {err}"),
|
||||
}
|
||||
})?)
|
||||
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_REMOTE_HEADER,
|
||||
msg: format!("while URL-decoding remote name: {err}"),
|
||||
})?,
|
||||
urlencoding::decode(task_uid.to_str().map_err(|err| {
|
||||
MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while parsing task UID as UTF-8: {err}"),
|
||||
}
|
||||
})?)
|
||||
.map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while URL-decoding task UID: {err}"),
|
||||
})?,
|
||||
),
|
||||
};
|
||||
|
||||
let task_uid: usize =
|
||||
task_uid.parse().map_err(|err| MeilisearchHttpError::InvalidHeaderValue {
|
||||
header_name: PROXY_ORIGIN_TASK_UID_HEADER,
|
||||
msg: format!("while parsing the task UID as an integer: {err}"),
|
||||
})?;
|
||||
|
||||
Ok(Some(Origin { remote_name: remote_name.into_owned(), task_uid }))
|
||||
}
|
||||
@@ -184,7 +184,7 @@ pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
|
||||
.is_some_and(|s| s.to_lowercase() == "true"))
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, ToSchema)]
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SummarizedTaskView {
|
||||
/// The task unique identifier.
|
||||
@@ -198,7 +198,10 @@ pub struct SummarizedTaskView {
|
||||
#[serde(rename = "type")]
|
||||
kind: Kind,
|
||||
/// The date on which the task was enqueued.
|
||||
#[serde(serialize_with = "time::serde::rfc3339::serialize")]
|
||||
#[serde(
|
||||
serialize_with = "time::serde::rfc3339::serialize",
|
||||
deserialize_with = "time::serde::rfc3339::deserialize"
|
||||
)]
|
||||
enqueued_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
|
||||
@@ -8,12 +8,13 @@ use index_scheduler::IndexScheduler;
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
use meilisearch_types::deserr::DeserrJsonError;
|
||||
use meilisearch_types::error::deserr_codes::{
|
||||
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkUrl,
|
||||
InvalidNetworkRemotes, InvalidNetworkSearchApiKey, InvalidNetworkSelf, InvalidNetworkSharding,
|
||||
InvalidNetworkUrl, InvalidNetworkWriteApiKey,
|
||||
};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{Network as DbNetwork, Remote as DbRemote};
|
||||
use meilisearch_types::keys::actions;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::network::{Network as DbNetwork, Remote as DbRemote};
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use utoipa::{OpenApi, ToSchema};
|
||||
@@ -57,9 +58,9 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
{
|
||||
"self": "ms-0",
|
||||
"remotes": {
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
|
||||
}
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
@@ -88,9 +89,9 @@ async fn get_network(
|
||||
#[schema(rename_all = "camelCase")]
|
||||
pub struct Remote {
|
||||
#[schema(value_type = Option<String>, example = json!({
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
|
||||
}))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkUrl>)]
|
||||
#[serde(default)]
|
||||
@@ -99,6 +100,10 @@ pub struct Remote {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkSearchApiKey>)]
|
||||
#[serde(default)]
|
||||
pub search_api_key: Setting<String>,
|
||||
#[schema(value_type = Option<String>, example = json!("XWnBI8QHUc-4IlqbKPLUDuhftNq19mQtjc6JvmivzJU"))]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkWriteApiKey>)]
|
||||
#[serde(default)]
|
||||
pub write_api_key: Setting<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserr, ToSchema, Serialize)]
|
||||
@@ -114,6 +119,10 @@ pub struct Network {
|
||||
#[serde(default, rename = "self")]
|
||||
#[deserr(default, rename = "self", error = DeserrJsonError<InvalidNetworkSelf>)]
|
||||
pub local: Setting<String>,
|
||||
#[schema(value_type = Option<bool>, example = json!(true))]
|
||||
#[serde(default)]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidNetworkSharding>)]
|
||||
pub sharding: Setting<bool>,
|
||||
}
|
||||
|
||||
impl Remote {
|
||||
@@ -136,6 +145,7 @@ impl Remote {
|
||||
Ok(url)
|
||||
})?,
|
||||
search_api_key: self.search_api_key.set(),
|
||||
write_api_key: self.write_api_key.set(),
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -174,9 +184,9 @@ impl Aggregate for PatchNetworkAnalytics {
|
||||
{
|
||||
"self": "ms-0",
|
||||
"remotes": {
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()) },
|
||||
"ms-0": Remote { url: Setting::Set("http://localhost:7700".into()), search_api_key: Setting::Reset, write_api_key: Setting::Reset },
|
||||
"ms-1": Remote { url: Setting::Set("http://localhost:7701".into()), search_api_key: Setting::Set("foo".into()), write_api_key: Setting::Set("bar".into()) },
|
||||
"ms-2": Remote { url: Setting::Set("http://localhost:7702".into()), search_api_key: Setting::Set("bar".into()), write_api_key: Setting::Set("foo".into()) },
|
||||
}
|
||||
})),
|
||||
(status = 401, description = "The authorization header is missing", body = ResponseError, content_type = "application/json", example = json!(
|
||||
@@ -207,6 +217,19 @@ async fn patch_network(
|
||||
Setting::NotSet => old_network.local,
|
||||
};
|
||||
|
||||
let merged_sharding = match new_network.sharding {
|
||||
Setting::Set(new_sharding) => new_sharding,
|
||||
Setting::Reset => false,
|
||||
Setting::NotSet => old_network.sharding,
|
||||
};
|
||||
|
||||
if merged_sharding && merged_self.is_none() {
|
||||
return Err(ResponseError::from_msg(
|
||||
"`.sharding`: enabling the sharding requires `.self` to be set\n - Hint: Disable `sharding` or set `self` to a value.".into(),
|
||||
meilisearch_types::error::Code::InvalidNetworkSharding,
|
||||
));
|
||||
}
|
||||
|
||||
let merged_remotes = match new_network.remotes {
|
||||
Setting::Set(new_remotes) => {
|
||||
let mut merged_remotes = BTreeMap::new();
|
||||
@@ -217,9 +240,17 @@ async fn patch_network(
|
||||
{
|
||||
match either_or_both {
|
||||
EitherOrBoth::Both((key, old), (_, Some(new))) => {
|
||||
let DbRemote { url: old_url, search_api_key: old_search_api_key } = old;
|
||||
let DbRemote {
|
||||
url: old_url,
|
||||
search_api_key: old_search_api_key,
|
||||
write_api_key: old_write_api_key,
|
||||
} = old;
|
||||
|
||||
let Remote { url: new_url, search_api_key: new_search_api_key } = new;
|
||||
let Remote {
|
||||
url: new_url,
|
||||
search_api_key: new_search_api_key,
|
||||
write_api_key: new_write_api_key,
|
||||
} = new;
|
||||
|
||||
let merged = DbRemote {
|
||||
url: match new_url {
|
||||
@@ -247,6 +278,11 @@ async fn patch_network(
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_search_api_key,
|
||||
},
|
||||
write_api_key: match new_write_api_key {
|
||||
Setting::Set(new_write_api_key) => Some(new_write_api_key),
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => old_write_api_key,
|
||||
},
|
||||
};
|
||||
merged_remotes.insert(key, merged);
|
||||
}
|
||||
@@ -274,7 +310,8 @@ async fn patch_network(
|
||||
&req,
|
||||
);
|
||||
|
||||
let merged_network = DbNetwork { local: merged_self, remotes: merged_remotes };
|
||||
let merged_network =
|
||||
DbNetwork { local: merged_self, remotes: merged_remotes, sharding: merged_sharding };
|
||||
index_scheduler.put_network(merged_network.clone())?;
|
||||
debug!(returns = ?merged_network, "Patch network");
|
||||
Ok(HttpResponse::Ok().json(merged_network))
|
||||
|
||||
@@ -10,11 +10,11 @@ use actix_http::StatusCode;
|
||||
use index_scheduler::{IndexScheduler, RoFeatures};
|
||||
use itertools::Itertools;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::features::{Network, Remote};
|
||||
use meilisearch_types::milli::order_by_map::OrderByMap;
|
||||
use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
|
||||
use meilisearch_types::milli::vector::Embedding;
|
||||
use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
|
||||
use meilisearch_types::network::{Network, Remote};
|
||||
use roaring::RoaringBitmap;
|
||||
use tokio::task::JoinHandle;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
pub use error::ProxySearchError;
|
||||
use error::ReqwestErrorWithoutUrl;
|
||||
use meilisearch_types::features::Remote;
|
||||
use meilisearch_types::network::Remote;
|
||||
use rand::Rng as _;
|
||||
use reqwest::{Client, Response, StatusCode};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
@@ -46,7 +46,7 @@ async fn errors_on_param() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `selfie`: expected one of `remotes`, `self`",
|
||||
"message": "Unknown field `selfie`: expected one of `remotes`, `self`, `sharding`",
|
||||
"code": "bad_request",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#bad_request"
|
||||
@@ -149,7 +149,7 @@ async fn errors_on_param() {
|
||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`",
|
||||
"message": "Unknown field `doggo` inside `.remotes.new`: expected one of `url`, `searchApiKey`, `writeApiKey`",
|
||||
"code": "invalid_network_remotes",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_network_remotes"
|
||||
@@ -192,9 +192,11 @@ async fn errors_on_param() {
|
||||
"remotes": {
|
||||
"kefir": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = server
|
||||
@@ -266,7 +268,8 @@ async fn auth() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -274,11 +277,12 @@ async fn auth() {
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// try get with get permission
|
||||
server.use_api_key(get_network_key.as_str().unwrap());
|
||||
@@ -286,11 +290,12 @@ async fn auth() {
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
{
|
||||
"self": "master",
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// try update with update permission
|
||||
server.use_api_key(update_network_key.as_str().unwrap());
|
||||
@@ -303,11 +308,12 @@ async fn auth() {
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "api_key",
|
||||
"remotes": {}
|
||||
}
|
||||
"###);
|
||||
{
|
||||
"self": "api_key",
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// try with the other's permission
|
||||
let (response, code) = server.get_network().await;
|
||||
@@ -383,7 +389,8 @@ async fn get_and_set_network() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": null,
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -393,7 +400,8 @@ async fn get_and_set_network() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "myself",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -417,13 +425,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "foo"
|
||||
"searchApiKey": "foo",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -443,13 +454,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -470,17 +484,21 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"myself": {
|
||||
"url": "http://localhost:7700",
|
||||
"searchApiKey": null
|
||||
"searchApiKey": null,
|
||||
"writeApiKey": null
|
||||
},
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -498,13 +516,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -518,13 +539,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -538,13 +562,16 @@ async fn get_and_set_network() {
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -553,60 +580,69 @@ async fn get_and_set_network() {
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
"###);
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// still doing nothing
|
||||
let (response, code) = server.set_network(json!({"remotes": {}})).await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
"###);
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// good time to check GET
|
||||
let (response, code) = server.get_network().await;
|
||||
|
||||
meili_snap::snapshot!(code, @"200 OK");
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz"
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar"
|
||||
}
|
||||
}
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {
|
||||
"them": {
|
||||
"url": "http://localhost:7702",
|
||||
"searchApiKey": "baz",
|
||||
"writeApiKey": null
|
||||
},
|
||||
"thy": {
|
||||
"url": "http://localhost:7701",
|
||||
"searchApiKey": "bar",
|
||||
"writeApiKey": null
|
||||
}
|
||||
"###);
|
||||
},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
// deleting everything
|
||||
let (response, code) = server
|
||||
@@ -619,7 +655,8 @@ async fn get_and_set_network() {
|
||||
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
|
||||
{
|
||||
"self": "thy",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
@@ -132,7 +132,8 @@ async fn remote_sharding() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -140,7 +141,8 @@ async fn remote_sharding() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
|
||||
@@ -148,7 +150,8 @@ async fn remote_sharding() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -444,7 +447,8 @@ async fn remote_sharding_retrieve_vectors() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -452,7 +456,8 @@ async fn remote_sharding_retrieve_vectors() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
|
||||
@@ -460,7 +465,8 @@ async fn remote_sharding_retrieve_vectors() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms2",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -934,7 +940,8 @@ async fn error_unregistered_remote() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -942,7 +949,8 @@ async fn error_unregistered_remote() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1052,7 +1060,8 @@ async fn error_no_weighted_score() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1060,7 +1069,8 @@ async fn error_no_weighted_score() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1185,7 +1195,8 @@ async fn error_bad_response() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1193,7 +1204,8 @@ async fn error_bad_response() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1322,7 +1334,8 @@ async fn error_bad_request() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1330,7 +1343,8 @@ async fn error_bad_request() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1452,7 +1466,8 @@ async fn error_bad_request_facets_by_index() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1460,7 +1475,8 @@ async fn error_bad_request_facets_by_index() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1593,7 +1609,8 @@ async fn error_bad_request_facets_by_index_facet() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1601,7 +1618,8 @@ async fn error_bad_request_facets_by_index_facet() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1743,7 +1761,8 @@ async fn error_remote_does_not_answer() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1751,7 +1770,8 @@ async fn error_remote_does_not_answer() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -1944,7 +1964,8 @@ async fn error_remote_404() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -1952,7 +1973,8 @@ async fn error_remote_404() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2139,7 +2161,8 @@ async fn error_remote_sharding_auth() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -2147,7 +2170,8 @@ async fn error_remote_sharding_auth() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2299,7 +2323,8 @@ async fn remote_sharding_auth() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -2307,7 +2332,8 @@ async fn remote_sharding_auth() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2454,7 +2480,8 @@ async fn error_remote_500() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -2462,7 +2489,8 @@ async fn error_remote_500() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
@@ -2633,7 +2661,8 @@ async fn error_remote_500_once() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms0",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
|
||||
@@ -2641,7 +2670,8 @@ async fn error_remote_500_once() {
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"self": "ms1",
|
||||
"remotes": {}
|
||||
"remotes": {},
|
||||
"sharding": false
|
||||
}
|
||||
"###);
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ async fn version_too_old() {
|
||||
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.18.0");
|
||||
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.22.0");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@@ -58,7 +58,7 @@ async fn version_requires_downgrade() {
|
||||
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
|
||||
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
|
||||
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
|
||||
snapshot!(err, @"Database version 1.18.1 is higher than the Meilisearch version 1.18.0. Downgrade is not supported");
|
||||
snapshot!(err, @"Database version 1.22.1 is higher than the Meilisearch version 1.22.0. Downgrade is not supported");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.18.0"
|
||||
"upgradeTo": "v1.22.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.18.0"
|
||||
"upgradeTo": "v1.22.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.18.0"
|
||||
"upgradeTo": "v1.22.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.18.0"
|
||||
"upgradeTo": "v1.22.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.18.0"
|
||||
"upgradeTo": "v1.22.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.18.0"
|
||||
"upgradeTo": "v1.22.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"progress": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.18.0"
|
||||
"upgradeTo": "v1.22.0"
|
||||
},
|
||||
"stats": {
|
||||
"totalNbTasks": 1,
|
||||
|
||||
@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"upgradeFrom": "v1.12.0",
|
||||
"upgradeTo": "v1.18.0"
|
||||
"upgradeTo": "v1.22.0"
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
|
||||
@@ -104,8 +104,8 @@ async fn binary_quantize_before_sending_documents() {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
-1.0,
|
||||
-1.0,
|
||||
0.0,
|
||||
0.0,
|
||||
1.0
|
||||
]
|
||||
],
|
||||
@@ -122,7 +122,7 @@ async fn binary_quantize_before_sending_documents() {
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
-1.0
|
||||
0.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
@@ -191,8 +191,8 @@ async fn binary_quantize_after_sending_documents() {
|
||||
"manual": {
|
||||
"embeddings": [
|
||||
[
|
||||
-1.0,
|
||||
-1.0,
|
||||
0.0,
|
||||
0.0,
|
||||
1.0
|
||||
]
|
||||
],
|
||||
@@ -209,7 +209,7 @@ async fn binary_quantize_after_sending_documents() {
|
||||
[
|
||||
1.0,
|
||||
1.0,
|
||||
-1.0
|
||||
0.0
|
||||
]
|
||||
],
|
||||
"regenerate": false
|
||||
@@ -320,7 +320,7 @@ async fn binary_quantize_clear_documents() {
|
||||
}
|
||||
"###);
|
||||
|
||||
// Make sure the arroy DB has been cleared
|
||||
// Make sure the hannoy DB has been cleared
|
||||
let (documents, _code) =
|
||||
index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
|
||||
snapshot!(documents, @r#"
|
||||
|
||||
@@ -684,7 +684,7 @@ async fn clear_documents() {
|
||||
}
|
||||
"###);
|
||||
|
||||
// Make sure the arroy DB has been cleared
|
||||
// Make sure the hannoy DB has been cleared
|
||||
let (documents, _code) =
|
||||
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
|
||||
snapshot!(documents, @r#"
|
||||
|
||||
@@ -236,7 +236,7 @@ async fn reset_embedder_documents() {
|
||||
}
|
||||
"###);
|
||||
|
||||
// Make sure the arroy DB has been cleared
|
||||
// Make sure the hannoy DB has been cleared
|
||||
let (documents, _code) =
|
||||
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await;
|
||||
snapshot!(json_string!(documents), @r###"
|
||||
|
||||
@@ -142,8 +142,8 @@ enum Command {
|
||||
|
||||
#[derive(Clone, ValueEnum)]
|
||||
enum IndexPart {
|
||||
/// Will make the arroy index hot.
|
||||
Arroy,
|
||||
/// Will make the hannoy index hot.
|
||||
Hannoy,
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<()> {
|
||||
@@ -658,12 +658,12 @@ fn hair_dryer(
|
||||
let rtxn = index.read_txn()?;
|
||||
for part in index_parts {
|
||||
match part {
|
||||
IndexPart::Arroy => {
|
||||
IndexPart::Hannoy => {
|
||||
let mut count = 0;
|
||||
let total = index.vector_arroy.len(&rtxn)?;
|
||||
eprintln!("Hair drying arroy for {uid}...");
|
||||
let total = index.vector_store.len(&rtxn)?;
|
||||
eprintln!("Hair drying hannoy for {uid}...");
|
||||
for (i, result) in index
|
||||
.vector_arroy
|
||||
.vector_store
|
||||
.remap_types::<Bytes, Bytes>()
|
||||
.iter(&rtxn)?
|
||||
.enumerate()
|
||||
|
||||
@@ -68,7 +68,7 @@ pub fn v1_10_to_v1_11(
|
||||
)
|
||||
})?;
|
||||
let index_read_database =
|
||||
try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY)
|
||||
try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_STORE)
|
||||
.with_context(|| format!("while updating date format for index `{uid}`"))?;
|
||||
|
||||
let mut index_wtxn = index_env.write_txn().with_context(|| {
|
||||
@@ -79,7 +79,7 @@ pub fn v1_10_to_v1_11(
|
||||
})?;
|
||||
|
||||
let index_write_database =
|
||||
try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY)
|
||||
try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_STORE)
|
||||
.with_context(|| format!("while updating date format for index `{uid}`"))?;
|
||||
|
||||
meilisearch_types::milli::arroy::upgrade::cosine_from_0_4_to_0_5(
|
||||
|
||||
@@ -88,6 +88,7 @@ rhai = { version = "1.22.2", features = [
|
||||
"sync",
|
||||
] }
|
||||
arroy = "0.6.1"
|
||||
hannoy = "0.0.4"
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
@@ -95,6 +96,7 @@ url = "2.5.4"
|
||||
hashbrown = "0.15.4"
|
||||
bumpalo = "3.18.1"
|
||||
bumparaw-collections = "0.1.4"
|
||||
steppe = { version = "0.4.0", default-features = false }
|
||||
thread_local = "1.1.9"
|
||||
allocator-api2 = "0.3.0"
|
||||
rustc-hash = "2.1.1"
|
||||
@@ -109,6 +111,7 @@ utoipa = { version = "5.4.0", features = [
|
||||
"openapi_extensions",
|
||||
] }
|
||||
lru = "0.14.0"
|
||||
twox-hash = { version = "2.1.1", default-features = false, features = ["std", "xxhash3_64", "xxhash64"] }
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.47", default-features = false }
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
use crate::{
|
||||
distance_between_two_points,
|
||||
heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec},
|
||||
lat_lng_to_xyz,
|
||||
search::new::{facet_string_values, facet_values_prefix_key},
|
||||
GeoPoint, Index,
|
||||
};
|
||||
use heed::{
|
||||
types::{Bytes, Unit},
|
||||
RoPrefix, RoTxn,
|
||||
};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use heed::types::{Bytes, Unit};
|
||||
use heed::{RoPrefix, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
|
||||
use crate::search::new::{facet_string_values, facet_values_prefix_key};
|
||||
use crate::{distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct GeoSortParameter {
|
||||
|
||||
@@ -1,19 +1,16 @@
|
||||
use std::collections::{BTreeSet, VecDeque};
|
||||
|
||||
use crate::{
|
||||
constants::RESERVED_GEO_FIELD_NAME,
|
||||
documents::{geo_sort::next_bucket, GeoSortParameter},
|
||||
heed_codec::{
|
||||
facet::{FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||
BytesRefCodec,
|
||||
},
|
||||
is_faceted,
|
||||
search::facet::{ascending_facet_sort, descending_facet_sort},
|
||||
AscDesc, DocumentId, Member, UserError,
|
||||
};
|
||||
use heed::Database;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::documents::geo_sort::next_bucket;
|
||||
use crate::documents::GeoSortParameter;
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::BytesRefCodec;
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::{is_faceted, AscDesc, DocumentId, Member, UserError};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum AscDescId {
|
||||
Facet { field_id: u16, ascending: bool },
|
||||
|
||||
@@ -78,6 +78,8 @@ pub enum InternalError {
|
||||
#[error(transparent)]
|
||||
ArroyError(#[from] arroy::Error),
|
||||
#[error(transparent)]
|
||||
HannoyError(#[from] hannoy::Error),
|
||||
#[error(transparent)]
|
||||
VectorEmbeddingError(#[from] crate::vector::Error),
|
||||
}
|
||||
|
||||
@@ -441,6 +443,29 @@ impl From<arroy::Error> for Error {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<hannoy::Error> for Error {
|
||||
fn from(value: hannoy::Error) -> Self {
|
||||
match value {
|
||||
hannoy::Error::Heed(heed) => heed.into(),
|
||||
hannoy::Error::Io(io) => io.into(),
|
||||
hannoy::Error::InvalidVecDimension { expected, received } => {
|
||||
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
|
||||
}
|
||||
hannoy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
|
||||
hannoy::Error::DatabaseFull
|
||||
| hannoy::Error::InvalidItemAppend
|
||||
| hannoy::Error::UnmatchingDistance { .. }
|
||||
| hannoy::Error::NeedBuild(_)
|
||||
| hannoy::Error::MissingKey { .. }
|
||||
| hannoy::Error::MissingMetadata(_)
|
||||
| hannoy::Error::UnknownVersion { .. }
|
||||
| hannoy::Error::CannotDecodeKeyMode { .. } => {
|
||||
Error::InternalError(InternalError::HannoyError(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum GeoError {
|
||||
#[error("The `_geo` field in the document with the id: `{document_id}` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `{value}`.")]
|
||||
|
||||
@@ -31,7 +31,7 @@ use crate::prompt::PromptData;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::new::StdResult;
|
||||
use crate::vector::db::IndexEmbeddingConfigs;
|
||||
use crate::vector::{ArroyStats, ArroyWrapper, Embedding};
|
||||
use crate::vector::{Embedding, HannoyStats, VectorStore};
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||
@@ -113,7 +113,7 @@ pub mod db_name {
|
||||
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s";
|
||||
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings";
|
||||
pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
|
||||
pub const VECTOR_ARROY: &str = "vector-arroy";
|
||||
pub const VECTOR_STORE: &str = "vector-arroy";
|
||||
pub const DOCUMENTS: &str = "documents";
|
||||
}
|
||||
const NUMBER_OF_DBS: u32 = 25;
|
||||
@@ -177,10 +177,10 @@ pub struct Index {
|
||||
/// Maps the document id, the facet field id and the strings.
|
||||
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
||||
|
||||
/// Maps an embedder name to its id in the arroy store.
|
||||
/// Maps an embedder name to its id in the hannoy store.
|
||||
pub(crate) embedder_category_id: Database<Unspecified, Unspecified>,
|
||||
/// Vector store based on arroy™.
|
||||
pub vector_arroy: arroy::Database<Unspecified>,
|
||||
/// Vector store based on hannoy™.
|
||||
pub vector_store: hannoy::Database<Unspecified>,
|
||||
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||
@@ -237,7 +237,7 @@ impl Index {
|
||||
// vector stuff
|
||||
let embedder_category_id =
|
||||
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
|
||||
let vector_arroy = env.create_database(&mut wtxn, Some(VECTOR_ARROY))?;
|
||||
let vector_store = env.create_database(&mut wtxn, Some(VECTOR_STORE))?;
|
||||
|
||||
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
|
||||
|
||||
@@ -264,7 +264,7 @@ impl Index {
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_arroy,
|
||||
vector_store,
|
||||
embedder_category_id,
|
||||
documents,
|
||||
};
|
||||
@@ -1769,11 +1769,13 @@ impl Index {
|
||||
) -> Result<BTreeMap<String, EmbeddingsWithMetadata>> {
|
||||
let mut res = BTreeMap::new();
|
||||
let embedders = self.embedding_configs();
|
||||
let index_version = self.get_version(rtxn)?.unwrap();
|
||||
for config in embedders.embedding_configs(rtxn)? {
|
||||
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
|
||||
let has_fragments = config.config.embedder_options.has_fragments();
|
||||
let reader = ArroyWrapper::new(
|
||||
self.vector_arroy,
|
||||
let reader = VectorStore::new(
|
||||
index_version,
|
||||
self.vector_store,
|
||||
embedder_info.embedder_id,
|
||||
config.config.quantized(),
|
||||
);
|
||||
@@ -1792,13 +1794,18 @@ impl Index {
|
||||
Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
|
||||
}
|
||||
|
||||
pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
|
||||
let mut stats = ArroyStats::default();
|
||||
pub fn hannoy_stats(&self, rtxn: &RoTxn<'_>) -> Result<HannoyStats> {
|
||||
let mut stats = HannoyStats::default();
|
||||
let embedding_configs = self.embedding_configs();
|
||||
let index_version = self.get_version(rtxn)?.unwrap();
|
||||
for config in embedding_configs.embedding_configs(rtxn)? {
|
||||
let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap();
|
||||
let reader =
|
||||
ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
|
||||
let reader = VectorStore::new(
|
||||
index_version,
|
||||
self.vector_store,
|
||||
embedder_id,
|
||||
config.config.quantized(),
|
||||
);
|
||||
reader.aggregate_stats(rtxn, &mut stats)?;
|
||||
}
|
||||
Ok(stats)
|
||||
@@ -1842,7 +1849,7 @@ impl Index {
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_arroy,
|
||||
vector_store: vector_hannoy,
|
||||
embedder_category_id,
|
||||
documents,
|
||||
} = self;
|
||||
@@ -1913,7 +1920,7 @@ impl Index {
|
||||
"field_id_docid_facet_strings",
|
||||
field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
|
||||
);
|
||||
sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("vector_hannoy", vector_hannoy.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
|
||||
sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ pub use search::new::{
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||
pub use {arroy, charabia as tokenizer, heed, rhai};
|
||||
pub use {arroy, charabia as tokenizer, hannoy, heed, rhai};
|
||||
|
||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||
pub use self::attribute_patterns::{AttributePatterns, PatternMatch};
|
||||
|
||||
@@ -5,7 +5,6 @@ use std::sync::atomic::{AtomicU32, AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use enum_iterator::Sequence;
|
||||
use indexmap::IndexMap;
|
||||
use itertools::Itertools;
|
||||
use serde::Serialize;
|
||||
@@ -96,14 +95,6 @@ impl Progress {
|
||||
|
||||
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
|
||||
}
|
||||
|
||||
// TODO: ideally we should expose the progress in a way that let arroy use it directly
|
||||
pub(crate) fn update_progress_from_arroy(&self, progress: arroy::WriterProgress) {
|
||||
self.update_progress(progress.main);
|
||||
if let Some(sub) = progress.sub {
|
||||
self.update_progress(sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate the names associated with the durations and push them.
|
||||
@@ -277,43 +268,26 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for arroy::MainStep {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
match self {
|
||||
arroy::MainStep::PreProcessingTheItems => "pre processing the items",
|
||||
arroy::MainStep::WritingTheDescendantsAndMetadata => {
|
||||
"writing the descendants and metadata"
|
||||
}
|
||||
arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items",
|
||||
arroy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes",
|
||||
arroy::MainStep::UpdatingTheTrees => "updating the trees",
|
||||
arroy::MainStep::CreateNewTrees => "create new trees",
|
||||
arroy::MainStep::WritingNodesToDatabase => "writing nodes to database",
|
||||
arroy::MainStep::DeleteExtraneousTrees => "delete extraneous trees",
|
||||
arroy::MainStep::WriteTheMetadata => "write the metadata",
|
||||
}
|
||||
.into()
|
||||
}
|
||||
// Integration with steppe
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
*self as u32
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
Self::CARDINALITY as u32
|
||||
impl steppe::Progress for Progress {
|
||||
fn update(&self, sub_progress: impl steppe::Step) {
|
||||
self.update_progress(Compat(sub_progress));
|
||||
}
|
||||
}
|
||||
|
||||
impl Step for arroy::SubStep {
|
||||
struct Compat<T: steppe::Step>(T);
|
||||
|
||||
impl<T: steppe::Step> Step for Compat<T> {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
self.unit.into()
|
||||
self.0.name()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
self.current.load(Ordering::Relaxed)
|
||||
self.0.current().try_into().unwrap_or(u32::MAX)
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
self.max
|
||||
self.0.total().try_into().unwrap_or(u32::MAX)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use crate::error::{DidYouMean, Error};
|
||||
use crate::vector::db::IndexEmbeddingConfig;
|
||||
use crate::vector::{ArroyStats, ArroyWrapper};
|
||||
use crate::vector::{HannoyStats, VectorStore};
|
||||
use crate::Index;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
@@ -82,6 +82,7 @@ fn evaluate_inner(
|
||||
embedding_configs: &[IndexEmbeddingConfig],
|
||||
filter: &VectorFilter<'_>,
|
||||
) -> crate::Result<RoaringBitmap> {
|
||||
let index_version = index.get_version(rtxn)?.unwrap();
|
||||
let embedder_name = embedder.value();
|
||||
let available_embedders =
|
||||
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
||||
@@ -96,8 +97,9 @@ fn evaluate_inner(
|
||||
.embedder_info(rtxn, embedder_name)?
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
|
||||
let arroy_wrapper = ArroyWrapper::new(
|
||||
index.vector_arroy,
|
||||
let vector_store = VectorStore::new(
|
||||
index_version,
|
||||
index.vector_store,
|
||||
embedder_info.embedder_id,
|
||||
embedding_config.config.quantized(),
|
||||
);
|
||||
@@ -122,7 +124,7 @@ fn evaluate_inner(
|
||||
})?;
|
||||
|
||||
let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
|
||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| {
|
||||
vector_store.items_in_store(rtxn, fragment_config.id, |bitmap| {
|
||||
bitmap.clone() - user_provided_docids
|
||||
})?
|
||||
}
|
||||
@@ -132,8 +134,8 @@ fn evaluate_inner(
|
||||
}
|
||||
|
||||
let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
let mut stats = HannoyStats::default();
|
||||
vector_store.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents - user_provided_docids.clone()
|
||||
}
|
||||
VectorFilter::UserProvided => {
|
||||
@@ -141,14 +143,14 @@ fn evaluate_inner(
|
||||
user_provided_docids.clone()
|
||||
}
|
||||
VectorFilter::Regenerate => {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
let mut stats = HannoyStats::default();
|
||||
vector_store.aggregate_stats(rtxn, &mut stats)?;
|
||||
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
||||
stats.documents - skip_regenerate
|
||||
}
|
||||
VectorFilter::None => {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
let mut stats = HannoyStats::default();
|
||||
vector_store.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents
|
||||
}
|
||||
};
|
||||
|
||||
@@ -76,6 +76,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
use super::VectorStoreStats;
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::vector::{ArroyWrapper, DistributionShift, Embedder};
|
||||
use crate::vector::{DistributionShift, Embedder, VectorStore};
|
||||
use crate::{DocumentId, Result, SearchContext, SearchLogger};
|
||||
|
||||
pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
||||
@@ -56,7 +56,12 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
let target = &self.target;
|
||||
|
||||
let before = Instant::now();
|
||||
let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
|
||||
let reader = VectorStore::new(
|
||||
ctx.index.get_version(ctx.txn)?.unwrap(),
|
||||
ctx.index.vector_store,
|
||||
self.embedder_index,
|
||||
self.quantized,
|
||||
);
|
||||
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
||||
self.cached_sorted_docids = results.into_iter();
|
||||
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
|
||||
|
||||
@@ -3,7 +3,7 @@ use std::sync::Arc;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::score_details::{self, ScoreDetails};
|
||||
use crate::vector::{ArroyWrapper, Embedder};
|
||||
use crate::vector::{Embedder, VectorStore};
|
||||
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
|
||||
|
||||
pub struct Similar<'a> {
|
||||
@@ -72,7 +72,12 @@ impl<'a> Similar<'a> {
|
||||
crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned())
|
||||
})?;
|
||||
|
||||
let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized);
|
||||
let reader = VectorStore::new(
|
||||
self.index.get_version(self.rtxn)?.unwrap(),
|
||||
self.index.vector_store,
|
||||
embedder_index,
|
||||
self.quantized,
|
||||
);
|
||||
let results = reader.nns_by_item(
|
||||
self.rtxn,
|
||||
self.id,
|
||||
|
||||
@@ -84,6 +84,7 @@ impl TempIndex {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)?;
|
||||
|
||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||
@@ -167,6 +168,7 @@ impl TempIndex {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)?;
|
||||
|
||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||
@@ -242,6 +244,7 @@ fn aborting_indexation() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -2,7 +2,8 @@ use heed::RwTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::{database_stats::DatabaseStats, FieldDistribution, Index, Result};
|
||||
use crate::database_stats::DatabaseStats;
|
||||
use crate::{FieldDistribution, Index, Result};
|
||||
|
||||
pub struct ClearDocuments<'t, 'i> {
|
||||
wtxn: &'t mut RwTxn<'i>,
|
||||
@@ -45,7 +46,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_arroy,
|
||||
vector_store,
|
||||
embedder_category_id: _,
|
||||
documents,
|
||||
} = self.index;
|
||||
@@ -88,7 +89,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
||||
field_id_docid_facet_f64s.clear(self.wtxn)?;
|
||||
field_id_docid_facet_strings.clear(self.wtxn)?;
|
||||
// vector
|
||||
vector_arroy.clear(self.wtxn)?;
|
||||
vector_store.clear(self.wtxn)?;
|
||||
|
||||
documents.clear(self.wtxn)?;
|
||||
|
||||
|
||||
@@ -2,9 +2,8 @@ use std::collections::BTreeSet;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use heed::BytesDecode;
|
||||
use obkv::KvReaderU16;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters,
|
||||
@@ -16,7 +15,7 @@ use crate::index::db_name::DOCID_WORD_POSITIONS;
|
||||
use crate::update::del_add::{is_noop_del_add_obkv, DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::helpers::sorter_into_reader;
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result};
|
||||
use crate::{DocumentId, FieldId, Result};
|
||||
|
||||
/// Extracts the word and the documents ids where this word appear.
|
||||
///
|
||||
@@ -201,45 +200,3 @@ fn words_into_sorter(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
|
||||
fn docids_into_writers<W>(
|
||||
word: &str,
|
||||
deletions: &RoaringBitmap,
|
||||
additions: &RoaringBitmap,
|
||||
writer: &mut grenad::Writer<W>,
|
||||
) -> Result<()>
|
||||
where
|
||||
W: std::io::Write,
|
||||
{
|
||||
if deletions == additions {
|
||||
// if the same value is deleted and added, do nothing.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Write each value in the same KvDelAdd before inserting it in the final writer.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
// deletions:
|
||||
if !deletions.is_empty() && !deletions.is_subset(additions) {
|
||||
obkv.insert(
|
||||
DelAdd::Deletion,
|
||||
CboRoaringBitmapCodec::bytes_encode(deletions).map_err(|_| {
|
||||
SerializationError::Encoding { db_name: Some(DOCID_WORD_POSITIONS) }
|
||||
})?,
|
||||
)?;
|
||||
}
|
||||
// additions:
|
||||
if !additions.is_empty() {
|
||||
obkv.insert(
|
||||
DelAdd::Addition,
|
||||
CboRoaringBitmapCodec::bytes_encode(additions).map_err(|_| {
|
||||
SerializationError::Encoding { db_name: Some(DOCID_WORD_POSITIONS) }
|
||||
})?,
|
||||
)?;
|
||||
}
|
||||
|
||||
// insert everything in the same writer.
|
||||
writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -39,7 +39,7 @@ use crate::update::{
|
||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||
};
|
||||
use crate::vector::db::EmbedderInfo;
|
||||
use crate::vector::{ArroyWrapper, RuntimeEmbedders};
|
||||
use crate::vector::{RuntimeEmbedders, VectorStore};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
|
||||
|
||||
static MERGED_DATABASE_COUNT: usize = 7;
|
||||
@@ -485,6 +485,7 @@ where
|
||||
|
||||
// If an embedder wasn't used in the typedchunk but must be binary quantized
|
||||
// we should insert it in `dimension`
|
||||
let index_version = self.index.get_version(self.wtxn)?.unwrap();
|
||||
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
|
||||
let index = self.index.embedding_configs().embedder_id(self.wtxn, name)?.ok_or(
|
||||
@@ -493,8 +494,12 @@ where
|
||||
key: None,
|
||||
},
|
||||
)?;
|
||||
let reader =
|
||||
ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized);
|
||||
let reader = VectorStore::new(
|
||||
index_version,
|
||||
self.index.vector_store,
|
||||
index,
|
||||
action.was_quantized,
|
||||
);
|
||||
let Some(dim) = reader.dimensions(self.wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
@@ -504,7 +509,7 @@ where
|
||||
|
||||
for (embedder_name, dimension) in dimension {
|
||||
let wtxn = &mut *self.wtxn;
|
||||
let vector_arroy = self.index.vector_arroy;
|
||||
let vector_hannoy = self.index.vector_store;
|
||||
let cancel = &self.should_abort;
|
||||
|
||||
let embedder_index =
|
||||
@@ -523,11 +528,12 @@ where
|
||||
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
|
||||
|
||||
pool.install(|| {
|
||||
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
||||
let mut writer =
|
||||
VectorStore::new(index_version, vector_hannoy, embedder_index, was_quantized);
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
// In the settings we don't have any progress to share
|
||||
&Progress::default(),
|
||||
Progress::default(),
|
||||
&mut rng,
|
||||
dimension,
|
||||
is_quantizing,
|
||||
@@ -1977,6 +1983,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2029,6 +2036,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2117,6 +2125,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2306,6 +2315,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2369,6 +2379,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2423,6 +2434,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2476,6 +2488,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2531,6 +2544,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2591,6 +2605,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2644,6 +2659,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2697,6 +2713,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2908,6 +2925,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -2968,6 +2986,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -3025,6 +3044,7 @@ mod tests {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::update::{AvailableIds, UpdateIndexingStep};
|
||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use crate::vector::settings::{RemoveFragments, WriteBackToDocuments};
|
||||
use crate::vector::ArroyWrapper;
|
||||
use crate::vector::VectorStore;
|
||||
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
|
||||
|
||||
pub struct TransformOutput {
|
||||
@@ -834,15 +834,17 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
None
|
||||
};
|
||||
|
||||
let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
|
||||
let index_version = self.index.get_version(wtxn)?.unwrap();
|
||||
let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff
|
||||
.embedding_config_updates
|
||||
.iter()
|
||||
.filter_map(|(name, action)| {
|
||||
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
|
||||
action.write_back()
|
||||
{
|
||||
let reader = ArroyWrapper::new(
|
||||
self.index.vector_arroy,
|
||||
let reader = VectorStore::new(
|
||||
index_version,
|
||||
self.index.vector_store,
|
||||
*embedder_id,
|
||||
action.was_quantized,
|
||||
);
|
||||
@@ -882,10 +884,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
|
||||
)?;
|
||||
|
||||
let injected_vectors: std::result::Result<
|
||||
serde_json::Map<String, serde_json::Value>,
|
||||
arroy::Error,
|
||||
> = readers
|
||||
let injected_vectors: crate::Result<_> = readers
|
||||
.iter()
|
||||
.filter_map(|(name, (reader, user_provided))| {
|
||||
if !user_provided.contains(docid) {
|
||||
@@ -949,9 +948,13 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let arroy =
|
||||
ArroyWrapper::new(self.index.vector_arroy, infos.embedder_id, was_quantized);
|
||||
let Some(dimensions) = arroy.dimensions(wtxn)? else {
|
||||
let hannoy = VectorStore::new(
|
||||
index_version,
|
||||
self.index.vector_store,
|
||||
infos.embedder_id,
|
||||
was_quantized,
|
||||
);
|
||||
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
for fragment_id in fragment_ids {
|
||||
@@ -959,17 +962,17 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
|
||||
if infos.embedding_status.user_provided_docids().is_empty() {
|
||||
// no user provided: clear store
|
||||
arroy.clear_store(wtxn, *fragment_id, dimensions)?;
|
||||
hannoy.clear_store(wtxn, *fragment_id, dimensions)?;
|
||||
continue;
|
||||
}
|
||||
|
||||
// some user provided, remove only the ids that are not user provided
|
||||
let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| {
|
||||
let to_delete = hannoy.items_in_store(wtxn, *fragment_id, |items| {
|
||||
items - infos.embedding_status.user_provided_docids()
|
||||
})?;
|
||||
|
||||
for to_delete in to_delete {
|
||||
arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
||||
hannoy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{
|
||||
};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
|
||||
use crate::vector::ArroyWrapper;
|
||||
use crate::vector::VectorStore;
|
||||
use crate::{
|
||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||
Result, SerializationError, U8StrStrCodec,
|
||||
@@ -619,6 +619,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let _entered = span.enter();
|
||||
|
||||
let embedders = index.embedding_configs();
|
||||
let index_version = index.get_version(wtxn)?.unwrap();
|
||||
|
||||
let mut remove_vectors_builder = MergerBuilder::new(KeepFirst);
|
||||
let mut manual_vectors_builder = MergerBuilder::new(KeepFirst);
|
||||
@@ -677,7 +678,12 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
.get(&embedder_name)
|
||||
.is_some_and(|conf| conf.is_quantized);
|
||||
// FIXME: allow customizing distance
|
||||
let writer = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, binary_quantized);
|
||||
let writer = VectorStore::new(
|
||||
index_version,
|
||||
index.vector_store,
|
||||
infos.embedder_id,
|
||||
binary_quantized,
|
||||
);
|
||||
|
||||
// remove vectors for docids we want them removed
|
||||
let merger = remove_vectors_builder.build();
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use grenad::CompressionType;
|
||||
|
||||
use super::GrenadParameters;
|
||||
use crate::{thread_pool_no_abort::ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||
use crate::thread_pool_no_abort::ThreadPoolNoAbort;
|
||||
use crate::ThreadPoolNoAbortBuilder;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexerConfig {
|
||||
|
||||
@@ -255,9 +255,9 @@ impl<'a> From<FrameGrantR<'a>> for FrameWithHeader<'a> {
|
||||
#[repr(u8)]
|
||||
pub enum EntryHeader {
|
||||
DbOperation(DbOperation),
|
||||
ArroyDeleteVector(ArroyDeleteVector),
|
||||
ArroySetVectors(ArroySetVectors),
|
||||
ArroySetVector(ArroySetVector),
|
||||
HannoyDeleteVector(HannoyDeleteVector),
|
||||
HannoySetVectors(HannoySetVectors),
|
||||
HannoySetVector(HannoySetVector),
|
||||
}
|
||||
|
||||
impl EntryHeader {
|
||||
@@ -268,9 +268,9 @@ impl EntryHeader {
|
||||
const fn variant_id(&self) -> u8 {
|
||||
match self {
|
||||
EntryHeader::DbOperation(_) => 0,
|
||||
EntryHeader::ArroyDeleteVector(_) => 1,
|
||||
EntryHeader::ArroySetVectors(_) => 2,
|
||||
EntryHeader::ArroySetVector(_) => 3,
|
||||
EntryHeader::HannoyDeleteVector(_) => 1,
|
||||
EntryHeader::HannoySetVectors(_) => 2,
|
||||
EntryHeader::HannoySetVector(_) => 3,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -286,26 +286,26 @@ impl EntryHeader {
|
||||
}
|
||||
|
||||
const fn total_delete_vector_size() -> usize {
|
||||
Self::variant_size() + mem::size_of::<ArroyDeleteVector>()
|
||||
Self::variant_size() + mem::size_of::<HannoyDeleteVector>()
|
||||
}
|
||||
|
||||
/// The `dimensions` corresponds to the number of `f32` in the embedding.
|
||||
fn total_set_vectors_size(count: usize, dimensions: usize) -> usize {
|
||||
let embedding_size = dimensions * mem::size_of::<f32>();
|
||||
Self::variant_size() + mem::size_of::<ArroySetVectors>() + embedding_size * count
|
||||
Self::variant_size() + mem::size_of::<HannoySetVectors>() + embedding_size * count
|
||||
}
|
||||
|
||||
fn total_set_vector_size(dimensions: usize) -> usize {
|
||||
let embedding_size = dimensions * mem::size_of::<f32>();
|
||||
Self::variant_size() + mem::size_of::<ArroySetVector>() + embedding_size
|
||||
Self::variant_size() + mem::size_of::<HannoySetVector>() + embedding_size
|
||||
}
|
||||
|
||||
fn header_size(&self) -> usize {
|
||||
let payload_size = match self {
|
||||
EntryHeader::DbOperation(op) => mem::size_of_val(op),
|
||||
EntryHeader::ArroyDeleteVector(adv) => mem::size_of_val(adv),
|
||||
EntryHeader::ArroySetVectors(asvs) => mem::size_of_val(asvs),
|
||||
EntryHeader::ArroySetVector(asv) => mem::size_of_val(asv),
|
||||
EntryHeader::HannoyDeleteVector(adv) => mem::size_of_val(adv),
|
||||
EntryHeader::HannoySetVectors(asvs) => mem::size_of_val(asvs),
|
||||
EntryHeader::HannoySetVector(asv) => mem::size_of_val(asv),
|
||||
};
|
||||
Self::variant_size() + payload_size
|
||||
}
|
||||
@@ -319,19 +319,19 @@ impl EntryHeader {
|
||||
EntryHeader::DbOperation(header)
|
||||
}
|
||||
1 => {
|
||||
let header_bytes = &remaining[..mem::size_of::<ArroyDeleteVector>()];
|
||||
let header_bytes = &remaining[..mem::size_of::<HannoyDeleteVector>()];
|
||||
let header = checked::pod_read_unaligned(header_bytes);
|
||||
EntryHeader::ArroyDeleteVector(header)
|
||||
EntryHeader::HannoyDeleteVector(header)
|
||||
}
|
||||
2 => {
|
||||
let header_bytes = &remaining[..mem::size_of::<ArroySetVectors>()];
|
||||
let header_bytes = &remaining[..mem::size_of::<HannoySetVectors>()];
|
||||
let header = checked::pod_read_unaligned(header_bytes);
|
||||
EntryHeader::ArroySetVectors(header)
|
||||
EntryHeader::HannoySetVectors(header)
|
||||
}
|
||||
3 => {
|
||||
let header_bytes = &remaining[..mem::size_of::<ArroySetVector>()];
|
||||
let header_bytes = &remaining[..mem::size_of::<HannoySetVector>()];
|
||||
let header = checked::pod_read_unaligned(header_bytes);
|
||||
EntryHeader::ArroySetVector(header)
|
||||
EntryHeader::HannoySetVector(header)
|
||||
}
|
||||
id => panic!("invalid variant id: {id}"),
|
||||
}
|
||||
@@ -341,9 +341,9 @@ impl EntryHeader {
|
||||
let (first, remaining) = header_bytes.split_first_mut().unwrap();
|
||||
let payload_bytes = match self {
|
||||
EntryHeader::DbOperation(op) => bytemuck::bytes_of(op),
|
||||
EntryHeader::ArroyDeleteVector(adv) => bytemuck::bytes_of(adv),
|
||||
EntryHeader::ArroySetVectors(asvs) => bytemuck::bytes_of(asvs),
|
||||
EntryHeader::ArroySetVector(asv) => bytemuck::bytes_of(asv),
|
||||
EntryHeader::HannoyDeleteVector(adv) => bytemuck::bytes_of(adv),
|
||||
EntryHeader::HannoySetVectors(asvs) => bytemuck::bytes_of(asvs),
|
||||
EntryHeader::HannoySetVector(asv) => bytemuck::bytes_of(asv),
|
||||
};
|
||||
*first = self.variant_id();
|
||||
remaining.copy_from_slice(payload_bytes);
|
||||
@@ -378,7 +378,7 @@ impl DbOperation {
|
||||
|
||||
#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
|
||||
#[repr(transparent)]
|
||||
pub struct ArroyDeleteVector {
|
||||
pub struct HannoyDeleteVector {
|
||||
pub docid: DocumentId,
|
||||
}
|
||||
|
||||
@@ -386,13 +386,13 @@ pub struct ArroyDeleteVector {
|
||||
#[repr(C)]
|
||||
/// The embeddings are in the remaining space and represents
|
||||
/// non-aligned [f32] each with dimensions f32s.
|
||||
pub struct ArroySetVectors {
|
||||
pub struct HannoySetVectors {
|
||||
pub docid: DocumentId,
|
||||
pub embedder_id: u8,
|
||||
_padding: [u8; 3],
|
||||
}
|
||||
|
||||
impl ArroySetVectors {
|
||||
impl HannoySetVectors {
|
||||
fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
|
||||
let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
|
||||
&frame[skip..]
|
||||
@@ -416,14 +416,14 @@ impl ArroySetVectors {
|
||||
#[repr(C)]
|
||||
/// The embeddings are in the remaining space and represents
|
||||
/// non-aligned [f32] each with dimensions f32s.
|
||||
pub struct ArroySetVector {
|
||||
pub struct HannoySetVector {
|
||||
pub docid: DocumentId,
|
||||
pub embedder_id: u8,
|
||||
pub extractor_id: u8,
|
||||
_padding: [u8; 2],
|
||||
}
|
||||
|
||||
impl ArroySetVector {
|
||||
impl HannoySetVector {
|
||||
fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
|
||||
let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
|
||||
&frame[skip..]
|
||||
@@ -553,7 +553,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
let refcell = self.producers.get().unwrap();
|
||||
let mut producer = refcell.0.borrow_mut_or_yield();
|
||||
|
||||
let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
|
||||
let payload_header = EntryHeader::HannoyDeleteVector(HannoyDeleteVector { docid });
|
||||
let total_length = EntryHeader::total_delete_vector_size();
|
||||
if total_length > max_grant {
|
||||
panic!("The entry is larger ({total_length} bytes) than the BBQueue max grant ({max_grant} bytes)");
|
||||
@@ -589,8 +589,8 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
// to zero to allocate no extra space at all
|
||||
let dimensions = embeddings.first().map_or(0, |emb| emb.len());
|
||||
|
||||
let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] };
|
||||
let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
|
||||
let hannoy_set_vector = HannoySetVectors { docid, embedder_id, _padding: [0; 3] };
|
||||
let payload_header = EntryHeader::HannoySetVectors(hannoy_set_vector);
|
||||
let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions);
|
||||
if total_length > max_grant {
|
||||
let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
|
||||
@@ -650,9 +650,9 @@ impl<'b> ExtractorBbqueueSender<'b> {
|
||||
// to zero to allocate no extra space at all
|
||||
let dimensions = embedding.as_ref().map_or(0, |emb| emb.len());
|
||||
|
||||
let arroy_set_vector =
|
||||
ArroySetVector { docid, embedder_id, extractor_id, _padding: [0; 2] };
|
||||
let payload_header = EntryHeader::ArroySetVector(arroy_set_vector);
|
||||
let hannoy_set_vector =
|
||||
HannoySetVector { docid, embedder_id, extractor_id, _padding: [0; 2] };
|
||||
let payload_header = EntryHeader::HannoySetVector(hannoy_set_vector);
|
||||
let total_length = EntryHeader::total_set_vector_size(dimensions);
|
||||
if total_length > max_grant {
|
||||
let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
|
||||
|
||||
@@ -240,12 +240,12 @@ impl<'extractor> SettingsChangeExtractor<'extractor> for SettingsChangeDocumentE
|
||||
/// modifies them by adding or removing vector fields based on embedder actions,
|
||||
/// and then updates the database.
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents::extract")]
|
||||
pub fn update_database_documents<'indexer, 'extractor, MSP, SD>(
|
||||
pub fn update_database_documents<'indexer, MSP, SD>(
|
||||
documents: &'indexer DocumentsIndentifiers<'indexer>,
|
||||
indexing_context: IndexingContext<MSP>,
|
||||
extractor_sender: &ExtractorBbqueueSender,
|
||||
settings_delta: &SD,
|
||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||
extractor_allocs: &mut ThreadLocal<FullySend<Bump>>,
|
||||
) -> Result<()>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
|
||||
@@ -17,6 +17,7 @@ use super::guess_primary_key::retrieve_or_guess_primary_key;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::progress::{AtomicPayloadStep, Progress};
|
||||
use crate::update::new::document::{DocumentContext, Versions};
|
||||
use crate::update::new::indexer::sharding::Shards;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::MostlySend;
|
||||
use crate::update::new::{DocumentIdentifiers, Insertion, Update};
|
||||
@@ -71,6 +72,7 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
new_fields_ids_map: &mut FieldsIdsMap,
|
||||
must_stop_processing: &MSP,
|
||||
progress: Progress,
|
||||
shards: Option<&Shards>,
|
||||
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
|
||||
where
|
||||
MSP: Fn() -> bool,
|
||||
@@ -107,6 +109,7 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
&mut bytes,
|
||||
&docids_version_offsets,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
shards,
|
||||
payload,
|
||||
),
|
||||
Payload::Update(payload) => extract_addition_payload_changes(
|
||||
@@ -120,6 +123,7 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
&mut bytes,
|
||||
&docids_version_offsets,
|
||||
IndexDocumentsMethod::UpdateDocuments,
|
||||
shards,
|
||||
payload,
|
||||
),
|
||||
Payload::Deletion(to_delete) => extract_deletion_payload_changes(
|
||||
@@ -127,6 +131,7 @@ impl<'pl> DocumentOperation<'pl> {
|
||||
rtxn,
|
||||
&mut available_docids,
|
||||
&docids_version_offsets,
|
||||
shards,
|
||||
to_delete,
|
||||
),
|
||||
};
|
||||
@@ -173,6 +178,7 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
||||
bytes: &mut u64,
|
||||
main_docids_version_offsets: &hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>,
|
||||
method: IndexDocumentsMethod,
|
||||
shards: Option<&Shards>,
|
||||
payload: &'pl [u8],
|
||||
) -> Result<hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>> {
|
||||
use IndexDocumentsMethod::{ReplaceDocuments, UpdateDocuments};
|
||||
@@ -210,12 +216,20 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
||||
primary_key.as_ref().unwrap()
|
||||
};
|
||||
|
||||
let current_offset = iter.byte_offset();
|
||||
let content = &payload[previous_offset..current_offset];
|
||||
previous_offset = current_offset;
|
||||
|
||||
let external_id =
|
||||
retrieved_primary_key.extract_fields_and_docid(doc, new_fields_ids_map, indexer)?;
|
||||
|
||||
let external_id = external_id.to_de();
|
||||
let current_offset = iter.byte_offset();
|
||||
let document_offset = DocumentOffset { content: &payload[previous_offset..current_offset] };
|
||||
|
||||
if shards.is_some_and(|shards| !shards.must_process(external_id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let document_offset = DocumentOffset { content };
|
||||
|
||||
match main_docids_version_offsets.get(external_id) {
|
||||
None => {
|
||||
@@ -299,8 +313,6 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
previous_offset = iter.byte_offset();
|
||||
}
|
||||
|
||||
if payload.is_empty() {
|
||||
@@ -329,11 +341,16 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>(
|
||||
rtxn: &RoTxn,
|
||||
available_docids: &mut AvailableIds,
|
||||
main_docids_version_offsets: &hashbrown::HashMap<&'s str, PayloadOperations<'pl>>,
|
||||
shards: Option<&Shards>,
|
||||
to_delete: &'pl [&'pl str],
|
||||
) -> Result<hashbrown::HashMap<&'s str, PayloadOperations<'pl>>> {
|
||||
let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new();
|
||||
|
||||
for external_id in to_delete {
|
||||
if shards.is_some_and(|shards| !shards.must_process(external_id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
match main_docids_version_offsets.get(external_id) {
|
||||
None => {
|
||||
match index.external_documents_ids().get(rtxn, external_id) {
|
||||
|
||||
@@ -8,7 +8,7 @@ use document_changes::{DocumentChanges, IndexingContext};
|
||||
pub use document_deletion::DocumentDeletion;
|
||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||
use hashbrown::HashMap;
|
||||
use heed::RwTxn;
|
||||
use heed::{RoTxn, RwTxn};
|
||||
pub use partial_dump::PartialDump;
|
||||
pub use post_processing::recompute_word_fst_from_word_docids_database;
|
||||
pub use update_by_function::UpdateByFunction;
|
||||
@@ -24,7 +24,7 @@ use crate::progress::{EmbedderStats, Progress};
|
||||
use crate::update::settings::SettingsDelta;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
|
||||
use crate::vector::{ArroyWrapper, Embedder, RuntimeEmbedders};
|
||||
use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
|
||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
|
||||
|
||||
pub(crate) mod de;
|
||||
@@ -36,6 +36,7 @@ mod guess_primary_key;
|
||||
mod partial_dump;
|
||||
mod post_processing;
|
||||
pub mod settings_changes;
|
||||
pub mod sharding;
|
||||
mod update_by_function;
|
||||
mod write;
|
||||
|
||||
@@ -66,7 +67,7 @@ where
|
||||
let mut bbbuffers = Vec::new();
|
||||
let finished_extraction = AtomicBool::new(false);
|
||||
|
||||
let arroy_memory = grenad_parameters.max_memory;
|
||||
let hannoy_memory = grenad_parameters.max_memory;
|
||||
|
||||
let (grenad_parameters, total_bbbuffer_capacity) =
|
||||
indexer_memory_settings(pool.current_num_threads(), grenad_parameters);
|
||||
@@ -129,8 +130,9 @@ where
|
||||
|
||||
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||
|
||||
let vector_arroy = index.vector_arroy;
|
||||
let arroy_writers: Result<HashMap<_, _>> = embedders
|
||||
let vector_arroy = index.vector_store;
|
||||
let index_version = index.get_version(wtxn)?.unwrap();
|
||||
let hannoy_writers: Result<HashMap<_, _>> = embedders
|
||||
.inner_as_ref()
|
||||
.iter()
|
||||
.map(|(embedder_name, runtime)| {
|
||||
@@ -143,7 +145,12 @@ where
|
||||
})?;
|
||||
|
||||
let dimensions = runtime.embedder.dimensions();
|
||||
let writer = ArroyWrapper::new(vector_arroy, embedder_index, runtime.is_quantized);
|
||||
let writer = VectorStore::new(
|
||||
index_version,
|
||||
vector_arroy,
|
||||
embedder_index,
|
||||
runtime.is_quantized,
|
||||
);
|
||||
|
||||
Ok((
|
||||
embedder_index,
|
||||
@@ -152,10 +159,10 @@ where
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut arroy_writers = arroy_writers?;
|
||||
let mut hannoy_writers = hannoy_writers?;
|
||||
|
||||
let congestion =
|
||||
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
|
||||
write_to_db(writer_receiver, finished_extraction, index, wtxn, &hannoy_writers)?;
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
|
||||
|
||||
@@ -169,8 +176,8 @@ where
|
||||
wtxn,
|
||||
indexing_context.progress,
|
||||
index_embeddings,
|
||||
arroy_memory,
|
||||
&mut arroy_writers,
|
||||
hannoy_memory,
|
||||
&mut hannoy_writers,
|
||||
None,
|
||||
&indexing_context.must_stop_processing,
|
||||
)
|
||||
@@ -226,7 +233,7 @@ where
|
||||
let mut bbbuffers = Vec::new();
|
||||
let finished_extraction = AtomicBool::new(false);
|
||||
|
||||
let arroy_memory = grenad_parameters.max_memory;
|
||||
let hannoy_memory = grenad_parameters.max_memory;
|
||||
|
||||
let (grenad_parameters, total_bbbuffer_capacity) =
|
||||
indexer_memory_settings(pool.current_num_threads(), grenad_parameters);
|
||||
@@ -283,15 +290,16 @@ where
|
||||
let new_embedders = settings_delta.new_embedders();
|
||||
let embedder_actions = settings_delta.embedder_actions();
|
||||
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
|
||||
let mut arroy_writers = arroy_writers_from_embedder_actions(
|
||||
let mut hannoy_writers = hannoy_writers_from_embedder_actions(
|
||||
index,
|
||||
wtxn,
|
||||
embedder_actions,
|
||||
new_embedders,
|
||||
index_embedder_category_ids,
|
||||
)?;
|
||||
|
||||
let congestion =
|
||||
write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?;
|
||||
write_to_db(writer_receiver, finished_extraction, index, wtxn, &hannoy_writers)?;
|
||||
|
||||
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
|
||||
|
||||
@@ -305,8 +313,8 @@ where
|
||||
wtxn,
|
||||
indexing_context.progress,
|
||||
index_embeddings,
|
||||
arroy_memory,
|
||||
&mut arroy_writers,
|
||||
hannoy_memory,
|
||||
&mut hannoy_writers,
|
||||
Some(embedder_actions),
|
||||
&indexing_context.must_stop_processing,
|
||||
)
|
||||
@@ -336,13 +344,15 @@ where
|
||||
Ok(congestion)
|
||||
}
|
||||
|
||||
fn arroy_writers_from_embedder_actions<'indexer>(
|
||||
fn hannoy_writers_from_embedder_actions<'indexer>(
|
||||
index: &Index,
|
||||
rtxn: &RoTxn,
|
||||
embedder_actions: &'indexer BTreeMap<String, EmbedderAction>,
|
||||
embedders: &'indexer RuntimeEmbedders,
|
||||
index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>,
|
||||
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, ArroyWrapper, usize)>> {
|
||||
let vector_arroy = index.vector_arroy;
|
||||
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, VectorStore, usize)>> {
|
||||
let vector_arroy = index.vector_store;
|
||||
let index_version = index.get_version(rtxn)?.unwrap();
|
||||
|
||||
embedders
|
||||
.inner_as_ref()
|
||||
@@ -360,8 +370,12 @@ fn arroy_writers_from_embedder_actions<'indexer>(
|
||||
},
|
||||
)));
|
||||
};
|
||||
let writer =
|
||||
ArroyWrapper::new(vector_arroy, embedder_category_id, action.was_quantized);
|
||||
let writer = VectorStore::new(
|
||||
index_version,
|
||||
vector_arroy,
|
||||
embedder_category_id,
|
||||
action.was_quantized,
|
||||
);
|
||||
let dimensions = runtime.embedder.dimensions();
|
||||
Some(Ok((
|
||||
embedder_category_id,
|
||||
@@ -384,7 +398,12 @@ where
|
||||
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
|
||||
continue;
|
||||
};
|
||||
let reader = ArroyWrapper::new(index.vector_arroy, *embedder_id, action.was_quantized);
|
||||
let reader = VectorStore::new(
|
||||
index.get_version(wtxn)?.unwrap(),
|
||||
index.vector_store,
|
||||
*embedder_id,
|
||||
action.was_quantized,
|
||||
);
|
||||
let Some(dimensions) = reader.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
@@ -400,7 +419,12 @@ where
|
||||
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
|
||||
continue;
|
||||
};
|
||||
let arroy = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, was_quantized);
|
||||
let arroy = VectorStore::new(
|
||||
index.get_version(wtxn)?.unwrap(),
|
||||
index.vector_store,
|
||||
infos.embedder_id,
|
||||
was_quantized,
|
||||
);
|
||||
let Some(dimensions) = arroy.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
|
||||
22
crates/milli/src/update/new/indexer/sharding.rs
Normal file
22
crates/milli/src/update/new/indexer/sharding.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
// Copyright © 2025 Meilisearch Some Rights Reserved
|
||||
// This file is part of Meilisearch Enterprise Edition (EE).
|
||||
// Use of this source code is governed by the Business Source License 1.1,
|
||||
// as found in the LICENSE-EE file or at <https://mariadb.com/bsl11>
|
||||
|
||||
use std::hash::{BuildHasher as _, BuildHasherDefault};
|
||||
|
||||
pub struct Shards {
|
||||
pub own: Vec<String>,
|
||||
pub others: Vec<String>,
|
||||
}
|
||||
|
||||
impl Shards {
|
||||
pub fn must_process(&self, docid: &str) -> bool {
|
||||
let hasher = BuildHasherDefault::<twox_hash::XxHash3_64>::new();
|
||||
let to_hash = |shard: &String| hasher.hash_one((shard, docid));
|
||||
|
||||
let max_hash = self.others.iter().map(to_hash).max().unwrap_or_default();
|
||||
|
||||
self.own.iter().map(to_hash).any(|hash| hash > max_hash)
|
||||
}
|
||||
}
|
||||
@@ -15,7 +15,7 @@ use crate::progress::Progress;
|
||||
use crate::update::settings::InnerIndexSettings;
|
||||
use crate::vector::db::IndexEmbeddingConfig;
|
||||
use crate::vector::settings::EmbedderAction;
|
||||
use crate::vector::{ArroyWrapper, Embedder, Embeddings, RuntimeEmbedders};
|
||||
use crate::vector::{Embedder, Embeddings, RuntimeEmbedders, VectorStore};
|
||||
use crate::{Error, Index, InternalError, Result, UserError};
|
||||
|
||||
pub fn write_to_db(
|
||||
@@ -23,9 +23,9 @@ pub fn write_to_db(
|
||||
finished_extraction: &AtomicBool,
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
arroy_writers: &HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
||||
hannoy_writers: &HashMap<u8, (&str, &Embedder, VectorStore, usize)>,
|
||||
) -> Result<ChannelCongestion> {
|
||||
// Used by by the ArroySetVector to copy the embedding into an
|
||||
// Used by by the HannoySetVector to copy the embedding into an
|
||||
// aligned memory area, required by arroy to accept a new vector.
|
||||
let mut aligned_embedding = Vec::new();
|
||||
let span = tracing::trace_span!(target: "indexing::write_db", "all");
|
||||
@@ -56,7 +56,7 @@ pub fn write_to_db(
|
||||
ReceiverAction::LargeVectors(large_vectors) => {
|
||||
let LargeVectors { docid, embedder_id, .. } = large_vectors;
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
hannoy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
for embedding in large_vectors.read_embeddings(*dimensions) {
|
||||
embeddings.push(embedding.to_vec()).unwrap();
|
||||
@@ -68,7 +68,7 @@ pub fn write_to_db(
|
||||
large_vector @ LargeVector { docid, embedder_id, extractor_id, .. },
|
||||
) => {
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
hannoy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let embedding = large_vector.read_embedding(*dimensions);
|
||||
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
|
||||
}
|
||||
@@ -80,12 +80,12 @@ pub fn write_to_db(
|
||||
&mut writer_receiver,
|
||||
index,
|
||||
wtxn,
|
||||
arroy_writers,
|
||||
hannoy_writers,
|
||||
&mut aligned_embedding,
|
||||
)?;
|
||||
}
|
||||
|
||||
write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?;
|
||||
write_from_bbqueue(&mut writer_receiver, index, wtxn, hannoy_writers, &mut aligned_embedding)?;
|
||||
|
||||
Ok(ChannelCongestion {
|
||||
attempts: writer_receiver.sent_messages_attempts(),
|
||||
@@ -115,8 +115,8 @@ pub fn build_vectors<MSP>(
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
progress: &Progress,
|
||||
index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||
arroy_memory: Option<usize>,
|
||||
arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>,
|
||||
hannoy_memory: Option<usize>,
|
||||
hannoy_writers: &mut HashMap<u8, (&str, &Embedder, VectorStore, usize)>,
|
||||
embeder_actions: Option<&BTreeMap<String, EmbedderAction>>,
|
||||
must_stop_processing: &MSP,
|
||||
) -> Result<()>
|
||||
@@ -129,18 +129,18 @@ where
|
||||
|
||||
let seed = rand::random();
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
|
||||
for (_index, (embedder_name, _embedder, writer, dimensions)) in arroy_writers {
|
||||
for (_index, (embedder_name, _embedder, writer, dimensions)) in hannoy_writers {
|
||||
let dimensions = *dimensions;
|
||||
let is_being_quantized = embeder_actions
|
||||
.and_then(|actions| actions.get(*embedder_name).map(|action| action.is_being_quantized))
|
||||
.unwrap_or(false);
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
progress,
|
||||
progress.clone(),
|
||||
&mut rng,
|
||||
dimensions,
|
||||
is_being_quantized,
|
||||
arroy_memory,
|
||||
hannoy_memory,
|
||||
must_stop_processing,
|
||||
)?;
|
||||
}
|
||||
@@ -181,7 +181,7 @@ pub fn write_from_bbqueue(
|
||||
writer_receiver: &mut WriterBbqueueReceiver<'_>,
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
arroy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, ArroyWrapper, usize)>,
|
||||
hannoy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, VectorStore, usize)>,
|
||||
aligned_embedding: &mut Vec<f32>,
|
||||
) -> crate::Result<()> {
|
||||
while let Some(frame_with_header) = writer_receiver.recv_frame() {
|
||||
@@ -221,17 +221,17 @@ pub fn write_from_bbqueue(
|
||||
},
|
||||
}
|
||||
}
|
||||
EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
|
||||
for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers {
|
||||
EntryHeader::HannoyDeleteVector(HannoyDeleteVector { docid }) => {
|
||||
for (_index, (_name, _embedder, writer, dimensions)) in hannoy_writers {
|
||||
let dimensions = *dimensions;
|
||||
writer.del_items(wtxn, dimensions, docid)?;
|
||||
}
|
||||
}
|
||||
EntryHeader::ArroySetVectors(asvs) => {
|
||||
let ArroySetVectors { docid, embedder_id, .. } = asvs;
|
||||
EntryHeader::HannoySetVectors(asvs) => {
|
||||
let HannoySetVectors { docid, embedder_id, .. } = asvs;
|
||||
let frame = frame_with_header.frame();
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
hannoy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let mut embeddings = Embeddings::new(*dimensions);
|
||||
let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
|
||||
writer.del_items(wtxn, *dimensions, docid)?;
|
||||
@@ -245,12 +245,12 @@ pub fn write_from_bbqueue(
|
||||
writer.add_items(wtxn, docid, &embeddings)?;
|
||||
}
|
||||
}
|
||||
EntryHeader::ArroySetVector(
|
||||
asv @ ArroySetVector { docid, embedder_id, extractor_id, .. },
|
||||
EntryHeader::HannoySetVector(
|
||||
asv @ HannoySetVector { docid, embedder_id, extractor_id, .. },
|
||||
) => {
|
||||
let frame = frame_with_header.frame();
|
||||
let (_, _, writer, dimensions) =
|
||||
arroy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
hannoy_writers.get(&embedder_id).expect("requested a missing embedder");
|
||||
let embedding = asv.read_all_embeddings_into_vec(frame, aligned_embedding);
|
||||
|
||||
if embedding.is_empty() {
|
||||
|
||||
@@ -63,8 +63,8 @@ where
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||
pub fn merge_and_send_docids<'extractor, MSP, D>(
|
||||
mut caches: Vec<BalancedCaches<'extractor>>,
|
||||
pub fn merge_and_send_docids<MSP, D>(
|
||||
mut caches: Vec<BalancedCaches<'_>>,
|
||||
database: Database<Bytes, Bytes>,
|
||||
index: &Index,
|
||||
docids_sender: WordDocidsSender<D>,
|
||||
@@ -91,8 +91,8 @@ where
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
|
||||
pub fn merge_and_send_facet_docids<'extractor>(
|
||||
mut caches: Vec<BalancedCaches<'extractor>>,
|
||||
pub fn merge_and_send_facet_docids(
|
||||
mut caches: Vec<BalancedCaches<'_>>,
|
||||
database: FacetDatabases,
|
||||
index: &Index,
|
||||
rtxn: &RoTxn,
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::documents::FieldIdMapper;
|
||||
use crate::vector::db::{EmbeddingStatus, IndexEmbeddingConfig};
|
||||
use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors};
|
||||
use crate::vector::{ArroyWrapper, Embedding, RuntimeEmbedders};
|
||||
use crate::vector::{Embedding, RuntimeEmbedders, VectorStore};
|
||||
use crate::{DocumentId, Index, InternalError, Result, UserError};
|
||||
|
||||
#[derive(Serialize)]
|
||||
@@ -120,8 +120,13 @@ impl<'t> VectorDocumentFromDb<'t> {
|
||||
config: &IndexEmbeddingConfig,
|
||||
status: &EmbeddingStatus,
|
||||
) -> Result<VectorEntry<'t>> {
|
||||
let reader =
|
||||
ArroyWrapper::new(self.index.vector_arroy, embedder_id, config.config.quantized());
|
||||
let index_version = self.index.get_version(self.rtxn)?.unwrap();
|
||||
let reader = VectorStore::new(
|
||||
index_version,
|
||||
self.index.vector_store,
|
||||
embedder_id,
|
||||
config.config.quantized(),
|
||||
);
|
||||
let vectors = reader.item_vectors(self.rtxn, self.docid)?;
|
||||
|
||||
Ok(VectorEntry {
|
||||
@@ -149,7 +154,7 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
|
||||
name,
|
||||
entry_from_raw_value(value, false).map_err(|_| {
|
||||
InternalError::Serialization(crate::SerializationError::Decoding {
|
||||
db_name: Some(crate::index::db_name::VECTOR_ARROY),
|
||||
db_name: Some(crate::index::db_name::VECTOR_STORE),
|
||||
})
|
||||
})?,
|
||||
))
|
||||
@@ -167,7 +172,7 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
|
||||
Some(embedding_from_doc) => {
|
||||
Some(entry_from_raw_value(embedding_from_doc, false).map_err(|_| {
|
||||
InternalError::Serialization(crate::SerializationError::Decoding {
|
||||
db_name: Some(crate::index::db_name::VECTOR_ARROY),
|
||||
db_name: Some(crate::index::db_name::VECTOR_STORE),
|
||||
})
|
||||
})?)
|
||||
}
|
||||
|
||||
@@ -1,17 +1,20 @@
|
||||
mod new_hannoy;
|
||||
mod v1_12;
|
||||
mod v1_13;
|
||||
mod v1_14;
|
||||
mod v1_15;
|
||||
mod v1_16;
|
||||
|
||||
use heed::RwTxn;
|
||||
use new_hannoy::Latest_V1_18_New_Hannoy;
|
||||
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
|
||||
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
|
||||
use v1_14::Latest_V1_13_To_Latest_V1_14;
|
||||
use v1_15::Latest_V1_14_To_Latest_V1_15;
|
||||
use v1_16::Latest_V1_15_To_V1_16_0;
|
||||
|
||||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use crate::progress::{Progress, VariableNameStep};
|
||||
use crate::update::upgrade::v1_16::Latest_V1_15_To_V1_16_0;
|
||||
use crate::{Index, InternalError, Result};
|
||||
|
||||
trait UpgradeIndex {
|
||||
@@ -34,6 +37,8 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
||||
&Latest_V1_13_To_Latest_V1_14 {},
|
||||
&Latest_V1_14_To_Latest_V1_15 {},
|
||||
&Latest_V1_15_To_V1_16_0 {},
|
||||
&ToTargetNoOp { target: (1, 18, 0) },
|
||||
&Latest_V1_18_New_Hannoy {},
|
||||
// This is the last upgrade function, it will be called when the index is up to date.
|
||||
// any other upgrade function should be added before this one.
|
||||
&ToCurrentNoOp {},
|
||||
@@ -61,9 +66,9 @@ const fn start(from: (u32, u32, u32)) -> Option<usize> {
|
||||
(1, 14, _) => function_index!(5),
|
||||
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
||||
(1, 15, _) => function_index!(6),
|
||||
(1, 16, _) => function_index!(7),
|
||||
(1, 17, _) => function_index!(7),
|
||||
(1, 18, _) => function_index!(7),
|
||||
(1, 16, _) | (1, 17, _) => function_index!(7),
|
||||
(1, 18, _) => function_index!(8),
|
||||
(1, 22, _) => function_index!(9),
|
||||
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
|
||||
// considering dumpless upgrade.
|
||||
(_major, _minor, _patch) => return None,
|
||||
@@ -146,3 +151,25 @@ impl UpgradeIndex for ToCurrentNoOp {
|
||||
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform no operation during the upgrade except changing to the specified target version.
|
||||
#[allow(non_camel_case_types)]
|
||||
struct ToTargetNoOp {
|
||||
pub target: (u32, u32, u32),
|
||||
}
|
||||
|
||||
impl UpgradeIndex for ToTargetNoOp {
|
||||
fn upgrade(
|
||||
&self,
|
||||
_wtxn: &mut RwTxn,
|
||||
_index: &Index,
|
||||
_original: (u32, u32, u32),
|
||||
_progress: Progress,
|
||||
) -> Result<bool> {
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn target_version(&self) -> (u32, u32, u32) {
|
||||
self.target
|
||||
}
|
||||
}
|
||||
|
||||
36
crates/milli/src/update/upgrade/new_hannoy.rs
Normal file
36
crates/milli/src/update/upgrade/new_hannoy.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
use heed::RwTxn;
|
||||
|
||||
use super::UpgradeIndex;
|
||||
use crate::progress::Progress;
|
||||
use crate::vector::VectorStore;
|
||||
use crate::{Index, Result};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub(super) struct Latest_V1_18_New_Hannoy();
|
||||
|
||||
impl UpgradeIndex for Latest_V1_18_New_Hannoy {
|
||||
fn upgrade(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index: &Index,
|
||||
_original: (u32, u32, u32),
|
||||
progress: Progress,
|
||||
) -> Result<bool> {
|
||||
let embedding_configs = index.embedding_configs();
|
||||
let index_version = index.get_version(wtxn)?.unwrap();
|
||||
for config in embedding_configs.embedding_configs(wtxn)? {
|
||||
// TODO use the embedder name to display progress
|
||||
let quantized = config.config.quantized();
|
||||
let embedder_id = embedding_configs.embedder_id(wtxn, &config.name)?.unwrap();
|
||||
let vector_store =
|
||||
VectorStore::new(index_version, index.vector_store, embedder_id, quantized);
|
||||
vector_store.convert_from_arroy(wtxn, progress.clone())?;
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn target_version(&self) -> (u32, u32, u32) {
|
||||
(1, 22, 0)
|
||||
}
|
||||
}
|
||||
@@ -27,9 +27,9 @@ impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
|
||||
let rtxn = index.read_txn()?;
|
||||
arroy::upgrade::from_0_5_to_0_6::<Cosine>(
|
||||
&rtxn,
|
||||
index.vector_arroy.remap_data_type(),
|
||||
index.vector_store.remap_types(),
|
||||
wtxn,
|
||||
index.vector_arroy.remap_data_type(),
|
||||
index.vector_store.remap_types(),
|
||||
)?;
|
||||
|
||||
Ok(false)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::time::Instant;
|
||||
|
||||
use arroy::Distance;
|
||||
use hannoy::Distance;
|
||||
|
||||
use super::error::CompositeEmbedderContainsHuggingFace;
|
||||
use super::{
|
||||
@@ -324,19 +324,18 @@ fn check_similarity(
|
||||
}
|
||||
|
||||
for (left, right) in left.into_iter().zip(right) {
|
||||
let left = arroy::internals::UnalignedVector::from_slice(&left);
|
||||
let right = arroy::internals::UnalignedVector::from_slice(&right);
|
||||
let left = arroy::internals::Leaf {
|
||||
header: arroy::distances::Cosine::new_header(&left),
|
||||
let left = hannoy::internals::UnalignedVector::from_slice(&left);
|
||||
let right = hannoy::internals::UnalignedVector::from_slice(&right);
|
||||
let left = hannoy::internals::Item {
|
||||
header: hannoy::distances::Cosine::new_header(&left),
|
||||
vector: left,
|
||||
};
|
||||
let right = arroy::internals::Leaf {
|
||||
header: arroy::distances::Cosine::new_header(&right),
|
||||
let right = hannoy::internals::Item {
|
||||
header: hannoy::distances::Cosine::new_header(&right),
|
||||
vector: right,
|
||||
};
|
||||
|
||||
let distance = arroy::distances::Cosine::built_distance(&left, &right);
|
||||
|
||||
let distance = hannoy::distances::Cosine::distance(&left, &right);
|
||||
if distance > super::MAX_COMPOSITE_DISTANCE {
|
||||
return Err(NewEmbedderError::composite_embedding_value_mismatch(distance, hint));
|
||||
}
|
||||
|
||||
@@ -3,11 +3,12 @@ use std::num::NonZeroUsize;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use arroy::distances::{BinaryQuantizedCosine, Cosine};
|
||||
use arroy::ItemId;
|
||||
use deserr::{DeserializeError, Deserr};
|
||||
use hannoy::distances::{Cosine, Hamming};
|
||||
use hannoy::ItemId;
|
||||
use heed::{RoTxn, RwTxn, Unspecified};
|
||||
use ordered_float::OrderedFloat;
|
||||
use rand::SeedableRng as _;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
@@ -41,31 +42,43 @@ pub type Embedding = Vec<f32>;
|
||||
pub const REQUEST_PARALLELISM: usize = 40;
|
||||
pub const MAX_COMPOSITE_DISTANCE: f32 = 0.01;
|
||||
|
||||
pub struct ArroyWrapper {
|
||||
quantized: bool,
|
||||
const HANNOY_EF_CONSTRUCTION: usize = 125;
|
||||
const HANNOY_M: usize = 16;
|
||||
const HANNOY_M0: usize = 32;
|
||||
|
||||
pub struct VectorStore {
|
||||
version: (u32, u32, u32),
|
||||
database: hannoy::Database<Unspecified>,
|
||||
embedder_index: u8,
|
||||
database: arroy::Database<Unspecified>,
|
||||
quantized: bool,
|
||||
}
|
||||
|
||||
impl ArroyWrapper {
|
||||
impl VectorStore {
|
||||
pub fn new(
|
||||
database: arroy::Database<Unspecified>,
|
||||
version: (u32, u32, u32),
|
||||
database: hannoy::Database<Unspecified>,
|
||||
embedder_index: u8,
|
||||
quantized: bool,
|
||||
) -> Self {
|
||||
Self { database, embedder_index, quantized }
|
||||
Self { version, database, embedder_index, quantized }
|
||||
}
|
||||
|
||||
pub fn embedder_index(&self) -> u8 {
|
||||
self.embedder_index
|
||||
}
|
||||
|
||||
fn readers<'a, D: arroy::Distance>(
|
||||
/// Whether we must use the arroy to read the vector store.
|
||||
pub fn version_uses_arroy(&self) -> bool {
|
||||
let (major, minor, _patch) = self.version;
|
||||
major == 1 && minor < 18
|
||||
}
|
||||
|
||||
fn arroy_readers<'a, D: arroy::Distance>(
|
||||
&'a self,
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
db: arroy::Database<D>,
|
||||
) -> impl Iterator<Item = Result<arroy::Reader<'a, D>, arroy::Error>> + 'a {
|
||||
arroy_store_range_for_embedder(self.embedder_index).filter_map(move |index| {
|
||||
vector_store_range_for_embedder(self.embedder_index).filter_map(move |index| {
|
||||
match arroy::Reader::open(rtxn, index, db) {
|
||||
Ok(reader) => match reader.is_empty(rtxn) {
|
||||
Ok(false) => Some(Ok(reader)),
|
||||
@@ -78,6 +91,24 @@ impl ArroyWrapper {
|
||||
})
|
||||
}
|
||||
|
||||
fn readers<'a, D: hannoy::Distance>(
|
||||
&'a self,
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
db: hannoy::Database<D>,
|
||||
) -> impl Iterator<Item = Result<hannoy::Reader<'a, D>, hannoy::Error>> + 'a {
|
||||
vector_store_range_for_embedder(self.embedder_index).filter_map(move |index| {
|
||||
match hannoy::Reader::open(rtxn, index, db) {
|
||||
Ok(reader) => match reader.is_empty(rtxn) {
|
||||
Ok(false) => Some(Ok(reader)),
|
||||
Ok(true) => None,
|
||||
Err(e) => Some(Err(e)),
|
||||
},
|
||||
Err(hannoy::Error::MissingMetadata(_)) => None,
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// The item ids that are present in the store specified by its id.
|
||||
///
|
||||
/// The ids are accessed via a lambda to avoid lifetime shenanigans.
|
||||
@@ -86,18 +117,27 @@ impl ArroyWrapper {
|
||||
rtxn: &RoTxn,
|
||||
store_id: u8,
|
||||
with_items: F,
|
||||
) -> Result<O, arroy::Error>
|
||||
) -> crate::Result<O>
|
||||
where
|
||||
F: FnOnce(&RoaringBitmap) -> O,
|
||||
{
|
||||
if self.quantized {
|
||||
if self.version_uses_arroy() {
|
||||
if self.quantized {
|
||||
self._arroy_items_in_store(rtxn, self.arroy_quantized_db(), store_id, with_items)
|
||||
.map_err(Into::into)
|
||||
} else {
|
||||
self._arroy_items_in_store(rtxn, self.arroy_angular_db(), store_id, with_items)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
} else if self.quantized {
|
||||
self._items_in_store(rtxn, self.quantized_db(), store_id, with_items)
|
||||
.map_err(Into::into)
|
||||
} else {
|
||||
self._items_in_store(rtxn, self.angular_db(), store_id, with_items)
|
||||
self._items_in_store(rtxn, self.angular_db(), store_id, with_items).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
fn _items_in_store<D: arroy::Distance, F, O>(
|
||||
fn _arroy_items_in_store<D: arroy::Distance, F, O>(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
db: arroy::Database<D>,
|
||||
@@ -107,7 +147,7 @@ impl ArroyWrapper {
|
||||
where
|
||||
F: FnOnce(&RoaringBitmap) -> O,
|
||||
{
|
||||
let index = arroy_store_for_embedder(self.embedder_index, store_id);
|
||||
let index = vector_store_for_embedder(self.embedder_index, store_id);
|
||||
let reader = arroy::Reader::open(rtxn, index, db);
|
||||
match reader {
|
||||
Ok(reader) => Ok(with_items(reader.item_ids())),
|
||||
@@ -116,8 +156,41 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<Option<usize>, arroy::Error> {
|
||||
if self.quantized {
|
||||
fn _items_in_store<D: hannoy::Distance, F, O>(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
db: hannoy::Database<D>,
|
||||
store_id: u8,
|
||||
with_items: F,
|
||||
) -> Result<O, hannoy::Error>
|
||||
where
|
||||
F: FnOnce(&RoaringBitmap) -> O,
|
||||
{
|
||||
let index = vector_store_for_embedder(self.embedder_index, store_id);
|
||||
let reader = hannoy::Reader::open(rtxn, index, db);
|
||||
match reader {
|
||||
Ok(reader) => Ok(with_items(reader.item_ids())),
|
||||
Err(hannoy::Error::MissingMetadata(_)) => Ok(with_items(&RoaringBitmap::new())),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dimensions(&self, rtxn: &RoTxn) -> crate::Result<Option<usize>> {
|
||||
if self.version_uses_arroy() {
|
||||
if self.quantized {
|
||||
Ok(self
|
||||
.arroy_readers(rtxn, self.arroy_quantized_db())
|
||||
.next()
|
||||
.transpose()?
|
||||
.map(|reader| reader.dimensions()))
|
||||
} else {
|
||||
Ok(self
|
||||
.arroy_readers(rtxn, self.arroy_angular_db())
|
||||
.next()
|
||||
.transpose()?
|
||||
.map(|reader| reader.dimensions()))
|
||||
}
|
||||
} else if self.quantized {
|
||||
Ok(self
|
||||
.readers(rtxn, self.quantized_db())
|
||||
.next()
|
||||
@@ -132,47 +205,92 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn convert_from_arroy(&self, wtxn: &mut RwTxn, progress: Progress) -> crate::Result<()> {
|
||||
if self.quantized {
|
||||
let dimensions = self
|
||||
.arroy_readers(wtxn, self.arroy_quantized_db())
|
||||
.next()
|
||||
.transpose()?
|
||||
.map(|reader| reader.dimensions());
|
||||
|
||||
let Some(dimensions) = dimensions else { return Ok(()) };
|
||||
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
let mut rng = rand::rngs::StdRng::from_entropy();
|
||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimensions);
|
||||
let mut builder = writer.builder(&mut rng).progress(progress.clone());
|
||||
builder.prepare_arroy_conversion(wtxn)?;
|
||||
builder.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
let dimensions = self
|
||||
.arroy_readers(wtxn, self.arroy_angular_db())
|
||||
.next()
|
||||
.transpose()?
|
||||
.map(|reader| reader.dimensions());
|
||||
|
||||
let Some(dimensions) = dimensions else { return Ok(()) };
|
||||
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
let mut rng = rand::rngs::StdRng::from_entropy();
|
||||
let writer = hannoy::Writer::new(self.angular_db(), index, dimensions);
|
||||
let mut builder = writer.builder(&mut rng).progress(progress.clone());
|
||||
builder.prepare_arroy_conversion(wtxn)?;
|
||||
builder.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
|
||||
&mut self,
|
||||
wtxn: &mut RwTxn,
|
||||
progress: &Progress,
|
||||
progress: Progress,
|
||||
rng: &mut R,
|
||||
dimension: usize,
|
||||
quantizing: bool,
|
||||
arroy_memory: Option<usize>,
|
||||
hannoy_memory: Option<usize>,
|
||||
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||
) -> Result<(), arroy::Error> {
|
||||
for index in arroy_store_range_for_embedder(self.embedder_index) {
|
||||
) -> Result<(), hannoy::Error> {
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
if self.quantized {
|
||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||
if writer.need_build(wtxn)? {
|
||||
writer.builder(rng).build(wtxn)?
|
||||
let mut builder = writer.builder(rng).progress(progress.clone());
|
||||
builder
|
||||
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
|
||||
.cancel(cancel)
|
||||
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
||||
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||
} else if writer.is_empty(wtxn)? {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||
let writer = hannoy::Writer::new(self.angular_db(), index, dimension);
|
||||
// If we are quantizing the databases, we can't know from meilisearch
|
||||
// if the db was empty but still contained the wrong metadata, thus we need
|
||||
// to quantize everything and can't stop early. Since this operation can
|
||||
// only happens once in the life of an embedder, it's not very performances
|
||||
// sensitive.
|
||||
if quantizing && !self.quantized {
|
||||
let writer = writer.prepare_changing_distance::<BinaryQuantizedCosine>(wtxn)?;
|
||||
writer
|
||||
.builder(rng)
|
||||
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
||||
.progress(|step| progress.update_progress_from_arroy(step))
|
||||
let writer = writer.prepare_changing_distance::<Hamming>(wtxn)?;
|
||||
let mut builder = writer.builder(rng).progress(progress.clone());
|
||||
builder
|
||||
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
|
||||
.cancel(cancel)
|
||||
.build(wtxn)?;
|
||||
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
||||
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||
} else if writer.need_build(wtxn)? {
|
||||
writer
|
||||
.builder(rng)
|
||||
.available_memory(arroy_memory.unwrap_or(usize::MAX))
|
||||
.progress(|step| progress.update_progress_from_arroy(step))
|
||||
let mut builder = writer.builder(rng).progress(progress.clone());
|
||||
builder
|
||||
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
|
||||
.cancel(cancel)
|
||||
.build(wtxn)?;
|
||||
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
||||
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||
} else if writer.is_empty(wtxn)? {
|
||||
continue;
|
||||
}
|
||||
@@ -188,18 +306,18 @@ impl ArroyWrapper {
|
||||
pub fn add_items(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
item_id: arroy::ItemId,
|
||||
item_id: hannoy::ItemId,
|
||||
embeddings: &Embeddings<f32>,
|
||||
) -> Result<(), arroy::Error> {
|
||||
) -> Result<(), hannoy::Error> {
|
||||
let dimension = embeddings.dimension();
|
||||
for (index, vector) in
|
||||
arroy_store_range_for_embedder(self.embedder_index).zip(embeddings.iter())
|
||||
vector_store_range_for_embedder(self.embedder_index).zip(embeddings.iter())
|
||||
{
|
||||
if self.quantized {
|
||||
arroy::Writer::new(self.quantized_db(), index, dimension)
|
||||
hannoy::Writer::new(self.quantized_db(), index, dimension)
|
||||
.add_item(wtxn, item_id, vector)?
|
||||
} else {
|
||||
arroy::Writer::new(self.angular_db(), index, dimension)
|
||||
hannoy::Writer::new(self.angular_db(), index, dimension)
|
||||
.add_item(wtxn, item_id, vector)?
|
||||
}
|
||||
}
|
||||
@@ -210,9 +328,9 @@ impl ArroyWrapper {
|
||||
pub fn add_item(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
item_id: arroy::ItemId,
|
||||
item_id: hannoy::ItemId,
|
||||
vector: &[f32],
|
||||
) -> Result<(), arroy::Error> {
|
||||
) -> Result<(), hannoy::Error> {
|
||||
if self.quantized {
|
||||
self._add_item(wtxn, self.quantized_db(), item_id, vector)
|
||||
} else {
|
||||
@@ -220,17 +338,17 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
fn _add_item<D: arroy::Distance>(
|
||||
fn _add_item<D: hannoy::Distance>(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
db: arroy::Database<D>,
|
||||
item_id: arroy::ItemId,
|
||||
db: hannoy::Database<D>,
|
||||
item_id: hannoy::ItemId,
|
||||
vector: &[f32],
|
||||
) -> Result<(), arroy::Error> {
|
||||
) -> Result<(), hannoy::Error> {
|
||||
let dimension = vector.len();
|
||||
|
||||
for index in arroy_store_range_for_embedder(self.embedder_index) {
|
||||
let writer = arroy::Writer::new(db, index, dimension);
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
let writer = hannoy::Writer::new(db, index, dimension);
|
||||
if !writer.contains_item(wtxn, item_id)? {
|
||||
writer.add_item(wtxn, item_id, vector)?;
|
||||
break;
|
||||
@@ -245,10 +363,10 @@ impl ArroyWrapper {
|
||||
pub fn add_item_in_store(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
item_id: arroy::ItemId,
|
||||
item_id: hannoy::ItemId,
|
||||
store_id: u8,
|
||||
vector: &[f32],
|
||||
) -> Result<(), arroy::Error> {
|
||||
) -> Result<(), hannoy::Error> {
|
||||
if self.quantized {
|
||||
self._add_item_in_store(wtxn, self.quantized_db(), item_id, store_id, vector)
|
||||
} else {
|
||||
@@ -256,18 +374,18 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
fn _add_item_in_store<D: arroy::Distance>(
|
||||
fn _add_item_in_store<D: hannoy::Distance>(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
db: arroy::Database<D>,
|
||||
item_id: arroy::ItemId,
|
||||
db: hannoy::Database<D>,
|
||||
item_id: hannoy::ItemId,
|
||||
store_id: u8,
|
||||
vector: &[f32],
|
||||
) -> Result<(), arroy::Error> {
|
||||
) -> Result<(), hannoy::Error> {
|
||||
let dimension = vector.len();
|
||||
|
||||
let index = arroy_store_for_embedder(self.embedder_index, store_id);
|
||||
let writer = arroy::Writer::new(db, index, dimension);
|
||||
let index = vector_store_for_embedder(self.embedder_index, store_id);
|
||||
let writer = hannoy::Writer::new(db, index, dimension);
|
||||
writer.add_item(wtxn, item_id, vector)
|
||||
}
|
||||
|
||||
@@ -276,14 +394,14 @@ impl ArroyWrapper {
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
dimension: usize,
|
||||
item_id: arroy::ItemId,
|
||||
) -> Result<(), arroy::Error> {
|
||||
for index in arroy_store_range_for_embedder(self.embedder_index) {
|
||||
item_id: hannoy::ItemId,
|
||||
) -> Result<(), hannoy::Error> {
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
if self.quantized {
|
||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||
writer.del_item(wtxn, item_id)?;
|
||||
} else {
|
||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||
let writer = hannoy::Writer::new(self.angular_db(), index, dimension);
|
||||
writer.del_item(wtxn, item_id)?;
|
||||
}
|
||||
}
|
||||
@@ -301,10 +419,10 @@ impl ArroyWrapper {
|
||||
pub fn del_item_in_store(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
item_id: arroy::ItemId,
|
||||
item_id: hannoy::ItemId,
|
||||
store_id: u8,
|
||||
dimensions: usize,
|
||||
) -> Result<bool, arroy::Error> {
|
||||
) -> Result<bool, hannoy::Error> {
|
||||
if self.quantized {
|
||||
self._del_item_in_store(wtxn, self.quantized_db(), item_id, store_id, dimensions)
|
||||
} else {
|
||||
@@ -312,16 +430,16 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
fn _del_item_in_store<D: arroy::Distance>(
|
||||
fn _del_item_in_store<D: hannoy::Distance>(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
db: arroy::Database<D>,
|
||||
item_id: arroy::ItemId,
|
||||
db: hannoy::Database<D>,
|
||||
item_id: hannoy::ItemId,
|
||||
store_id: u8,
|
||||
dimensions: usize,
|
||||
) -> Result<bool, arroy::Error> {
|
||||
let index = arroy_store_for_embedder(self.embedder_index, store_id);
|
||||
let writer = arroy::Writer::new(db, index, dimensions);
|
||||
) -> Result<bool, hannoy::Error> {
|
||||
let index = vector_store_for_embedder(self.embedder_index, store_id);
|
||||
let writer = hannoy::Writer::new(db, index, dimensions);
|
||||
writer.del_item(wtxn, item_id)
|
||||
}
|
||||
|
||||
@@ -335,7 +453,7 @@ impl ArroyWrapper {
|
||||
wtxn: &mut RwTxn,
|
||||
store_id: u8,
|
||||
dimensions: usize,
|
||||
) -> Result<(), arroy::Error> {
|
||||
) -> Result<(), hannoy::Error> {
|
||||
if self.quantized {
|
||||
self._clear_store(wtxn, self.quantized_db(), store_id, dimensions)
|
||||
} else {
|
||||
@@ -343,15 +461,15 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
fn _clear_store<D: arroy::Distance>(
|
||||
fn _clear_store<D: hannoy::Distance>(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
db: arroy::Database<D>,
|
||||
db: hannoy::Database<D>,
|
||||
store_id: u8,
|
||||
dimensions: usize,
|
||||
) -> Result<(), arroy::Error> {
|
||||
let index = arroy_store_for_embedder(self.embedder_index, store_id);
|
||||
let writer = arroy::Writer::new(db, index, dimensions);
|
||||
) -> Result<(), hannoy::Error> {
|
||||
let index = vector_store_for_embedder(self.embedder_index, store_id);
|
||||
let writer = hannoy::Writer::new(db, index, dimensions);
|
||||
writer.clear(wtxn)
|
||||
}
|
||||
|
||||
@@ -359,9 +477,9 @@ impl ArroyWrapper {
|
||||
pub fn del_item(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
item_id: arroy::ItemId,
|
||||
item_id: hannoy::ItemId,
|
||||
vector: &[f32],
|
||||
) -> Result<bool, arroy::Error> {
|
||||
) -> Result<bool, hannoy::Error> {
|
||||
if self.quantized {
|
||||
self._del_item(wtxn, self.quantized_db(), item_id, vector)
|
||||
} else {
|
||||
@@ -369,37 +487,34 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
|
||||
fn _del_item<D: arroy::Distance>(
|
||||
fn _del_item<D: hannoy::Distance>(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
db: arroy::Database<D>,
|
||||
item_id: arroy::ItemId,
|
||||
db: hannoy::Database<D>,
|
||||
item_id: hannoy::ItemId,
|
||||
vector: &[f32],
|
||||
) -> Result<bool, arroy::Error> {
|
||||
) -> Result<bool, hannoy::Error> {
|
||||
let dimension = vector.len();
|
||||
|
||||
for index in arroy_store_range_for_embedder(self.embedder_index) {
|
||||
let writer = arroy::Writer::new(db, index, dimension);
|
||||
let Some(candidate) = writer.item_vector(wtxn, item_id)? else {
|
||||
continue;
|
||||
};
|
||||
if candidate == vector {
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
let writer = hannoy::Writer::new(db, index, dimension);
|
||||
if writer.contains_item(wtxn, item_id)? {
|
||||
return writer.del_item(wtxn, item_id);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
|
||||
for index in arroy_store_range_for_embedder(self.embedder_index) {
|
||||
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), hannoy::Error> {
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
if self.quantized {
|
||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||
if writer.is_empty(wtxn)? {
|
||||
continue;
|
||||
}
|
||||
writer.clear(wtxn)?;
|
||||
} else {
|
||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||
let writer = hannoy::Writer::new(self.angular_db(), index, dimension);
|
||||
if writer.is_empty(wtxn)? {
|
||||
continue;
|
||||
}
|
||||
@@ -413,17 +528,31 @@ impl ArroyWrapper {
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
dimension: usize,
|
||||
item: arroy::ItemId,
|
||||
) -> Result<bool, arroy::Error> {
|
||||
for index in arroy_store_range_for_embedder(self.embedder_index) {
|
||||
let contains = if self.quantized {
|
||||
let writer = arroy::Writer::new(self.quantized_db(), index, dimension);
|
||||
item: hannoy::ItemId,
|
||||
) -> crate::Result<bool> {
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
let contains = if self.version_uses_arroy() {
|
||||
if self.quantized {
|
||||
let writer = arroy::Writer::new(self.arroy_quantized_db(), index, dimension);
|
||||
if writer.is_empty(rtxn)? {
|
||||
continue;
|
||||
}
|
||||
writer.contains_item(rtxn, item)?
|
||||
} else {
|
||||
let writer = arroy::Writer::new(self.arroy_angular_db(), index, dimension);
|
||||
if writer.is_empty(rtxn)? {
|
||||
continue;
|
||||
}
|
||||
writer.contains_item(rtxn, item)?
|
||||
}
|
||||
} else if self.quantized {
|
||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||
if writer.is_empty(rtxn)? {
|
||||
continue;
|
||||
}
|
||||
writer.contains_item(rtxn, item)?
|
||||
} else {
|
||||
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
|
||||
let writer = hannoy::Writer::new(self.angular_db(), index, dimension);
|
||||
if writer.is_empty(rtxn)? {
|
||||
continue;
|
||||
}
|
||||
@@ -442,15 +571,23 @@ impl ArroyWrapper {
|
||||
item: ItemId,
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||
if self.quantized {
|
||||
self._nns_by_item(rtxn, self.quantized_db(), item, limit, filter)
|
||||
) -> crate::Result<Vec<(ItemId, f32)>> {
|
||||
if self.version_uses_arroy() {
|
||||
if self.quantized {
|
||||
self._arroy_nns_by_item(rtxn, self.arroy_quantized_db(), item, limit, filter)
|
||||
.map_err(Into::into)
|
||||
} else {
|
||||
self._arroy_nns_by_item(rtxn, self.arroy_angular_db(), item, limit, filter)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
} else if self.quantized {
|
||||
self._nns_by_item(rtxn, self.quantized_db(), item, limit, filter).map_err(Into::into)
|
||||
} else {
|
||||
self._nns_by_item(rtxn, self.angular_db(), item, limit, filter)
|
||||
self._nns_by_item(rtxn, self.angular_db(), item, limit, filter).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
fn _nns_by_item<D: arroy::Distance>(
|
||||
fn _arroy_nns_by_item<D: arroy::Distance>(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
db: arroy::Database<D>,
|
||||
@@ -460,7 +597,7 @@ impl ArroyWrapper {
|
||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for reader in self.readers(rtxn, db) {
|
||||
for reader in self.arroy_readers(rtxn, db) {
|
||||
let reader = reader?;
|
||||
let mut searcher = reader.nns(limit);
|
||||
if let Some(filter) = filter {
|
||||
@@ -478,21 +615,56 @@ impl ArroyWrapper {
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn _nns_by_item<D: hannoy::Distance>(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
db: hannoy::Database<D>,
|
||||
item: ItemId,
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Vec<(ItemId, f32)>, hannoy::Error> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for reader in self.readers(rtxn, db) {
|
||||
let reader = reader?;
|
||||
let mut searcher = reader.nns(limit);
|
||||
searcher.ef_search((limit * 10).max(100)); // TODO find better ef
|
||||
if let Some(filter) = filter {
|
||||
searcher.candidates(filter);
|
||||
}
|
||||
|
||||
if let Some(mut ret) = searcher.by_item(rtxn, item)? {
|
||||
results.append(&mut ret);
|
||||
}
|
||||
}
|
||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn nns_by_vector(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
vector: &[f32],
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||
if self.quantized {
|
||||
) -> crate::Result<Vec<(ItemId, f32)>> {
|
||||
if self.version_uses_arroy() {
|
||||
if self.quantized {
|
||||
self._arroy_nns_by_vector(rtxn, self.arroy_quantized_db(), vector, limit, filter)
|
||||
.map_err(Into::into)
|
||||
} else {
|
||||
self._arroy_nns_by_vector(rtxn, self.arroy_angular_db(), vector, limit, filter)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
} else if self.quantized {
|
||||
self._nns_by_vector(rtxn, self.quantized_db(), vector, limit, filter)
|
||||
.map_err(Into::into)
|
||||
} else {
|
||||
self._nns_by_vector(rtxn, self.angular_db(), vector, limit, filter)
|
||||
self._nns_by_vector(rtxn, self.angular_db(), vector, limit, filter).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
fn _nns_by_vector<D: arroy::Distance>(
|
||||
fn _arroy_nns_by_vector<D: arroy::Distance>(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
db: arroy::Database<D>,
|
||||
@@ -502,7 +674,7 @@ impl ArroyWrapper {
|
||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for reader in self.readers(rtxn, db) {
|
||||
for reader in self.arroy_readers(rtxn, db) {
|
||||
let reader = reader?;
|
||||
let mut searcher = reader.nns(limit);
|
||||
if let Some(filter) = filter {
|
||||
@@ -520,10 +692,50 @@ impl ArroyWrapper {
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> Result<Vec<Vec<f32>>, arroy::Error> {
|
||||
fn _nns_by_vector<D: hannoy::Distance>(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
db: hannoy::Database<D>,
|
||||
vector: &[f32],
|
||||
limit: usize,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Vec<(ItemId, f32)>, hannoy::Error> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
for reader in self.readers(rtxn, db) {
|
||||
let reader = reader?;
|
||||
let mut searcher = reader.nns(limit);
|
||||
searcher.ef_search((limit * 10).max(100)); // TODO find better ef
|
||||
if let Some(filter) = filter {
|
||||
searcher.candidates(filter);
|
||||
}
|
||||
|
||||
results.append(&mut searcher.by_vector(rtxn, vector)?);
|
||||
}
|
||||
|
||||
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> crate::Result<Vec<Vec<f32>>> {
|
||||
let mut vectors = Vec::new();
|
||||
|
||||
if self.quantized {
|
||||
if self.version_uses_arroy() {
|
||||
if self.quantized {
|
||||
for reader in self.arroy_readers(rtxn, self.arroy_quantized_db()) {
|
||||
if let Some(vec) = reader?.item_vector(rtxn, item_id)? {
|
||||
vectors.push(vec);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for reader in self.arroy_readers(rtxn, self.arroy_angular_db()) {
|
||||
if let Some(vec) = reader?.item_vector(rtxn, item_id)? {
|
||||
vectors.push(vec);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if self.quantized {
|
||||
for reader in self.readers(rtxn, self.quantized_db()) {
|
||||
if let Some(vec) = reader?.item_vector(rtxn, item_id)? {
|
||||
vectors.push(vec);
|
||||
@@ -536,22 +748,31 @@ impl ArroyWrapper {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(vectors)
|
||||
}
|
||||
|
||||
fn angular_db(&self) -> arroy::Database<Cosine> {
|
||||
fn arroy_angular_db(&self) -> arroy::Database<arroy::distances::Cosine> {
|
||||
self.database.remap_types()
|
||||
}
|
||||
|
||||
fn arroy_quantized_db(&self) -> arroy::Database<arroy::distances::BinaryQuantizedCosine> {
|
||||
self.database.remap_types()
|
||||
}
|
||||
|
||||
fn angular_db(&self) -> hannoy::Database<Cosine> {
|
||||
self.database.remap_data_type()
|
||||
}
|
||||
|
||||
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
|
||||
fn quantized_db(&self) -> hannoy::Database<Hamming> {
|
||||
self.database.remap_data_type()
|
||||
}
|
||||
|
||||
pub fn aggregate_stats(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
stats: &mut ArroyStats,
|
||||
) -> Result<(), arroy::Error> {
|
||||
stats: &mut HannoyStats,
|
||||
) -> Result<(), hannoy::Error> {
|
||||
if self.quantized {
|
||||
for reader in self.readers(rtxn, self.quantized_db()) {
|
||||
let reader = reader?;
|
||||
@@ -573,10 +794,11 @@ impl ArroyWrapper {
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct ArroyStats {
|
||||
pub struct HannoyStats {
|
||||
pub number_of_embeddings: u64,
|
||||
pub documents: RoaringBitmap,
|
||||
}
|
||||
|
||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub struct Embeddings<F> {
|
||||
@@ -1221,11 +1443,11 @@ pub const fn is_cuda_enabled() -> bool {
|
||||
cfg!(feature = "cuda")
|
||||
}
|
||||
|
||||
fn arroy_store_range_for_embedder(embedder_id: u8) -> impl Iterator<Item = u16> {
|
||||
(0..=u8::MAX).map(move |store_id| arroy_store_for_embedder(embedder_id, store_id))
|
||||
fn vector_store_range_for_embedder(embedder_id: u8) -> impl Iterator<Item = u16> {
|
||||
(0..=u8::MAX).map(move |store_id| vector_store_for_embedder(embedder_id, store_id))
|
||||
}
|
||||
|
||||
fn arroy_store_for_embedder(embedder_id: u8, store_id: u8) -> u16 {
|
||||
fn vector_store_for_embedder(embedder_id: u8, store_id: u8) -> u16 {
|
||||
let embedder_id = (embedder_id as u16) << 8;
|
||||
embedder_id | (store_id as u16)
|
||||
}
|
||||
|
||||
@@ -321,7 +321,14 @@ impl Embedder {
|
||||
pub fn prompt_count_in_chunk_hint(&self) -> usize {
|
||||
match self.data.request.input_type() {
|
||||
InputType::Text => 1,
|
||||
InputType::TextArray => 10,
|
||||
InputType::TextArray => {
|
||||
let chunk_size = std::env::var("MEILI_EMBEDDINGS_CHUNK_SIZE")
|
||||
.ok()
|
||||
.and_then(|chunk_size| chunk_size.parse().ok())
|
||||
.unwrap_or(10);
|
||||
assert!(chunk_size <= 100, "Embedding chunk size cannot exceed 100");
|
||||
chunk_size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -59,6 +59,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -97,6 +97,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -329,6 +329,7 @@ fn criteria_ascdesc() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -138,6 +138,7 @@ fn test_typo_disabled_on_word() {
|
||||
&mut new_fields_ids_map,
|
||||
&|| false,
|
||||
Progress::default(),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ fn fibo_recursive(n: u32) -> u32 {
|
||||
if n == 1 {
|
||||
return 2;
|
||||
}
|
||||
return fibo_recursive(n - 1) - fibo_recursive(n - 2);
|
||||
fibo_recursive(n - 1) - fibo_recursive(n - 2)
|
||||
}
|
||||
|
||||
use tracing_error::ExtractSpanTrace as _;
|
||||
|
||||
Reference in New Issue
Block a user