mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-04 11:46:30 +00:00
Merge branch 'release-v1.16.0' into fragment-filters
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@ -5,7 +5,7 @@
|
|||||||
**/*.json_lines
|
**/*.json_lines
|
||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
/*.mdb
|
/*.mdb
|
||||||
/data.ms
|
/*.ms
|
||||||
/snapshots
|
/snapshots
|
||||||
/dumps
|
/dumps
|
||||||
/bench
|
/bench
|
||||||
|
5
Cargo.lock
generated
5
Cargo.lock
generated
@ -3775,6 +3775,7 @@ dependencies = [
|
|||||||
"meili-snap",
|
"meili-snap",
|
||||||
"meilisearch-auth",
|
"meilisearch-auth",
|
||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
|
"memmap2",
|
||||||
"mimalloc",
|
"mimalloc",
|
||||||
"mime",
|
"mime",
|
||||||
"mopa-maintained",
|
"mopa-maintained",
|
||||||
@ -3908,9 +3909,9 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memmap2"
|
name = "memmap2"
|
||||||
version = "0.9.5"
|
version = "0.9.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
|
checksum = "483758ad303d734cec05e5c12b41d7e93e6a6390c5e9dae6bdeb7c1259012d28"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"stable_deref_trait",
|
"stable_deref_trait",
|
||||||
|
@ -14,7 +14,7 @@ license.workspace = true
|
|||||||
anyhow = "1.0.98"
|
anyhow = "1.0.98"
|
||||||
bumpalo = "3.18.1"
|
bumpalo = "3.18.1"
|
||||||
csv = "1.3.1"
|
csv = "1.3.1"
|
||||||
memmap2 = "0.9.5"
|
memmap2 = "0.9.7"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
mimalloc = { version = "0.1.47", default-features = false }
|
mimalloc = { version = "0.1.47", default-features = false }
|
||||||
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
serde_json = { version = "1.0.140", features = ["preserve_order"] }
|
||||||
@ -55,4 +55,3 @@ harness = false
|
|||||||
[[bench]]
|
[[bench]]
|
||||||
name = "sort"
|
name = "sort"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::fs::File;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use super::v2_to_v3::CompatV2ToV3;
|
use super::v2_to_v3::CompatV2ToV3;
|
||||||
@ -94,6 +95,10 @@ impl CompatIndexV1ToV2 {
|
|||||||
self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>)
|
self.from.documents().map(|it| Box::new(it) as Box<dyn Iterator<Item = _>>)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
self.from.documents_file()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> {
|
pub fn settings(&mut self) -> Result<v2::settings::Settings<v2::settings::Checked>> {
|
||||||
Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check())
|
Ok(v2::settings::Settings::<v2::settings::Unchecked>::from(self.from.settings()?).check())
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::fs::File;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
@ -122,6 +123,13 @@ impl CompatIndexV2ToV3 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
match self {
|
||||||
|
CompatIndexV2ToV3::V2(v2) => v2.documents_file(),
|
||||||
|
CompatIndexV2ToV3::Compat(compat) => compat.documents_file(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
|
pub fn settings(&mut self) -> Result<v3::Settings<v3::Checked>> {
|
||||||
let settings = match self {
|
let settings = match self {
|
||||||
CompatIndexV2ToV3::V2(from) => from.settings()?,
|
CompatIndexV2ToV3::V2(from) => from.settings()?,
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use std::fs::File;
|
||||||
|
|
||||||
use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3};
|
use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3};
|
||||||
use super::v4_to_v5::CompatV4ToV5;
|
use super::v4_to_v5::CompatV4ToV5;
|
||||||
use crate::reader::{v3, v4, UpdateFile};
|
use crate::reader::{v3, v4, UpdateFile};
|
||||||
@ -252,6 +254,13 @@ impl CompatIndexV3ToV4 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
match self {
|
||||||
|
CompatIndexV3ToV4::V3(v3) => v3.documents_file(),
|
||||||
|
CompatIndexV3ToV4::Compat(compat) => compat.documents_file(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> {
|
pub fn settings(&mut self) -> Result<v4::Settings<v4::Checked>> {
|
||||||
Ok(match self {
|
Ok(match self {
|
||||||
CompatIndexV3ToV4::V3(v3) => {
|
CompatIndexV3ToV4::V3(v3) => {
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use std::fs::File;
|
||||||
|
|
||||||
use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4};
|
use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4};
|
||||||
use super::v5_to_v6::CompatV5ToV6;
|
use super::v5_to_v6::CompatV5ToV6;
|
||||||
use crate::reader::{v4, v5, Document};
|
use crate::reader::{v4, v5, Document};
|
||||||
@ -241,6 +243,13 @@ impl CompatIndexV4ToV5 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
match self {
|
||||||
|
CompatIndexV4ToV5::V4(v4) => v4.documents_file(),
|
||||||
|
CompatIndexV4ToV5::Compat(compat) => compat.documents_file(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> {
|
pub fn settings(&mut self) -> Result<v5::Settings<v5::Checked>> {
|
||||||
match self {
|
match self {
|
||||||
CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()),
|
CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()),
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use std::fs::File;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
@ -243,6 +244,13 @@ impl CompatIndexV5ToV6 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
match self {
|
||||||
|
CompatIndexV5ToV6::V5(v5) => v5.documents_file(),
|
||||||
|
CompatIndexV5ToV6::Compat(compat) => compat.documents_file(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
|
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
|
||||||
match self {
|
match self {
|
||||||
CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()),
|
CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()),
|
||||||
|
@ -192,6 +192,14 @@ impl DumpIndexReader {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A reference to a file in the NDJSON format containing all the documents of the index
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
match self {
|
||||||
|
DumpIndexReader::Current(v6) => v6.documents_file(),
|
||||||
|
DumpIndexReader::Compat(compat) => compat.documents_file(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
|
pub fn settings(&mut self) -> Result<v6::Settings<v6::Checked>> {
|
||||||
match self {
|
match self {
|
||||||
DumpIndexReader::Current(v6) => v6.settings(),
|
DumpIndexReader::Current(v6) => v6.settings(),
|
||||||
|
@ -72,6 +72,10 @@ impl V1IndexReader {
|
|||||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
self.documents.get_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<self::settings::Settings> {
|
pub fn settings(&mut self) -> Result<self::settings::Settings> {
|
||||||
Ok(serde_json::from_reader(&mut self.settings)?)
|
Ok(serde_json::from_reader(&mut self.settings)?)
|
||||||
}
|
}
|
||||||
|
@ -203,6 +203,10 @@ impl V2IndexReader {
|
|||||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
self.documents.get_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||||
Ok(self.settings.clone())
|
Ok(self.settings.clone())
|
||||||
}
|
}
|
||||||
|
@ -215,6 +215,10 @@ impl V3IndexReader {
|
|||||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
self.documents.get_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||||
Ok(self.settings.clone())
|
Ok(self.settings.clone())
|
||||||
}
|
}
|
||||||
|
@ -210,6 +210,10 @@ impl V4IndexReader {
|
|||||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
self.documents.get_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||||
Ok(self.settings.clone())
|
Ok(self.settings.clone())
|
||||||
}
|
}
|
||||||
|
@ -247,6 +247,10 @@ impl V5IndexReader {
|
|||||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
self.documents.get_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||||
Ok(self.settings.clone())
|
Ok(self.settings.clone())
|
||||||
}
|
}
|
||||||
|
@ -284,6 +284,10 @@ impl V6IndexReader {
|
|||||||
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
.map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_file(&self) -> &File {
|
||||||
|
self.documents.get_ref()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
pub fn settings(&mut self) -> Result<Settings<Checked>> {
|
||||||
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
let mut settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?;
|
||||||
patch_embedders(&mut settings);
|
patch_embedders(&mut settings);
|
||||||
|
@ -26,7 +26,7 @@ flate2 = "1.1.2"
|
|||||||
indexmap = "2.9.0"
|
indexmap = "2.9.0"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
memmap2 = "0.9.5"
|
memmap2 = "0.9.7"
|
||||||
page_size = "0.6.0"
|
page_size = "0.6.0"
|
||||||
rayon = "1.10.0"
|
rayon = "1.10.0"
|
||||||
roaring = { version = "0.10.12", features = ["serde"] }
|
roaring = { version = "0.10.12", features = ["serde"] }
|
||||||
|
@ -20,6 +20,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
|||||||
|
|
||||||
let IndexScheduler {
|
let IndexScheduler {
|
||||||
cleanup_enabled: _,
|
cleanup_enabled: _,
|
||||||
|
experimental_no_edition_2024_for_dumps: _,
|
||||||
processing_tasks,
|
processing_tasks,
|
||||||
env,
|
env,
|
||||||
version,
|
version,
|
||||||
|
@ -168,6 +168,9 @@ pub struct IndexScheduler {
|
|||||||
/// Whether we should automatically cleanup the task queue or not.
|
/// Whether we should automatically cleanup the task queue or not.
|
||||||
pub(crate) cleanup_enabled: bool,
|
pub(crate) cleanup_enabled: bool,
|
||||||
|
|
||||||
|
/// Whether we should use the old document indexer or the new one.
|
||||||
|
pub(crate) experimental_no_edition_2024_for_dumps: bool,
|
||||||
|
|
||||||
/// The webhook url we should send tasks to after processing every batches.
|
/// The webhook url we should send tasks to after processing every batches.
|
||||||
pub(crate) webhook_url: Option<String>,
|
pub(crate) webhook_url: Option<String>,
|
||||||
/// The Authorization header to send to the webhook URL.
|
/// The Authorization header to send to the webhook URL.
|
||||||
@ -210,6 +213,7 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
index_mapper: self.index_mapper.clone(),
|
index_mapper: self.index_mapper.clone(),
|
||||||
cleanup_enabled: self.cleanup_enabled,
|
cleanup_enabled: self.cleanup_enabled,
|
||||||
|
experimental_no_edition_2024_for_dumps: self.experimental_no_edition_2024_for_dumps,
|
||||||
webhook_url: self.webhook_url.clone(),
|
webhook_url: self.webhook_url.clone(),
|
||||||
webhook_authorization_header: self.webhook_authorization_header.clone(),
|
webhook_authorization_header: self.webhook_authorization_header.clone(),
|
||||||
embedders: self.embedders.clone(),
|
embedders: self.embedders.clone(),
|
||||||
@ -296,6 +300,9 @@ impl IndexScheduler {
|
|||||||
index_mapper,
|
index_mapper,
|
||||||
env,
|
env,
|
||||||
cleanup_enabled: options.cleanup_enabled,
|
cleanup_enabled: options.cleanup_enabled,
|
||||||
|
experimental_no_edition_2024_for_dumps: options
|
||||||
|
.indexer_config
|
||||||
|
.experimental_no_edition_2024_for_dumps,
|
||||||
webhook_url: options.webhook_url,
|
webhook_url: options.webhook_url,
|
||||||
webhook_authorization_header: options.webhook_authorization_header,
|
webhook_authorization_header: options.webhook_authorization_header,
|
||||||
embedders: Default::default(),
|
embedders: Default::default(),
|
||||||
@ -594,6 +601,11 @@ impl IndexScheduler {
|
|||||||
Ok(nbr_index_processing_tasks > 0)
|
Ok(nbr_index_processing_tasks > 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the index should use the old document indexer.
|
||||||
|
pub fn no_edition_2024_for_dumps(&self) -> bool {
|
||||||
|
self.experimental_no_edition_2024_for_dumps
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the tasks matching the query from the user's point of view along
|
/// Return the tasks matching the query from the user's point of view along
|
||||||
/// with the total number of tasks matching the query, ignoring from and limit.
|
/// with the total number of tasks matching the query, ignoring from and limit.
|
||||||
///
|
///
|
||||||
|
@ -5,6 +5,7 @@ use std::sync::atomic::Ordering;
|
|||||||
|
|
||||||
use dump::IndexMetadata;
|
use dump::IndexMetadata;
|
||||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
use meilisearch_types::milli::{self};
|
use meilisearch_types::milli::{self};
|
||||||
@ -227,12 +228,21 @@ impl IndexScheduler {
|
|||||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
for (
|
||||||
|
embedder_name,
|
||||||
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
|
) in embeddings
|
||||||
|
{
|
||||||
let embeddings = ExplicitVectors {
|
let embeddings = ExplicitVectors {
|
||||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||||
embeddings,
|
embeddings,
|
||||||
)),
|
)),
|
||||||
regenerate,
|
regenerate: regenerate &&
|
||||||
|
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||||
|
// are marked as user-provided,
|
||||||
|
// all embeddings would be regenerated on any settings change or document update.
|
||||||
|
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||||
|
!has_fragments,
|
||||||
};
|
};
|
||||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@ use flate2::write::GzEncoder;
|
|||||||
use flate2::Compression;
|
use flate2::Compression;
|
||||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||||
use meilisearch_types::milli::update::{request_threads, Setting};
|
use meilisearch_types::milli::update::{request_threads, Setting};
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
@ -229,12 +230,21 @@ impl IndexScheduler {
|
|||||||
));
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
for (
|
||||||
|
embedder_name,
|
||||||
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
|
) in embeddings
|
||||||
|
{
|
||||||
let embeddings = ExplicitVectors {
|
let embeddings = ExplicitVectors {
|
||||||
embeddings: Some(
|
embeddings: Some(
|
||||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||||
),
|
),
|
||||||
regenerate,
|
regenerate: regenerate &&
|
||||||
|
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||||
|
// are marked as user-provided,
|
||||||
|
// all embeddings would be regenerated on any settings change or document update.
|
||||||
|
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||||
|
!has_fragments,
|
||||||
};
|
};
|
||||||
vectors.insert(
|
vectors.insert(
|
||||||
embedder_name,
|
embedder_name,
|
||||||
|
@ -3,6 +3,7 @@ use std::collections::BTreeMap;
|
|||||||
use big_s::S;
|
use big_s::S;
|
||||||
use insta::assert_json_snapshot;
|
use insta::assert_json_snapshot;
|
||||||
use meili_snap::{json_string, snapshot};
|
use meili_snap::{json_string, snapshot};
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::update::Setting;
|
use meilisearch_types::milli::update::Setting;
|
||||||
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
|
||||||
use meilisearch_types::milli::vector::SearchQuery;
|
use meilisearch_types::milli::vector::SearchQuery;
|
||||||
@ -220,8 +221,8 @@ fn import_vectors() {
|
|||||||
|
|
||||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||||
|
|
||||||
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
|
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == lab_embed, @"true");
|
||||||
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
|
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
|
||||||
|
|
||||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
@ -311,9 +312,9 @@ fn import_vectors() {
|
|||||||
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
let embeddings = index.embeddings(&rtxn, 0).unwrap();
|
||||||
|
|
||||||
// automatically changed to patou because set to regenerate
|
// automatically changed to patou because set to regenerate
|
||||||
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
|
assert_json_snapshot!(embeddings[&simple_hf_name].embeddings[0] == patou_embed, @"true");
|
||||||
// remained beagle
|
// remained beagle
|
||||||
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
|
assert_json_snapshot!(embeddings[&fakerest_name].embeddings[0] == beagle_embed, @"true");
|
||||||
|
|
||||||
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||||
@ -497,13 +498,13 @@ fn import_vectors_first_and_embedder_later() {
|
|||||||
|
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
|
||||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
assert!(!embeddings.is_empty(), "{embeddings:?}");
|
||||||
|
|
||||||
// the document with the id 3 should keep its original embedding
|
// the document with the id 3 should keep its original embedding
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let (embeddings, _) = &embeddings["my_doggo_embedder"];
|
let EmbeddingsWithMetadata { embeddings, .. } = &embeddings["my_doggo_embedder"];
|
||||||
|
|
||||||
snapshot!(embeddings.len(), @"1");
|
snapshot!(embeddings.len(), @"1");
|
||||||
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
|
||||||
@ -558,7 +559,7 @@ fn import_vectors_first_and_embedder_later() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
|
||||||
|
|
||||||
assert!(!embedding.is_empty());
|
assert!(!embedding.is_empty());
|
||||||
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
|
||||||
@ -566,7 +567,7 @@ fn import_vectors_first_and_embedder_later() {
|
|||||||
// the document with the id 4 should generate an embedding
|
// the document with the id 4 should generate an embedding
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let (embedding, _) = &embeddings["my_doggo_embedder"];
|
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["my_doggo_embedder"];
|
||||||
|
|
||||||
assert!(!embedding.is_empty());
|
assert!(!embedding.is_empty());
|
||||||
}
|
}
|
||||||
@ -696,7 +697,7 @@ fn delete_document_containing_vector() {
|
|||||||
"###);
|
"###);
|
||||||
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
|
||||||
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
let embeddings = index.embeddings(&rtxn, docid).unwrap();
|
||||||
let (embedding, _) = &embeddings["manual"];
|
let EmbeddingsWithMetadata { embeddings: embedding, .. } = &embeddings["manual"];
|
||||||
assert!(!embedding.is_empty(), "{embedding:?}");
|
assert!(!embedding.is_empty(), "{embedding:?}");
|
||||||
|
|
||||||
index_scheduler
|
index_scheduler
|
||||||
|
@ -24,7 +24,7 @@ enum-iterator = "2.1.0"
|
|||||||
file-store = { path = "../file-store" }
|
file-store = { path = "../file-store" }
|
||||||
flate2 = "1.1.2"
|
flate2 = "1.1.2"
|
||||||
fst = "0.4.7"
|
fst = "0.4.7"
|
||||||
memmap2 = "0.9.5"
|
memmap2 = "0.9.7"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
roaring = { version = "0.10.12", features = ["serde"] }
|
roaring = { version = "0.10.12", features = ["serde"] }
|
||||||
rustc-hash = "2.1.1"
|
rustc-hash = "2.1.1"
|
||||||
|
@ -233,9 +233,6 @@ pub enum Action {
|
|||||||
#[serde(rename = "*")]
|
#[serde(rename = "*")]
|
||||||
#[deserr(rename = "*")]
|
#[deserr(rename = "*")]
|
||||||
All = 0,
|
All = 0,
|
||||||
#[serde(rename = "*.get")]
|
|
||||||
#[deserr(rename = "*.get")]
|
|
||||||
AllGet,
|
|
||||||
#[serde(rename = "search")]
|
#[serde(rename = "search")]
|
||||||
#[deserr(rename = "search")]
|
#[deserr(rename = "search")]
|
||||||
Search,
|
Search,
|
||||||
@ -365,6 +362,9 @@ pub enum Action {
|
|||||||
#[serde(rename = "chatsSettings.update")]
|
#[serde(rename = "chatsSettings.update")]
|
||||||
#[deserr(rename = "chatsSettings.update")]
|
#[deserr(rename = "chatsSettings.update")]
|
||||||
ChatsSettingsUpdate,
|
ChatsSettingsUpdate,
|
||||||
|
#[serde(rename = "*.get")]
|
||||||
|
#[deserr(rename = "*.get")]
|
||||||
|
AllGet,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Action {
|
impl Action {
|
||||||
@ -403,6 +403,7 @@ impl Action {
|
|||||||
METRICS_GET => Some(Self::MetricsGet),
|
METRICS_GET => Some(Self::MetricsGet),
|
||||||
DUMPS_ALL => Some(Self::DumpsAll),
|
DUMPS_ALL => Some(Self::DumpsAll),
|
||||||
DUMPS_CREATE => Some(Self::DumpsCreate),
|
DUMPS_CREATE => Some(Self::DumpsCreate),
|
||||||
|
SNAPSHOTS_ALL => Some(Self::SnapshotsAll),
|
||||||
SNAPSHOTS_CREATE => Some(Self::SnapshotsCreate),
|
SNAPSHOTS_CREATE => Some(Self::SnapshotsCreate),
|
||||||
VERSION => Some(Self::Version),
|
VERSION => Some(Self::Version),
|
||||||
KEYS_CREATE => Some(Self::KeysAdd),
|
KEYS_CREATE => Some(Self::KeysAdd),
|
||||||
@ -411,8 +412,10 @@ impl Action {
|
|||||||
KEYS_DELETE => Some(Self::KeysDelete),
|
KEYS_DELETE => Some(Self::KeysDelete),
|
||||||
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
|
EXPERIMENTAL_FEATURES_GET => Some(Self::ExperimentalFeaturesGet),
|
||||||
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
|
EXPERIMENTAL_FEATURES_UPDATE => Some(Self::ExperimentalFeaturesUpdate),
|
||||||
|
EXPORT => Some(Self::Export),
|
||||||
NETWORK_GET => Some(Self::NetworkGet),
|
NETWORK_GET => Some(Self::NetworkGet),
|
||||||
NETWORK_UPDATE => Some(Self::NetworkUpdate),
|
NETWORK_UPDATE => Some(Self::NetworkUpdate),
|
||||||
|
ALL_GET => Some(Self::AllGet),
|
||||||
_otherwise => None,
|
_otherwise => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -497,6 +500,7 @@ pub mod actions {
|
|||||||
pub const METRICS_GET: u8 = MetricsGet.repr();
|
pub const METRICS_GET: u8 = MetricsGet.repr();
|
||||||
pub const DUMPS_ALL: u8 = DumpsAll.repr();
|
pub const DUMPS_ALL: u8 = DumpsAll.repr();
|
||||||
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
|
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
|
||||||
|
pub const SNAPSHOTS_ALL: u8 = SnapshotsAll.repr();
|
||||||
pub const SNAPSHOTS_CREATE: u8 = SnapshotsCreate.repr();
|
pub const SNAPSHOTS_CREATE: u8 = SnapshotsCreate.repr();
|
||||||
pub const VERSION: u8 = Version.repr();
|
pub const VERSION: u8 = Version.repr();
|
||||||
pub const KEYS_CREATE: u8 = KeysAdd.repr();
|
pub const KEYS_CREATE: u8 = KeysAdd.repr();
|
||||||
@ -519,3 +523,68 @@ pub mod actions {
|
|||||||
pub const CHATS_SETTINGS_GET: u8 = ChatsSettingsGet.repr();
|
pub const CHATS_SETTINGS_GET: u8 = ChatsSettingsGet.repr();
|
||||||
pub const CHATS_SETTINGS_UPDATE: u8 = ChatsSettingsUpdate.repr();
|
pub const CHATS_SETTINGS_UPDATE: u8 = ChatsSettingsUpdate.repr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) mod test {
|
||||||
|
use super::actions::*;
|
||||||
|
use super::Action::*;
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_action_repr_and_constants() {
|
||||||
|
assert!(All.repr() == 0 && ALL == 0);
|
||||||
|
assert!(Search.repr() == 1 && SEARCH == 1);
|
||||||
|
assert!(DocumentsAll.repr() == 2 && DOCUMENTS_ALL == 2);
|
||||||
|
assert!(DocumentsAdd.repr() == 3 && DOCUMENTS_ADD == 3);
|
||||||
|
assert!(DocumentsGet.repr() == 4 && DOCUMENTS_GET == 4);
|
||||||
|
assert!(DocumentsDelete.repr() == 5 && DOCUMENTS_DELETE == 5);
|
||||||
|
assert!(IndexesAll.repr() == 6 && INDEXES_ALL == 6);
|
||||||
|
assert!(IndexesAdd.repr() == 7 && INDEXES_CREATE == 7);
|
||||||
|
assert!(IndexesGet.repr() == 8 && INDEXES_GET == 8);
|
||||||
|
assert!(IndexesUpdate.repr() == 9 && INDEXES_UPDATE == 9);
|
||||||
|
assert!(IndexesDelete.repr() == 10 && INDEXES_DELETE == 10);
|
||||||
|
assert!(IndexesSwap.repr() == 11 && INDEXES_SWAP == 11);
|
||||||
|
assert!(TasksAll.repr() == 12 && TASKS_ALL == 12);
|
||||||
|
assert!(TasksCancel.repr() == 13 && TASKS_CANCEL == 13);
|
||||||
|
assert!(TasksDelete.repr() == 14 && TASKS_DELETE == 14);
|
||||||
|
assert!(TasksGet.repr() == 15 && TASKS_GET == 15);
|
||||||
|
assert!(SettingsAll.repr() == 16 && SETTINGS_ALL == 16);
|
||||||
|
assert!(SettingsGet.repr() == 17 && SETTINGS_GET == 17);
|
||||||
|
assert!(SettingsUpdate.repr() == 18 && SETTINGS_UPDATE == 18);
|
||||||
|
assert!(StatsAll.repr() == 19 && STATS_ALL == 19);
|
||||||
|
assert!(StatsGet.repr() == 20 && STATS_GET == 20);
|
||||||
|
assert!(MetricsAll.repr() == 21 && METRICS_ALL == 21);
|
||||||
|
assert!(MetricsGet.repr() == 22 && METRICS_GET == 22);
|
||||||
|
assert!(DumpsAll.repr() == 23 && DUMPS_ALL == 23);
|
||||||
|
assert!(DumpsCreate.repr() == 24 && DUMPS_CREATE == 24);
|
||||||
|
assert!(SnapshotsAll.repr() == 25 && SNAPSHOTS_ALL == 25);
|
||||||
|
assert!(SnapshotsCreate.repr() == 26 && SNAPSHOTS_CREATE == 26);
|
||||||
|
assert!(Version.repr() == 27 && VERSION == 27);
|
||||||
|
assert!(KeysAdd.repr() == 28 && KEYS_CREATE == 28);
|
||||||
|
assert!(KeysGet.repr() == 29 && KEYS_GET == 29);
|
||||||
|
assert!(KeysUpdate.repr() == 30 && KEYS_UPDATE == 30);
|
||||||
|
assert!(KeysDelete.repr() == 31 && KEYS_DELETE == 31);
|
||||||
|
assert!(ExperimentalFeaturesGet.repr() == 32 && EXPERIMENTAL_FEATURES_GET == 32);
|
||||||
|
assert!(ExperimentalFeaturesUpdate.repr() == 33 && EXPERIMENTAL_FEATURES_UPDATE == 33);
|
||||||
|
assert!(Export.repr() == 34 && EXPORT == 34);
|
||||||
|
assert!(NetworkGet.repr() == 35 && NETWORK_GET == 35);
|
||||||
|
assert!(NetworkUpdate.repr() == 36 && NETWORK_UPDATE == 36);
|
||||||
|
assert!(ChatCompletions.repr() == 37 && CHAT_COMPLETIONS == 37);
|
||||||
|
assert!(ChatsAll.repr() == 38 && CHATS_ALL == 38);
|
||||||
|
assert!(ChatsGet.repr() == 39 && CHATS_GET == 39);
|
||||||
|
assert!(ChatsDelete.repr() == 40 && CHATS_DELETE == 40);
|
||||||
|
assert!(ChatsSettingsAll.repr() == 41 && CHATS_SETTINGS_ALL == 41);
|
||||||
|
assert!(ChatsSettingsGet.repr() == 42 && CHATS_SETTINGS_GET == 42);
|
||||||
|
assert!(ChatsSettingsUpdate.repr() == 43 && CHATS_SETTINGS_UPDATE == 43);
|
||||||
|
assert!(AllGet.repr() == 44 && ALL_GET == 44);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_from_repr() {
|
||||||
|
for action in enum_iterator::all::<Action>() {
|
||||||
|
let repr = action.repr();
|
||||||
|
let action_from_repr = Action::from_repr(repr);
|
||||||
|
assert_eq!(Some(action), action_from_repr, "Failed for action: {:?}", action);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -50,6 +50,7 @@ jsonwebtoken = "9.3.1"
|
|||||||
lazy_static = "1.5.0"
|
lazy_static = "1.5.0"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
|
memmap2 = "0.9.7"
|
||||||
mimalloc = { version = "0.1.47", default-features = false }
|
mimalloc = { version = "0.1.47", default-features = false }
|
||||||
mime = "0.3.17"
|
mime = "0.3.17"
|
||||||
num_cpus = "1.17.0"
|
num_cpus = "1.17.0"
|
||||||
@ -169,5 +170,5 @@ german = ["meilisearch-types/german"]
|
|||||||
turkish = ["meilisearch-types/turkish"]
|
turkish = ["meilisearch-types/turkish"]
|
||||||
|
|
||||||
[package.metadata.mini-dashboard]
|
[package.metadata.mini-dashboard]
|
||||||
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.20/build.zip"
|
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.21/build.zip"
|
||||||
sha1 = "82a7ddd7bf14bb5323c3d235d2b62892a98b6a59"
|
sha1 = "94f56a8e24e2e3a1bc1bd7d9ceaa23464a5e241a"
|
||||||
|
Binary file not shown.
@ -203,6 +203,7 @@ struct Infos {
|
|||||||
experimental_composite_embedders: bool,
|
experimental_composite_embedders: bool,
|
||||||
experimental_embedding_cache_entries: usize,
|
experimental_embedding_cache_entries: usize,
|
||||||
experimental_no_snapshot_compaction: bool,
|
experimental_no_snapshot_compaction: bool,
|
||||||
|
experimental_no_edition_2024_for_dumps: bool,
|
||||||
experimental_no_edition_2024_for_settings: bool,
|
experimental_no_edition_2024_for_settings: bool,
|
||||||
gpu_enabled: bool,
|
gpu_enabled: bool,
|
||||||
db_path: bool,
|
db_path: bool,
|
||||||
@ -293,6 +294,7 @@ impl Infos {
|
|||||||
max_indexing_threads,
|
max_indexing_threads,
|
||||||
skip_index_budget: _,
|
skip_index_budget: _,
|
||||||
experimental_no_edition_2024_for_settings,
|
experimental_no_edition_2024_for_settings,
|
||||||
|
experimental_no_edition_2024_for_dumps,
|
||||||
} = indexer_options;
|
} = indexer_options;
|
||||||
|
|
||||||
let RuntimeTogglableFeatures {
|
let RuntimeTogglableFeatures {
|
||||||
@ -329,6 +331,7 @@ impl Infos {
|
|||||||
experimental_composite_embedders: composite_embedders,
|
experimental_composite_embedders: composite_embedders,
|
||||||
experimental_embedding_cache_entries,
|
experimental_embedding_cache_entries,
|
||||||
experimental_no_snapshot_compaction,
|
experimental_no_snapshot_compaction,
|
||||||
|
experimental_no_edition_2024_for_dumps,
|
||||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||||
db_path: db_path != PathBuf::from("./data.ms"),
|
db_path: db_path != PathBuf::from("./data.ms"),
|
||||||
import_dump: import_dump.is_some(),
|
import_dump: import_dump.is_some(),
|
||||||
|
@ -30,6 +30,7 @@ use actix_web::web::Data;
|
|||||||
use actix_web::{web, HttpRequest};
|
use actix_web::{web, HttpRequest};
|
||||||
use analytics::Analytics;
|
use analytics::Analytics;
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
|
use bumpalo::Bump;
|
||||||
use error::PayloadError;
|
use error::PayloadError;
|
||||||
use extractors::payload::PayloadConfig;
|
use extractors::payload::PayloadConfig;
|
||||||
use index_scheduler::versioning::Versioning;
|
use index_scheduler::versioning::Versioning;
|
||||||
@ -38,6 +39,7 @@ use meilisearch_auth::{open_auth_store_env, AuthController};
|
|||||||
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
use meilisearch_types::milli::constants::VERSION_MAJOR;
|
||||||
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
use meilisearch_types::milli::progress::{EmbedderStats, Progress};
|
||||||
|
use meilisearch_types::milli::update::new::indexer;
|
||||||
use meilisearch_types::milli::update::{
|
use meilisearch_types::milli::update::{
|
||||||
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
|
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
|
||||||
};
|
};
|
||||||
@ -533,7 +535,7 @@ fn import_dump(
|
|||||||
let mut index_reader = index_reader?;
|
let mut index_reader = index_reader?;
|
||||||
let metadata = index_reader.metadata();
|
let metadata = index_reader.metadata();
|
||||||
let uid = metadata.uid.clone();
|
let uid = metadata.uid.clone();
|
||||||
tracing::info!("Importing index `{}`.", metadata.uid);
|
tracing::info!("Importing index `{uid}`.");
|
||||||
|
|
||||||
let date = Some((metadata.created_at, metadata.updated_at));
|
let date = Some((metadata.created_at, metadata.updated_at));
|
||||||
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
|
let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
|
||||||
@ -552,48 +554,100 @@ fn import_dump(
|
|||||||
apply_settings_to_builder(&settings, &mut builder);
|
apply_settings_to_builder(&settings, &mut builder);
|
||||||
let embedder_stats: Arc<EmbedderStats> = Default::default();
|
let embedder_stats: Arc<EmbedderStats> = Default::default();
|
||||||
builder.execute(&|| false, &progress, embedder_stats.clone())?;
|
builder.execute(&|| false, &progress, embedder_stats.clone())?;
|
||||||
|
wtxn.commit()?;
|
||||||
|
|
||||||
// 5.3 Import the documents.
|
let mut wtxn = index.write_txn()?;
|
||||||
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
let rtxn = index.read_txn()?;
|
||||||
tracing::info!("Importing the documents.");
|
|
||||||
let file = tempfile::tempfile()?;
|
if index_scheduler.no_edition_2024_for_dumps() {
|
||||||
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
// 5.3 Import the documents.
|
||||||
for document in index_reader.documents()? {
|
// 5.3.1 We need to recreate the grenad+obkv format accepted by the index.
|
||||||
builder.append_json_object(&document?)?;
|
tracing::info!("Importing the documents.");
|
||||||
|
let file = tempfile::tempfile()?;
|
||||||
|
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
|
||||||
|
for document in index_reader.documents()? {
|
||||||
|
builder.append_json_object(&document?)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This flush the content of the batch builder.
|
||||||
|
let file = builder.into_inner()?.into_inner()?;
|
||||||
|
|
||||||
|
// 5.3.2 We feed it to the milli index.
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
let reader = DocumentsBatchReader::from_reader(reader)?;
|
||||||
|
|
||||||
|
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
|
||||||
|
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
||||||
|
|
||||||
|
let builder = milli::update::IndexDocuments::new(
|
||||||
|
&mut wtxn,
|
||||||
|
&index,
|
||||||
|
indexer_config,
|
||||||
|
IndexDocumentsConfig {
|
||||||
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
||||||
|
|| false,
|
||||||
|
&embedder_stats,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let builder = builder.with_embedders(embedders);
|
||||||
|
|
||||||
|
let (builder, user_result) = builder.add_documents(reader)?;
|
||||||
|
let user_result = user_result?;
|
||||||
|
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
||||||
|
builder.execute()?;
|
||||||
|
} else {
|
||||||
|
let db_fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
|
let primary_key = index.primary_key(&rtxn)?;
|
||||||
|
let mut new_fields_ids_map = db_fields_ids_map.clone();
|
||||||
|
|
||||||
|
let mut indexer = indexer::DocumentOperation::new();
|
||||||
|
let embedders = index.embedding_configs().embedding_configs(&rtxn)?;
|
||||||
|
let embedders = index_scheduler.embedders(uid.clone(), embedders)?;
|
||||||
|
|
||||||
|
let mmap = unsafe { memmap2::Mmap::map(index_reader.documents_file())? };
|
||||||
|
|
||||||
|
indexer.replace_documents(&mmap)?;
|
||||||
|
|
||||||
|
let indexer_config = index_scheduler.indexer_config();
|
||||||
|
let pool = &indexer_config.thread_pool;
|
||||||
|
|
||||||
|
let indexer_alloc = Bump::new();
|
||||||
|
let (document_changes, mut operation_stats, primary_key) = indexer.into_changes(
|
||||||
|
&indexer_alloc,
|
||||||
|
&index,
|
||||||
|
&rtxn,
|
||||||
|
primary_key,
|
||||||
|
&mut new_fields_ids_map,
|
||||||
|
&|| false, // never stop processing a dump
|
||||||
|
progress.clone(),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let operation_stats = operation_stats.pop().unwrap();
|
||||||
|
if let Some(error) = operation_stats.error {
|
||||||
|
return Err(error.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let _congestion = indexer::index(
|
||||||
|
&mut wtxn,
|
||||||
|
&index,
|
||||||
|
pool,
|
||||||
|
indexer_config.grenad_parameters(),
|
||||||
|
&db_fields_ids_map,
|
||||||
|
new_fields_ids_map,
|
||||||
|
primary_key,
|
||||||
|
&document_changes,
|
||||||
|
embedders,
|
||||||
|
&|| false, // never stop processing a dump
|
||||||
|
&progress,
|
||||||
|
&embedder_stats,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This flush the content of the batch builder.
|
|
||||||
let file = builder.into_inner()?.into_inner()?;
|
|
||||||
|
|
||||||
// 5.3.2 We feed it to the milli index.
|
|
||||||
let reader = BufReader::new(file);
|
|
||||||
let reader = DocumentsBatchReader::from_reader(reader)?;
|
|
||||||
|
|
||||||
let embedder_configs = index.embedding_configs().embedding_configs(&wtxn)?;
|
|
||||||
let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
|
|
||||||
|
|
||||||
let builder = milli::update::IndexDocuments::new(
|
|
||||||
&mut wtxn,
|
|
||||||
&index,
|
|
||||||
indexer_config,
|
|
||||||
IndexDocumentsConfig {
|
|
||||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
|
||||||
..Default::default()
|
|
||||||
},
|
|
||||||
|indexing_step| tracing::trace!("update: {:?}", indexing_step),
|
|
||||||
|| false,
|
|
||||||
&embedder_stats,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let builder = builder.with_embedders(embedders);
|
|
||||||
|
|
||||||
let (builder, user_result) = builder.add_documents(reader)?;
|
|
||||||
let user_result = user_result?;
|
|
||||||
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
|
|
||||||
builder.execute()?;
|
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
tracing::info!("All documents successfully imported.");
|
tracing::info!("All documents successfully imported.");
|
||||||
|
|
||||||
index_scheduler.refresh_index_stats(&uid)?;
|
index_scheduler.refresh_index_stats(&uid)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,30 +15,33 @@ lazy_static! {
|
|||||||
"Meilisearch number of degraded search requests"
|
"Meilisearch number of degraded search requests"
|
||||||
))
|
))
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_CHAT_SEARCH_REQUESTS: IntCounterVec = register_int_counter_vec!(
|
pub static ref MEILISEARCH_CHAT_SEARCHES_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||||
opts!(
|
opts!(
|
||||||
"meilisearch_chat_search_requests",
|
"meilisearch_chat_searches_total",
|
||||||
"Meilisearch number of search requests performed by the chat route itself"
|
"Total number of searches performed by the chat route"
|
||||||
),
|
),
|
||||||
&["type"]
|
&["type"]
|
||||||
)
|
)
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
pub static ref MEILISEARCH_CHAT_PROMPT_TOKENS_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||||
opts!("meilisearch_chat_prompt_tokens_usage", "Meilisearch Chat Prompt Tokens Usage"),
|
opts!("meilisearch_chat_prompt_tokens_total", "Total number of prompt tokens consumed"),
|
||||||
&["workspace", "model"]
|
&["workspace", "model"]
|
||||||
)
|
)
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE: IntCounterVec =
|
pub static ref MEILISEARCH_CHAT_COMPLETION_TOKENS_TOTAL: IntCounterVec =
|
||||||
register_int_counter_vec!(
|
register_int_counter_vec!(
|
||||||
opts!(
|
opts!(
|
||||||
"meilisearch_chat_completion_tokens_usage",
|
"meilisearch_chat_completion_tokens_total",
|
||||||
"Meilisearch Chat Completion Tokens Usage"
|
"Total number of completion tokens consumed"
|
||||||
),
|
),
|
||||||
&["workspace", "model"]
|
&["workspace", "model"]
|
||||||
)
|
)
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
pub static ref MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE: IntCounterVec = register_int_counter_vec!(
|
pub static ref MEILISEARCH_CHAT_TOKENS_TOTAL: IntCounterVec = register_int_counter_vec!(
|
||||||
opts!("meilisearch_chat_total_tokens_usage", "Meilisearch Chat Total Tokens Usage"),
|
opts!(
|
||||||
|
"meilisearch_chat_tokens_total",
|
||||||
|
"Total number of tokens consumed (prompt + completion)"
|
||||||
|
),
|
||||||
&["workspace", "model"]
|
&["workspace", "model"]
|
||||||
)
|
)
|
||||||
.expect("Can't create a metric");
|
.expect("Can't create a metric");
|
||||||
|
@ -68,6 +68,8 @@ const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS_TOTAL_SIZE: &str =
|
|||||||
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
const MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES: &str =
|
||||||
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
"MEILI_EXPERIMENTAL_EMBEDDING_CACHE_ENTRIES";
|
||||||
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
const MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION: &str = "MEILI_EXPERIMENTAL_NO_SNAPSHOT_COMPACTION";
|
||||||
|
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS: &str =
|
||||||
|
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS";
|
||||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||||
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
const DEFAULT_HTTP_ADDR: &str = "localhost:7700";
|
||||||
@ -759,6 +761,15 @@ pub struct IndexerOpts {
|
|||||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
|
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_SETTINGS)]
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub experimental_no_edition_2024_for_settings: bool,
|
pub experimental_no_edition_2024_for_settings: bool,
|
||||||
|
|
||||||
|
/// Experimental make dump imports use the old document indexer.
|
||||||
|
///
|
||||||
|
/// When enabled, Meilisearch will use the old document indexer when importing dumps.
|
||||||
|
///
|
||||||
|
/// For more information, see <https://github.com/orgs/meilisearch/discussions/851>.
|
||||||
|
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub experimental_no_edition_2024_for_dumps: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexerOpts {
|
impl IndexerOpts {
|
||||||
@ -769,6 +780,7 @@ impl IndexerOpts {
|
|||||||
max_indexing_threads,
|
max_indexing_threads,
|
||||||
skip_index_budget: _,
|
skip_index_budget: _,
|
||||||
experimental_no_edition_2024_for_settings,
|
experimental_no_edition_2024_for_settings,
|
||||||
|
experimental_no_edition_2024_for_dumps,
|
||||||
} = self;
|
} = self;
|
||||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||||
export_to_env_if_not_present(
|
export_to_env_if_not_present(
|
||||||
@ -788,6 +800,12 @@ impl IndexerOpts {
|
|||||||
experimental_no_edition_2024_for_settings.to_string(),
|
experimental_no_edition_2024_for_settings.to_string(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
if experimental_no_edition_2024_for_dumps {
|
||||||
|
export_to_env_if_not_present(
|
||||||
|
MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_DUMPS,
|
||||||
|
experimental_no_edition_2024_for_dumps.to_string(),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -808,6 +826,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
|||||||
skip_index_budget: other.skip_index_budget,
|
skip_index_budget: other.skip_index_budget,
|
||||||
experimental_no_edition_2024_for_settings: other
|
experimental_no_edition_2024_for_settings: other
|
||||||
.experimental_no_edition_2024_for_settings,
|
.experimental_no_edition_2024_for_settings,
|
||||||
|
experimental_no_edition_2024_for_dumps: other.experimental_no_edition_2024_for_dumps,
|
||||||
chunk_compression_type: Default::default(),
|
chunk_compression_type: Default::default(),
|
||||||
chunk_compression_level: Default::default(),
|
chunk_compression_level: Default::default(),
|
||||||
documents_chunk_size: Default::default(),
|
documents_chunk_size: Default::default(),
|
||||||
|
@ -50,8 +50,8 @@ use crate::error::MeilisearchHttpError;
|
|||||||
use crate::extractors::authentication::policies::ActionPolicy;
|
use crate::extractors::authentication::policies::ActionPolicy;
|
||||||
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
|
use crate::extractors::authentication::{extract_token_from_request, GuardedData, Policy as _};
|
||||||
use crate::metrics::{
|
use crate::metrics::{
|
||||||
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE, MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE,
|
MEILISEARCH_CHAT_COMPLETION_TOKENS_TOTAL, MEILISEARCH_CHAT_PROMPT_TOKENS_TOTAL,
|
||||||
MEILISEARCH_CHAT_SEARCH_REQUESTS, MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE,
|
MEILISEARCH_CHAT_SEARCHES_TOTAL, MEILISEARCH_CHAT_TOKENS_TOTAL,
|
||||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS,
|
||||||
};
|
};
|
||||||
use crate::routes::chats::utils::SseEventSender;
|
use crate::routes::chats::utils::SseEventSender;
|
||||||
@ -319,7 +319,7 @@ async fn process_search_request(
|
|||||||
};
|
};
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
if let Ok((ref rtxn, ref search_result)) = output {
|
if let Ok((ref rtxn, ref search_result)) = output {
|
||||||
MEILISEARCH_CHAT_SEARCH_REQUESTS.with_label_values(&["internal"]).inc();
|
MEILISEARCH_CHAT_SEARCHES_TOTAL.with_label_values(&["internal"]).inc();
|
||||||
if search_result.degraded {
|
if search_result.degraded {
|
||||||
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc();
|
||||||
}
|
}
|
||||||
@ -596,13 +596,13 @@ async fn run_conversation<C: async_openai::config::Config>(
|
|||||||
match result {
|
match result {
|
||||||
Ok(resp) => {
|
Ok(resp) => {
|
||||||
if let Some(usage) = resp.usage.as_ref() {
|
if let Some(usage) = resp.usage.as_ref() {
|
||||||
MEILISEARCH_CHAT_PROMPT_TOKENS_USAGE
|
MEILISEARCH_CHAT_PROMPT_TOKENS_TOTAL
|
||||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||||
.inc_by(usage.prompt_tokens as u64);
|
.inc_by(usage.prompt_tokens as u64);
|
||||||
MEILISEARCH_CHAT_COMPLETION_TOKENS_USAGE
|
MEILISEARCH_CHAT_COMPLETION_TOKENS_TOTAL
|
||||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||||
.inc_by(usage.completion_tokens as u64);
|
.inc_by(usage.completion_tokens as u64);
|
||||||
MEILISEARCH_CHAT_TOTAL_TOKENS_USAGE
|
MEILISEARCH_CHAT_TOKENS_TOTAL
|
||||||
.with_label_values(&[workspace_uid, &chat_completion.model])
|
.with_label_values(&[workspace_uid, &chat_completion.model])
|
||||||
.inc_by(usage.total_tokens as u64);
|
.inc_by(usage.total_tokens as u64);
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@ use meilisearch_types::error::{Code, ResponseError};
|
|||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::milli::documents::sort::recursive_sort;
|
use meilisearch_types::milli::documents::sort::recursive_sort;
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
use meilisearch_types::milli::update::IndexDocumentsMethod;
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||||
use meilisearch_types::milli::{AscDesc, DocumentId};
|
use meilisearch_types::milli::{AscDesc, DocumentId};
|
||||||
@ -1470,9 +1471,13 @@ fn some_documents<'a, 't: 'a>(
|
|||||||
Some(Value::Object(map)) => map,
|
Some(Value::Object(map)) => map,
|
||||||
_ => Default::default(),
|
_ => Default::default(),
|
||||||
};
|
};
|
||||||
for (name, (vector, regenerate)) in index.embeddings(rtxn, key)? {
|
for (
|
||||||
|
name,
|
||||||
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ },
|
||||||
|
) in index.embeddings(rtxn, key)?
|
||||||
|
{
|
||||||
let embeddings =
|
let embeddings =
|
||||||
ExplicitVectors { embeddings: Some(vector.into()), regenerate };
|
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
|
||||||
vectors.insert(
|
vectors.insert(
|
||||||
name,
|
name,
|
||||||
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
serde_json::to_value(embeddings).map_err(MeilisearchHttpError::from)?,
|
||||||
|
@ -16,7 +16,7 @@ use meilisearch_types::error::{Code, ResponseError};
|
|||||||
use meilisearch_types::heed::RoTxn;
|
use meilisearch_types::heed::RoTxn;
|
||||||
use meilisearch_types::index_uid::IndexUid;
|
use meilisearch_types::index_uid::IndexUid;
|
||||||
use meilisearch_types::locales::Locale;
|
use meilisearch_types::locales::Locale;
|
||||||
use meilisearch_types::milli::index::{self, SearchParameters};
|
use meilisearch_types::milli::index::{self, EmbeddingsWithMetadata, SearchParameters};
|
||||||
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||||
use meilisearch_types::milli::vector::Embedder;
|
use meilisearch_types::milli::vector::Embedder;
|
||||||
@ -1528,8 +1528,11 @@ impl<'a> HitMaker<'a> {
|
|||||||
Some(Value::Object(map)) => map,
|
Some(Value::Object(map)) => map,
|
||||||
_ => Default::default(),
|
_ => Default::default(),
|
||||||
};
|
};
|
||||||
for (name, (vector, regenerate)) in self.index.embeddings(self.rtxn, id)? {
|
for (name, EmbeddingsWithMetadata { embeddings, regenerate, has_fragments: _ }) in
|
||||||
let embeddings = ExplicitVectors { embeddings: Some(vector.into()), regenerate };
|
self.index.embeddings(self.rtxn, id)?
|
||||||
|
{
|
||||||
|
let embeddings =
|
||||||
|
ExplicitVectors { embeddings: Some(embeddings.into()), regenerate };
|
||||||
vectors.insert(
|
vectors.insert(
|
||||||
name,
|
name,
|
||||||
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
|
serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
|
||||||
|
@ -421,7 +421,7 @@ async fn error_add_api_key_invalid_parameters_actions() {
|
|||||||
meili_snap::snapshot!(code, @"400 Bad Request");
|
meili_snap::snapshot!(code, @"400 Bad Request");
|
||||||
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
|
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r#"
|
||||||
{
|
{
|
||||||
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `*.get`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
|
"message": "Unknown value `doc.add` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`",
|
||||||
"code": "invalid_api_key_actions",
|
"code": "invalid_api_key_actions",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||||
|
@ -93,7 +93,7 @@ async fn create_api_key_bad_actions() {
|
|||||||
snapshot!(code, @"400 Bad Request");
|
snapshot!(code, @"400 Bad Request");
|
||||||
snapshot!(json_string!(response), @r#"
|
snapshot!(json_string!(response), @r#"
|
||||||
{
|
{
|
||||||
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `*.get`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`",
|
"message": "Unknown value `doggo` at `.actions[0]`: expected one of `*`, `search`, `documents.*`, `documents.add`, `documents.get`, `documents.delete`, `indexes.*`, `indexes.create`, `indexes.get`, `indexes.update`, `indexes.delete`, `indexes.swap`, `tasks.*`, `tasks.cancel`, `tasks.delete`, `tasks.get`, `settings.*`, `settings.get`, `settings.update`, `stats.*`, `stats.get`, `metrics.*`, `metrics.get`, `dumps.*`, `dumps.create`, `snapshots.*`, `snapshots.create`, `version`, `keys.create`, `keys.get`, `keys.update`, `keys.delete`, `experimental.get`, `experimental.update`, `export`, `network.get`, `network.update`, `chatCompletions`, `chats.*`, `chats.get`, `chats.delete`, `chatsSettings.*`, `chatsSettings.get`, `chatsSettings.update`, `*.get`",
|
||||||
"code": "invalid_api_key_actions",
|
"code": "invalid_api_key_actions",
|
||||||
"type": "invalid_request",
|
"type": "invalid_request",
|
||||||
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
"link": "https://docs.meilisearch.com/errors#invalid_api_key_actions"
|
||||||
|
@ -466,6 +466,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
|||||||
// Having 2 threads makes the tests way faster
|
// Having 2 threads makes the tests way faster
|
||||||
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
|
max_indexing_threads: MaxThreads::from_str("2").unwrap(),
|
||||||
experimental_no_edition_2024_for_settings: false,
|
experimental_no_edition_2024_for_settings: false,
|
||||||
|
experimental_no_edition_2024_for_dumps: false,
|
||||||
},
|
},
|
||||||
experimental_enable_metrics: false,
|
experimental_enable_metrics: false,
|
||||||
..Parser::parse_from(None as Option<&str>)
|
..Parser::parse_from(None as Option<&str>)
|
||||||
|
@ -15,6 +15,7 @@ use meilisearch_types::heed::{
|
|||||||
};
|
};
|
||||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||||
use meilisearch_types::tasks::{Status, Task};
|
use meilisearch_types::tasks::{Status, Task};
|
||||||
@ -591,12 +592,21 @@ fn export_documents(
|
|||||||
.into());
|
.into());
|
||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
for (
|
||||||
|
embedder_name,
|
||||||
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
|
) in embeddings
|
||||||
|
{
|
||||||
let embeddings = ExplicitVectors {
|
let embeddings = ExplicitVectors {
|
||||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||||
embeddings,
|
embeddings,
|
||||||
)),
|
)),
|
||||||
regenerate,
|
regenerate: regenerate &&
|
||||||
|
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||||
|
// are marked as user-provided,
|
||||||
|
// all embeddings would be regenerated on any settings change or document update.
|
||||||
|
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||||
|
!has_fragments,
|
||||||
};
|
};
|
||||||
vectors
|
vectors
|
||||||
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||||
|
@ -40,7 +40,7 @@ indexmap = { version = "2.9.0", features = ["serde"] }
|
|||||||
json-depth-checker = { path = "../json-depth-checker" }
|
json-depth-checker = { path = "../json-depth-checker" }
|
||||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||||
memchr = "2.7.5"
|
memchr = "2.7.5"
|
||||||
memmap2 = "0.9.5"
|
memmap2 = "0.9.7"
|
||||||
obkv = "0.3.0"
|
obkv = "0.3.0"
|
||||||
once_cell = "1.21.3"
|
once_cell = "1.21.3"
|
||||||
ordered-float = "5.0.0"
|
ordered-float = "5.0.0"
|
||||||
|
@ -1766,20 +1766,22 @@ impl Index {
|
|||||||
&self,
|
&self,
|
||||||
rtxn: &RoTxn<'_>,
|
rtxn: &RoTxn<'_>,
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
) -> Result<BTreeMap<String, (Vec<Embedding>, bool)>> {
|
) -> Result<BTreeMap<String, EmbeddingsWithMetadata>> {
|
||||||
let mut res = BTreeMap::new();
|
let mut res = BTreeMap::new();
|
||||||
let embedders = self.embedding_configs();
|
let embedders = self.embedding_configs();
|
||||||
for config in embedders.embedding_configs(rtxn)? {
|
for config in embedders.embedding_configs(rtxn)? {
|
||||||
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
|
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
|
||||||
|
let has_fragments = config.config.embedder_options.has_fragments();
|
||||||
let reader = ArroyWrapper::new(
|
let reader = ArroyWrapper::new(
|
||||||
self.vector_arroy,
|
self.vector_arroy,
|
||||||
embedder_info.embedder_id,
|
embedder_info.embedder_id,
|
||||||
config.config.quantized(),
|
config.config.quantized(),
|
||||||
);
|
);
|
||||||
let embeddings = reader.item_vectors(rtxn, docid)?;
|
let embeddings = reader.item_vectors(rtxn, docid)?;
|
||||||
|
let regenerate = embedder_info.embedding_status.must_regenerate(docid);
|
||||||
res.insert(
|
res.insert(
|
||||||
config.name.to_owned(),
|
config.name.to_owned(),
|
||||||
(embeddings, embedder_info.embedding_status.must_regenerate(docid)),
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Ok(res)
|
Ok(res)
|
||||||
@ -1919,6 +1921,12 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct EmbeddingsWithMetadata {
|
||||||
|
pub embeddings: Vec<Embedding>,
|
||||||
|
pub regenerate: bool,
|
||||||
|
pub has_fragments: bool,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default, Deserialize, Serialize)]
|
#[derive(Debug, Default, Deserialize, Serialize)]
|
||||||
pub struct ChatConfig {
|
pub struct ChatConfig {
|
||||||
pub description: String,
|
pub description: String,
|
||||||
|
@ -93,7 +93,7 @@ pub struct ChatSearchParams {
|
|||||||
pub hybrid: Setting<HybridQuery>,
|
pub hybrid: Setting<HybridQuery>,
|
||||||
|
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default = Setting::Set(20))]
|
#[deserr(default)]
|
||||||
#[schema(value_type = Option<usize>)]
|
#[schema(value_type = Option<usize>)]
|
||||||
pub limit: Setting<usize>,
|
pub limit: Setting<usize>,
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ use crate::progress::EmbedderStats;
|
|||||||
use crate::prompt::Prompt;
|
use crate::prompt::Prompt;
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
use crate::vector::db::{EmbedderInfo, EmbeddingStatus, EmbeddingStatusDelta};
|
use crate::vector::db::{EmbedderInfo, EmbeddingStatusDelta};
|
||||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||||
use crate::vector::extractor::{Extractor, ExtractorDiff, RequestFragmentExtractor};
|
use crate::vector::extractor::{Extractor, ExtractorDiff, RequestFragmentExtractor};
|
||||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState};
|
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState};
|
||||||
@ -441,6 +441,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
{
|
{
|
||||||
let embedder_is_manual = matches!(*runtime.embedder, Embedder::UserProvided(_));
|
let embedder_is_manual = matches!(*runtime.embedder, Embedder::UserProvided(_));
|
||||||
|
|
||||||
|
let (old_is_user_provided, old_must_regenerate) =
|
||||||
|
embedder_info.embedding_status.is_user_provided_must_regenerate(docid);
|
||||||
let (old, new) = parsed_vectors.remove(embedder_name);
|
let (old, new) = parsed_vectors.remove(embedder_name);
|
||||||
let new_must_regenerate = new.must_regenerate();
|
let new_must_regenerate = new.must_regenerate();
|
||||||
let delta = match action {
|
let delta = match action {
|
||||||
@ -499,16 +501,19 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
let is_adding_fragments = has_fragments && !old_has_fragments;
|
let is_adding_fragments = has_fragments && !old_has_fragments;
|
||||||
|
|
||||||
if is_adding_fragments {
|
if !has_fragments {
|
||||||
|
// removing fragments
|
||||||
|
regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)?
|
||||||
|
} else if is_adding_fragments ||
|
||||||
|
// regenerate all fragments when going from user provided to ! user provided
|
||||||
|
old_is_user_provided
|
||||||
|
{
|
||||||
regenerate_all_fragments(
|
regenerate_all_fragments(
|
||||||
runtime.fragments(),
|
runtime.fragments(),
|
||||||
&doc_alloc,
|
&doc_alloc,
|
||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
obkv,
|
obkv,
|
||||||
)
|
)
|
||||||
} else if !has_fragments {
|
|
||||||
// removing fragments
|
|
||||||
regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)?
|
|
||||||
} else {
|
} else {
|
||||||
let mut fragment_diff = Vec::new();
|
let mut fragment_diff = Vec::new();
|
||||||
let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map();
|
let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map();
|
||||||
@ -600,7 +605,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
docid,
|
docid,
|
||||||
&delta,
|
&delta,
|
||||||
new_must_regenerate,
|
new_must_regenerate,
|
||||||
&embedder_info.embedding_status,
|
old_is_user_provided,
|
||||||
|
old_must_regenerate,
|
||||||
);
|
);
|
||||||
|
|
||||||
// and we finally push the unique vectors into the writer
|
// and we finally push the unique vectors into the writer
|
||||||
@ -657,10 +663,9 @@ fn push_embedding_status_delta(
|
|||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
delta: &VectorStateDelta,
|
delta: &VectorStateDelta,
|
||||||
new_must_regenerate: bool,
|
new_must_regenerate: bool,
|
||||||
embedding_status: &EmbeddingStatus,
|
old_is_user_provided: bool,
|
||||||
|
old_must_regenerate: bool,
|
||||||
) {
|
) {
|
||||||
let (old_is_user_provided, old_must_regenerate) =
|
|
||||||
embedding_status.is_user_provided_must_regenerate(docid);
|
|
||||||
let new_is_user_provided = match delta {
|
let new_is_user_provided = match delta {
|
||||||
VectorStateDelta::NoChange => old_is_user_provided,
|
VectorStateDelta::NoChange => old_is_user_provided,
|
||||||
VectorStateDelta::NowRemoved => {
|
VectorStateDelta::NowRemoved => {
|
||||||
|
@ -16,6 +16,7 @@ pub struct IndexerConfig {
|
|||||||
pub max_positions_per_attributes: Option<u32>,
|
pub max_positions_per_attributes: Option<u32>,
|
||||||
pub skip_index_budget: bool,
|
pub skip_index_budget: bool,
|
||||||
pub experimental_no_edition_2024_for_settings: bool,
|
pub experimental_no_edition_2024_for_settings: bool,
|
||||||
|
pub experimental_no_edition_2024_for_dumps: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexerConfig {
|
impl IndexerConfig {
|
||||||
@ -65,6 +66,7 @@ impl Default for IndexerConfig {
|
|||||||
max_positions_per_attributes: None,
|
max_positions_per_attributes: None,
|
||||||
skip_index_budget: false,
|
skip_index_budget: false,
|
||||||
experimental_no_edition_2024_for_settings: false,
|
experimental_no_edition_2024_for_settings: false,
|
||||||
|
experimental_no_edition_2024_for_dumps: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -620,12 +620,35 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
where
|
where
|
||||||
'a: 'doc,
|
'a: 'doc,
|
||||||
{
|
{
|
||||||
match &mut self.kind {
|
self.set_status(docid, old_is_user_provided, true, false, true);
|
||||||
ChunkType::Fragments { fragments: _, session } => {
|
|
||||||
let doc_alloc = session.doc_alloc();
|
|
||||||
|
|
||||||
if old_is_user_provided | full_reindex {
|
match &mut self.kind {
|
||||||
|
ChunkType::Fragments { fragments, session } => {
|
||||||
|
let doc_alloc = session.doc_alloc();
|
||||||
|
let reindex_all_fragments =
|
||||||
|
// when the vectors were user-provided, Meilisearch cannot know if they come from a particular fragment,
|
||||||
|
// and so Meilisearch needs to clear all embeddings in that case.
|
||||||
|
// Fortunately, as dump export fragment vector with `regenerate` set to `false`,
|
||||||
|
// this case should be rare and opt-in.
|
||||||
|
old_is_user_provided ||
|
||||||
|
// full-reindex case
|
||||||
|
full_reindex;
|
||||||
|
|
||||||
|
if reindex_all_fragments {
|
||||||
session.on_embed_mut().clear_vectors(docid);
|
session.on_embed_mut().clear_vectors(docid);
|
||||||
|
let extractors = fragments.iter().map(|fragment| {
|
||||||
|
RequestFragmentExtractor::new(fragment, doc_alloc).ignore_errors()
|
||||||
|
});
|
||||||
|
insert_autogenerated(
|
||||||
|
docid,
|
||||||
|
external_docid,
|
||||||
|
extractors,
|
||||||
|
document,
|
||||||
|
&(),
|
||||||
|
session,
|
||||||
|
unused_vectors_distribution,
|
||||||
|
)?;
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
settings_delta.try_for_each_fragment_diff(
|
settings_delta.try_for_each_fragment_diff(
|
||||||
@ -669,7 +692,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
Result::Ok(())
|
Result::Ok(())
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
self.set_status(docid, old_is_user_provided, true, false, true);
|
|
||||||
}
|
}
|
||||||
ChunkType::DocumentTemplate { document_template, session } => {
|
ChunkType::DocumentTemplate { document_template, session } => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
@ -690,12 +712,18 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
|
|
||||||
match extractor.diff_settings(document, &external_docid, old_extractor.as_ref())? {
|
match extractor.diff_settings(document, &external_docid, old_extractor.as_ref())? {
|
||||||
ExtractorDiff::Removed => {
|
ExtractorDiff::Removed => {
|
||||||
|
if old_is_user_provided || full_reindex {
|
||||||
|
session.on_embed_mut().clear_vectors(docid);
|
||||||
|
}
|
||||||
OnEmbed::process_embedding_response(
|
OnEmbed::process_embedding_response(
|
||||||
session.on_embed_mut(),
|
session.on_embed_mut(),
|
||||||
crate::vector::session::EmbeddingResponse { metadata, embedding: None },
|
crate::vector::session::EmbeddingResponse { metadata, embedding: None },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => {
|
ExtractorDiff::Added(input) | ExtractorDiff::Updated(input) => {
|
||||||
|
if old_is_user_provided || full_reindex {
|
||||||
|
session.on_embed_mut().clear_vectors(docid);
|
||||||
|
}
|
||||||
session.request_embedding(metadata, input, unused_vectors_distribution)?;
|
session.request_embedding(metadata, input, unused_vectors_distribution)?;
|
||||||
}
|
}
|
||||||
ExtractorDiff::Unchanged => { /* do nothing */ }
|
ExtractorDiff::Unchanged => { /* do nothing */ }
|
||||||
@ -722,6 +750,13 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
where
|
where
|
||||||
'a: 'doc,
|
'a: 'doc,
|
||||||
{
|
{
|
||||||
|
self.set_status(
|
||||||
|
docid,
|
||||||
|
old_is_user_provided,
|
||||||
|
old_must_regenerate,
|
||||||
|
false,
|
||||||
|
new_must_regenerate,
|
||||||
|
);
|
||||||
match &mut self.kind {
|
match &mut self.kind {
|
||||||
ChunkType::DocumentTemplate { document_template, session } => {
|
ChunkType::DocumentTemplate { document_template, session } => {
|
||||||
let doc_alloc = session.doc_alloc();
|
let doc_alloc = session.doc_alloc();
|
||||||
@ -731,10 +766,6 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
new_fields_ids_map,
|
new_fields_ids_map,
|
||||||
);
|
);
|
||||||
|
|
||||||
if old_is_user_provided {
|
|
||||||
session.on_embed_mut().clear_vectors(docid);
|
|
||||||
}
|
|
||||||
|
|
||||||
update_autogenerated(
|
update_autogenerated(
|
||||||
docid,
|
docid,
|
||||||
external_docid,
|
external_docid,
|
||||||
@ -743,6 +774,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
new_document,
|
new_document,
|
||||||
&external_docid,
|
&external_docid,
|
||||||
old_must_regenerate,
|
old_must_regenerate,
|
||||||
|
old_is_user_provided,
|
||||||
session,
|
session,
|
||||||
unused_vectors_distribution,
|
unused_vectors_distribution,
|
||||||
)?
|
)?
|
||||||
@ -754,7 +786,21 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
if old_is_user_provided {
|
if old_is_user_provided {
|
||||||
|
// when the document was `userProvided`, Meilisearch cannot know whose fragments a particular
|
||||||
|
// vector was referring to.
|
||||||
|
// So as a result Meilisearch will regenerate all fragments on this case.
|
||||||
|
// Fortunately, since dumps for fragments set regenerate to false, this case should be rare.
|
||||||
session.on_embed_mut().clear_vectors(docid);
|
session.on_embed_mut().clear_vectors(docid);
|
||||||
|
insert_autogenerated(
|
||||||
|
docid,
|
||||||
|
external_docid,
|
||||||
|
extractors,
|
||||||
|
new_document,
|
||||||
|
&(),
|
||||||
|
session,
|
||||||
|
unused_vectors_distribution,
|
||||||
|
)?;
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
update_autogenerated(
|
update_autogenerated(
|
||||||
@ -765,25 +811,18 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
new_document,
|
new_document,
|
||||||
&(),
|
&(),
|
||||||
old_must_regenerate,
|
old_must_regenerate,
|
||||||
|
false,
|
||||||
session,
|
session,
|
||||||
unused_vectors_distribution,
|
unused_vectors_distribution,
|
||||||
)?
|
)?
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
self.set_status(
|
|
||||||
docid,
|
|
||||||
old_is_user_provided,
|
|
||||||
old_must_regenerate,
|
|
||||||
false,
|
|
||||||
new_must_regenerate,
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn insert_autogenerated<D: Document<'a> + Debug>(
|
pub fn insert_autogenerated<'doc, D: Document<'doc> + Debug>(
|
||||||
&mut self,
|
&mut self,
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
external_docid: &'a str,
|
external_docid: &'a str,
|
||||||
@ -791,7 +830,10 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
|||||||
new_fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
|
new_fields_ids_map: &'a RefCell<crate::GlobalFieldsIdsMap>,
|
||||||
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
||||||
new_must_regenerate: bool,
|
new_must_regenerate: bool,
|
||||||
) -> Result<()> {
|
) -> Result<()>
|
||||||
|
where
|
||||||
|
'a: 'doc,
|
||||||
|
{
|
||||||
let (default_is_user_provided, default_must_regenerate) = (false, true);
|
let (default_is_user_provided, default_must_regenerate) = (false, true);
|
||||||
self.set_status(
|
self.set_status(
|
||||||
docid,
|
docid,
|
||||||
@ -956,6 +998,7 @@ fn update_autogenerated<'doc, 'a: 'doc, 'b, E, OD, ND>(
|
|||||||
new_document: ND,
|
new_document: ND,
|
||||||
meta: &E::DocumentMetadata,
|
meta: &E::DocumentMetadata,
|
||||||
old_must_regenerate: bool,
|
old_must_regenerate: bool,
|
||||||
|
mut must_clear_on_generation: bool,
|
||||||
session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>,
|
session: &mut EmbedSession<'a, OnEmbeddingDocumentUpdates<'a, 'b>, E::Input>,
|
||||||
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
unused_vectors_distribution: &UnusedVectorsDistributionBump<'a>,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
@ -984,6 +1027,11 @@ where
|
|||||||
};
|
};
|
||||||
|
|
||||||
if must_regenerate {
|
if must_regenerate {
|
||||||
|
if must_clear_on_generation {
|
||||||
|
must_clear_on_generation = false;
|
||||||
|
session.on_embed_mut().clear_vectors(docid);
|
||||||
|
}
|
||||||
|
|
||||||
let metadata =
|
let metadata =
|
||||||
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
Metadata { docid, external_docid, extractor_id: extractor.extractor_id() };
|
||||||
|
|
||||||
@ -1002,7 +1050,7 @@ where
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn insert_autogenerated<'a, 'b, E, D: Document<'a> + Debug>(
|
fn insert_autogenerated<'doc, 'a: 'doc, 'b, E, D: Document<'doc> + Debug>(
|
||||||
docid: DocumentId,
|
docid: DocumentId,
|
||||||
external_docid: &'a str,
|
external_docid: &'a str,
|
||||||
extractors: impl IntoIterator<Item = E>,
|
extractors: impl IntoIterator<Item = E>,
|
||||||
|
@ -835,6 +835,25 @@ impl EmbedderOptions {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn has_fragments(&self) -> bool {
|
||||||
|
match &self {
|
||||||
|
EmbedderOptions::HuggingFace(_)
|
||||||
|
| EmbedderOptions::OpenAi(_)
|
||||||
|
| EmbedderOptions::Ollama(_)
|
||||||
|
| EmbedderOptions::UserProvided(_) => false,
|
||||||
|
EmbedderOptions::Rest(embedder_options) => {
|
||||||
|
!embedder_options.indexing_fragments.is_empty()
|
||||||
|
}
|
||||||
|
EmbedderOptions::Composite(embedder_options) => {
|
||||||
|
if let SubEmbedderOptions::Rest(embedder_options) = &embedder_options.index {
|
||||||
|
!embedder_options.indexing_fragments.is_empty()
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for EmbedderOptions {
|
impl Default for EmbedderOptions {
|
||||||
|
Reference in New Issue
Block a user