mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 00:31:02 +00:00
when exporting vectors, for regenerate to false when the embedder has fragments
This commit is contained in:
@ -5,6 +5,7 @@ use std::sync::atomic::Ordering;
|
|||||||
|
|
||||||
use dump::IndexMetadata;
|
use dump::IndexMetadata;
|
||||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
use meilisearch_types::milli::{self};
|
use meilisearch_types::milli::{self};
|
||||||
@ -227,12 +228,21 @@ impl IndexScheduler {
|
|||||||
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
return Err(Error::from_milli(user_err, Some(uid.to_string())));
|
||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
for (
|
||||||
|
embedder_name,
|
||||||
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
|
) in embeddings
|
||||||
|
{
|
||||||
let embeddings = ExplicitVectors {
|
let embeddings = ExplicitVectors {
|
||||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||||
embeddings,
|
embeddings,
|
||||||
)),
|
)),
|
||||||
regenerate,
|
regenerate: regenerate &&
|
||||||
|
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||||
|
// are marked as user-provided,
|
||||||
|
// all embeddings would be regenerated on any settings change or document update.
|
||||||
|
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||||
|
!has_fragments,
|
||||||
};
|
};
|
||||||
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@ use flate2::write::GzEncoder;
|
|||||||
use flate2::Compression;
|
use flate2::Compression;
|
||||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
use meilisearch_types::milli::progress::{Progress, VariableNameStep};
|
||||||
use meilisearch_types::milli::update::{request_threads, Setting};
|
use meilisearch_types::milli::update::{request_threads, Setting};
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
@ -229,12 +230,21 @@ impl IndexScheduler {
|
|||||||
));
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
for (
|
||||||
|
embedder_name,
|
||||||
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
|
) in embeddings
|
||||||
|
{
|
||||||
let embeddings = ExplicitVectors {
|
let embeddings = ExplicitVectors {
|
||||||
embeddings: Some(
|
embeddings: Some(
|
||||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||||
),
|
),
|
||||||
regenerate,
|
regenerate: regenerate &&
|
||||||
|
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||||
|
// are marked as user-provided,
|
||||||
|
// all embeddings would be regenerated on any settings change or document update.
|
||||||
|
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||||
|
!has_fragments,
|
||||||
};
|
};
|
||||||
vectors.insert(
|
vectors.insert(
|
||||||
embedder_name,
|
embedder_name,
|
||||||
|
@ -15,6 +15,7 @@ use meilisearch_types::heed::{
|
|||||||
};
|
};
|
||||||
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
use meilisearch_types::milli::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
|
use meilisearch_types::milli::index::EmbeddingsWithMetadata;
|
||||||
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
use meilisearch_types::milli::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||||
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
use meilisearch_types::milli::{obkv_to_json, BEU32};
|
||||||
use meilisearch_types::tasks::{Status, Task};
|
use meilisearch_types::tasks::{Status, Task};
|
||||||
@ -591,12 +592,21 @@ fn export_documents(
|
|||||||
.into());
|
.into());
|
||||||
};
|
};
|
||||||
|
|
||||||
for (embedder_name, (embeddings, regenerate)) in embeddings {
|
for (
|
||||||
|
embedder_name,
|
||||||
|
EmbeddingsWithMetadata { embeddings, regenerate, has_fragments },
|
||||||
|
) in embeddings
|
||||||
|
{
|
||||||
let embeddings = ExplicitVectors {
|
let embeddings = ExplicitVectors {
|
||||||
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
|
||||||
embeddings,
|
embeddings,
|
||||||
)),
|
)),
|
||||||
regenerate,
|
regenerate: regenerate &&
|
||||||
|
// Meilisearch does not handle well dumps with fragments, because as the fragments
|
||||||
|
// are marked as user-provided,
|
||||||
|
// all embeddings would be regenerated on any settings change or document update.
|
||||||
|
// To prevent this, we mark embeddings has non regenerate in this case.
|
||||||
|
!has_fragments,
|
||||||
};
|
};
|
||||||
vectors
|
vectors
|
||||||
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
|
||||||
|
Reference in New Issue
Block a user