Implement dumpless upgrade from v1.15 to v1.16

This commit is contained in:
Louis Dureuil
2025-07-07 11:57:08 +02:00
parent 73c9c1ebdc
commit a3254d7d7d
4 changed files with 72 additions and 0 deletions

View File

@@ -2,6 +2,7 @@ mod v1_12;
mod v1_13;
mod v1_14;
mod v1_15;
mod v1_16;
use heed::RwTxn;
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
@@ -10,6 +11,7 @@ use v1_15::Latest_V1_14_To_Latest_V1_15;
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::progress::{Progress, VariableNameStep};
use crate::update::upgrade::v1_16::Latest_V1_15_To_V1_16_0;
use crate::{Index, InternalError, Result};
trait UpgradeIndex {
@@ -31,6 +33,7 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
&V1_13_1_To_Latest_V1_13 {},
&Latest_V1_13_To_Latest_V1_14 {},
&Latest_V1_14_To_Latest_V1_15 {},
&Latest_V1_15_To_V1_16_0 {},
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
@@ -58,6 +61,7 @@ const fn start(from: (u32, u32, u32)) -> Option<usize> {
(1, 14, _) => function_index!(5),
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
(1, 15, _) => function_index!(6),
(1, 16, _) => function_index!(7),
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
// considering dumpless upgrade.
(_major, _minor, _patch) => return None,

View File

@@ -1,4 +1,6 @@
use heed::RwTxn;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use super::UpgradeIndex;
use crate::progress::Progress;
@@ -26,3 +28,14 @@ impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 {
(1, 15, 0)
}
}
/// Parts of v1.15 `IndexingEmbeddingConfig` that are relevant for upgrade to v1.16
///
/// # Warning
///
/// This object should not be rewritten to the DB, only read to get the name and `user_provided` roaring.
#[derive(Debug, Deserialize, Serialize)]
pub struct IndexEmbeddingConfig {
pub name: String,
pub user_provided: RoaringBitmap,
}

View File

@@ -0,0 +1,48 @@
use heed::types::{SerdeJson, Str};
use heed::RwTxn;
use super::UpgradeIndex;
use crate::progress::Progress;
use crate::vector::db::{EmbedderInfo, EmbeddingStatus};
use crate::{Index, InternalError, Result};
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_15_To_V1_16_0();
impl UpgradeIndex for Latest_V1_15_To_V1_16_0 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
let v1_15_indexing_configs = index
.main
.remap_types::<Str, SerdeJson<Vec<super::v1_15::IndexEmbeddingConfig>>>()
.get(wtxn, crate::index::main_key::EMBEDDING_CONFIGS)?
.unwrap_or_default();
let embedders = index.embedding_configs();
for config in v1_15_indexing_configs {
let embedder_id = embedders.embedder_id(wtxn, &config.name)?.ok_or(
InternalError::DatabaseMissingEntry {
db_name: crate::index::db_name::VECTOR_EMBEDDER_CATEGORY_ID,
key: None,
},
)?;
let info = EmbedderInfo {
embedder_id,
// v1.15 used not to make a difference between `user_provided` and `! regenerate`.
embedding_status: EmbeddingStatus::from_user_provided(config.user_provided),
};
embedders.put_embedder_info(wtxn, &config.name, &info)?;
}
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 16, 0)
}
}

View File

@@ -117,6 +117,13 @@ impl EmbeddingStatus {
Default::default()
}
/// Create a new `EmbeddingStatus` that assumes that any `user_provided` docid is also skipping regenerate.
///
/// Used for migration from v1.15 and earlier DBs.
pub(crate) fn from_user_provided(user_provided: RoaringBitmap) -> Self {
Self { user_provided, skip_regenerate_different_from_user_provided: Default::default() }
}
/// Whether the document contains user-provided vectors for that embedder.
pub fn is_user_provided(&self, docid: DocumentId) -> bool {
self.user_provided.contains(docid)