Make the VectorStore aware of the index version

This commit is contained in:
Clément Renault
2025-08-12 15:09:26 +02:00
committed by Louis Dureuil
parent 6340412219
commit f5f2f7c6f2
11 changed files with 89 additions and 22 deletions

View File

@@ -485,6 +485,7 @@ where
// If an embedder wasn't used in the typedchunk but must be binary quantized
// we should insert it in `dimension`
let index_version = self.index.get_version(&self.wtxn)?.unwrap();
for (name, action) in settings_diff.embedding_config_updates.iter() {
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
let index = self.index.embedding_configs().embedder_id(self.wtxn, name)?.ok_or(
@@ -493,7 +494,12 @@ where
key: None,
},
)?;
let reader = VectorStore::new(self.index.vector_store, index, action.was_quantized);
let reader = VectorStore::new(
index_version,
self.index.vector_store,
index,
action.was_quantized,
);
let Some(dim) = reader.dimensions(self.wtxn)? else {
continue;
};
@@ -522,7 +528,8 @@ where
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
pool.install(|| {
let mut writer = VectorStore::new(vector_hannoy, embedder_index, was_quantized);
let mut writer =
VectorStore::new(index_version, vector_hannoy, embedder_index, was_quantized);
writer.build_and_quantize(
wtxn,
// In the settings we don't have any progress to share

View File

@@ -834,6 +834,7 @@ impl<'a, 'i> Transform<'a, 'i> {
None
};
let index_version = self.index.get_version(wtxn)?.unwrap();
let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff
.embedding_config_updates
.iter()
@@ -842,6 +843,7 @@ impl<'a, 'i> Transform<'a, 'i> {
action.write_back()
{
let reader = VectorStore::new(
index_version,
self.index.vector_store,
*embedder_id,
action.was_quantized,
@@ -949,8 +951,12 @@ impl<'a, 'i> Transform<'a, 'i> {
else {
continue;
};
let hannoy =
VectorStore::new(self.index.vector_store, infos.embedder_id, was_quantized);
let hannoy = VectorStore::new(
index_version,
self.index.vector_store,
infos.embedder_id,
was_quantized,
);
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
continue;
};

View File

@@ -619,6 +619,7 @@ pub(crate) fn write_typed_chunk_into_index(
let _entered = span.enter();
let embedders = index.embedding_configs();
let index_version = index.get_version(wtxn)?.unwrap();
let mut remove_vectors_builder = MergerBuilder::new(KeepFirst);
let mut manual_vectors_builder = MergerBuilder::new(KeepFirst);
@@ -677,7 +678,12 @@ pub(crate) fn write_typed_chunk_into_index(
.get(&embedder_name)
.is_some_and(|conf| conf.is_quantized);
// FIXME: allow customizing distance
let writer = VectorStore::new(index.vector_store, infos.embedder_id, binary_quantized);
let writer = VectorStore::new(
index_version,
index.vector_store,
infos.embedder_id,
binary_quantized,
);
// remove vectors for docids we want them removed
let merger = remove_vectors_builder.build();