Switch from version to backend selector

This commit is contained in:
Louis Dureuil
2025-08-26 17:49:56 +02:00
parent b5f0c19406
commit da6fffdf6d
12 changed files with 88 additions and 78 deletions

View File

@ -485,7 +485,7 @@ where
// If an embedder wasn't used in the typedchunk but must be binary quantized
// we should insert it in `dimension`
let index_version = self.index.get_version(self.wtxn)?.unwrap();
let backend = self.index.get_vector_store(self.wtxn)?;
for (name, action) in settings_diff.embedding_config_updates.iter() {
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
let index = self.index.embedding_configs().embedder_id(self.wtxn, name)?.ok_or(
@ -494,12 +494,8 @@ where
key: None,
},
)?;
let reader = VectorStore::new(
index_version,
self.index.vector_store,
index,
action.was_quantized,
);
let reader =
VectorStore::new(backend, self.index.vector_store, index, action.was_quantized);
let Some(dim) = reader.dimensions(self.wtxn)? else {
continue;
};
@ -529,7 +525,7 @@ where
pool.install(|| {
let mut writer =
VectorStore::new(index_version, vector_hannoy, embedder_index, was_quantized);
VectorStore::new(backend, vector_hannoy, embedder_index, was_quantized);
writer.build_and_quantize(
wtxn,
// In the settings we don't have any progress to share

View File

@ -834,7 +834,7 @@ impl<'a, 'i> Transform<'a, 'i> {
None
};
let index_version = self.index.get_version(wtxn)?.unwrap();
let backend = self.index.get_vector_store(wtxn)?;
let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff
.embedding_config_updates
.iter()
@ -843,7 +843,7 @@ impl<'a, 'i> Transform<'a, 'i> {
action.write_back()
{
let reader = VectorStore::new(
index_version,
backend,
self.index.vector_store,
*embedder_id,
action.was_quantized,
@ -949,7 +949,7 @@ impl<'a, 'i> Transform<'a, 'i> {
continue;
};
let hannoy = VectorStore::new(
index_version,
backend,
self.index.vector_store,
infos.embedder_id,
was_quantized,

View File

@ -619,7 +619,7 @@ pub(crate) fn write_typed_chunk_into_index(
let _entered = span.enter();
let embedders = index.embedding_configs();
let index_version = index.get_version(wtxn)?.unwrap();
let backend = index.get_vector_store(wtxn)?;
let mut remove_vectors_builder = MergerBuilder::new(KeepFirst);
let mut manual_vectors_builder = MergerBuilder::new(KeepFirst);
@ -678,12 +678,8 @@ pub(crate) fn write_typed_chunk_into_index(
.get(&embedder_name)
.is_some_and(|conf| conf.is_quantized);
// FIXME: allow customizing distance
let writer = VectorStore::new(
index_version,
index.vector_store,
infos.embedder_id,
binary_quantized,
);
let writer =
VectorStore::new(backend, index.vector_store, infos.embedder_id, binary_quantized);
// remove vectors for docids we want them removed
let merger = remove_vectors_builder.build();

View File

@ -131,7 +131,7 @@ where
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
let vector_arroy = index.vector_store;
let index_version = index.get_version(wtxn)?.unwrap();
let backend = index.get_vector_store(wtxn)?;
let hannoy_writers: Result<HashMap<_, _>> = embedders
.inner_as_ref()
.iter()
@ -145,12 +145,8 @@ where
})?;
let dimensions = runtime.embedder.dimensions();
let writer = VectorStore::new(
index_version,
vector_arroy,
embedder_index,
runtime.is_quantized,
);
let writer =
VectorStore::new(backend, vector_arroy, embedder_index, runtime.is_quantized);
Ok((
embedder_index,
@ -352,7 +348,7 @@ fn hannoy_writers_from_embedder_actions<'indexer>(
index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>,
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, VectorStore, usize)>> {
let vector_arroy = index.vector_store;
let index_version = index.get_version(rtxn)?.unwrap();
let backend = index.get_vector_store(rtxn)?;
embedders
.inner_as_ref()
@ -371,7 +367,7 @@ fn hannoy_writers_from_embedder_actions<'indexer>(
)));
};
let writer = VectorStore::new(
index_version,
backend,
vector_arroy,
embedder_category_id,
action.was_quantized,
@ -394,16 +390,13 @@ fn delete_old_embedders_and_fragments<SD>(
where
SD: SettingsDelta,
{
let backend = index.get_vector_store(wtxn)?;
for action in settings_delta.embedder_actions().values() {
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
continue;
};
let reader = VectorStore::new(
index.get_version(wtxn)?.unwrap(),
index.vector_store,
*embedder_id,
action.was_quantized,
);
let reader =
VectorStore::new(backend, index.vector_store, *embedder_id, action.was_quantized);
let Some(dimensions) = reader.dimensions(wtxn)? else {
continue;
};
@ -419,12 +412,7 @@ where
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
continue;
};
let arroy = VectorStore::new(
index.get_version(wtxn)?.unwrap(),
index.vector_store,
infos.embedder_id,
was_quantized,
);
let arroy = VectorStore::new(backend, index.vector_store, infos.embedder_id, was_quantized);
let Some(dimensions) = arroy.dimensions(wtxn)? else {
continue;
};

View File

@ -120,9 +120,9 @@ impl<'t> VectorDocumentFromDb<'t> {
config: &IndexEmbeddingConfig,
status: &EmbeddingStatus,
) -> Result<VectorEntry<'t>> {
let index_version = self.index.get_version(self.rtxn)?.unwrap();
let backend = self.index.get_vector_store(self.rtxn)?;
let reader = VectorStore::new(
index_version,
backend,
self.index.vector_store,
embedder_id,
config.config.quantized(),

View File

@ -17,13 +17,14 @@ impl UpgradeIndex for Latest_V1_18_New_Hannoy {
progress: Progress,
) -> Result<bool> {
let embedding_configs = index.embedding_configs();
let index_version = index.get_version(wtxn)?.unwrap();
let backend = index.get_vector_store(wtxn)?;
for config in embedding_configs.embedding_configs(wtxn)? {
// TODO use the embedder name to display progress
/// REMOVE THIS FILE, IMPLEMENT CONVERSION AS A SETTING CHANGE
let quantized = config.config.quantized();
let embedder_id = embedding_configs.embedder_id(wtxn, &config.name)?.unwrap();
let vector_store =
VectorStore::new(index_version, index.vector_store, embedder_id, quantized);
VectorStore::new(backend, index.vector_store, embedder_id, quantized);
vector_store.convert_from_arroy(wtxn, progress.clone())?;
}