Reintroduce arroy and support for dumpless upgrade from previous versions

This commit is contained in:
Clément Renault
2025-07-29 18:00:29 +02:00
committed by Louis Dureuil
parent a7cd6853db
commit 27550dafad
16 changed files with 94 additions and 39 deletions

View File

@ -2,7 +2,8 @@ use heed::RwTxn;
use roaring::RoaringBitmap;
use time::OffsetDateTime;
use crate::{database_stats::DatabaseStats, FieldDistribution, Index, Result};
use crate::database_stats::DatabaseStats;
use crate::{FieldDistribution, Index, Result};
pub struct ClearDocuments<'t, 'i> {
wtxn: &'t mut RwTxn<'i>,
@ -45,7 +46,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
facet_id_is_empty_docids,
field_id_docid_facet_f64s,
field_id_docid_facet_strings,
vector_hannoy,
vector_store,
embedder_category_id: _,
documents,
} = self.index;
@ -88,7 +89,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
field_id_docid_facet_f64s.clear(self.wtxn)?;
field_id_docid_facet_strings.clear(self.wtxn)?;
// vector
vector_hannoy.clear(self.wtxn)?;
vector_store.clear(self.wtxn)?;
documents.clear(self.wtxn)?;

View File

@ -493,8 +493,7 @@ where
key: None,
},
)?;
let reader =
VectorStore::new(self.index.vector_hannoy, index, action.was_quantized);
let reader = VectorStore::new(self.index.vector_store, index, action.was_quantized);
let Some(dim) = reader.dimensions(self.wtxn)? else {
continue;
};
@ -504,7 +503,7 @@ where
for (embedder_name, dimension) in dimension {
let wtxn = &mut *self.wtxn;
let vector_hannoy = self.index.vector_hannoy;
let vector_hannoy = self.index.vector_store;
let cancel = &self.should_abort;
let embedder_index =

View File

@ -842,7 +842,7 @@ impl<'a, 'i> Transform<'a, 'i> {
action.write_back()
{
let reader = VectorStore::new(
self.index.vector_hannoy,
self.index.vector_store,
*embedder_id,
action.was_quantized,
);
@ -950,7 +950,7 @@ impl<'a, 'i> Transform<'a, 'i> {
continue;
};
let hannoy =
VectorStore::new(self.index.vector_hannoy, infos.embedder_id, was_quantized);
VectorStore::new(self.index.vector_store, infos.embedder_id, was_quantized);
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
continue;
};

View File

@ -677,7 +677,7 @@ pub(crate) fn write_typed_chunk_into_index(
.get(&embedder_name)
.is_some_and(|conf| conf.is_quantized);
// FIXME: allow customizing distance
let writer = VectorStore::new(index.vector_hannoy, infos.embedder_id, binary_quantized);
let writer = VectorStore::new(index.vector_store, infos.embedder_id, binary_quantized);
// remove vectors for docids we want them removed
let merger = remove_vectors_builder.build();

View File

@ -130,7 +130,7 @@ where
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
let vector_arroy = index.vector_hannoy;
let vector_arroy = index.vector_store;
let hannoy_writers: Result<HashMap<_, _>> = embedders
.inner_as_ref()
.iter()
@ -343,7 +343,7 @@ fn hannoy_writers_from_embedder_actions<'indexer>(
embedders: &'indexer RuntimeEmbedders,
index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>,
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, VectorStore, usize)>> {
let vector_arroy = index.vector_hannoy;
let vector_arroy = index.vector_store;
embedders
.inner_as_ref()
@ -385,7 +385,7 @@ where
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
continue;
};
let reader = VectorStore::new(index.vector_hannoy, *embedder_id, action.was_quantized);
let reader = VectorStore::new(index.vector_store, *embedder_id, action.was_quantized);
let Some(dimensions) = reader.dimensions(wtxn)? else {
continue;
};
@ -401,7 +401,7 @@ where
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
continue;
};
let arroy = VectorStore::new(index.vector_hannoy, infos.embedder_id, was_quantized);
let arroy = VectorStore::new(index.vector_store, infos.embedder_id, was_quantized);
let Some(dimensions) = arroy.dimensions(wtxn)? else {
continue;
};

View File

@ -121,7 +121,7 @@ impl<'t> VectorDocumentFromDb<'t> {
status: &EmbeddingStatus,
) -> Result<VectorEntry<'t>> {
let reader =
VectorStore::new(self.index.vector_hannoy, embedder_id, config.config.quantized());
VectorStore::new(self.index.vector_store, embedder_id, config.config.quantized());
let vectors = reader.item_vectors(self.rtxn, self.docid)?;
Ok(VectorEntry {

View File

@ -1,4 +1,4 @@
use hannoy::distances::Cosine;
use arroy::distances::Cosine;
use heed::RwTxn;
use super::UpgradeIndex;
@ -25,13 +25,12 @@ impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
progress.update_progress(VectorStore::UpdateInternalVersions);
let rtxn = index.read_txn()?;
// hannoy::upgrade::from_0_5_to_0_6::<Cosine>(
// &rtxn,
// index.vector_hannoy.remap_data_type(),
// wtxn,
// index.vector_hannoy.remap_data_type(),
// )?;
unimplemented!("upgrade hannoy");
arroy::upgrade::from_0_5_to_0_6::<Cosine>(
&rtxn,
index.vector_store.remap_types(),
wtxn,
index.vector_store.remap_types(),
)?;
Ok(false)
}