Use Hannoy instead of arroy

This commit is contained in:
Kerollmops
2025-07-21 11:42:46 +02:00
committed by Louis Dureuil
parent 580bfb06b4
commit affcaef556
25 changed files with 380 additions and 356 deletions

View File

@@ -39,7 +39,7 @@ use crate::update::{
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
};
use crate::vector::db::EmbedderInfo;
use crate::vector::{ArroyWrapper, RuntimeEmbedders};
use crate::vector::{HannoyWrapper, RuntimeEmbedders};
use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
static MERGED_DATABASE_COUNT: usize = 7;
@@ -494,7 +494,7 @@ where
},
)?;
let reader =
ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized);
HannoyWrapper::new(self.index.vector_hannoy, index, action.was_quantized);
let Some(dim) = reader.dimensions(self.wtxn)? else {
continue;
};
@@ -504,7 +504,7 @@ where
for (embedder_name, dimension) in dimension {
let wtxn = &mut *self.wtxn;
let vector_arroy = self.index.vector_arroy;
let vector_hannoy = self.index.vector_hannoy;
let cancel = &self.should_abort;
let embedder_index =
@@ -523,7 +523,7 @@ where
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
pool.install(|| {
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
let mut writer = HannoyWrapper::new(vector_hannoy, embedder_index, was_quantized);
writer.build_and_quantize(
wtxn,
// In the settings we don't have any progress to share

View File

@@ -32,7 +32,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableIds, UpdateIndexingStep};
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use crate::vector::settings::{RemoveFragments, WriteBackToDocuments};
use crate::vector::ArroyWrapper;
use crate::vector::HannoyWrapper;
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
pub struct TransformOutput {
@@ -834,15 +834,15 @@ impl<'a, 'i> Transform<'a, 'i> {
None
};
let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
let readers: BTreeMap<&str, (HannoyWrapper, &RoaringBitmap)> = settings_diff
.embedding_config_updates
.iter()
.filter_map(|(name, action)| {
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
action.write_back()
{
let reader = ArroyWrapper::new(
self.index.vector_arroy,
let reader = HannoyWrapper::new(
self.index.vector_hannoy,
*embedder_id,
action.was_quantized,
);
@@ -884,7 +884,7 @@ impl<'a, 'i> Transform<'a, 'i> {
let injected_vectors: std::result::Result<
serde_json::Map<String, serde_json::Value>,
arroy::Error,
hannoy::Error,
> = readers
.iter()
.filter_map(|(name, (reader, user_provided))| {
@@ -949,9 +949,9 @@ impl<'a, 'i> Transform<'a, 'i> {
else {
continue;
};
let arroy =
ArroyWrapper::new(self.index.vector_arroy, infos.embedder_id, was_quantized);
let Some(dimensions) = arroy.dimensions(wtxn)? else {
let hannoy =
HannoyWrapper::new(self.index.vector_hannoy, infos.embedder_id, was_quantized);
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
continue;
};
for fragment_id in fragment_ids {
@@ -959,17 +959,17 @@ impl<'a, 'i> Transform<'a, 'i> {
if infos.embedding_status.user_provided_docids().is_empty() {
// no user provided: clear store
arroy.clear_store(wtxn, *fragment_id, dimensions)?;
hannoy.clear_store(wtxn, *fragment_id, dimensions)?;
continue;
}
// some user provided, remove only the ids that are not user provided
let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| {
let to_delete = hannoy.items_in_store(wtxn, *fragment_id, |items| {
items - infos.embedding_status.user_provided_docids()
})?;
for to_delete in to_delete {
arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
hannoy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
}
}
}

View File

@@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{
};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
use crate::vector::ArroyWrapper;
use crate::vector::HannoyWrapper;
use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec,
@@ -677,7 +677,8 @@ pub(crate) fn write_typed_chunk_into_index(
.get(&embedder_name)
.is_some_and(|conf| conf.is_quantized);
// FIXME: allow customizing distance
let writer = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, binary_quantized);
let writer =
HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, binary_quantized);
// remove vectors for docids we want them removed
let merger = remove_vectors_builder.build();