Rename the ArroyWrapper/HannoyWrapper into VectorStore

This commit is contained in:
Clément Renault
2025-07-29 17:30:02 +02:00
parent bd38f8e359
commit fdbfd36f72
10 changed files with 31 additions and 33 deletions

View File

@ -31,7 +31,7 @@ use crate::prompt::PromptData;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::update::new::StdResult; use crate::update::new::StdResult;
use crate::vector::db::IndexEmbeddingConfigs; use crate::vector::db::IndexEmbeddingConfigs;
use crate::vector::{Embedding, HannoyStats, HannoyWrapper}; use crate::vector::{Embedding, HannoyStats, VectorStore};
use crate::{ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@ -237,7 +237,7 @@ impl Index {
// vector stuff // vector stuff
let embedder_category_id = let embedder_category_id =
env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?; env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?;
let vector_hannoy = env.create_database(&mut wtxn, Some(VECTOR_HANNOY))?; let vector_hannoy = env.create_database(&mut wtxn, Some(VECTOR_STORE))?;
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?; let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
@ -1772,7 +1772,7 @@ impl Index {
for config in embedders.embedding_configs(rtxn)? { for config in embedders.embedding_configs(rtxn)? {
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap(); let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
let has_fragments = config.config.embedder_options.has_fragments(); let has_fragments = config.config.embedder_options.has_fragments();
let reader = HannoyWrapper::new( let reader = VectorStore::new(
self.vector_hannoy, self.vector_hannoy,
embedder_info.embedder_id, embedder_info.embedder_id,
config.config.quantized(), config.config.quantized(),
@ -1798,7 +1798,7 @@ impl Index {
for config in embedding_configs.embedding_configs(rtxn)? { for config in embedding_configs.embedding_configs(rtxn)? {
let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap(); let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap();
let reader = let reader =
HannoyWrapper::new(self.vector_hannoy, embedder_id, config.config.quantized()); VectorStore::new(self.vector_hannoy, embedder_id, config.config.quantized());
reader.aggregate_stats(rtxn, &mut stats)?; reader.aggregate_stats(rtxn, &mut stats)?;
} }
Ok(stats) Ok(stats)

View File

@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use super::VectorStoreStats; use super::VectorStoreStats;
use crate::score_details::{self, ScoreDetails}; use crate::score_details::{self, ScoreDetails};
use crate::vector::{DistributionShift, Embedder, HannoyWrapper}; use crate::vector::{DistributionShift, Embedder, VectorStore};
use crate::{DocumentId, Result, SearchContext, SearchLogger}; use crate::{DocumentId, Result, SearchContext, SearchLogger};
pub struct VectorSort<Q: RankingRuleQueryTrait> { pub struct VectorSort<Q: RankingRuleQueryTrait> {
@ -56,8 +56,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
let target = &self.target; let target = &self.target;
let before = Instant::now(); let before = Instant::now();
let reader = let reader = VectorStore::new(ctx.index.vector_hannoy, self.embedder_index, self.quantized);
HannoyWrapper::new(ctx.index.vector_hannoy, self.embedder_index, self.quantized);
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
self.cached_sorted_docids = results.into_iter(); self.cached_sorted_docids = results.into_iter();
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats { *ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {

View File

@ -3,7 +3,7 @@ use std::sync::Arc;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::score_details::{self, ScoreDetails}; use crate::score_details::{self, ScoreDetails};
use crate::vector::{Embedder, HannoyWrapper}; use crate::vector::{Embedder, VectorStore};
use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult}; use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult};
pub struct Similar<'a> { pub struct Similar<'a> {
@ -72,7 +72,7 @@ impl<'a> Similar<'a> {
crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned()) crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned())
})?; })?;
let reader = HannoyWrapper::new(self.index.vector_hannoy, embedder_index, self.quantized); let reader = VectorStore::new(self.index.vector_hannoy, embedder_index, self.quantized);
let results = reader.nns_by_item( let results = reader.nns_by_item(
self.rtxn, self.rtxn,
self.id, self.id,

View File

@ -39,7 +39,7 @@ use crate::update::{
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
}; };
use crate::vector::db::EmbedderInfo; use crate::vector::db::EmbedderInfo;
use crate::vector::{HannoyWrapper, RuntimeEmbedders}; use crate::vector::{RuntimeEmbedders, VectorStore};
use crate::{CboRoaringBitmapCodec, Index, Result, UserError}; use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
static MERGED_DATABASE_COUNT: usize = 7; static MERGED_DATABASE_COUNT: usize = 7;
@ -494,7 +494,7 @@ where
}, },
)?; )?;
let reader = let reader =
HannoyWrapper::new(self.index.vector_hannoy, index, action.was_quantized); VectorStore::new(self.index.vector_hannoy, index, action.was_quantized);
let Some(dim) = reader.dimensions(self.wtxn)? else { let Some(dim) = reader.dimensions(self.wtxn)? else {
continue; continue;
}; };
@ -523,7 +523,7 @@ where
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized); let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
pool.install(|| { pool.install(|| {
let mut writer = HannoyWrapper::new(vector_hannoy, embedder_index, was_quantized); let mut writer = VectorStore::new(vector_hannoy, embedder_index, was_quantized);
writer.build_and_quantize( writer.build_and_quantize(
wtxn, wtxn,
// In the settings we don't have any progress to share // In the settings we don't have any progress to share

View File

@ -32,7 +32,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableIds, UpdateIndexingStep}; use crate::update::{AvailableIds, UpdateIndexingStep};
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use crate::vector::settings::{RemoveFragments, WriteBackToDocuments}; use crate::vector::settings::{RemoveFragments, WriteBackToDocuments};
use crate::vector::HannoyWrapper; use crate::vector::VectorStore;
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result}; use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
pub struct TransformOutput { pub struct TransformOutput {
@ -834,14 +834,14 @@ impl<'a, 'i> Transform<'a, 'i> {
None None
}; };
let readers: BTreeMap<&str, (HannoyWrapper, &RoaringBitmap)> = settings_diff let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff
.embedding_config_updates .embedding_config_updates
.iter() .iter()
.filter_map(|(name, action)| { .filter_map(|(name, action)| {
if let Some(WriteBackToDocuments { embedder_id, user_provided }) = if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
action.write_back() action.write_back()
{ {
let reader = HannoyWrapper::new( let reader = VectorStore::new(
self.index.vector_hannoy, self.index.vector_hannoy,
*embedder_id, *embedder_id,
action.was_quantized, action.was_quantized,
@ -950,7 +950,7 @@ impl<'a, 'i> Transform<'a, 'i> {
continue; continue;
}; };
let hannoy = let hannoy =
HannoyWrapper::new(self.index.vector_hannoy, infos.embedder_id, was_quantized); VectorStore::new(self.index.vector_hannoy, infos.embedder_id, was_quantized);
let Some(dimensions) = hannoy.dimensions(wtxn)? else { let Some(dimensions) = hannoy.dimensions(wtxn)? else {
continue; continue;
}; };

View File

@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{
}; };
use crate::update::settings::InnerIndexSettingsDiff; use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig}; use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
use crate::vector::HannoyWrapper; use crate::vector::VectorStore;
use crate::{ use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec, Result, SerializationError, U8StrStrCodec,
@ -677,8 +677,7 @@ pub(crate) fn write_typed_chunk_into_index(
.get(&embedder_name) .get(&embedder_name)
.is_some_and(|conf| conf.is_quantized); .is_some_and(|conf| conf.is_quantized);
// FIXME: allow customizing distance // FIXME: allow customizing distance
let writer = let writer = VectorStore::new(index.vector_hannoy, infos.embedder_id, binary_quantized);
HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, binary_quantized);
// remove vectors for docids we want them removed // remove vectors for docids we want them removed
let merger = remove_vectors_builder.build(); let merger = remove_vectors_builder.build();

View File

@ -24,7 +24,7 @@ use crate::progress::{EmbedderStats, Progress};
use crate::update::settings::SettingsDelta; use crate::update::settings::SettingsDelta;
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments}; use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments};
use crate::vector::{Embedder, HannoyWrapper, RuntimeEmbedders}; use crate::vector::{Embedder, RuntimeEmbedders, VectorStore};
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort}; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
pub(crate) mod de; pub(crate) mod de;
@ -143,7 +143,7 @@ where
})?; })?;
let dimensions = runtime.embedder.dimensions(); let dimensions = runtime.embedder.dimensions();
let writer = HannoyWrapper::new(vector_arroy, embedder_index, runtime.is_quantized); let writer = VectorStore::new(vector_arroy, embedder_index, runtime.is_quantized);
Ok(( Ok((
embedder_index, embedder_index,
@ -341,7 +341,7 @@ fn hannoy_writers_from_embedder_actions<'indexer>(
embedder_actions: &'indexer BTreeMap<String, EmbedderAction>, embedder_actions: &'indexer BTreeMap<String, EmbedderAction>,
embedders: &'indexer RuntimeEmbedders, embedders: &'indexer RuntimeEmbedders,
index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>, index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>,
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, HannoyWrapper, usize)>> { ) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, VectorStore, usize)>> {
let vector_arroy = index.vector_hannoy; let vector_arroy = index.vector_hannoy;
embedders embedders
@ -361,7 +361,7 @@ fn hannoy_writers_from_embedder_actions<'indexer>(
))); )));
}; };
let writer = let writer =
HannoyWrapper::new(vector_arroy, embedder_category_id, action.was_quantized); VectorStore::new(vector_arroy, embedder_category_id, action.was_quantized);
let dimensions = runtime.embedder.dimensions(); let dimensions = runtime.embedder.dimensions();
Some(Ok(( Some(Ok((
embedder_category_id, embedder_category_id,
@ -384,7 +384,7 @@ where
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else { let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
continue; continue;
}; };
let reader = HannoyWrapper::new(index.vector_hannoy, *embedder_id, action.was_quantized); let reader = VectorStore::new(index.vector_hannoy, *embedder_id, action.was_quantized);
let Some(dimensions) = reader.dimensions(wtxn)? else { let Some(dimensions) = reader.dimensions(wtxn)? else {
continue; continue;
}; };
@ -400,7 +400,7 @@ where
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else { let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
continue; continue;
}; };
let arroy = HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, was_quantized); let arroy = VectorStore::new(index.vector_hannoy, infos.embedder_id, was_quantized);
let Some(dimensions) = arroy.dimensions(wtxn)? else { let Some(dimensions) = arroy.dimensions(wtxn)? else {
continue; continue;
}; };

View File

@ -15,7 +15,7 @@ use crate::progress::Progress;
use crate::update::settings::InnerIndexSettings; use crate::update::settings::InnerIndexSettings;
use crate::vector::db::IndexEmbeddingConfig; use crate::vector::db::IndexEmbeddingConfig;
use crate::vector::settings::EmbedderAction; use crate::vector::settings::EmbedderAction;
use crate::vector::{Embedder, Embeddings, HannoyWrapper, RuntimeEmbedders}; use crate::vector::{Embedder, Embeddings, RuntimeEmbedders, VectorStore};
use crate::{Error, Index, InternalError, Result, UserError}; use crate::{Error, Index, InternalError, Result, UserError};
pub fn write_to_db( pub fn write_to_db(
@ -23,7 +23,7 @@ pub fn write_to_db(
finished_extraction: &AtomicBool, finished_extraction: &AtomicBool,
index: &Index, index: &Index,
wtxn: &mut RwTxn<'_>, wtxn: &mut RwTxn<'_>,
hannoy_writers: &HashMap<u8, (&str, &Embedder, HannoyWrapper, usize)>, hannoy_writers: &HashMap<u8, (&str, &Embedder, VectorStore, usize)>,
) -> Result<ChannelCongestion> { ) -> Result<ChannelCongestion> {
// Used by by the HannoySetVector to copy the embedding into an // Used by by the HannoySetVector to copy the embedding into an
// aligned memory area, required by arroy to accept a new vector. // aligned memory area, required by arroy to accept a new vector.
@ -116,7 +116,7 @@ pub fn build_vectors<MSP>(
progress: &Progress, progress: &Progress,
index_embeddings: Vec<IndexEmbeddingConfig>, index_embeddings: Vec<IndexEmbeddingConfig>,
hannoy_memory: Option<usize>, hannoy_memory: Option<usize>,
hannoy_writers: &mut HashMap<u8, (&str, &Embedder, HannoyWrapper, usize)>, hannoy_writers: &mut HashMap<u8, (&str, &Embedder, VectorStore, usize)>,
embeder_actions: Option<&BTreeMap<String, EmbedderAction>>, embeder_actions: Option<&BTreeMap<String, EmbedderAction>>,
must_stop_processing: &MSP, must_stop_processing: &MSP,
) -> Result<()> ) -> Result<()>
@ -181,7 +181,7 @@ pub fn write_from_bbqueue(
writer_receiver: &mut WriterBbqueueReceiver<'_>, writer_receiver: &mut WriterBbqueueReceiver<'_>,
index: &Index, index: &Index,
wtxn: &mut RwTxn<'_>, wtxn: &mut RwTxn<'_>,
hannoy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, HannoyWrapper, usize)>, hannoy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, VectorStore, usize)>,
aligned_embedding: &mut Vec<f32>, aligned_embedding: &mut Vec<f32>,
) -> crate::Result<()> { ) -> crate::Result<()> {
while let Some(frame_with_header) = writer_receiver.recv_frame() { while let Some(frame_with_header) = writer_receiver.recv_frame() {

View File

@ -14,7 +14,7 @@ use crate::constants::RESERVED_VECTORS_FIELD_NAME;
use crate::documents::FieldIdMapper; use crate::documents::FieldIdMapper;
use crate::vector::db::{EmbeddingStatus, IndexEmbeddingConfig}; use crate::vector::db::{EmbeddingStatus, IndexEmbeddingConfig};
use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors}; use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors};
use crate::vector::{Embedding, HannoyWrapper, RuntimeEmbedders}; use crate::vector::{Embedding, RuntimeEmbedders, VectorStore};
use crate::{DocumentId, Index, InternalError, Result, UserError}; use crate::{DocumentId, Index, InternalError, Result, UserError};
#[derive(Serialize)] #[derive(Serialize)]
@ -121,7 +121,7 @@ impl<'t> VectorDocumentFromDb<'t> {
status: &EmbeddingStatus, status: &EmbeddingStatus,
) -> Result<VectorEntry<'t>> { ) -> Result<VectorEntry<'t>> {
let reader = let reader =
HannoyWrapper::new(self.index.vector_hannoy, embedder_id, config.config.quantized()); VectorStore::new(self.index.vector_hannoy, embedder_id, config.config.quantized());
let vectors = reader.item_vectors(self.rtxn, self.docid)?; let vectors = reader.item_vectors(self.rtxn, self.docid)?;
Ok(VectorEntry { Ok(VectorEntry {

View File

@ -45,13 +45,13 @@ const HANNOY_EF_CONSTRUCTION: usize = 48;
const HANNOY_M: usize = 16; const HANNOY_M: usize = 16;
const HANNOY_M0: usize = 32; const HANNOY_M0: usize = 32;
pub struct HannoyWrapper { pub struct VectorStore {
quantized: bool, quantized: bool,
embedder_index: u8, embedder_index: u8,
database: hannoy::Database<Unspecified>, database: hannoy::Database<Unspecified>,
} }
impl HannoyWrapper { impl VectorStore {
pub fn new( pub fn new(
database: hannoy::Database<Unspecified>, database: hannoy::Database<Unspecified>,
embedder_index: u8, embedder_index: u8,