diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 4c4712c63..c4651b5a9 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -31,7 +31,7 @@ use crate::prompt::PromptData; use crate::proximity::ProximityPrecision; use crate::update::new::StdResult; use crate::vector::db::IndexEmbeddingConfigs; -use crate::vector::{Embedding, HannoyStats, HannoyWrapper}; +use crate::vector::{Embedding, HannoyStats, VectorStore}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -237,7 +237,7 @@ impl Index { // vector stuff let embedder_category_id = env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?; - let vector_hannoy = env.create_database(&mut wtxn, Some(VECTOR_HANNOY))?; + let vector_hannoy = env.create_database(&mut wtxn, Some(VECTOR_STORE))?; let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?; @@ -1772,7 +1772,7 @@ impl Index { for config in embedders.embedding_configs(rtxn)? { let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap(); let has_fragments = config.config.embedder_options.has_fragments(); - let reader = HannoyWrapper::new( + let reader = VectorStore::new( self.vector_hannoy, embedder_info.embedder_id, config.config.quantized(), @@ -1798,7 +1798,7 @@ impl Index { for config in embedding_configs.embedding_configs(rtxn)? { let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap(); let reader = - HannoyWrapper::new(self.vector_hannoy, embedder_id, config.config.quantized()); + VectorStore::new(self.vector_hannoy, embedder_id, config.config.quantized()); reader.aggregate_stats(rtxn, &mut stats)?; } Ok(stats) diff --git a/crates/milli/src/search/new/vector_sort.rs b/crates/milli/src/search/new/vector_sort.rs index 71f7faa48..ce755c57d 100644 --- a/crates/milli/src/search/new/vector_sort.rs +++ b/crates/milli/src/search/new/vector_sort.rs @@ -6,7 +6,7 @@ use roaring::RoaringBitmap; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; use super::VectorStoreStats; use crate::score_details::{self, ScoreDetails}; -use crate::vector::{DistributionShift, Embedder, HannoyWrapper}; +use crate::vector::{DistributionShift, Embedder, VectorStore}; use crate::{DocumentId, Result, SearchContext, SearchLogger}; pub struct VectorSort { @@ -56,8 +56,7 @@ impl VectorSort { let target = &self.target; let before = Instant::now(); - let reader = - HannoyWrapper::new(ctx.index.vector_hannoy, self.embedder_index, self.quantized); + let reader = VectorStore::new(ctx.index.vector_hannoy, self.embedder_index, self.quantized); let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; self.cached_sorted_docids = results.into_iter(); *ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats { diff --git a/crates/milli/src/search/similar.rs b/crates/milli/src/search/similar.rs index 2e70958c0..ec3a5a565 100644 --- a/crates/milli/src/search/similar.rs +++ b/crates/milli/src/search/similar.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use roaring::RoaringBitmap; use crate::score_details::{self, ScoreDetails}; -use crate::vector::{Embedder, HannoyWrapper}; +use crate::vector::{Embedder, VectorStore}; use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult}; pub struct Similar<'a> { @@ -72,7 +72,7 @@ impl<'a> Similar<'a> { crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned()) })?; - let reader = HannoyWrapper::new(self.index.vector_hannoy, embedder_index, self.quantized); + let reader = VectorStore::new(self.index.vector_hannoy, embedder_index, self.quantized); let results = reader.nns_by_item( self.rtxn, self.id, diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index ec1aac32a..5bfc8c218 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -39,7 +39,7 @@ use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; use crate::vector::db::EmbedderInfo; -use crate::vector::{HannoyWrapper, RuntimeEmbedders}; +use crate::vector::{RuntimeEmbedders, VectorStore}; use crate::{CboRoaringBitmapCodec, Index, Result, UserError}; static MERGED_DATABASE_COUNT: usize = 7; @@ -494,7 +494,7 @@ where }, )?; let reader = - HannoyWrapper::new(self.index.vector_hannoy, index, action.was_quantized); + VectorStore::new(self.index.vector_hannoy, index, action.was_quantized); let Some(dim) = reader.dimensions(self.wtxn)? else { continue; }; @@ -523,7 +523,7 @@ where let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized); pool.install(|| { - let mut writer = HannoyWrapper::new(vector_hannoy, embedder_index, was_quantized); + let mut writer = VectorStore::new(vector_hannoy, embedder_index, was_quantized); writer.build_and_quantize( wtxn, // In the settings we don't have any progress to share diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs index fcb5b00d1..b7c936a82 100644 --- a/crates/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -32,7 +32,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use crate::vector::settings::{RemoveFragments, WriteBackToDocuments}; -use crate::vector::HannoyWrapper; +use crate::vector::VectorStore; use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result}; pub struct TransformOutput { @@ -834,14 +834,14 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: BTreeMap<&str, (HannoyWrapper, &RoaringBitmap)> = settings_diff + let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { if let Some(WriteBackToDocuments { embedder_id, user_provided }) = action.write_back() { - let reader = HannoyWrapper::new( + let reader = VectorStore::new( self.index.vector_hannoy, *embedder_id, action.was_quantized, @@ -950,7 +950,7 @@ impl<'a, 'i> Transform<'a, 'i> { continue; }; let hannoy = - HannoyWrapper::new(self.index.vector_hannoy, infos.embedder_id, was_quantized); + VectorStore::new(self.index.vector_hannoy, infos.embedder_id, was_quantized); let Some(dimensions) = hannoy.dimensions(wtxn)? else { continue; }; diff --git a/crates/milli/src/update/index_documents/typed_chunk.rs b/crates/milli/src/update/index_documents/typed_chunk.rs index b0590eab4..31616906c 100644 --- a/crates/milli/src/update/index_documents/typed_chunk.rs +++ b/crates/milli/src/update/index_documents/typed_chunk.rs @@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{ }; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig}; -use crate::vector::HannoyWrapper; +use crate::vector::VectorStore; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, U8StrStrCodec, @@ -677,8 +677,7 @@ pub(crate) fn write_typed_chunk_into_index( .get(&embedder_name) .is_some_and(|conf| conf.is_quantized); // FIXME: allow customizing distance - let writer = - HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, binary_quantized); + let writer = VectorStore::new(index.vector_hannoy, infos.embedder_id, binary_quantized); // remove vectors for docids we want them removed let merger = remove_vectors_builder.build(); diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 6ffd88763..e750d39a8 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -24,7 +24,7 @@ use crate::progress::{EmbedderStats, Progress}; use crate::update::settings::SettingsDelta; use crate::update::GrenadParameters; use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments}; -use crate::vector::{Embedder, HannoyWrapper, RuntimeEmbedders}; +use crate::vector::{Embedder, RuntimeEmbedders, VectorStore}; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort}; pub(crate) mod de; @@ -144,7 +144,7 @@ where })?; let dimensions = runtime.embedder.dimensions(); - let writer = HannoyWrapper::new(vector_arroy, embedder_index, runtime.is_quantized); + let writer = VectorStore::new(vector_arroy, embedder_index, runtime.is_quantized); Ok(( embedder_index, @@ -342,7 +342,7 @@ fn hannoy_writers_from_embedder_actions<'indexer>( embedder_actions: &'indexer BTreeMap, embedders: &'indexer RuntimeEmbedders, index_embedder_category_ids: &'indexer std::collections::HashMap, -) -> Result> { +) -> Result> { let vector_arroy = index.vector_hannoy; embedders @@ -362,7 +362,7 @@ fn hannoy_writers_from_embedder_actions<'indexer>( ))); }; let writer = - HannoyWrapper::new(vector_arroy, embedder_category_id, action.was_quantized); + VectorStore::new(vector_arroy, embedder_category_id, action.was_quantized); let dimensions = runtime.embedder.dimensions(); Some(Ok(( embedder_category_id, @@ -385,7 +385,7 @@ where let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else { continue; }; - let reader = HannoyWrapper::new(index.vector_hannoy, *embedder_id, action.was_quantized); + let reader = VectorStore::new(index.vector_hannoy, *embedder_id, action.was_quantized); let Some(dimensions) = reader.dimensions(wtxn)? else { continue; }; @@ -401,7 +401,7 @@ where let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else { continue; }; - let arroy = HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, was_quantized); + let arroy = VectorStore::new(index.vector_hannoy, infos.embedder_id, was_quantized); let Some(dimensions) = arroy.dimensions(wtxn)? else { continue; }; diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index 4be916c02..a023e1431 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -15,7 +15,7 @@ use crate::progress::Progress; use crate::update::settings::InnerIndexSettings; use crate::vector::db::IndexEmbeddingConfig; use crate::vector::settings::EmbedderAction; -use crate::vector::{Embedder, Embeddings, HannoyWrapper, RuntimeEmbedders}; +use crate::vector::{Embedder, Embeddings, RuntimeEmbedders, VectorStore}; use crate::{Error, Index, InternalError, Result, UserError}; pub fn write_to_db( @@ -23,7 +23,7 @@ pub fn write_to_db( finished_extraction: &AtomicBool, index: &Index, wtxn: &mut RwTxn<'_>, - hannoy_writers: &HashMap, + hannoy_writers: &HashMap, ) -> Result { // Used by by the HannoySetVector to copy the embedding into an // aligned memory area, required by arroy to accept a new vector. @@ -116,7 +116,7 @@ pub fn build_vectors( progress: &Progress, index_embeddings: Vec, hannoy_memory: Option, - hannoy_writers: &mut HashMap, + hannoy_writers: &mut HashMap, embeder_actions: Option<&BTreeMap>, must_stop_processing: &MSP, ) -> Result<()> @@ -181,7 +181,7 @@ pub fn write_from_bbqueue( writer_receiver: &mut WriterBbqueueReceiver<'_>, index: &Index, wtxn: &mut RwTxn<'_>, - hannoy_writers: &HashMap, + hannoy_writers: &HashMap, aligned_embedding: &mut Vec, ) -> crate::Result<()> { while let Some(frame_with_header) = writer_receiver.recv_frame() { diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs index 64e1377ad..a091f5ab9 100644 --- a/crates/milli/src/update/new/vector_document.rs +++ b/crates/milli/src/update/new/vector_document.rs @@ -14,7 +14,7 @@ use crate::constants::RESERVED_VECTORS_FIELD_NAME; use crate::documents::FieldIdMapper; use crate::vector::db::{EmbeddingStatus, IndexEmbeddingConfig}; use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors}; -use crate::vector::{Embedding, HannoyWrapper, RuntimeEmbedders}; +use crate::vector::{Embedding, RuntimeEmbedders, VectorStore}; use crate::{DocumentId, Index, InternalError, Result, UserError}; #[derive(Serialize)] @@ -121,7 +121,7 @@ impl<'t> VectorDocumentFromDb<'t> { status: &EmbeddingStatus, ) -> Result> { let reader = - HannoyWrapper::new(self.index.vector_hannoy, embedder_id, config.config.quantized()); + VectorStore::new(self.index.vector_hannoy, embedder_id, config.config.quantized()); let vectors = reader.item_vectors(self.rtxn, self.docid)?; Ok(VectorEntry { diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index 512361029..649412ade 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -45,13 +45,13 @@ const HANNOY_EF_CONSTRUCTION: usize = 48; const HANNOY_M: usize = 16; const HANNOY_M0: usize = 32; -pub struct HannoyWrapper { +pub struct VectorStore { quantized: bool, embedder_index: u8, database: hannoy::Database, } -impl HannoyWrapper { +impl VectorStore { pub fn new( database: hannoy::Database, embedder_index: u8,