From 79c71636a57d2ae203d55cd889d271cbc442f546 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 21 Jul 2025 11:42:46 +0200 Subject: [PATCH] Use Hannoy instead of arroy --- Cargo.lock | 84 ++++--- .../index-scheduler/src/index_mapper/mod.rs | 6 +- .../tests/vector/binary_quantized.rs | 2 +- crates/meilisearch/tests/vector/mod.rs | 2 +- crates/meilisearch/tests/vector/settings.rs | 2 +- crates/meilitool/src/main.rs | 12 +- crates/meilitool/src/upgrade/v1_11.rs | 17 +- crates/milli/Cargo.toml | 2 +- crates/milli/src/error.rs | 31 +-- crates/milli/src/index.rs | 28 +-- crates/milli/src/lib.rs | 2 +- crates/milli/src/progress.rs | 82 +++--- crates/milli/src/search/new/vector_sort.rs | 5 +- crates/milli/src/search/similar.rs | 4 +- crates/milli/src/update/clear_documents.rs | 4 +- .../milli/src/update/index_documents/mod.rs | 8 +- .../src/update/index_documents/transform.rs | 22 +- .../src/update/index_documents/typed_chunk.rs | 5 +- crates/milli/src/update/new/channel.rs | 64 ++--- crates/milli/src/update/new/indexer/mod.rs | 40 +-- crates/milli/src/update/new/indexer/write.rs | 40 +-- .../milli/src/update/new/vector_document.rs | 8 +- crates/milli/src/update/upgrade/v1_14.rs | 15 +- crates/milli/src/vector/composite.rs | 17 +- crates/milli/src/vector/mod.rs | 234 +++++++++--------- 25 files changed, 380 insertions(+), 356 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8413b3d14..c6ca66bd3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -442,28 +442,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" -[[package]] -name = "arroy" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6" -dependencies = [ - "bytemuck", - "byteorder", - "enum-iterator", - "heed", - "memmap2", - "nohash", - "ordered-float 4.6.0", - "page_size", - "rand 0.8.5", - "rayon", - "roaring", - "tempfile", - "thiserror 2.0.12", - "tracing", -] - [[package]] name = "assert-json-diff" version = "2.0.2" @@ -2600,6 +2578,31 @@ dependencies = [ "rand_distr", ] +[[package]] +name = "hannoy" +version = "0.0.1" +source = "git+https://github.com/nnethercott/hannoy?rev=d51750c#d51750cd5612b6875375f5f4ad3928c87d55ee38" +dependencies = [ + "bytemuck", + "byteorder", + "enum-iterator", + "hashbrown 0.15.4", + "heed", + "memmap2", + "min-max-heap", + "nohash", + "ordered-float 5.0.0", + "page_size", + "papaya", + "rand 0.8.5", + "rayon", + "roaring", + "tempfile", + "thiserror 2.0.12", + "tinyvec", + "tracing", +] + [[package]] name = "hash32" version = "0.3.1" @@ -3922,7 +3925,6 @@ name = "milli" version = "1.16.0" dependencies = [ "allocator-api2 0.3.0", - "arroy", "bbqueue", "big_s", "bimap", @@ -3950,6 +3952,7 @@ dependencies = [ "fxhash", "geoutils", "grenad", + "hannoy", "hashbrown 0.15.4", "heed", "hf-hub", @@ -4019,6 +4022,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "min-max-heap" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2687e6cf9c00f48e9284cf9fd15f2ef341d03cc7743abf9df4c5f07fdee50b18" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -4359,15 +4368,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "ordered-float" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" -dependencies = [ - "num-traits", -] - [[package]] name = "ordered-float" version = "5.0.0" @@ -4399,6 +4399,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "papaya" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92dd0b07c53a0a0c764db2ace8c541dc47320dad97c2200c2a637ab9dd2328f" +dependencies = [ + "equivalent", + "seize", +] + [[package]] name = "parking_lot" version = "0.12.4" @@ -5450,6 +5460,16 @@ dependencies = [ "time", ] +[[package]] +name = "seize" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4b8d813387d566f627f3ea1b914c068aac94c40ae27ec43f5f33bde65abefe7" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "semver" version = "1.0.26" diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs index 86fb17ca7..fd0383efd 100644 --- a/crates/index-scheduler/src/index_mapper/mod.rs +++ b/crates/index-scheduler/src/index_mapper/mod.rs @@ -143,10 +143,10 @@ impl IndexStats { /// /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result { - let arroy_stats = index.arroy_stats(rtxn)?; + let hannoy_stats = index.hannoy_stats(rtxn)?; Ok(IndexStats { - number_of_embeddings: Some(arroy_stats.number_of_embeddings), - number_of_embedded_documents: Some(arroy_stats.documents.len()), + number_of_embeddings: Some(hannoy_stats.number_of_embeddings), + number_of_embedded_documents: Some(hannoy_stats.documents.len()), documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(), number_of_documents: None, database_size: index.on_disk_size()?, diff --git a/crates/meilisearch/tests/vector/binary_quantized.rs b/crates/meilisearch/tests/vector/binary_quantized.rs index 89d32cc50..99f184dfc 100644 --- a/crates/meilisearch/tests/vector/binary_quantized.rs +++ b/crates/meilisearch/tests/vector/binary_quantized.rs @@ -320,7 +320,7 @@ async fn binary_quantize_clear_documents() { } "###); - // Make sure the arroy DB has been cleared + // Make sure the hannoy DB has been cleared let (documents, _code) = index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await; snapshot!(documents, @r###" diff --git a/crates/meilisearch/tests/vector/mod.rs b/crates/meilisearch/tests/vector/mod.rs index 7f54489b6..97382f3f7 100644 --- a/crates/meilisearch/tests/vector/mod.rs +++ b/crates/meilisearch/tests/vector/mod.rs @@ -683,7 +683,7 @@ async fn clear_documents() { } "###); - // Make sure the arroy DB has been cleared + // Make sure the hannoy DB has been cleared let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await; snapshot!(documents, @r###" diff --git a/crates/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs index d26174faf..8ace8f092 100644 --- a/crates/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -236,7 +236,7 @@ async fn reset_embedder_documents() { } "###); - // Make sure the arroy DB has been cleared + // Make sure the hannoy DB has been cleared let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await; snapshot!(json_string!(documents), @r###" diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 170bbdcc8..439bba015 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -142,8 +142,8 @@ enum Command { #[derive(Clone, ValueEnum)] enum IndexPart { - /// Will make the arroy index hot. - Arroy, + /// Will make the hannoy index hot. + Hannoy, } fn main() -> anyhow::Result<()> { @@ -658,12 +658,12 @@ fn hair_dryer( let rtxn = index.read_txn()?; for part in index_parts { match part { - IndexPart::Arroy => { + IndexPart::Hannoy => { let mut count = 0; - let total = index.vector_arroy.len(&rtxn)?; - eprintln!("Hair drying arroy for {uid}..."); + let total = index.vector_hannoy.len(&rtxn)?; + eprintln!("Hair drying hannoy for {uid}..."); for (i, result) in index - .vector_arroy + .vector_hannoy .remap_types::() .iter(&rtxn)? .enumerate() diff --git a/crates/meilitool/src/upgrade/v1_11.rs b/crates/meilitool/src/upgrade/v1_11.rs index 76d2fc24f..385487b89 100644 --- a/crates/meilitool/src/upgrade/v1_11.rs +++ b/crates/meilitool/src/upgrade/v1_11.rs @@ -68,7 +68,7 @@ pub fn v1_10_to_v1_11( ) })?; let index_read_database = - try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY) + try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_HANNOY) .with_context(|| format!("while updating date format for index `{uid}`"))?; let mut index_wtxn = index_env.write_txn().with_context(|| { @@ -79,15 +79,16 @@ pub fn v1_10_to_v1_11( })?; let index_write_database = - try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY) + try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_HANNOY) .with_context(|| format!("while updating date format for index `{uid}`"))?; - meilisearch_types::milli::arroy::upgrade::cosine_from_0_4_to_0_5( - &index_rtxn, - index_read_database.remap_types(), - &mut index_wtxn, - index_write_database.remap_types(), - )?; + // meilisearch_types::milli::hannoy::upgrade::cosine_from_0_4_to_0_5( + // &index_rtxn, + // index_read_database.remap_types(), + // &mut index_wtxn, + // index_write_database.remap_types(), + // )?; + unimplemented!("Hannoy doesn't support upgrading"); index_wtxn.commit()?; } diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index d94a4d4e1..ca10a5d7d 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -87,7 +87,7 @@ rhai = { version = "1.22.2", features = [ "no_time", "sync", ] } -arroy = "0.6.1" +hannoy = { git = "https://github.com/nnethercott/hannoy", rev = "d51750c" } rand = "0.8.5" tracing = "0.1.41" ureq = { version = "2.12.1", features = ["json"] } diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 9ad9d0511..93625ff81 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -76,7 +76,7 @@ pub enum InternalError { #[error("Cannot upgrade to the following version: v{0}.{1}.{2}.")] CannotUpgradeToVersion(u32, u32, u32), #[error(transparent)] - ArroyError(#[from] arroy::Error), + HannoyError(#[from] hannoy::Error), #[error(transparent)] VectorEmbeddingError(#[from] crate::vector::Error), } @@ -419,23 +419,24 @@ impl From for Error { } } -impl From for Error { - fn from(value: arroy::Error) -> Self { +impl From for Error { + fn from(value: hannoy::Error) -> Self { match value { - arroy::Error::Heed(heed) => heed.into(), - arroy::Error::Io(io) => io.into(), - arroy::Error::InvalidVecDimension { expected, received } => { + hannoy::Error::Heed(heed) => heed.into(), + hannoy::Error::Io(io) => io.into(), + hannoy::Error::InvalidVecDimension { expected, received } => { Error::UserError(UserError::InvalidVectorDimensions { expected, found: received }) } - arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation), - arroy::Error::DatabaseFull - | arroy::Error::InvalidItemAppend - | arroy::Error::UnmatchingDistance { .. } - | arroy::Error::NeedBuild(_) - | arroy::Error::MissingKey { .. } - | arroy::Error::MissingMetadata(_) - | arroy::Error::CannotDecodeKeyMode { .. } => { - Error::InternalError(InternalError::ArroyError(value)) + hannoy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation), + hannoy::Error::DatabaseFull + | hannoy::Error::InvalidItemAppend + | hannoy::Error::UnmatchingDistance { .. } + | hannoy::Error::NeedBuild(_) + | hannoy::Error::MissingKey { .. } + | hannoy::Error::MissingMetadata(_) + | hannoy::Error::UnknownVersion { .. } + | hannoy::Error::CannotDecodeKeyMode { .. } => { + Error::InternalError(InternalError::HannoyError(value)) } } } diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 9f32fdb04..61deadfa0 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -31,7 +31,7 @@ use crate::prompt::PromptData; use crate::proximity::ProximityPrecision; use crate::update::new::StdResult; use crate::vector::db::IndexEmbeddingConfigs; -use crate::vector::{ArroyStats, ArroyWrapper, Embedding}; +use crate::vector::{Embedding, HannoyStats, HannoyWrapper}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -113,7 +113,7 @@ pub mod db_name { pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id"; - pub const VECTOR_ARROY: &str = "vector-arroy"; + pub const VECTOR_HANNOY: &str = "vector-hannoy"; pub const DOCUMENTS: &str = "documents"; } const NUMBER_OF_DBS: u32 = 25; @@ -177,10 +177,10 @@ pub struct Index { /// Maps the document id, the facet field id and the strings. pub field_id_docid_facet_strings: Database, - /// Maps an embedder name to its id in the arroy store. + /// Maps an embedder name to its id in the hannoy store. pub(crate) embedder_category_id: Database, - /// Vector store based on arroy™. - pub vector_arroy: arroy::Database, + /// Vector store based on hannoy™. + pub vector_hannoy: hannoy::Database, /// Maps the document id to the document as an obkv store. pub(crate) documents: Database, @@ -237,7 +237,7 @@ impl Index { // vector stuff let embedder_category_id = env.create_database(&mut wtxn, Some(VECTOR_EMBEDDER_CATEGORY_ID))?; - let vector_arroy = env.create_database(&mut wtxn, Some(VECTOR_ARROY))?; + let vector_hannoy = env.create_database(&mut wtxn, Some(VECTOR_HANNOY))?; let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?; @@ -264,7 +264,7 @@ impl Index { facet_id_is_empty_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, - vector_arroy, + vector_hannoy, embedder_category_id, documents, }; @@ -1772,8 +1772,8 @@ impl Index { for config in embedders.embedding_configs(rtxn)? { let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap(); let has_fragments = config.config.embedder_options.has_fragments(); - let reader = ArroyWrapper::new( - self.vector_arroy, + let reader = HannoyWrapper::new( + self.vector_hannoy, embedder_info.embedder_id, config.config.quantized(), ); @@ -1792,13 +1792,13 @@ impl Index { Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 }) } - pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result { - let mut stats = ArroyStats::default(); + pub fn hannoy_stats(&self, rtxn: &RoTxn<'_>) -> Result { + let mut stats = HannoyStats::default(); let embedding_configs = self.embedding_configs(); for config in embedding_configs.embedding_configs(rtxn)? { let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap(); let reader = - ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized()); + HannoyWrapper::new(self.vector_hannoy, embedder_id, config.config.quantized()); reader.aggregate_stats(rtxn, &mut stats)?; } Ok(stats) @@ -1842,7 +1842,7 @@ impl Index { facet_id_is_empty_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, - vector_arroy, + vector_hannoy, embedder_category_id, documents, } = self; @@ -1913,7 +1913,7 @@ impl Index { "field_id_docid_facet_strings", field_id_docid_facet_strings.stat(rtxn).map(compute_size)?, ); - sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?); + sizes.insert("vector_hannoy", vector_hannoy.stat(rtxn).map(compute_size)?); sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?); sizes.insert("documents", documents.stat(rtxn).map(compute_size)?); diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index 6fdae86b3..91ea87b68 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -53,7 +53,7 @@ pub use search::new::{ }; use serde_json::Value; pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder}; -pub use {arroy, charabia as tokenizer, heed, rhai}; +pub use {charabia as tokenizer, hannoy, heed, rhai}; pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError}; pub use self::attribute_patterns::{AttributePatterns, PatternMatch}; diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs index 61c61cd49..2ec4fd6de 100644 --- a/crates/milli/src/progress.rs +++ b/crates/milli/src/progress.rs @@ -98,12 +98,12 @@ impl Progress { } // TODO: ideally we should expose the progress in a way that let arroy use it directly - pub(crate) fn update_progress_from_arroy(&self, progress: arroy::WriterProgress) { - self.update_progress(progress.main); - if let Some(sub) = progress.sub { - self.update_progress(sub); - } - } + // pub(crate) fn update_progress_from_hannoy(&self, progress: hannoy::WriterProgress) { + // self.update_progress(progress.main); + // if let Some(sub) = progress.sub { + // self.update_progress(sub); + // } + // } } /// Generate the names associated with the durations and push them. @@ -277,43 +277,43 @@ impl Step for VariableNameStep { } } -impl Step for arroy::MainStep { - fn name(&self) -> Cow<'static, str> { - match self { - arroy::MainStep::PreProcessingTheItems => "pre processing the items", - arroy::MainStep::WritingTheDescendantsAndMetadata => { - "writing the descendants and metadata" - } - arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items", - arroy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes", - arroy::MainStep::UpdatingTheTrees => "updating the trees", - arroy::MainStep::CreateNewTrees => "create new trees", - arroy::MainStep::WritingNodesToDatabase => "writing nodes to database", - arroy::MainStep::DeleteExtraneousTrees => "delete extraneous trees", - arroy::MainStep::WriteTheMetadata => "write the metadata", - } - .into() - } +// impl Step for hannoy::MainStep { +// fn name(&self) -> Cow<'static, str> { +// match self { +// hannoy::MainStep::PreProcessingTheItems => "pre processing the items", +// hannoy::MainStep::WritingTheDescendantsAndMetadata => { +// "writing the descendants and metadata" +// } +// hannoy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items", +// hannoy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes", +// hannoy::MainStep::UpdatingTheTrees => "updating the trees", +// hannoy::MainStep::CreateNewTrees => "create new trees", +// hannoy::MainStep::WritingNodesToDatabase => "writing nodes to database", +// hannoy::MainStep::DeleteExtraneousTrees => "delete extraneous trees", +// hannoy::MainStep::WriteTheMetadata => "write the metadata", +// } +// .into() +// } - fn current(&self) -> u32 { - *self as u32 - } +// fn current(&self) -> u32 { +// *self as u32 +// } - fn total(&self) -> u32 { - Self::CARDINALITY as u32 - } -} +// fn total(&self) -> u32 { +// Self::CARDINALITY as u32 +// } +// } -impl Step for arroy::SubStep { - fn name(&self) -> Cow<'static, str> { - self.unit.into() - } +// impl Step for hannoy::SubStep { +// fn name(&self) -> Cow<'static, str> { +// self.unit.into() +// } - fn current(&self) -> u32 { - self.current.load(Ordering::Relaxed) - } +// fn current(&self) -> u32 { +// self.current.load(Ordering::Relaxed) +// } - fn total(&self) -> u32 { - self.max - } -} +// fn total(&self) -> u32 { +// self.max +// } +// } diff --git a/crates/milli/src/search/new/vector_sort.rs b/crates/milli/src/search/new/vector_sort.rs index 2c201e899..71f7faa48 100644 --- a/crates/milli/src/search/new/vector_sort.rs +++ b/crates/milli/src/search/new/vector_sort.rs @@ -6,7 +6,7 @@ use roaring::RoaringBitmap; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; use super::VectorStoreStats; use crate::score_details::{self, ScoreDetails}; -use crate::vector::{ArroyWrapper, DistributionShift, Embedder}; +use crate::vector::{DistributionShift, Embedder, HannoyWrapper}; use crate::{DocumentId, Result, SearchContext, SearchLogger}; pub struct VectorSort { @@ -56,7 +56,8 @@ impl VectorSort { let target = &self.target; let before = Instant::now(); - let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized); + let reader = + HannoyWrapper::new(ctx.index.vector_hannoy, self.embedder_index, self.quantized); let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; self.cached_sorted_docids = results.into_iter(); *ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats { diff --git a/crates/milli/src/search/similar.rs b/crates/milli/src/search/similar.rs index 903b5fcf9..d8f4cde59 100644 --- a/crates/milli/src/search/similar.rs +++ b/crates/milli/src/search/similar.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use roaring::RoaringBitmap; use crate::score_details::{self, ScoreDetails}; -use crate::vector::{ArroyWrapper, Embedder}; +use crate::vector::{Embedder, HannoyWrapper}; use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult}; pub struct Similar<'a> { @@ -72,7 +72,7 @@ impl<'a> Similar<'a> { crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned()) })?; - let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized); + let reader = HannoyWrapper::new(self.index.vector_hannoy, embedder_index, self.quantized); let results = reader.nns_by_item( self.rtxn, self.id, diff --git a/crates/milli/src/update/clear_documents.rs b/crates/milli/src/update/clear_documents.rs index 84eeca7f9..164aed9a0 100644 --- a/crates/milli/src/update/clear_documents.rs +++ b/crates/milli/src/update/clear_documents.rs @@ -45,7 +45,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { facet_id_is_empty_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, - vector_arroy, + vector_hannoy, embedder_category_id: _, documents, } = self.index; @@ -88,7 +88,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { field_id_docid_facet_f64s.clear(self.wtxn)?; field_id_docid_facet_strings.clear(self.wtxn)?; // vector - vector_arroy.clear(self.wtxn)?; + vector_hannoy.clear(self.wtxn)?; documents.clear(self.wtxn)?; diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 658ff1923..a5182763e 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -39,7 +39,7 @@ use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; use crate::vector::db::EmbedderInfo; -use crate::vector::{ArroyWrapper, RuntimeEmbedders}; +use crate::vector::{HannoyWrapper, RuntimeEmbedders}; use crate::{CboRoaringBitmapCodec, Index, Result, UserError}; static MERGED_DATABASE_COUNT: usize = 7; @@ -494,7 +494,7 @@ where }, )?; let reader = - ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized); + HannoyWrapper::new(self.index.vector_hannoy, index, action.was_quantized); let Some(dim) = reader.dimensions(self.wtxn)? else { continue; }; @@ -504,7 +504,7 @@ where for (embedder_name, dimension) in dimension { let wtxn = &mut *self.wtxn; - let vector_arroy = self.index.vector_arroy; + let vector_hannoy = self.index.vector_hannoy; let cancel = &self.should_abort; let embedder_index = @@ -523,7 +523,7 @@ where let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized); pool.install(|| { - let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized); + let mut writer = HannoyWrapper::new(vector_hannoy, embedder_index, was_quantized); writer.build_and_quantize( wtxn, // In the settings we don't have any progress to share diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs index e07483aff..fcb5b00d1 100644 --- a/crates/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -32,7 +32,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use crate::vector::settings::{RemoveFragments, WriteBackToDocuments}; -use crate::vector::ArroyWrapper; +use crate::vector::HannoyWrapper; use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result}; pub struct TransformOutput { @@ -834,15 +834,15 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff + let readers: BTreeMap<&str, (HannoyWrapper, &RoaringBitmap)> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { if let Some(WriteBackToDocuments { embedder_id, user_provided }) = action.write_back() { - let reader = ArroyWrapper::new( - self.index.vector_arroy, + let reader = HannoyWrapper::new( + self.index.vector_hannoy, *embedder_id, action.was_quantized, ); @@ -884,7 +884,7 @@ impl<'a, 'i> Transform<'a, 'i> { let injected_vectors: std::result::Result< serde_json::Map, - arroy::Error, + hannoy::Error, > = readers .iter() .filter_map(|(name, (reader, user_provided))| { @@ -949,9 +949,9 @@ impl<'a, 'i> Transform<'a, 'i> { else { continue; }; - let arroy = - ArroyWrapper::new(self.index.vector_arroy, infos.embedder_id, was_quantized); - let Some(dimensions) = arroy.dimensions(wtxn)? else { + let hannoy = + HannoyWrapper::new(self.index.vector_hannoy, infos.embedder_id, was_quantized); + let Some(dimensions) = hannoy.dimensions(wtxn)? else { continue; }; for fragment_id in fragment_ids { @@ -959,17 +959,17 @@ impl<'a, 'i> Transform<'a, 'i> { if infos.embedding_status.user_provided_docids().is_empty() { // no user provided: clear store - arroy.clear_store(wtxn, *fragment_id, dimensions)?; + hannoy.clear_store(wtxn, *fragment_id, dimensions)?; continue; } // some user provided, remove only the ids that are not user provided - let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| { + let to_delete = hannoy.items_in_store(wtxn, *fragment_id, |items| { items - infos.embedding_status.user_provided_docids() })?; for to_delete in to_delete { - arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?; + hannoy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?; } } } diff --git a/crates/milli/src/update/index_documents/typed_chunk.rs b/crates/milli/src/update/index_documents/typed_chunk.rs index c93e3e0f7..b0590eab4 100644 --- a/crates/milli/src/update/index_documents/typed_chunk.rs +++ b/crates/milli/src/update/index_documents/typed_chunk.rs @@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{ }; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig}; -use crate::vector::ArroyWrapper; +use crate::vector::HannoyWrapper; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, U8StrStrCodec, @@ -677,7 +677,8 @@ pub(crate) fn write_typed_chunk_into_index( .get(&embedder_name) .is_some_and(|conf| conf.is_quantized); // FIXME: allow customizing distance - let writer = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, binary_quantized); + let writer = + HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, binary_quantized); // remove vectors for docids we want them removed let merger = remove_vectors_builder.build(); diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs index aec192ace..86843795b 100644 --- a/crates/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -255,9 +255,9 @@ impl<'a> From> for FrameWithHeader<'a> { #[repr(u8)] pub enum EntryHeader { DbOperation(DbOperation), - ArroyDeleteVector(ArroyDeleteVector), - ArroySetVectors(ArroySetVectors), - ArroySetVector(ArroySetVector), + HannoyDeleteVector(HannoyDeleteVector), + HannoySetVectors(HannoySetVectors), + HannoySetVector(HannoySetVector), } impl EntryHeader { @@ -268,9 +268,9 @@ impl EntryHeader { const fn variant_id(&self) -> u8 { match self { EntryHeader::DbOperation(_) => 0, - EntryHeader::ArroyDeleteVector(_) => 1, - EntryHeader::ArroySetVectors(_) => 2, - EntryHeader::ArroySetVector(_) => 3, + EntryHeader::HannoyDeleteVector(_) => 1, + EntryHeader::HannoySetVectors(_) => 2, + EntryHeader::HannoySetVector(_) => 3, } } @@ -286,26 +286,26 @@ impl EntryHeader { } const fn total_delete_vector_size() -> usize { - Self::variant_size() + mem::size_of::() + Self::variant_size() + mem::size_of::() } /// The `dimensions` corresponds to the number of `f32` in the embedding. fn total_set_vectors_size(count: usize, dimensions: usize) -> usize { let embedding_size = dimensions * mem::size_of::(); - Self::variant_size() + mem::size_of::() + embedding_size * count + Self::variant_size() + mem::size_of::() + embedding_size * count } fn total_set_vector_size(dimensions: usize) -> usize { let embedding_size = dimensions * mem::size_of::(); - Self::variant_size() + mem::size_of::() + embedding_size + Self::variant_size() + mem::size_of::() + embedding_size } fn header_size(&self) -> usize { let payload_size = match self { EntryHeader::DbOperation(op) => mem::size_of_val(op), - EntryHeader::ArroyDeleteVector(adv) => mem::size_of_val(adv), - EntryHeader::ArroySetVectors(asvs) => mem::size_of_val(asvs), - EntryHeader::ArroySetVector(asv) => mem::size_of_val(asv), + EntryHeader::HannoyDeleteVector(adv) => mem::size_of_val(adv), + EntryHeader::HannoySetVectors(asvs) => mem::size_of_val(asvs), + EntryHeader::HannoySetVector(asv) => mem::size_of_val(asv), }; Self::variant_size() + payload_size } @@ -319,19 +319,19 @@ impl EntryHeader { EntryHeader::DbOperation(header) } 1 => { - let header_bytes = &remaining[..mem::size_of::()]; + let header_bytes = &remaining[..mem::size_of::()]; let header = checked::pod_read_unaligned(header_bytes); - EntryHeader::ArroyDeleteVector(header) + EntryHeader::HannoyDeleteVector(header) } 2 => { - let header_bytes = &remaining[..mem::size_of::()]; + let header_bytes = &remaining[..mem::size_of::()]; let header = checked::pod_read_unaligned(header_bytes); - EntryHeader::ArroySetVectors(header) + EntryHeader::HannoySetVectors(header) } 3 => { - let header_bytes = &remaining[..mem::size_of::()]; + let header_bytes = &remaining[..mem::size_of::()]; let header = checked::pod_read_unaligned(header_bytes); - EntryHeader::ArroySetVector(header) + EntryHeader::HannoySetVector(header) } id => panic!("invalid variant id: {id}"), } @@ -341,9 +341,9 @@ impl EntryHeader { let (first, remaining) = header_bytes.split_first_mut().unwrap(); let payload_bytes = match self { EntryHeader::DbOperation(op) => bytemuck::bytes_of(op), - EntryHeader::ArroyDeleteVector(adv) => bytemuck::bytes_of(adv), - EntryHeader::ArroySetVectors(asvs) => bytemuck::bytes_of(asvs), - EntryHeader::ArroySetVector(asv) => bytemuck::bytes_of(asv), + EntryHeader::HannoyDeleteVector(adv) => bytemuck::bytes_of(adv), + EntryHeader::HannoySetVectors(asvs) => bytemuck::bytes_of(asvs), + EntryHeader::HannoySetVector(asv) => bytemuck::bytes_of(asv), }; *first = self.variant_id(); remaining.copy_from_slice(payload_bytes); @@ -378,7 +378,7 @@ impl DbOperation { #[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)] #[repr(transparent)] -pub struct ArroyDeleteVector { +pub struct HannoyDeleteVector { pub docid: DocumentId, } @@ -386,13 +386,13 @@ pub struct ArroyDeleteVector { #[repr(C)] /// The embeddings are in the remaining space and represents /// non-aligned [f32] each with dimensions f32s. -pub struct ArroySetVectors { +pub struct HannoySetVectors { pub docid: DocumentId, pub embedder_id: u8, _padding: [u8; 3], } -impl ArroySetVectors { +impl HannoySetVectors { fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] { let skip = EntryHeader::variant_size() + mem::size_of::(); &frame[skip..] @@ -416,14 +416,14 @@ impl ArroySetVectors { #[repr(C)] /// The embeddings are in the remaining space and represents /// non-aligned [f32] each with dimensions f32s. -pub struct ArroySetVector { +pub struct HannoySetVector { pub docid: DocumentId, pub embedder_id: u8, pub extractor_id: u8, _padding: [u8; 2], } -impl ArroySetVector { +impl HannoySetVector { fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] { let skip = EntryHeader::variant_size() + mem::size_of::(); &frame[skip..] @@ -553,7 +553,7 @@ impl<'b> ExtractorBbqueueSender<'b> { let refcell = self.producers.get().unwrap(); let mut producer = refcell.0.borrow_mut_or_yield(); - let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }); + let payload_header = EntryHeader::HannoyDeleteVector(HannoyDeleteVector { docid }); let total_length = EntryHeader::total_delete_vector_size(); if total_length > max_grant { panic!("The entry is larger ({total_length} bytes) than the BBQueue max grant ({max_grant} bytes)"); @@ -589,8 +589,8 @@ impl<'b> ExtractorBbqueueSender<'b> { // to zero to allocate no extra space at all let dimensions = embeddings.first().map_or(0, |emb| emb.len()); - let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] }; - let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector); + let hannoy_set_vector = HannoySetVectors { docid, embedder_id, _padding: [0; 3] }; + let payload_header = EntryHeader::HannoySetVectors(hannoy_set_vector); let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions); if total_length > max_grant { let mut value_file = tempfile::tempfile().map(BufWriter::new)?; @@ -650,9 +650,9 @@ impl<'b> ExtractorBbqueueSender<'b> { // to zero to allocate no extra space at all let dimensions = embedding.as_ref().map_or(0, |emb| emb.len()); - let arroy_set_vector = - ArroySetVector { docid, embedder_id, extractor_id, _padding: [0; 2] }; - let payload_header = EntryHeader::ArroySetVector(arroy_set_vector); + let hannoy_set_vector = + HannoySetVector { docid, embedder_id, extractor_id, _padding: [0; 2] }; + let payload_header = EntryHeader::HannoySetVector(hannoy_set_vector); let total_length = EntryHeader::total_set_vector_size(dimensions); if total_length > max_grant { let mut value_file = tempfile::tempfile().map(BufWriter::new)?; diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index a6ba3a919..09d5f0a48 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -24,7 +24,7 @@ use crate::progress::{EmbedderStats, Progress}; use crate::update::settings::SettingsDelta; use crate::update::GrenadParameters; use crate::vector::settings::{EmbedderAction, RemoveFragments, WriteBackToDocuments}; -use crate::vector::{ArroyWrapper, Embedder, RuntimeEmbedders}; +use crate::vector::{Embedder, HannoyWrapper, RuntimeEmbedders}; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort}; pub(crate) mod de; @@ -66,7 +66,7 @@ where let mut bbbuffers = Vec::new(); let finished_extraction = AtomicBool::new(false); - let arroy_memory = grenad_parameters.max_memory; + let hannoy_memory = grenad_parameters.max_memory; let (grenad_parameters, total_bbbuffer_capacity) = indexer_memory_settings(pool.current_num_threads(), grenad_parameters); @@ -129,8 +129,8 @@ where let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map); - let vector_arroy = index.vector_arroy; - let arroy_writers: Result> = embedders + let vector_arroy = index.vector_hannoy; + let hannoy_writers: Result> = embedders .inner_as_ref() .iter() .map(|(embedder_name, runtime)| { @@ -143,7 +143,7 @@ where })?; let dimensions = runtime.embedder.dimensions(); - let writer = ArroyWrapper::new(vector_arroy, embedder_index, runtime.is_quantized); + let writer = HannoyWrapper::new(vector_arroy, embedder_index, runtime.is_quantized); Ok(( embedder_index, @@ -152,10 +152,10 @@ where }) .collect(); - let mut arroy_writers = arroy_writers?; + let mut hannoy_writers = hannoy_writers?; let congestion = - write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?; + write_to_db(writer_receiver, finished_extraction, index, wtxn, &hannoy_writers)?; indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors); @@ -169,8 +169,8 @@ where wtxn, indexing_context.progress, index_embeddings, - arroy_memory, - &mut arroy_writers, + hannoy_memory, + &mut hannoy_writers, None, &indexing_context.must_stop_processing, ) @@ -226,7 +226,7 @@ where let mut bbbuffers = Vec::new(); let finished_extraction = AtomicBool::new(false); - let arroy_memory = grenad_parameters.max_memory; + let hannoy_memory = grenad_parameters.max_memory; let (grenad_parameters, total_bbbuffer_capacity) = indexer_memory_settings(pool.current_num_threads(), grenad_parameters); @@ -283,7 +283,7 @@ where let new_embedders = settings_delta.new_embedders(); let embedder_actions = settings_delta.embedder_actions(); let index_embedder_category_ids = settings_delta.new_embedder_category_id(); - let mut arroy_writers = arroy_writers_from_embedder_actions( + let mut hannoy_writers = hannoy_writers_from_embedder_actions( index, embedder_actions, new_embedders, @@ -291,7 +291,7 @@ where )?; let congestion = - write_to_db(writer_receiver, finished_extraction, index, wtxn, &arroy_writers)?; + write_to_db(writer_receiver, finished_extraction, index, wtxn, &hannoy_writers)?; indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors); @@ -305,8 +305,8 @@ where wtxn, indexing_context.progress, index_embeddings, - arroy_memory, - &mut arroy_writers, + hannoy_memory, + &mut hannoy_writers, Some(embedder_actions), &indexing_context.must_stop_processing, ) @@ -336,13 +336,13 @@ where Ok(congestion) } -fn arroy_writers_from_embedder_actions<'indexer>( +fn hannoy_writers_from_embedder_actions<'indexer>( index: &Index, embedder_actions: &'indexer BTreeMap, embedders: &'indexer RuntimeEmbedders, index_embedder_category_ids: &'indexer std::collections::HashMap, -) -> Result> { - let vector_arroy = index.vector_arroy; +) -> Result> { + let vector_arroy = index.vector_hannoy; embedders .inner_as_ref() @@ -361,7 +361,7 @@ fn arroy_writers_from_embedder_actions<'indexer>( ))); }; let writer = - ArroyWrapper::new(vector_arroy, embedder_category_id, action.was_quantized); + HannoyWrapper::new(vector_arroy, embedder_category_id, action.was_quantized); let dimensions = runtime.embedder.dimensions(); Some(Ok(( embedder_category_id, @@ -384,7 +384,7 @@ where let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else { continue; }; - let reader = ArroyWrapper::new(index.vector_arroy, *embedder_id, action.was_quantized); + let reader = HannoyWrapper::new(index.vector_hannoy, *embedder_id, action.was_quantized); let Some(dimensions) = reader.dimensions(wtxn)? else { continue; }; @@ -400,7 +400,7 @@ where let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else { continue; }; - let arroy = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, was_quantized); + let arroy = HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, was_quantized); let Some(dimensions) = arroy.dimensions(wtxn)? else { continue; }; diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index b8e3685f8..4be916c02 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -15,7 +15,7 @@ use crate::progress::Progress; use crate::update::settings::InnerIndexSettings; use crate::vector::db::IndexEmbeddingConfig; use crate::vector::settings::EmbedderAction; -use crate::vector::{ArroyWrapper, Embedder, Embeddings, RuntimeEmbedders}; +use crate::vector::{Embedder, Embeddings, HannoyWrapper, RuntimeEmbedders}; use crate::{Error, Index, InternalError, Result, UserError}; pub fn write_to_db( @@ -23,9 +23,9 @@ pub fn write_to_db( finished_extraction: &AtomicBool, index: &Index, wtxn: &mut RwTxn<'_>, - arroy_writers: &HashMap, + hannoy_writers: &HashMap, ) -> Result { - // Used by by the ArroySetVector to copy the embedding into an + // Used by by the HannoySetVector to copy the embedding into an // aligned memory area, required by arroy to accept a new vector. let mut aligned_embedding = Vec::new(); let span = tracing::trace_span!(target: "indexing::write_db", "all"); @@ -56,7 +56,7 @@ pub fn write_to_db( ReceiverAction::LargeVectors(large_vectors) => { let LargeVectors { docid, embedder_id, .. } = large_vectors; let (_, _, writer, dimensions) = - arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + hannoy_writers.get(&embedder_id).expect("requested a missing embedder"); let mut embeddings = Embeddings::new(*dimensions); for embedding in large_vectors.read_embeddings(*dimensions) { embeddings.push(embedding.to_vec()).unwrap(); @@ -68,7 +68,7 @@ pub fn write_to_db( large_vector @ LargeVector { docid, embedder_id, extractor_id, .. }, ) => { let (_, _, writer, dimensions) = - arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + hannoy_writers.get(&embedder_id).expect("requested a missing embedder"); let embedding = large_vector.read_embedding(*dimensions); writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?; } @@ -80,12 +80,12 @@ pub fn write_to_db( &mut writer_receiver, index, wtxn, - arroy_writers, + hannoy_writers, &mut aligned_embedding, )?; } - write_from_bbqueue(&mut writer_receiver, index, wtxn, arroy_writers, &mut aligned_embedding)?; + write_from_bbqueue(&mut writer_receiver, index, wtxn, hannoy_writers, &mut aligned_embedding)?; Ok(ChannelCongestion { attempts: writer_receiver.sent_messages_attempts(), @@ -115,8 +115,8 @@ pub fn build_vectors( wtxn: &mut RwTxn<'_>, progress: &Progress, index_embeddings: Vec, - arroy_memory: Option, - arroy_writers: &mut HashMap, + hannoy_memory: Option, + hannoy_writers: &mut HashMap, embeder_actions: Option<&BTreeMap>, must_stop_processing: &MSP, ) -> Result<()> @@ -129,7 +129,7 @@ where let seed = rand::random(); let mut rng = rand::rngs::StdRng::seed_from_u64(seed); - for (_index, (embedder_name, _embedder, writer, dimensions)) in arroy_writers { + for (_index, (embedder_name, _embedder, writer, dimensions)) in hannoy_writers { let dimensions = *dimensions; let is_being_quantized = embeder_actions .and_then(|actions| actions.get(*embedder_name).map(|action| action.is_being_quantized)) @@ -140,7 +140,7 @@ where &mut rng, dimensions, is_being_quantized, - arroy_memory, + hannoy_memory, must_stop_processing, )?; } @@ -181,7 +181,7 @@ pub fn write_from_bbqueue( writer_receiver: &mut WriterBbqueueReceiver<'_>, index: &Index, wtxn: &mut RwTxn<'_>, - arroy_writers: &HashMap, + hannoy_writers: &HashMap, aligned_embedding: &mut Vec, ) -> crate::Result<()> { while let Some(frame_with_header) = writer_receiver.recv_frame() { @@ -221,17 +221,17 @@ pub fn write_from_bbqueue( }, } } - EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => { - for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers { + EntryHeader::HannoyDeleteVector(HannoyDeleteVector { docid }) => { + for (_index, (_name, _embedder, writer, dimensions)) in hannoy_writers { let dimensions = *dimensions; writer.del_items(wtxn, dimensions, docid)?; } } - EntryHeader::ArroySetVectors(asvs) => { - let ArroySetVectors { docid, embedder_id, .. } = asvs; + EntryHeader::HannoySetVectors(asvs) => { + let HannoySetVectors { docid, embedder_id, .. } = asvs; let frame = frame_with_header.frame(); let (_, _, writer, dimensions) = - arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + hannoy_writers.get(&embedder_id).expect("requested a missing embedder"); let mut embeddings = Embeddings::new(*dimensions); let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); writer.del_items(wtxn, *dimensions, docid)?; @@ -245,12 +245,12 @@ pub fn write_from_bbqueue( writer.add_items(wtxn, docid, &embeddings)?; } } - EntryHeader::ArroySetVector( - asv @ ArroySetVector { docid, embedder_id, extractor_id, .. }, + EntryHeader::HannoySetVector( + asv @ HannoySetVector { docid, embedder_id, extractor_id, .. }, ) => { let frame = frame_with_header.frame(); let (_, _, writer, dimensions) = - arroy_writers.get(&embedder_id).expect("requested a missing embedder"); + hannoy_writers.get(&embedder_id).expect("requested a missing embedder"); let embedding = asv.read_all_embeddings_into_vec(frame, aligned_embedding); if embedding.is_empty() { diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs index b59984248..64e1377ad 100644 --- a/crates/milli/src/update/new/vector_document.rs +++ b/crates/milli/src/update/new/vector_document.rs @@ -14,7 +14,7 @@ use crate::constants::RESERVED_VECTORS_FIELD_NAME; use crate::documents::FieldIdMapper; use crate::vector::db::{EmbeddingStatus, IndexEmbeddingConfig}; use crate::vector::parsed_vectors::{RawVectors, RawVectorsError, VectorOrArrayOfVectors}; -use crate::vector::{ArroyWrapper, Embedding, RuntimeEmbedders}; +use crate::vector::{Embedding, HannoyWrapper, RuntimeEmbedders}; use crate::{DocumentId, Index, InternalError, Result, UserError}; #[derive(Serialize)] @@ -121,7 +121,7 @@ impl<'t> VectorDocumentFromDb<'t> { status: &EmbeddingStatus, ) -> Result> { let reader = - ArroyWrapper::new(self.index.vector_arroy, embedder_id, config.config.quantized()); + HannoyWrapper::new(self.index.vector_hannoy, embedder_id, config.config.quantized()); let vectors = reader.item_vectors(self.rtxn, self.docid)?; Ok(VectorEntry { @@ -149,7 +149,7 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> { name, entry_from_raw_value(value, false).map_err(|_| { InternalError::Serialization(crate::SerializationError::Decoding { - db_name: Some(crate::index::db_name::VECTOR_ARROY), + db_name: Some(crate::index::db_name::VECTOR_HANNOY), }) })?, )) @@ -167,7 +167,7 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> { Some(embedding_from_doc) => { Some(entry_from_raw_value(embedding_from_doc, false).map_err(|_| { InternalError::Serialization(crate::SerializationError::Decoding { - db_name: Some(crate::index::db_name::VECTOR_ARROY), + db_name: Some(crate::index::db_name::VECTOR_HANNOY), }) })?) } diff --git a/crates/milli/src/update/upgrade/v1_14.rs b/crates/milli/src/update/upgrade/v1_14.rs index 039734b75..68ff9f8cd 100644 --- a/crates/milli/src/update/upgrade/v1_14.rs +++ b/crates/milli/src/update/upgrade/v1_14.rs @@ -1,4 +1,4 @@ -use arroy::distances::Cosine; +use hannoy::distances::Cosine; use heed::RwTxn; use super::UpgradeIndex; @@ -25,12 +25,13 @@ impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 { progress.update_progress(VectorStore::UpdateInternalVersions); let rtxn = index.read_txn()?; - arroy::upgrade::from_0_5_to_0_6::( - &rtxn, - index.vector_arroy.remap_data_type(), - wtxn, - index.vector_arroy.remap_data_type(), - )?; + // hannoy::upgrade::from_0_5_to_0_6::( + // &rtxn, + // index.vector_hannoy.remap_data_type(), + // wtxn, + // index.vector_hannoy.remap_data_type(), + // )?; + unimplemented!("upgrade hannoy"); Ok(false) } diff --git a/crates/milli/src/vector/composite.rs b/crates/milli/src/vector/composite.rs index 2e31da094..539e92ba8 100644 --- a/crates/milli/src/vector/composite.rs +++ b/crates/milli/src/vector/composite.rs @@ -1,6 +1,6 @@ use std::time::Instant; -use arroy::Distance; +use hannoy::Distance; use super::error::CompositeEmbedderContainsHuggingFace; use super::{ @@ -324,19 +324,18 @@ fn check_similarity( } for (left, right) in left.into_iter().zip(right) { - let left = arroy::internals::UnalignedVector::from_slice(&left); - let right = arroy::internals::UnalignedVector::from_slice(&right); - let left = arroy::internals::Leaf { - header: arroy::distances::Cosine::new_header(&left), + let left = hannoy::internals::UnalignedVector::from_slice(&left); + let right = hannoy::internals::UnalignedVector::from_slice(&right); + let left = hannoy::internals::Item { + header: hannoy::distances::Cosine::new_header(&left), vector: left, }; - let right = arroy::internals::Leaf { - header: arroy::distances::Cosine::new_header(&right), + let right = hannoy::internals::Item { + header: hannoy::distances::Cosine::new_header(&right), vector: right, }; - let distance = arroy::distances::Cosine::built_distance(&left, &right); - + let distance = hannoy::distances::Cosine::distance(&left, &right); if distance > super::MAX_COMPOSITE_DISTANCE { return Err(NewEmbedderError::composite_embedding_value_mismatch(distance, hint)); } diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index 873693a34..06330226b 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -3,9 +3,9 @@ use std::num::NonZeroUsize; use std::sync::{Arc, Mutex}; use std::time::Instant; -use arroy::distances::{BinaryQuantizedCosine, Cosine}; -use arroy::ItemId; use deserr::{DeserializeError, Deserr}; +use hannoy::distances::{BinaryQuantizedCosine, Cosine}; +use hannoy::ItemId; use heed::{RoTxn, RwTxn, Unspecified}; use ordered_float::OrderedFloat; use roaring::RoaringBitmap; @@ -41,15 +41,15 @@ pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; pub const MAX_COMPOSITE_DISTANCE: f32 = 0.01; -pub struct ArroyWrapper { +pub struct HannoyWrapper { quantized: bool, embedder_index: u8, - database: arroy::Database, + database: hannoy::Database, } -impl ArroyWrapper { +impl HannoyWrapper { pub fn new( - database: arroy::Database, + database: hannoy::Database, embedder_index: u8, quantized: bool, ) -> Self { @@ -60,19 +60,19 @@ impl ArroyWrapper { self.embedder_index } - fn readers<'a, D: arroy::Distance>( + fn readers<'a, D: hannoy::Distance>( &'a self, rtxn: &'a RoTxn<'a>, - db: arroy::Database, - ) -> impl Iterator, arroy::Error>> + 'a { - arroy_store_range_for_embedder(self.embedder_index).filter_map(move |index| { - match arroy::Reader::open(rtxn, index, db) { + db: hannoy::Database, + ) -> impl Iterator, hannoy::Error>> + 'a { + hannoy_store_range_for_embedder(self.embedder_index).filter_map(move |index| { + match hannoy::Reader::open(rtxn, index, db) { Ok(reader) => match reader.is_empty(rtxn) { Ok(false) => Some(Ok(reader)), Ok(true) => None, Err(e) => Some(Err(e)), }, - Err(arroy::Error::MissingMetadata(_)) => None, + Err(hannoy::Error::MissingMetadata(_)) => None, Err(e) => Some(Err(e)), } }) @@ -86,7 +86,7 @@ impl ArroyWrapper { rtxn: &RoTxn, store_id: u8, with_items: F, - ) -> Result + ) -> Result where F: FnOnce(&RoaringBitmap) -> O, { @@ -97,26 +97,26 @@ impl ArroyWrapper { } } - fn _items_in_store( + fn _items_in_store( &self, rtxn: &RoTxn, - db: arroy::Database, + db: hannoy::Database, store_id: u8, with_items: F, - ) -> Result + ) -> Result where F: FnOnce(&RoaringBitmap) -> O, { - let index = arroy_store_for_embedder(self.embedder_index, store_id); - let reader = arroy::Reader::open(rtxn, index, db); + let index = hannoy_store_for_embedder(self.embedder_index, store_id); + let reader = hannoy::Reader::open(rtxn, index, db); match reader { Ok(reader) => Ok(with_items(reader.item_ids())), - Err(arroy::Error::MissingMetadata(_)) => Ok(with_items(&RoaringBitmap::new())), + Err(hannoy::Error::MissingMetadata(_)) => Ok(with_items(&RoaringBitmap::new())), Err(err) => Err(err), } } - pub fn dimensions(&self, rtxn: &RoTxn) -> Result, arroy::Error> { + pub fn dimensions(&self, rtxn: &RoTxn) -> Result, hannoy::Error> { if self.quantized { Ok(self .readers(rtxn, self.quantized_db()) @@ -140,39 +140,41 @@ impl ArroyWrapper { rng: &mut R, dimension: usize, quantizing: bool, - arroy_memory: Option, + hannoy_memory: Option, cancel: &(impl Fn() -> bool + Sync + Send), - ) -> Result<(), arroy::Error> { - for index in arroy_store_range_for_embedder(self.embedder_index) { + ) -> Result<(), hannoy::Error> { + for index in hannoy_store_range_for_embedder(self.embedder_index) { if self.quantized { - let writer = arroy::Writer::new(self.quantized_db(), index, dimension); + let writer = hannoy::Writer::new(self.quantized_db(), index, dimension); if writer.need_build(wtxn)? { - writer.builder(rng).build(wtxn)? + writer.builder(rng).ef_construction(48).build::<16, 32>(wtxn)? } else if writer.is_empty(wtxn)? { continue; } } else { - let writer = arroy::Writer::new(self.angular_db(), index, dimension); + let writer = hannoy::Writer::new(self.angular_db(), index, dimension); // If we are quantizing the databases, we can't know from meilisearch // if the db was empty but still contained the wrong metadata, thus we need // to quantize everything and can't stop early. Since this operation can // only happens once in the life of an embedder, it's not very performances // sensitive. if quantizing && !self.quantized { - let writer = writer.prepare_changing_distance::(wtxn)?; - writer - .builder(rng) - .available_memory(arroy_memory.unwrap_or(usize::MAX)) - .progress(|step| progress.update_progress_from_arroy(step)) - .cancel(cancel) - .build(wtxn)?; + // let writer = writer.prepare_changing_distance::(wtxn)?; + // writer + // .builder(rng) + // .available_memory(hannoy_memory.unwrap_or(usize::MAX)) + // .progress(|step| progress.update_progress_from_hannoy(step)) + // .cancel(cancel) + // .build(wtxn)?; + unimplemented!("switching from quantized to non-quantized"); } else if writer.need_build(wtxn)? { writer .builder(rng) - .available_memory(arroy_memory.unwrap_or(usize::MAX)) - .progress(|step| progress.update_progress_from_arroy(step)) - .cancel(cancel) - .build(wtxn)?; + .available_memory(hannoy_memory.unwrap_or(usize::MAX)) + // .progress(|step| progress.update_progress_from_hannoy(step)) + // .cancel(cancel) + .ef_construction(48) + .build::<16, 32>(wtxn)?; } else if writer.is_empty(wtxn)? { continue; } @@ -188,18 +190,18 @@ impl ArroyWrapper { pub fn add_items( &self, wtxn: &mut RwTxn, - item_id: arroy::ItemId, + item_id: hannoy::ItemId, embeddings: &Embeddings, - ) -> Result<(), arroy::Error> { + ) -> Result<(), hannoy::Error> { let dimension = embeddings.dimension(); for (index, vector) in - arroy_store_range_for_embedder(self.embedder_index).zip(embeddings.iter()) + hannoy_store_range_for_embedder(self.embedder_index).zip(embeddings.iter()) { if self.quantized { - arroy::Writer::new(self.quantized_db(), index, dimension) + hannoy::Writer::new(self.quantized_db(), index, dimension) .add_item(wtxn, item_id, vector)? } else { - arroy::Writer::new(self.angular_db(), index, dimension) + hannoy::Writer::new(self.angular_db(), index, dimension) .add_item(wtxn, item_id, vector)? } } @@ -210,9 +212,9 @@ impl ArroyWrapper { pub fn add_item( &self, wtxn: &mut RwTxn, - item_id: arroy::ItemId, + item_id: hannoy::ItemId, vector: &[f32], - ) -> Result<(), arroy::Error> { + ) -> Result<(), hannoy::Error> { if self.quantized { self._add_item(wtxn, self.quantized_db(), item_id, vector) } else { @@ -220,17 +222,17 @@ impl ArroyWrapper { } } - fn _add_item( + fn _add_item( &self, wtxn: &mut RwTxn, - db: arroy::Database, - item_id: arroy::ItemId, + db: hannoy::Database, + item_id: hannoy::ItemId, vector: &[f32], - ) -> Result<(), arroy::Error> { + ) -> Result<(), hannoy::Error> { let dimension = vector.len(); - for index in arroy_store_range_for_embedder(self.embedder_index) { - let writer = arroy::Writer::new(db, index, dimension); + for index in hannoy_store_range_for_embedder(self.embedder_index) { + let writer = hannoy::Writer::new(db, index, dimension); if !writer.contains_item(wtxn, item_id)? { writer.add_item(wtxn, item_id, vector)?; break; @@ -245,10 +247,10 @@ impl ArroyWrapper { pub fn add_item_in_store( &self, wtxn: &mut RwTxn, - item_id: arroy::ItemId, + item_id: hannoy::ItemId, store_id: u8, vector: &[f32], - ) -> Result<(), arroy::Error> { + ) -> Result<(), hannoy::Error> { if self.quantized { self._add_item_in_store(wtxn, self.quantized_db(), item_id, store_id, vector) } else { @@ -256,18 +258,18 @@ impl ArroyWrapper { } } - fn _add_item_in_store( + fn _add_item_in_store( &self, wtxn: &mut RwTxn, - db: arroy::Database, - item_id: arroy::ItemId, + db: hannoy::Database, + item_id: hannoy::ItemId, store_id: u8, vector: &[f32], - ) -> Result<(), arroy::Error> { + ) -> Result<(), hannoy::Error> { let dimension = vector.len(); - let index = arroy_store_for_embedder(self.embedder_index, store_id); - let writer = arroy::Writer::new(db, index, dimension); + let index = hannoy_store_for_embedder(self.embedder_index, store_id); + let writer = hannoy::Writer::new(db, index, dimension); writer.add_item(wtxn, item_id, vector) } @@ -276,14 +278,14 @@ impl ArroyWrapper { &self, wtxn: &mut RwTxn, dimension: usize, - item_id: arroy::ItemId, - ) -> Result<(), arroy::Error> { - for index in arroy_store_range_for_embedder(self.embedder_index) { + item_id: hannoy::ItemId, + ) -> Result<(), hannoy::Error> { + for index in hannoy_store_range_for_embedder(self.embedder_index) { if self.quantized { - let writer = arroy::Writer::new(self.quantized_db(), index, dimension); + let writer = hannoy::Writer::new(self.quantized_db(), index, dimension); writer.del_item(wtxn, item_id)?; } else { - let writer = arroy::Writer::new(self.angular_db(), index, dimension); + let writer = hannoy::Writer::new(self.angular_db(), index, dimension); writer.del_item(wtxn, item_id)?; } } @@ -301,10 +303,10 @@ impl ArroyWrapper { pub fn del_item_in_store( &self, wtxn: &mut RwTxn, - item_id: arroy::ItemId, + item_id: hannoy::ItemId, store_id: u8, dimensions: usize, - ) -> Result { + ) -> Result { if self.quantized { self._del_item_in_store(wtxn, self.quantized_db(), item_id, store_id, dimensions) } else { @@ -312,16 +314,16 @@ impl ArroyWrapper { } } - fn _del_item_in_store( + fn _del_item_in_store( &self, wtxn: &mut RwTxn, - db: arroy::Database, - item_id: arroy::ItemId, + db: hannoy::Database, + item_id: hannoy::ItemId, store_id: u8, dimensions: usize, - ) -> Result { - let index = arroy_store_for_embedder(self.embedder_index, store_id); - let writer = arroy::Writer::new(db, index, dimensions); + ) -> Result { + let index = hannoy_store_for_embedder(self.embedder_index, store_id); + let writer = hannoy::Writer::new(db, index, dimensions); writer.del_item(wtxn, item_id) } @@ -335,7 +337,7 @@ impl ArroyWrapper { wtxn: &mut RwTxn, store_id: u8, dimensions: usize, - ) -> Result<(), arroy::Error> { + ) -> Result<(), hannoy::Error> { if self.quantized { self._clear_store(wtxn, self.quantized_db(), store_id, dimensions) } else { @@ -343,15 +345,15 @@ impl ArroyWrapper { } } - fn _clear_store( + fn _clear_store( &self, wtxn: &mut RwTxn, - db: arroy::Database, + db: hannoy::Database, store_id: u8, dimensions: usize, - ) -> Result<(), arroy::Error> { - let index = arroy_store_for_embedder(self.embedder_index, store_id); - let writer = arroy::Writer::new(db, index, dimensions); + ) -> Result<(), hannoy::Error> { + let index = hannoy_store_for_embedder(self.embedder_index, store_id); + let writer = hannoy::Writer::new(db, index, dimensions); writer.clear(wtxn) } @@ -359,9 +361,9 @@ impl ArroyWrapper { pub fn del_item( &self, wtxn: &mut RwTxn, - item_id: arroy::ItemId, + item_id: hannoy::ItemId, vector: &[f32], - ) -> Result { + ) -> Result { if self.quantized { self._del_item(wtxn, self.quantized_db(), item_id, vector) } else { @@ -369,37 +371,34 @@ impl ArroyWrapper { } } - fn _del_item( + fn _del_item( &self, wtxn: &mut RwTxn, - db: arroy::Database, - item_id: arroy::ItemId, + db: hannoy::Database, + item_id: hannoy::ItemId, vector: &[f32], - ) -> Result { + ) -> Result { let dimension = vector.len(); - for index in arroy_store_range_for_embedder(self.embedder_index) { - let writer = arroy::Writer::new(db, index, dimension); - let Some(candidate) = writer.item_vector(wtxn, item_id)? else { - continue; - }; - if candidate == vector { + for index in hannoy_store_range_for_embedder(self.embedder_index) { + let writer = hannoy::Writer::new(db, index, dimension); + if writer.contains_item(wtxn, item_id)? { return writer.del_item(wtxn, item_id); } } Ok(false) } - pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { - for index in arroy_store_range_for_embedder(self.embedder_index) { + pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), hannoy::Error> { + for index in hannoy_store_range_for_embedder(self.embedder_index) { if self.quantized { - let writer = arroy::Writer::new(self.quantized_db(), index, dimension); + let writer = hannoy::Writer::new(self.quantized_db(), index, dimension); if writer.is_empty(wtxn)? { continue; } writer.clear(wtxn)?; } else { - let writer = arroy::Writer::new(self.angular_db(), index, dimension); + let writer = hannoy::Writer::new(self.angular_db(), index, dimension); if writer.is_empty(wtxn)? { continue; } @@ -413,17 +412,17 @@ impl ArroyWrapper { &self, rtxn: &RoTxn, dimension: usize, - item: arroy::ItemId, - ) -> Result { - for index in arroy_store_range_for_embedder(self.embedder_index) { + item: hannoy::ItemId, + ) -> Result { + for index in hannoy_store_range_for_embedder(self.embedder_index) { let contains = if self.quantized { - let writer = arroy::Writer::new(self.quantized_db(), index, dimension); + let writer = hannoy::Writer::new(self.quantized_db(), index, dimension); if writer.is_empty(rtxn)? { continue; } writer.contains_item(rtxn, item)? } else { - let writer = arroy::Writer::new(self.angular_db(), index, dimension); + let writer = hannoy::Writer::new(self.angular_db(), index, dimension); if writer.is_empty(rtxn)? { continue; } @@ -442,7 +441,7 @@ impl ArroyWrapper { item: ItemId, limit: usize, filter: Option<&RoaringBitmap>, - ) -> Result, arroy::Error> { + ) -> Result, hannoy::Error> { if self.quantized { self._nns_by_item(rtxn, self.quantized_db(), item, limit, filter) } else { @@ -450,19 +449,19 @@ impl ArroyWrapper { } } - fn _nns_by_item( + fn _nns_by_item( &self, rtxn: &RoTxn, - db: arroy::Database, + db: hannoy::Database, item: ItemId, limit: usize, filter: Option<&RoaringBitmap>, - ) -> Result, arroy::Error> { + ) -> Result, hannoy::Error> { let mut results = Vec::new(); for reader in self.readers(rtxn, db) { let reader = reader?; - let mut searcher = reader.nns(limit); + let mut searcher = reader.nns(limit, limit * 2); // TODO find better ef if let Some(filter) = filter { if reader.item_ids().is_disjoint(filter) { continue; @@ -484,7 +483,7 @@ impl ArroyWrapper { vector: &[f32], limit: usize, filter: Option<&RoaringBitmap>, - ) -> Result, arroy::Error> { + ) -> Result, hannoy::Error> { if self.quantized { self._nns_by_vector(rtxn, self.quantized_db(), vector, limit, filter) } else { @@ -492,19 +491,19 @@ impl ArroyWrapper { } } - fn _nns_by_vector( + fn _nns_by_vector( &self, rtxn: &RoTxn, - db: arroy::Database, + db: hannoy::Database, vector: &[f32], limit: usize, filter: Option<&RoaringBitmap>, - ) -> Result, arroy::Error> { + ) -> Result, hannoy::Error> { let mut results = Vec::new(); for reader in self.readers(rtxn, db) { let reader = reader?; - let mut searcher = reader.nns(limit); + let mut searcher = reader.nns(limit, limit * 2); // TODO find better ef if let Some(filter) = filter { if reader.item_ids().is_disjoint(filter) { continue; @@ -520,7 +519,7 @@ impl ArroyWrapper { Ok(results) } - pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> Result>, arroy::Error> { + pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> Result>, hannoy::Error> { let mut vectors = Vec::new(); if self.quantized { @@ -539,19 +538,19 @@ impl ArroyWrapper { Ok(vectors) } - fn angular_db(&self) -> arroy::Database { + fn angular_db(&self) -> hannoy::Database { self.database.remap_data_type() } - fn quantized_db(&self) -> arroy::Database { + fn quantized_db(&self) -> hannoy::Database { self.database.remap_data_type() } pub fn aggregate_stats( &self, rtxn: &RoTxn, - stats: &mut ArroyStats, - ) -> Result<(), arroy::Error> { + stats: &mut HannoyStats, + ) -> Result<(), hannoy::Error> { if self.quantized { for reader in self.readers(rtxn, self.quantized_db()) { let reader = reader?; @@ -579,10 +578,11 @@ impl ArroyWrapper { } #[derive(Debug, Default, Clone)] -pub struct ArroyStats { +pub struct HannoyStats { pub number_of_embeddings: u64, pub documents: RoaringBitmap, } + /// One or multiple embeddings stored consecutively in a flat vector. #[derive(Debug, PartialEq)] pub struct Embeddings { @@ -1227,11 +1227,11 @@ pub const fn is_cuda_enabled() -> bool { cfg!(feature = "cuda") } -fn arroy_store_range_for_embedder(embedder_id: u8) -> impl Iterator { - (0..=u8::MAX).map(move |store_id| arroy_store_for_embedder(embedder_id, store_id)) +fn hannoy_store_range_for_embedder(embedder_id: u8) -> impl Iterator { + (0..=u8::MAX).map(move |store_id| hannoy_store_for_embedder(embedder_id, store_id)) } -fn arroy_store_for_embedder(embedder_id: u8, store_id: u8) -> u16 { +fn hannoy_store_for_embedder(embedder_id: u8, store_id: u8) -> u16 { let embedder_id = (embedder_id as u16) << 8; embedder_id | (store_id as u16) }