diff --git a/crates/meilisearch/tests/vector/binary_quantized.rs b/crates/meilisearch/tests/vector/binary_quantized.rs index effa98b4e..ff3fc470e 100644 --- a/crates/meilisearch/tests/vector/binary_quantized.rs +++ b/crates/meilisearch/tests/vector/binary_quantized.rs @@ -320,7 +320,7 @@ async fn binary_quantize_clear_documents() { } "###); - // Make sure the hannoy DB has been cleared + // Make sure the vector DB has been cleared let (documents, _code) = index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await; snapshot!(documents, @r#" diff --git a/crates/meilisearch/tests/vector/mod.rs b/crates/meilisearch/tests/vector/mod.rs index 9eb7deff6..d89fbf0a9 100644 --- a/crates/meilisearch/tests/vector/mod.rs +++ b/crates/meilisearch/tests/vector/mod.rs @@ -10,11 +10,11 @@ use std::str::FromStr; use meili_snap::{json_string, snapshot}; use meilisearch::option::MaxThreads; +pub use rest::create_mock; use crate::common::index::Index; use crate::common::{default_settings, GetAllDocumentsOptions, Server}; use crate::json; -pub use rest::create_mock; pub async fn get_server_vector() -> Server { Server::new().await @@ -684,7 +684,7 @@ async fn clear_documents() { } "###); - // Make sure the hannoy DB has been cleared + // Make sure the vector DB has been cleared let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await; snapshot!(documents, @r#" diff --git a/crates/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs index 8ace8f092..292451ec3 100644 --- a/crates/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -236,7 +236,7 @@ async fn reset_embedder_documents() { } "###); - // Make sure the hannoy DB has been cleared + // Make sure the vector DB has been cleared let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await; snapshot!(json_string!(documents), @r###" diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs index 1a2110b5d..831bcf209 100644 --- a/crates/meilitool/src/main.rs +++ b/crates/meilitool/src/main.rs @@ -142,7 +142,7 @@ enum Command { #[derive(Clone, ValueEnum)] enum IndexPart { - /// Will make the hannoy index hot. + /// Will make the vector index hot. Hannoy, } diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 0f4f7c455..54086d8a5 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -178,7 +178,7 @@ pub struct Index { /// Maps the document id, the facet field id and the strings. pub field_id_docid_facet_strings: Database, - /// Maps an embedder name to its id in the hannoy store. + /// Maps an embedder name to its id in the vector store. pub(crate) embedder_category_id: Database, /// Vector store based on hannoyâ„¢. pub vector_store: hannoy::Database, @@ -1881,7 +1881,7 @@ impl Index { facet_id_is_empty_docids, field_id_docid_facet_f64s, field_id_docid_facet_strings, - vector_store: vector_hannoy, + vector_store, embedder_category_id, documents, } = self; @@ -1952,7 +1952,7 @@ impl Index { "field_id_docid_facet_strings", field_id_docid_facet_strings.stat(rtxn).map(compute_size)?, ); - sizes.insert("vector_hannoy", vector_hannoy.stat(rtxn).map(compute_size)?); + sizes.insert("vector_store", vector_store.stat(rtxn).map(compute_size)?); sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?); sizes.insert("documents", documents.stat(rtxn).map(compute_size)?); diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 686da865c..ea468f08b 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -505,7 +505,7 @@ where for (embedder_name, dimension) in dimension { let wtxn = &mut *self.wtxn; - let vector_hannoy = self.index.vector_store; + let vector_store = self.index.vector_store; let cancel = &self.should_abort; let embedder_index = @@ -525,7 +525,7 @@ where pool.install(|| { let mut writer = - VectorStore::new(backend, vector_hannoy, embedder_index, was_quantized); + VectorStore::new(backend, vector_store, embedder_index, was_quantized); writer.build_and_quantize( wtxn, // In the settings we don't have any progress to share diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs index e4bd49434..8df8722c3 100644 --- a/crates/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -948,13 +948,13 @@ impl<'a, 'i> Transform<'a, 'i> { else { continue; }; - let hannoy = VectorStore::new( + let vector_store = VectorStore::new( backend, self.index.vector_store, infos.embedder_id, was_quantized, ); - let Some(dimensions) = hannoy.dimensions(wtxn)? else { + let Some(dimensions) = vector_store.dimensions(wtxn)? else { continue; }; for fragment_id in fragment_ids { @@ -962,17 +962,17 @@ impl<'a, 'i> Transform<'a, 'i> { if infos.embedding_status.user_provided_docids().is_empty() { // no user provided: clear store - hannoy.clear_store(wtxn, *fragment_id, dimensions)?; + vector_store.clear_store(wtxn, *fragment_id, dimensions)?; continue; } // some user provided, remove only the ids that are not user provided - let to_delete = hannoy.items_in_store(wtxn, *fragment_id, |items| { + let to_delete = vector_store.items_in_store(wtxn, *fragment_id, |items| { items - infos.embedding_status.user_provided_docids() })?; for to_delete in to_delete { - hannoy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?; + vector_store.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?; } } } diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs index 86843795b..884f133d6 100644 --- a/crates/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -255,9 +255,9 @@ impl<'a> From> for FrameWithHeader<'a> { #[repr(u8)] pub enum EntryHeader { DbOperation(DbOperation), - HannoyDeleteVector(HannoyDeleteVector), - HannoySetVectors(HannoySetVectors), - HannoySetVector(HannoySetVector), + DeleteVector(DeleteVector), + SetVectors(SetVectors), + SetVector(SetVector), } impl EntryHeader { @@ -268,9 +268,9 @@ impl EntryHeader { const fn variant_id(&self) -> u8 { match self { EntryHeader::DbOperation(_) => 0, - EntryHeader::HannoyDeleteVector(_) => 1, - EntryHeader::HannoySetVectors(_) => 2, - EntryHeader::HannoySetVector(_) => 3, + EntryHeader::DeleteVector(_) => 1, + EntryHeader::SetVectors(_) => 2, + EntryHeader::SetVector(_) => 3, } } @@ -286,26 +286,26 @@ impl EntryHeader { } const fn total_delete_vector_size() -> usize { - Self::variant_size() + mem::size_of::() + Self::variant_size() + mem::size_of::() } /// The `dimensions` corresponds to the number of `f32` in the embedding. fn total_set_vectors_size(count: usize, dimensions: usize) -> usize { let embedding_size = dimensions * mem::size_of::(); - Self::variant_size() + mem::size_of::() + embedding_size * count + Self::variant_size() + mem::size_of::() + embedding_size * count } fn total_set_vector_size(dimensions: usize) -> usize { let embedding_size = dimensions * mem::size_of::(); - Self::variant_size() + mem::size_of::() + embedding_size + Self::variant_size() + mem::size_of::() + embedding_size } fn header_size(&self) -> usize { let payload_size = match self { EntryHeader::DbOperation(op) => mem::size_of_val(op), - EntryHeader::HannoyDeleteVector(adv) => mem::size_of_val(adv), - EntryHeader::HannoySetVectors(asvs) => mem::size_of_val(asvs), - EntryHeader::HannoySetVector(asv) => mem::size_of_val(asv), + EntryHeader::DeleteVector(adv) => mem::size_of_val(adv), + EntryHeader::SetVectors(asvs) => mem::size_of_val(asvs), + EntryHeader::SetVector(asv) => mem::size_of_val(asv), }; Self::variant_size() + payload_size } @@ -319,19 +319,19 @@ impl EntryHeader { EntryHeader::DbOperation(header) } 1 => { - let header_bytes = &remaining[..mem::size_of::()]; + let header_bytes = &remaining[..mem::size_of::()]; let header = checked::pod_read_unaligned(header_bytes); - EntryHeader::HannoyDeleteVector(header) + EntryHeader::DeleteVector(header) } 2 => { - let header_bytes = &remaining[..mem::size_of::()]; + let header_bytes = &remaining[..mem::size_of::()]; let header = checked::pod_read_unaligned(header_bytes); - EntryHeader::HannoySetVectors(header) + EntryHeader::SetVectors(header) } 3 => { - let header_bytes = &remaining[..mem::size_of::()]; + let header_bytes = &remaining[..mem::size_of::()]; let header = checked::pod_read_unaligned(header_bytes); - EntryHeader::HannoySetVector(header) + EntryHeader::SetVector(header) } id => panic!("invalid variant id: {id}"), } @@ -341,9 +341,9 @@ impl EntryHeader { let (first, remaining) = header_bytes.split_first_mut().unwrap(); let payload_bytes = match self { EntryHeader::DbOperation(op) => bytemuck::bytes_of(op), - EntryHeader::HannoyDeleteVector(adv) => bytemuck::bytes_of(adv), - EntryHeader::HannoySetVectors(asvs) => bytemuck::bytes_of(asvs), - EntryHeader::HannoySetVector(asv) => bytemuck::bytes_of(asv), + EntryHeader::DeleteVector(adv) => bytemuck::bytes_of(adv), + EntryHeader::SetVectors(asvs) => bytemuck::bytes_of(asvs), + EntryHeader::SetVector(asv) => bytemuck::bytes_of(asv), }; *first = self.variant_id(); remaining.copy_from_slice(payload_bytes); @@ -378,7 +378,7 @@ impl DbOperation { #[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)] #[repr(transparent)] -pub struct HannoyDeleteVector { +pub struct DeleteVector { pub docid: DocumentId, } @@ -386,13 +386,13 @@ pub struct HannoyDeleteVector { #[repr(C)] /// The embeddings are in the remaining space and represents /// non-aligned [f32] each with dimensions f32s. -pub struct HannoySetVectors { +pub struct SetVectors { pub docid: DocumentId, pub embedder_id: u8, _padding: [u8; 3], } -impl HannoySetVectors { +impl SetVectors { fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] { let skip = EntryHeader::variant_size() + mem::size_of::(); &frame[skip..] @@ -416,14 +416,14 @@ impl HannoySetVectors { #[repr(C)] /// The embeddings are in the remaining space and represents /// non-aligned [f32] each with dimensions f32s. -pub struct HannoySetVector { +pub struct SetVector { pub docid: DocumentId, pub embedder_id: u8, pub extractor_id: u8, _padding: [u8; 2], } -impl HannoySetVector { +impl SetVector { fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] { let skip = EntryHeader::variant_size() + mem::size_of::(); &frame[skip..] @@ -553,7 +553,7 @@ impl<'b> ExtractorBbqueueSender<'b> { let refcell = self.producers.get().unwrap(); let mut producer = refcell.0.borrow_mut_or_yield(); - let payload_header = EntryHeader::HannoyDeleteVector(HannoyDeleteVector { docid }); + let payload_header = EntryHeader::DeleteVector(DeleteVector { docid }); let total_length = EntryHeader::total_delete_vector_size(); if total_length > max_grant { panic!("The entry is larger ({total_length} bytes) than the BBQueue max grant ({max_grant} bytes)"); @@ -589,8 +589,8 @@ impl<'b> ExtractorBbqueueSender<'b> { // to zero to allocate no extra space at all let dimensions = embeddings.first().map_or(0, |emb| emb.len()); - let hannoy_set_vector = HannoySetVectors { docid, embedder_id, _padding: [0; 3] }; - let payload_header = EntryHeader::HannoySetVectors(hannoy_set_vector); + let set_vectors = SetVectors { docid, embedder_id, _padding: [0; 3] }; + let payload_header = EntryHeader::SetVectors(set_vectors); let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions); if total_length > max_grant { let mut value_file = tempfile::tempfile().map(BufWriter::new)?; @@ -650,9 +650,8 @@ impl<'b> ExtractorBbqueueSender<'b> { // to zero to allocate no extra space at all let dimensions = embedding.as_ref().map_or(0, |emb| emb.len()); - let hannoy_set_vector = - HannoySetVector { docid, embedder_id, extractor_id, _padding: [0; 2] }; - let payload_header = EntryHeader::HannoySetVector(hannoy_set_vector); + let set_vector = SetVector { docid, embedder_id, extractor_id, _padding: [0; 2] }; + let payload_header = EntryHeader::SetVector(set_vector); let total_length = EntryHeader::total_set_vector_size(dimensions); if total_length > max_grant { let mut value_file = tempfile::tempfile().map(BufWriter::new)?; diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 0547aa155..3655a59f6 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -67,7 +67,7 @@ where let mut bbbuffers = Vec::new(); let finished_extraction = AtomicBool::new(false); - let hannoy_memory = grenad_parameters.max_memory; + let vector_memory = grenad_parameters.max_memory; let (grenad_parameters, total_bbbuffer_capacity) = indexer_memory_settings(pool.current_num_threads(), grenad_parameters); @@ -132,7 +132,7 @@ where let vector_arroy = index.vector_store; let backend = index.get_vector_store(wtxn)?; - let hannoy_writers: Result> = embedders + let vector_stores: Result> = embedders .inner_as_ref() .iter() .map(|(embedder_name, runtime)| { @@ -155,10 +155,10 @@ where }) .collect(); - let mut hannoy_writers = hannoy_writers?; + let mut vector_stores = vector_stores?; let congestion = - write_to_db(writer_receiver, finished_extraction, index, wtxn, &hannoy_writers)?; + write_to_db(writer_receiver, finished_extraction, index, wtxn, &vector_stores)?; indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors); @@ -172,8 +172,8 @@ where wtxn, indexing_context.progress, index_embeddings, - hannoy_memory, - &mut hannoy_writers, + vector_memory, + &mut vector_stores, None, &indexing_context.must_stop_processing, ) @@ -229,7 +229,7 @@ where let mut bbbuffers = Vec::new(); let finished_extraction = AtomicBool::new(false); - let hannoy_memory = grenad_parameters.max_memory; + let vector_memory = grenad_parameters.max_memory; let (grenad_parameters, total_bbbuffer_capacity) = indexer_memory_settings(pool.current_num_threads(), grenad_parameters); @@ -286,7 +286,7 @@ where let new_embedders = settings_delta.new_embedders(); let embedder_actions = settings_delta.embedder_actions(); let index_embedder_category_ids = settings_delta.new_embedder_category_id(); - let mut hannoy_writers = hannoy_writers_from_embedder_actions( + let mut vector_stores = vector_stores_from_embedder_actions( index, wtxn, embedder_actions, @@ -295,7 +295,7 @@ where )?; let congestion = - write_to_db(writer_receiver, finished_extraction, index, wtxn, &hannoy_writers)?; + write_to_db(writer_receiver, finished_extraction, index, wtxn, &vector_stores)?; indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors); @@ -309,8 +309,8 @@ where wtxn, indexing_context.progress, index_embeddings, - hannoy_memory, - &mut hannoy_writers, + vector_memory, + &mut vector_stores, Some(embedder_actions), &indexing_context.must_stop_processing, ) @@ -340,7 +340,7 @@ where Ok(congestion) } -fn hannoy_writers_from_embedder_actions<'indexer>( +fn vector_stores_from_embedder_actions<'indexer>( index: &Index, rtxn: &RoTxn, embedder_actions: &'indexer BTreeMap, diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index 9cb014e25..55ccfdf35 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -23,7 +23,7 @@ pub fn write_to_db( finished_extraction: &AtomicBool, index: &Index, wtxn: &mut RwTxn<'_>, - hannoy_writers: &HashMap, + vector_stores: &HashMap, ) -> Result { // Used by by the HannoySetVector to copy the embedding into an // aligned memory area, required by arroy to accept a new vector. @@ -56,7 +56,7 @@ pub fn write_to_db( ReceiverAction::LargeVectors(large_vectors) => { let LargeVectors { docid, embedder_id, .. } = large_vectors; let (_, _, writer, dimensions) = - hannoy_writers.get(&embedder_id).expect("requested a missing embedder"); + vector_stores.get(&embedder_id).expect("requested a missing embedder"); let mut embeddings = Embeddings::new(*dimensions); for embedding in large_vectors.read_embeddings(*dimensions) { embeddings.push(embedding.to_vec()).unwrap(); @@ -68,7 +68,7 @@ pub fn write_to_db( large_vector @ LargeVector { docid, embedder_id, extractor_id, .. }, ) => { let (_, _, writer, dimensions) = - hannoy_writers.get(&embedder_id).expect("requested a missing embedder"); + vector_stores.get(&embedder_id).expect("requested a missing embedder"); let embedding = large_vector.read_embedding(*dimensions); writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?; } @@ -80,12 +80,12 @@ pub fn write_to_db( &mut writer_receiver, index, wtxn, - hannoy_writers, + vector_stores, &mut aligned_embedding, )?; } - write_from_bbqueue(&mut writer_receiver, index, wtxn, hannoy_writers, &mut aligned_embedding)?; + write_from_bbqueue(&mut writer_receiver, index, wtxn, vector_stores, &mut aligned_embedding)?; Ok(ChannelCongestion { attempts: writer_receiver.sent_messages_attempts(), @@ -115,8 +115,8 @@ pub fn build_vectors( wtxn: &mut RwTxn<'_>, progress: &Progress, index_embeddings: Vec, - hannoy_memory: Option, - hannoy_writers: &mut HashMap, + vector_memory: Option, + vector_stores: &mut HashMap, embeder_actions: Option<&BTreeMap>, must_stop_processing: &MSP, ) -> Result<()> @@ -129,7 +129,7 @@ where let seed = rand::random(); let mut rng = rand::rngs::StdRng::seed_from_u64(seed); - for (_index, (embedder_name, _embedder, writer, dimensions)) in hannoy_writers { + for (_index, (embedder_name, _embedder, writer, dimensions)) in vector_stores { let dimensions = *dimensions; let is_being_quantized = embeder_actions .and_then(|actions| actions.get(*embedder_name).map(|action| action.is_being_quantized)) @@ -140,7 +140,7 @@ where &mut rng, dimensions, is_being_quantized, - hannoy_memory, + vector_memory, must_stop_processing, )?; } @@ -181,7 +181,7 @@ pub fn write_from_bbqueue( writer_receiver: &mut WriterBbqueueReceiver<'_>, index: &Index, wtxn: &mut RwTxn<'_>, - hannoy_writers: &HashMap, + vector_stores: &HashMap, aligned_embedding: &mut Vec, ) -> crate::Result<()> { while let Some(frame_with_header) = writer_receiver.recv_frame() { @@ -221,17 +221,17 @@ pub fn write_from_bbqueue( }, } } - EntryHeader::HannoyDeleteVector(HannoyDeleteVector { docid }) => { - for (_index, (_name, _embedder, writer, dimensions)) in hannoy_writers { + EntryHeader::DeleteVector(DeleteVector { docid }) => { + for (_index, (_name, _embedder, writer, dimensions)) in vector_stores { let dimensions = *dimensions; writer.del_items(wtxn, dimensions, docid)?; } } - EntryHeader::HannoySetVectors(asvs) => { - let HannoySetVectors { docid, embedder_id, .. } = asvs; + EntryHeader::SetVectors(asvs) => { + let SetVectors { docid, embedder_id, .. } = asvs; let frame = frame_with_header.frame(); let (_, _, writer, dimensions) = - hannoy_writers.get(&embedder_id).expect("requested a missing embedder"); + vector_stores.get(&embedder_id).expect("requested a missing embedder"); let mut embeddings = Embeddings::new(*dimensions); let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding); writer.del_items(wtxn, *dimensions, docid)?; @@ -245,12 +245,10 @@ pub fn write_from_bbqueue( writer.add_items(wtxn, docid, &embeddings)?; } } - EntryHeader::HannoySetVector( - asv @ HannoySetVector { docid, embedder_id, extractor_id, .. }, - ) => { + EntryHeader::SetVector(asv @ SetVector { docid, embedder_id, extractor_id, .. }) => { let frame = frame_with_header.frame(); let (_, _, writer, dimensions) = - hannoy_writers.get(&embedder_id).expect("requested a missing embedder"); + vector_stores.get(&embedder_id).expect("requested a missing embedder"); let embedding = asv.read_all_embeddings_into_vec(frame, aligned_embedding); if embedding.is_empty() {