diff --git a/crates/milli/src/search/facet/filter_vector.rs b/crates/milli/src/search/facet/filter_vector.rs index 1ef4b8e3d..62303c622 100644 --- a/crates/milli/src/search/facet/filter_vector.rs +++ b/crates/milli/src/search/facet/filter_vector.rs @@ -3,7 +3,7 @@ use roaring::{MultiOps, RoaringBitmap}; use crate::error::{DidYouMean, Error}; use crate::vector::db::IndexEmbeddingConfig; -use crate::vector::{ArroyStats, ArroyWrapper}; +use crate::vector::{HannoyStats, VectorStore}; use crate::Index; #[derive(Debug, thiserror::Error)] @@ -82,6 +82,7 @@ fn evaluate_inner( embedding_configs: &[IndexEmbeddingConfig], filter: &VectorFilter<'_>, ) -> crate::Result { + let index_version = index.get_version(rtxn)?.unwrap(); let embedder_name = embedder.value(); let available_embedders = || embedding_configs.iter().map(|c| c.name.clone()).collect::>(); @@ -96,8 +97,9 @@ fn evaluate_inner( .embedder_info(rtxn, embedder_name)? .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; - let arroy_wrapper = ArroyWrapper::new( - index.vector_arroy, + let vector_store = VectorStore::new( + index_version, + index.vector_store, embedder_info.embedder_id, embedding_config.config.quantized(), ); @@ -122,7 +124,7 @@ fn evaluate_inner( })?; let user_provided_docids = embedder_info.embedding_status.user_provided_docids(); - arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| { + vector_store.items_in_store(rtxn, fragment_config.id, |bitmap| { bitmap.clone() - user_provided_docids })? } @@ -132,8 +134,8 @@ fn evaluate_inner( } let user_provided_docids = embedder_info.embedding_status.user_provided_docids(); - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + let mut stats = HannoyStats::default(); + vector_store.aggregate_stats(rtxn, &mut stats)?; stats.documents - user_provided_docids.clone() } VectorFilter::UserProvided => { @@ -141,14 +143,14 @@ fn evaluate_inner( user_provided_docids.clone() } VectorFilter::Regenerate => { - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + let mut stats = HannoyStats::default(); + vector_store.aggregate_stats(rtxn, &mut stats)?; let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids(); stats.documents - skip_regenerate } VectorFilter::None => { - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + let mut stats = HannoyStats::default(); + vector_store.aggregate_stats(rtxn, &mut stats)?; stats.documents } }; diff --git a/crates/milli/src/update/index_documents/extract/extract_word_docids.rs b/crates/milli/src/update/index_documents/extract/extract_word_docids.rs index a964c0bbe..6d28adb2b 100644 --- a/crates/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/crates/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -2,9 +2,8 @@ use std::collections::BTreeSet; use std::fs::File; use std::io::{self, BufReader}; -use heed::{BytesDecode, BytesEncode}; +use heed::BytesDecode; use obkv::KvReaderU16; -use roaring::RoaringBitmap; use super::helpers::{ create_sorter, create_writer, try_split_array_at, writer_into_reader, GrenadParameters, @@ -16,7 +15,7 @@ use crate::index::db_name::DOCID_WORD_POSITIONS; use crate::update::del_add::{is_noop_del_add_obkv, DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::index_documents::helpers::sorter_into_reader; use crate::update::settings::InnerIndexSettingsDiff; -use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result}; +use crate::{DocumentId, FieldId, Result}; /// Extracts the word and the documents ids where this word appear. /// @@ -201,45 +200,3 @@ fn words_into_sorter( Ok(()) } - -#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] -fn docids_into_writers( - word: &str, - deletions: &RoaringBitmap, - additions: &RoaringBitmap, - writer: &mut grenad::Writer, -) -> Result<()> -where - W: std::io::Write, -{ - if deletions == additions { - // if the same value is deleted and added, do nothing. - return Ok(()); - } - - // Write each value in the same KvDelAdd before inserting it in the final writer. - let mut obkv = KvWriterDelAdd::memory(); - // deletions: - if !deletions.is_empty() && !deletions.is_subset(additions) { - obkv.insert( - DelAdd::Deletion, - CboRoaringBitmapCodec::bytes_encode(deletions).map_err(|_| { - SerializationError::Encoding { db_name: Some(DOCID_WORD_POSITIONS) } - })?, - )?; - } - // additions: - if !additions.is_empty() { - obkv.insert( - DelAdd::Addition, - CboRoaringBitmapCodec::bytes_encode(additions).map_err(|_| { - SerializationError::Encoding { db_name: Some(DOCID_WORD_POSITIONS) } - })?, - )?; - } - - // insert everything in the same writer. - writer.insert(word.as_bytes(), obkv.into_inner().unwrap())?; - - Ok(()) -} diff --git a/crates/milli/src/update/new/extract/documents.rs b/crates/milli/src/update/new/extract/documents.rs index 31d2ada0f..5f287851a 100644 --- a/crates/milli/src/update/new/extract/documents.rs +++ b/crates/milli/src/update/new/extract/documents.rs @@ -240,12 +240,12 @@ impl<'extractor> SettingsChangeExtractor<'extractor> for SettingsChangeDocumentE /// modifies them by adding or removing vector fields based on embedder actions, /// and then updates the database. #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents::extract")] -pub fn update_database_documents<'indexer, 'extractor, MSP, SD>( +pub fn update_database_documents<'indexer, MSP, SD>( documents: &'indexer DocumentsIndentifiers<'indexer>, indexing_context: IndexingContext, extractor_sender: &ExtractorBbqueueSender, settings_delta: &SD, - extractor_allocs: &'extractor mut ThreadLocal>, + extractor_allocs: &mut ThreadLocal>, ) -> Result<()> where MSP: Fn() -> bool + Sync, diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs index 15f06c67d..44ba8e301 100644 --- a/crates/milli/src/update/new/merger.rs +++ b/crates/milli/src/update/new/merger.rs @@ -63,8 +63,8 @@ where } #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] -pub fn merge_and_send_docids<'extractor, MSP, D>( - mut caches: Vec>, +pub fn merge_and_send_docids( + mut caches: Vec>, database: Database, index: &Index, docids_sender: WordDocidsSender, @@ -91,8 +91,8 @@ where } #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] -pub fn merge_and_send_facet_docids<'extractor>( - mut caches: Vec>, +pub fn merge_and_send_facet_docids( + mut caches: Vec>, database: FacetDatabases, index: &Index, rtxn: &RoTxn, diff --git a/crates/tracing-trace/src/main.rs b/crates/tracing-trace/src/main.rs index 4a3d26923..22c96ec78 100644 --- a/crates/tracing-trace/src/main.rs +++ b/crates/tracing-trace/src/main.rs @@ -59,7 +59,7 @@ fn fibo_recursive(n: u32) -> u32 { if n == 1 { return 2; } - return fibo_recursive(n - 1) - fibo_recursive(n - 2); + fibo_recursive(n - 1) - fibo_recursive(n - 2) } use tracing_error::ExtractSpanTrace as _;