mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Add support for the progress API of arroy
This commit is contained in:
		
							
								
								
									
										7
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										7
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -393,12 +393,13 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" | |||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| name = "arroy" | name = "arroy" | ||||||
| version = "0.6.0" | version = "0.6.1" | ||||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | source = "registry+https://github.com/rust-lang/crates.io-index" | ||||||
| checksum = "4a885313dfac15b64fd61a39d1970a2befa076c69a763434117c5b6163f9fecb" | checksum = "08e6111f351d004bd13e95ab540721272136fd3218b39d3ec95a2ea1c4e6a0a6" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bytemuck", |  "bytemuck", | ||||||
|  "byteorder", |  "byteorder", | ||||||
|  |  "enum-iterator", | ||||||
|  "heed", |  "heed", | ||||||
|  "memmap2", |  "memmap2", | ||||||
|  "nohash", |  "nohash", | ||||||
| @@ -3017,7 +3018,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||||||
| checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" | checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "cfg-if", |  "cfg-if", | ||||||
|  "windows-targets 0.48.1", |  "windows-targets 0.52.6", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
|   | |||||||
| @@ -87,7 +87,7 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838 | |||||||
|     "no_time", |     "no_time", | ||||||
|     "sync", |     "sync", | ||||||
| ] } | ] } | ||||||
| arroy = "0.6.0" | arroy = "0.6.1" | ||||||
| rand = "0.8.5" | rand = "0.8.5" | ||||||
| tracing = "0.1.41" | tracing = "0.1.41" | ||||||
| ureq = { version = "2.12.1", features = ["json"] } | ureq = { version = "2.12.1", features = ["json"] } | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | use enum_iterator::Sequence; | ||||||
| use std::any::TypeId; | use std::any::TypeId; | ||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
| use std::marker::PhantomData; | use std::marker::PhantomData; | ||||||
| @@ -76,6 +77,14 @@ impl Progress { | |||||||
|  |  | ||||||
|         durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect() |         durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // TODO: ideally we should expose the progress in a way that let arroy use it directly | ||||||
|  |     pub(crate) fn update_progress_from_arroy(&self, progress: arroy::WriterProgress) { | ||||||
|  |         self.update_progress(progress.main); | ||||||
|  |         if let Some(sub) = progress.sub { | ||||||
|  |             self.update_progress(sub); | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Generate the names associated with the durations and push them. | /// Generate the names associated with the durations and push them. | ||||||
| @@ -238,3 +247,44 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> { | |||||||
|         self.total |         self.total | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl Step for arroy::MainStep { | ||||||
|  |     fn name(&self) -> Cow<'static, str> { | ||||||
|  |         match self { | ||||||
|  |             arroy::MainStep::PreProcessingTheItems => "pre processing the items", | ||||||
|  |             arroy::MainStep::WritingTheDescendantsAndMetadata => { | ||||||
|  |                 "writing the descendants and metadata" | ||||||
|  |             } | ||||||
|  |             arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items", | ||||||
|  |             arroy::MainStep::RetrievingTheTreeAndItemNodes => "retrieving the tree and item nodes", | ||||||
|  |             arroy::MainStep::UpdatingTheTrees => "updating the trees", | ||||||
|  |             arroy::MainStep::CreateNewTrees => "create new trees", | ||||||
|  |             arroy::MainStep::WritingNodesToDatabase => "writing nodes to database", | ||||||
|  |             arroy::MainStep::DeleteExtraneousTrees => "delete extraneous trees", | ||||||
|  |             arroy::MainStep::WriteTheMetadata => "write the metadata", | ||||||
|  |         } | ||||||
|  |         .into() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn current(&self) -> u32 { | ||||||
|  |         *self as u32 | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn total(&self) -> u32 { | ||||||
|  |         Self::CARDINALITY as u32 | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Step for arroy::SubStep { | ||||||
|  |     fn name(&self) -> Cow<'static, str> { | ||||||
|  |         self.unit.into() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn current(&self) -> u32 { | ||||||
|  |         self.current.load(Ordering::Relaxed) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn total(&self) -> u32 { | ||||||
|  |         self.max | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -31,6 +31,7 @@ use super::new::StdResult; | |||||||
| use crate::documents::{obkv_to_object, DocumentsBatchReader}; | use crate::documents::{obkv_to_object, DocumentsBatchReader}; | ||||||
| use crate::error::{Error, InternalError}; | use crate::error::{Error, InternalError}; | ||||||
| use crate::index::{PrefixSearch, PrefixSettings}; | use crate::index::{PrefixSearch, PrefixSettings}; | ||||||
|  | use crate::progress::Progress; | ||||||
| use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder; | use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder; | ||||||
| pub use crate::update::index_documents::helpers::CursorClonableMmap; | pub use crate::update::index_documents::helpers::CursorClonableMmap; | ||||||
| use crate::update::{ | use crate::update::{ | ||||||
| @@ -522,6 +523,8 @@ where | |||||||
|                 let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized); |                 let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized); | ||||||
|                 writer.build_and_quantize( |                 writer.build_and_quantize( | ||||||
|                     wtxn, |                     wtxn, | ||||||
|  |                     // In the settings we don't have any progress to share | ||||||
|  |                     &Progress::default(), | ||||||
|                     &mut rng, |                     &mut rng, | ||||||
|                     dimension, |                     dimension, | ||||||
|                     is_quantizing, |                     is_quantizing, | ||||||
|   | |||||||
| @@ -201,6 +201,7 @@ where | |||||||
|             build_vectors( |             build_vectors( | ||||||
|                 index, |                 index, | ||||||
|                 wtxn, |                 wtxn, | ||||||
|  |                 indexing_context.progress, | ||||||
|                 index_embeddings, |                 index_embeddings, | ||||||
|                 arroy_memory, |                 arroy_memory, | ||||||
|                 &mut arroy_writers, |                 &mut arroy_writers, | ||||||
|   | |||||||
| @@ -10,6 +10,7 @@ use super::super::channel::*; | |||||||
| use crate::documents::PrimaryKey; | use crate::documents::PrimaryKey; | ||||||
| use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; | use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; | ||||||
| use crate::index::IndexEmbeddingConfig; | use crate::index::IndexEmbeddingConfig; | ||||||
|  | use crate::progress::Progress; | ||||||
| use crate::update::settings::InnerIndexSettings; | use crate::update::settings::InnerIndexSettings; | ||||||
| use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; | use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs, Embeddings}; | ||||||
| use crate::{Error, Index, InternalError, Result}; | use crate::{Error, Index, InternalError, Result}; | ||||||
| @@ -100,6 +101,7 @@ impl ChannelCongestion { | |||||||
| pub fn build_vectors<MSP>( | pub fn build_vectors<MSP>( | ||||||
|     index: &Index, |     index: &Index, | ||||||
|     wtxn: &mut RwTxn<'_>, |     wtxn: &mut RwTxn<'_>, | ||||||
|  |     progress: &Progress, | ||||||
|     index_embeddings: Vec<IndexEmbeddingConfig>, |     index_embeddings: Vec<IndexEmbeddingConfig>, | ||||||
|     arroy_memory: Option<usize>, |     arroy_memory: Option<usize>, | ||||||
|     arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>, |     arroy_writers: &mut HashMap<u8, (&str, &Embedder, ArroyWrapper, usize)>, | ||||||
| @@ -118,6 +120,7 @@ where | |||||||
|         let dimensions = *dimensions; |         let dimensions = *dimensions; | ||||||
|         writer.build_and_quantize( |         writer.build_and_quantize( | ||||||
|             wtxn, |             wtxn, | ||||||
|  |             progress, | ||||||
|             &mut rng, |             &mut rng, | ||||||
|             dimensions, |             dimensions, | ||||||
|             false, |             false, | ||||||
|   | |||||||
| @@ -13,6 +13,7 @@ use serde::{Deserialize, Serialize}; | |||||||
| use utoipa::ToSchema; | use utoipa::ToSchema; | ||||||
|  |  | ||||||
| use self::error::{EmbedError, NewEmbedderError}; | use self::error::{EmbedError, NewEmbedderError}; | ||||||
|  | use crate::progress::Progress; | ||||||
| use crate::prompt::{Prompt, PromptData}; | use crate::prompt::{Prompt, PromptData}; | ||||||
| use crate::ThreadPoolNoAbort; | use crate::ThreadPoolNoAbort; | ||||||
|  |  | ||||||
| @@ -81,9 +82,11 @@ impl ArroyWrapper { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[allow(clippy::too_many_arguments)] | ||||||
|     pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>( |     pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>( | ||||||
|         &mut self, |         &mut self, | ||||||
|         wtxn: &mut RwTxn, |         wtxn: &mut RwTxn, | ||||||
|  |         progress: &Progress, | ||||||
|         rng: &mut R, |         rng: &mut R, | ||||||
|         dimension: usize, |         dimension: usize, | ||||||
|         quantizing: bool, |         quantizing: bool, | ||||||
| @@ -110,12 +113,14 @@ impl ArroyWrapper { | |||||||
|                     writer |                     writer | ||||||
|                         .builder(rng) |                         .builder(rng) | ||||||
|                         .available_memory(arroy_memory.unwrap_or(usize::MAX)) |                         .available_memory(arroy_memory.unwrap_or(usize::MAX)) | ||||||
|  |                         .progress(|step| progress.update_progress_from_arroy(step)) | ||||||
|                         .cancel(cancel) |                         .cancel(cancel) | ||||||
|                         .build(wtxn)?; |                         .build(wtxn)?; | ||||||
|                 } else if writer.need_build(wtxn)? { |                 } else if writer.need_build(wtxn)? { | ||||||
|                     writer |                     writer | ||||||
|                         .builder(rng) |                         .builder(rng) | ||||||
|                         .available_memory(arroy_memory.unwrap_or(usize::MAX)) |                         .available_memory(arroy_memory.unwrap_or(usize::MAX)) | ||||||
|  |                         .progress(|step| progress.update_progress_from_arroy(step)) | ||||||
|                         .cancel(cancel) |                         .cancel(cancel) | ||||||
|                         .build(wtxn)?; |                         .build(wtxn)?; | ||||||
|                 } else if writer.is_empty(wtxn)? { |                 } else if writer.is_empty(wtxn)? { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user