mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	inital implementation of the progress
This commit is contained in:
		| @@ -1734,6 +1734,7 @@ pub(crate) mod tests { | ||||
|  | ||||
|     use crate::error::{Error, InternalError}; | ||||
|     use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::update::new::indexer; | ||||
|     use crate::update::settings::InnerIndexSettings; | ||||
|     use crate::update::{ | ||||
| @@ -1810,7 +1811,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             )?; | ||||
|  | ||||
|             if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { | ||||
| @@ -1829,7 +1830,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap()?; | ||||
| @@ -1901,7 +1902,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             )?; | ||||
|  | ||||
|             if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { | ||||
| @@ -1920,7 +1921,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| false, | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap()?; | ||||
| @@ -1982,7 +1983,7 @@ pub(crate) mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2001,7 +2002,7 @@ pub(crate) mod tests { | ||||
|                     &document_changes, | ||||
|                     embedders, | ||||
|                     &|| should_abort.load(Relaxed), | ||||
|                     &|_| (), | ||||
|                     &Progress::default(), | ||||
|                 ) | ||||
|             }) | ||||
|             .unwrap() | ||||
|   | ||||
| @@ -31,6 +31,7 @@ pub mod vector; | ||||
| #[macro_use] | ||||
| pub mod snapshot_tests; | ||||
| mod fieldids_weights_map; | ||||
| pub mod progress; | ||||
|  | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| use std::convert::{TryFrom, TryInto}; | ||||
|   | ||||
							
								
								
									
										116
									
								
								crates/milli/src/progress.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								crates/milli/src/progress.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,116 @@ | ||||
| use std::{ | ||||
|     any::TypeId, | ||||
|     borrow::Cow, | ||||
|     sync::{ | ||||
|         atomic::{AtomicU32, Ordering}, | ||||
|         Arc, RwLock, | ||||
|     }, | ||||
| }; | ||||
|  | ||||
| use serde::Serialize; | ||||
|  | ||||
| pub trait Step: 'static + Send + Sync { | ||||
|     fn name(&self) -> Cow<'static, str>; | ||||
|     fn current(&self) -> u32; | ||||
|     fn total(&self) -> u32; | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Default)] | ||||
| pub struct Progress { | ||||
|     steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>, | ||||
| } | ||||
|  | ||||
| impl Progress { | ||||
|     pub fn update_progress<P: Step>(&self, sub_progress: P) { | ||||
|         let mut steps = self.steps.write().unwrap(); | ||||
|         let step_type = TypeId::of::<P>(); | ||||
|         if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) { | ||||
|             steps.truncate(idx); | ||||
|         } | ||||
|         steps.push((step_type, Box::new(sub_progress))); | ||||
|     } | ||||
|  | ||||
|     // TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types | ||||
|     pub fn as_progress_view(&self) -> ProgressView { | ||||
|         let steps = self.steps.read().unwrap(); | ||||
|  | ||||
|         let mut percentage = 0.0; | ||||
|         let mut prev_factors = 1.0; | ||||
|  | ||||
|         let mut step_view = Vec::new(); | ||||
|         for (_, step) in steps.iter() { | ||||
|             prev_factors *= step.total() as f32; | ||||
|             percentage += step.current() as f32 / prev_factors; | ||||
|  | ||||
|             step_view.push(ProgressStepView { | ||||
|                 name: step.name(), | ||||
|                 finished: step.current(), | ||||
|                 total: step.total(), | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         ProgressView { steps: step_view, percentage: percentage * 100.0 } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// This trait lets you use the AtomicSubStep defined right below. | ||||
| /// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe. | ||||
| /// By forcing the Default trait + the &'static str we make it harder to miss-use the trait. | ||||
| pub trait NamedStep: 'static + Send + Sync + Default { | ||||
|     fn name(&self) -> &'static str; | ||||
| } | ||||
|  | ||||
| /// Structure to quickly define steps that need very quick, lockless updating of their current step. | ||||
| /// You can use this struct if: | ||||
| /// - The name of the step doesn't change | ||||
| /// - The total number of steps doesn't change | ||||
| pub struct AtomicSubStep<Name: NamedStep> { | ||||
|     name: Name, | ||||
|     current: Arc<AtomicU32>, | ||||
|     total: u32, | ||||
| } | ||||
|  | ||||
| impl<Name: NamedStep> AtomicSubStep<Name> { | ||||
|     pub fn new(total: u32) -> (Arc<AtomicU32>, Self) { | ||||
|         let current = Arc::new(AtomicU32::new(0)); | ||||
|         (current.clone(), Self { current, total, name: Name::default() }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<Name: NamedStep> Step for AtomicSubStep<Name> { | ||||
|     fn name(&self) -> Cow<'static, str> { | ||||
|         self.name.name().into() | ||||
|     } | ||||
|  | ||||
|     fn current(&self) -> u32 { | ||||
|         self.current.load(Ordering::Relaxed) | ||||
|     } | ||||
|  | ||||
|     fn total(&self) -> u32 { | ||||
|         self.total | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Default)] | ||||
| pub struct Document {} | ||||
|  | ||||
| impl NamedStep for Document { | ||||
|     fn name(&self) -> &'static str { | ||||
|         "document" | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub type AtomicDocumentStep = AtomicSubStep<Document>; | ||||
|  | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| pub struct ProgressView { | ||||
|     steps: Vec<ProgressStepView>, | ||||
|     percentage: f32, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Clone)] | ||||
| pub struct ProgressStepView { | ||||
|     name: Cow<'static, str>, | ||||
|     finished: u32, | ||||
|     total: u32, | ||||
| } | ||||
| @@ -5,6 +5,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::{btreemap, hashset}; | ||||
|  | ||||
| use crate::progress::Progress; | ||||
| use crate::update::new::indexer; | ||||
| use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use crate::vector::EmbeddingConfigs; | ||||
| @@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -91,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -766,6 +766,7 @@ mod tests { | ||||
|     use crate::documents::mmap_from_objects; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::index::IndexEmbeddingConfig; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::search::TermsMatchingStrategy; | ||||
|     use crate::update::new::indexer; | ||||
|     use crate::update::Setting; | ||||
| @@ -1964,7 +1965,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2148,7 +2149,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2163,7 +2164,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2210,7 +2211,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2225,7 +2226,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2263,7 +2264,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2278,7 +2279,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2315,7 +2316,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2330,7 +2331,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2369,7 +2370,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2384,7 +2385,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2428,7 +2429,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2443,7 +2444,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2480,7 +2481,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2495,7 +2496,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2532,7 +2533,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2547,7 +2548,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2726,7 +2727,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2741,7 +2742,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2785,7 +2786,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2800,7 +2801,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -2841,7 +2842,7 @@ mod tests { | ||||
|                 None, | ||||
|                 &mut new_fields_ids_map, | ||||
|                 &|| false, | ||||
|                 &|_progress| (), | ||||
|                 Progress::default(), | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -2856,7 +2857,7 @@ mod tests { | ||||
|             &document_changes, | ||||
|             embedders, | ||||
|             &|| false, | ||||
|             &|_| (), | ||||
|             &Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
|   | ||||
| @@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd; | ||||
| use crate::update::new::channel::FieldIdDocidFacetSender; | ||||
| use crate::update::new::extract::perm_json_p; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::ref_cell_ext::RefCellExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str { | ||||
|  | ||||
| impl FacetedDocidsExtractor { | ||||
|     #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] | ||||
|     pub fn run_extraction< | ||||
|         'pl, | ||||
|         'fid, | ||||
|         'indexer, | ||||
|         'index, | ||||
|         'extractor, | ||||
|         DC: DocumentChanges<'pl>, | ||||
|         MSP, | ||||
|         SP, | ||||
|     >( | ||||
|     pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         sender: &FieldIdDocidFacetSender, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let index = indexing_context.index; | ||||
|         let rtxn = index.read_txn()?; | ||||
|   | ||||
| @@ -15,23 +15,22 @@ pub use geo::*; | ||||
| pub use searchable::*; | ||||
| pub use vectors::EmbeddingExtractor; | ||||
|  | ||||
| use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress}; | ||||
| use super::steps::Step; | ||||
| use super::indexer::document_changes::{DocumentChanges, IndexingContext}; | ||||
| use super::steps::IndexingStep; | ||||
| use super::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::GrenadParameters; | ||||
| use crate::Result; | ||||
|  | ||||
| pub trait DocidsExtractor { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync; | ||||
|         MSP: Fn() -> bool + Sync; | ||||
| } | ||||
|  | ||||
| /// TODO move in permissive json pointer | ||||
|   | ||||
| @@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; | ||||
| use crate::update::new::extract::cache::BalancedCaches; | ||||
| use crate::update::new::extract::perm_json_p::contained_in; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::ref_cell_ext::RefCellExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> { | ||||
| pub struct WordDocidsExtractors; | ||||
|  | ||||
| impl WordDocidsExtractors { | ||||
|     pub fn run_extraction< | ||||
|         'pl, | ||||
|         'fid, | ||||
|         'indexer, | ||||
|         'index, | ||||
|         'extractor, | ||||
|         DC: DocumentChanges<'pl>, | ||||
|         MSP, | ||||
|         SP, | ||||
|     >( | ||||
|     pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<WordDocidsCaches<'extractor>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let index = indexing_context.index; | ||||
|         let rtxn = index.read_txn()?; | ||||
|   | ||||
| @@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer}; | ||||
| use super::cache::BalancedCaches; | ||||
| use super::DocidsExtractor; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, | ||||
| }; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, ThreadLocal}; | ||||
| use crate::update::new::DocumentChange; | ||||
| use crate::update::GrenadParameters; | ||||
| @@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> | ||||
| } | ||||
|  | ||||
| pub trait SearchableExtractor: Sized + Sync { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         let rtxn = indexing_context.index.read_txn()?; | ||||
|         let stop_words = indexing_context.index.stop_words(&rtxn)?; | ||||
| @@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync { | ||||
| } | ||||
|  | ||||
| impl<T: SearchableExtractor> DocidsExtractor for T { | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( | ||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( | ||||
|         grenad_parameters: GrenadParameters, | ||||
|         document_changes: &DC, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|         step: Step, | ||||
|         step: IndexingStep, | ||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> | ||||
|     where | ||||
|         MSP: Fn() -> bool + Sync, | ||||
|         SP: Fn(Progress) + Sync, | ||||
|     { | ||||
|         Self::run_extraction( | ||||
|             grenad_parameters, | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| use std::cell::{Cell, RefCell}; | ||||
| use std::sync::atomic::Ordering; | ||||
| use std::sync::{Arc, RwLock}; | ||||
|  | ||||
| use bumpalo::Bump; | ||||
| @@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator; | ||||
|  | ||||
| use super::super::document_change::DocumentChange; | ||||
| use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; | ||||
| use crate::progress::{AtomicDocumentStep, Progress}; | ||||
| use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; | ||||
| use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result}; | ||||
|  | ||||
| @@ -133,10 +135,8 @@ pub struct IndexingContext< | ||||
|     'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|     'index,   // covariant lifetime of the index | ||||
|     MSP, | ||||
|     SP, | ||||
| > where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     pub index: &'index Index, | ||||
|     pub db_fields_ids_map: &'indexer FieldsIdsMap, | ||||
| @@ -144,7 +144,8 @@ pub struct IndexingContext< | ||||
|     pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>, | ||||
|     pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>, | ||||
|     pub must_stop_processing: &'indexer MSP, | ||||
|     pub send_progress: &'indexer SP, | ||||
|     // TODO: TAMO: Rename field to progress | ||||
|     pub send_progress: &'indexer Progress, | ||||
| } | ||||
|  | ||||
| impl< | ||||
| @@ -152,18 +153,15 @@ impl< | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > Copy | ||||
|     for IndexingContext< | ||||
|         'fid,     // invariant lifetime of fields ids map | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > | ||||
| where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
| } | ||||
|  | ||||
| @@ -172,18 +170,15 @@ impl< | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > Clone | ||||
|     for IndexingContext< | ||||
|         'fid,     // invariant lifetime of fields ids map | ||||
|         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation | ||||
|         'index,   // covariant lifetime of the index | ||||
|         MSP, | ||||
|         SP, | ||||
|     > | ||||
| where | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     fn clone(&self) -> Self { | ||||
|         *self | ||||
| @@ -202,7 +197,6 @@ pub fn extract< | ||||
|     EX, | ||||
|     DC: DocumentChanges<'pl>, | ||||
|     MSP, | ||||
|     SP, | ||||
| >( | ||||
|     document_changes: &DC, | ||||
|     extractor: &EX, | ||||
| @@ -214,17 +208,17 @@ pub fn extract< | ||||
|         fields_ids_map_store, | ||||
|         must_stop_processing, | ||||
|         send_progress, | ||||
|     }: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, | ||||
|     }: IndexingContext<'fid, 'indexer, 'index, MSP>, | ||||
|     extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, | ||||
|     datastore: &'data ThreadLocal<EX::Data>, | ||||
|     step: Step, | ||||
|     step: IndexingStep, | ||||
| ) -> Result<()> | ||||
| where | ||||
|     EX: Extractor<'extractor>, | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     tracing::trace!("We are resetting the extractor allocators"); | ||||
|     send_progress.update_progress(step); | ||||
|     // Clean up and reuse the extractor allocs | ||||
|     for extractor_alloc in extractor_allocs.iter_mut() { | ||||
|         tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes()); | ||||
| @@ -232,6 +226,8 @@ where | ||||
|     } | ||||
|  | ||||
|     let total_documents = document_changes.len() as u32; | ||||
|     let (step, progress_step) = AtomicDocumentStep::new(total_documents); | ||||
|     send_progress.update_progress(progress_step); | ||||
|  | ||||
|     let pi = document_changes.iter(CHUNK_SIZE); | ||||
|     pi.enumerate().try_arc_for_each_try_init( | ||||
| @@ -253,7 +249,7 @@ where | ||||
|             } | ||||
|             let finished_documents = (finished_documents * CHUNK_SIZE) as u32; | ||||
|  | ||||
|             (send_progress)(Progress::from_step_substep(step, finished_documents, total_documents)); | ||||
|             step.store(finished_documents, Ordering::Relaxed); | ||||
|  | ||||
|             // Clean up and reuse the document-specific allocator | ||||
|             context.doc_alloc.reset(); | ||||
| @@ -271,32 +267,7 @@ where | ||||
|             res | ||||
|         }, | ||||
|     )?; | ||||
|  | ||||
|     (send_progress)(Progress::from_step_substep(step, total_documents, total_documents)); | ||||
|     step.store(total_documents, Ordering::Relaxed); | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub struct Progress { | ||||
|     pub finished_steps: u16, | ||||
|     pub total_steps: u16, | ||||
|     pub step_name: &'static str, | ||||
|     pub finished_total_substep: Option<(u32, u32)>, | ||||
| } | ||||
|  | ||||
| impl Progress { | ||||
|     pub fn from_step(step: Step) -> Self { | ||||
|         Self { | ||||
|             finished_steps: step.finished_steps(), | ||||
|             total_steps: Step::total_steps(), | ||||
|             step_name: step.name(), | ||||
|             finished_total_substep: None, | ||||
|         } | ||||
|     } | ||||
|     pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self { | ||||
|         Self { | ||||
|             finished_total_substep: Some((finished_substep, total_substep)), | ||||
|             ..Progress::from_step(step) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -92,11 +92,12 @@ mod test { | ||||
|  | ||||
|     use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::update::new::indexer::document_changes::{ | ||||
|         extract, DocumentChangeContext, Extractor, IndexingContext, | ||||
|     }; | ||||
|     use crate::update::new::indexer::DocumentDeletion; | ||||
|     use crate::update::new::steps::Step; | ||||
|     use crate::update::new::steps::IndexingStep; | ||||
|     use crate::update::new::thread_local::{MostlySend, ThreadLocal}; | ||||
|     use crate::update::new::DocumentChange; | ||||
|     use crate::DocumentId; | ||||
| @@ -164,7 +165,7 @@ mod test { | ||||
|             doc_allocs: &doc_allocs, | ||||
|             fields_ids_map_store: &fields_ids_map_store, | ||||
|             must_stop_processing: &(|| false), | ||||
|             send_progress: &(|_progress| {}), | ||||
|             send_progress: &Progress::default(), | ||||
|         }; | ||||
|  | ||||
|         for _ in 0..3 { | ||||
| @@ -176,7 +177,7 @@ mod test { | ||||
|                 context, | ||||
|                 &mut extractor_allocs, | ||||
|                 &datastore, | ||||
|                 Step::ExtractingDocuments, | ||||
|                 IndexingStep::ExtractingDocuments, | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| use std::sync::atomic::Ordering; | ||||
|  | ||||
| use bumpalo::collections::CollectIn; | ||||
| use bumpalo::Bump; | ||||
| use bumparaw_collections::RawMap; | ||||
| @@ -10,11 +12,12 @@ use serde_json::value::RawValue; | ||||
| use serde_json::Deserializer; | ||||
|  | ||||
| use super::super::document_change::DocumentChange; | ||||
| use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress}; | ||||
| use super::document_changes::{DocumentChangeContext, DocumentChanges}; | ||||
| use super::retrieve_or_guess_primary_key; | ||||
| use crate::documents::PrimaryKey; | ||||
| use crate::progress::{AtomicSubStep, Progress}; | ||||
| use crate::update::new::document::Versions; | ||||
| use crate::update::new::steps::Step; | ||||
| use crate::update::new::steps::IndexingStep; | ||||
| use crate::update::new::thread_local::MostlySend; | ||||
| use crate::update::new::{Deletion, Insertion, Update}; | ||||
| use crate::update::{AvailableIds, IndexDocumentsMethod}; | ||||
| @@ -45,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|  | ||||
|     #[allow(clippy::too_many_arguments)] | ||||
|     #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")] | ||||
|     pub fn into_changes<MSP, SP>( | ||||
|     pub fn into_changes<MSP>( | ||||
|         self, | ||||
|         indexer: &'pl Bump, | ||||
|         index: &Index, | ||||
| @@ -53,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|         primary_key_from_op: Option<&'pl str>, | ||||
|         new_fields_ids_map: &mut FieldsIdsMap, | ||||
|         must_stop_processing: &MSP, | ||||
|         send_progress: &SP, | ||||
|         progress: Progress, | ||||
|     ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)> | ||||
|     where | ||||
|         MSP: Fn() -> bool, | ||||
|         SP: Fn(Progress), | ||||
|     { | ||||
|         progress.update_progress(IndexingStep::PreparingPayloads); | ||||
|         let Self { operations, method } = self; | ||||
|  | ||||
|         let documents_ids = index.documents_ids(rtxn)?; | ||||
| @@ -68,16 +71,15 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|         let mut primary_key = None; | ||||
|  | ||||
|         let payload_count = operations.len(); | ||||
|         let (step, progress_step) = | ||||
|             AtomicSubStep::<crate::progress::Document>::new(payload_count as u32); | ||||
|         progress.update_progress(progress_step); | ||||
|  | ||||
|         for (payload_index, operation) in operations.into_iter().enumerate() { | ||||
|             if must_stop_processing() { | ||||
|                 return Err(InternalError::AbortedIndexation.into()); | ||||
|             } | ||||
|             send_progress(Progress::from_step_substep( | ||||
|                 Step::PreparingPayloads, | ||||
|                 payload_index as u32, | ||||
|                 payload_count as u32, | ||||
|             )); | ||||
|             step.store(payload_index as u32, Ordering::Relaxed); | ||||
|  | ||||
|             let mut bytes = 0; | ||||
|             let result = match operation { | ||||
| @@ -118,12 +120,7 @@ impl<'pl> DocumentOperation<'pl> { | ||||
|             }; | ||||
|             operations_stats.push(PayloadStats { document_count, bytes, error }); | ||||
|         } | ||||
|  | ||||
|         send_progress(Progress::from_step_substep( | ||||
|             Step::PreparingPayloads, | ||||
|             payload_count as u32, | ||||
|             payload_count as u32, | ||||
|         )); | ||||
|         step.store(payload_count as u32, Ordering::Relaxed); | ||||
|  | ||||
|         // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone | ||||
|         let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> = | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use std::thread::{self, Builder}; | ||||
|  | ||||
| use big_s::S; | ||||
| use bumparaw_collections::RawMap; | ||||
| use document_changes::{extract, DocumentChanges, IndexingContext, Progress}; | ||||
| use document_changes::{extract, DocumentChanges, IndexingContext}; | ||||
| pub use document_deletion::DocumentDeletion; | ||||
| pub use document_operation::{DocumentOperation, PayloadStats}; | ||||
| use hashbrown::HashMap; | ||||
| @@ -22,7 +22,7 @@ use super::channel::*; | ||||
| use super::extract::*; | ||||
| use super::facet_search_builder::FacetSearchBuilder; | ||||
| use super::merger::FacetFieldIdsDelta; | ||||
| use super::steps::Step; | ||||
| use super::steps::IndexingStep; | ||||
| use super::thread_local::ThreadLocal; | ||||
| use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder}; | ||||
| use super::words_prefix_docids::{ | ||||
| @@ -33,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||
| use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; | ||||
| use crate::progress::Progress; | ||||
| use crate::proximity::ProximityPrecision; | ||||
| use crate::update::del_add::DelAdd; | ||||
| use crate::update::new::extract::EmbeddingExtractor; | ||||
| @@ -60,7 +61,7 @@ mod update_by_function; | ||||
| /// | ||||
| /// TODO return stats | ||||
| #[allow(clippy::too_many_arguments)] // clippy: 😝 | ||||
| pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>( | ||||
| pub fn index<'pl, 'indexer, 'index, DC, MSP>( | ||||
|     wtxn: &mut RwTxn, | ||||
|     index: &'index Index, | ||||
|     pool: &ThreadPoolNoAbort, | ||||
| @@ -71,12 +72,11 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>( | ||||
|     document_changes: &DC, | ||||
|     embedders: EmbeddingConfigs, | ||||
|     must_stop_processing: &'indexer MSP, | ||||
|     send_progress: &'indexer SP, | ||||
|     send_progress: &'indexer Progress, | ||||
| ) -> Result<()> | ||||
| where | ||||
|     DC: DocumentChanges<'pl>, | ||||
|     MSP: Fn() -> bool + Sync, | ||||
|     SP: Fn(Progress) + Sync, | ||||
| { | ||||
|     let mut bbbuffers = Vec::new(); | ||||
|     let finished_extraction = AtomicBool::new(false); | ||||
| @@ -159,7 +159,7 @@ where | ||||
|                         indexing_context, | ||||
|                         &mut extractor_allocs, | ||||
|                         &datastore, | ||||
|                         Step::ExtractingDocuments, | ||||
|                         IndexingStep::ExtractingDocuments, | ||||
|                     )?; | ||||
|                 } | ||||
|                 { | ||||
| @@ -191,7 +191,7 @@ where | ||||
|                                 indexing_context, | ||||
|                                 &mut extractor_allocs, | ||||
|                                 &extractor_sender.field_id_docid_facet_sender(), | ||||
|                                 Step::ExtractingFacets | ||||
|                                 IndexingStep::ExtractingFacets | ||||
|                             )? | ||||
|                     }; | ||||
|  | ||||
| @@ -224,7 +224,7 @@ where | ||||
|                             document_changes, | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             Step::ExtractingWords | ||||
|                             IndexingStep::ExtractingWords | ||||
|                         )? | ||||
|                     }; | ||||
|  | ||||
| @@ -302,7 +302,7 @@ where | ||||
|                             document_changes, | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             Step::ExtractingWordProximity, | ||||
|                             IndexingStep::ExtractingWordProximity, | ||||
|                         )? | ||||
|                     }; | ||||
|  | ||||
| @@ -338,7 +338,7 @@ where | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             &datastore, | ||||
|                             Step::ExtractingEmbeddings, | ||||
|                             IndexingStep::ExtractingEmbeddings, | ||||
|                         )?; | ||||
|                     } | ||||
|                     { | ||||
| @@ -371,7 +371,7 @@ where | ||||
|                             indexing_context, | ||||
|                             &mut extractor_allocs, | ||||
|                             &datastore, | ||||
|                             Step::WritingGeoPoints | ||||
|                             IndexingStep::WritingGeoPoints | ||||
|                         )?; | ||||
|                     } | ||||
|  | ||||
| @@ -383,9 +383,7 @@ where | ||||
|                         &indexing_context.must_stop_processing, | ||||
|                     )?; | ||||
|                 } | ||||
|  | ||||
|                 (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase)); | ||||
|  | ||||
|                 indexing_context.send_progress.update_progress(IndexingStep::WritingToDatabase); | ||||
|                 finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); | ||||
|  | ||||
|                 Result::Ok((facet_field_ids_delta, index_embeddings)) | ||||
| @@ -485,7 +483,7 @@ where | ||||
|             )?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors)); | ||||
|         indexing_context.send_progress.update_progress(IndexingStep::WaitingForExtractors); | ||||
|  | ||||
|         let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?; | ||||
|  | ||||
| @@ -498,10 +496,9 @@ where | ||||
|                 break 'vectors; | ||||
|             } | ||||
|  | ||||
|             (indexing_context.send_progress)(Progress::from_step( | ||||
|                 Step::WritingEmbeddingsToDatabase, | ||||
|             )); | ||||
|  | ||||
|             indexing_context | ||||
|                 .send_progress | ||||
|                 .update_progress(IndexingStep::WritingEmbeddingsToDatabase); | ||||
|             let mut rng = rand::rngs::StdRng::seed_from_u64(42); | ||||
|             for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers { | ||||
|                 let dimensions = *dimensions; | ||||
| @@ -517,21 +514,19 @@ where | ||||
|             index.put_embedding_configs(wtxn, index_embeddings)?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets)); | ||||
|  | ||||
|         indexing_context.send_progress.update_progress(IndexingStep::PostProcessingFacets); | ||||
|         if index.facet_search(wtxn)? { | ||||
|             compute_facet_search_database(index, wtxn, global_fields_ids_map)?; | ||||
|         } | ||||
|  | ||||
|         compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords)); | ||||
|  | ||||
|         indexing_context.send_progress.update_progress(IndexingStep::PostProcessingWords); | ||||
|         if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { | ||||
|             compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?; | ||||
|         } | ||||
|  | ||||
|         (indexing_context.send_progress)(Progress::from_step(Step::Finalizing)); | ||||
|         indexing_context.send_progress.update_progress(IndexingStep::Finalizing); | ||||
|  | ||||
|         Ok(()) as Result<_> | ||||
|     })?; | ||||
|   | ||||
| @@ -1,8 +1,12 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use enum_iterator::Sequence; | ||||
|  | ||||
| use crate::progress::Step; | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)] | ||||
| #[repr(u16)] | ||||
| pub enum Step { | ||||
| #[repr(u8)] | ||||
| pub enum IndexingStep { | ||||
|     PreparingPayloads, | ||||
|     ExtractingDocuments, | ||||
|     ExtractingFacets, | ||||
| @@ -18,30 +22,31 @@ pub enum Step { | ||||
|     Finalizing, | ||||
| } | ||||
|  | ||||
| impl Step { | ||||
|     pub fn name(&self) -> &'static str { | ||||
| impl Step for IndexingStep { | ||||
|     fn name(&self) -> Cow<'static, str> { | ||||
|         match self { | ||||
|             Step::PreparingPayloads => "preparing update file", | ||||
|             Step::ExtractingDocuments => "extracting documents", | ||||
|             Step::ExtractingFacets => "extracting facets", | ||||
|             Step::ExtractingWords => "extracting words", | ||||
|             Step::ExtractingWordProximity => "extracting word proximity", | ||||
|             Step::ExtractingEmbeddings => "extracting embeddings", | ||||
|             Step::WritingGeoPoints => "writing geo points", | ||||
|             Step::WritingToDatabase => "writing to database", | ||||
|             Step::WaitingForExtractors => "waiting for extractors", | ||||
|             Step::WritingEmbeddingsToDatabase => "writing embeddings to database", | ||||
|             Step::PostProcessingFacets => "post-processing facets", | ||||
|             Step::PostProcessingWords => "post-processing words", | ||||
|             Step::Finalizing => "finalizing", | ||||
|             IndexingStep::PreparingPayloads => "preparing update file", | ||||
|             IndexingStep::ExtractingDocuments => "extracting documents", | ||||
|             IndexingStep::ExtractingFacets => "extracting facets", | ||||
|             IndexingStep::ExtractingWords => "extracting words", | ||||
|             IndexingStep::ExtractingWordProximity => "extracting word proximity", | ||||
|             IndexingStep::ExtractingEmbeddings => "extracting embeddings", | ||||
|             IndexingStep::WritingGeoPoints => "writing geo points", | ||||
|             IndexingStep::WritingToDatabase => "writing to database", | ||||
|             IndexingStep::WaitingForExtractors => "waiting for extractors", | ||||
|             IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database", | ||||
|             IndexingStep::PostProcessingFacets => "post-processing facets", | ||||
|             IndexingStep::PostProcessingWords => "post-processing words", | ||||
|             IndexingStep::Finalizing => "finalizing", | ||||
|         } | ||||
|         .into() | ||||
|     } | ||||
|  | ||||
|     pub fn finished_steps(self) -> u16 { | ||||
|         self as u16 | ||||
|     fn current(&self) -> u32 { | ||||
|         *self as u32 | ||||
|     } | ||||
|  | ||||
|     pub const fn total_steps() -> u16 { | ||||
|         Self::CARDINALITY as u16 | ||||
|     fn total(&self) -> u32 { | ||||
|         Self::CARDINALITY as u32 | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::hashset; | ||||
| use milli::documents::mmap_from_objects; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -57,7 +58,7 @@ fn test_facet_distribution_with_no_facet_values() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -72,7 +73,7 @@ fn test_facet_distribution_with_no_facet_values() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -7,6 +7,7 @@ use bumpalo::Bump; | ||||
| use either::{Either, Left, Right}; | ||||
| use heed::EnvOpenOptions; | ||||
| use maplit::{btreemap, hashset}; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -90,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -109,7 +110,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,7 @@ use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use itertools::Itertools; | ||||
| use maplit::hashset; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -326,7 +327,7 @@ fn criteria_ascdesc() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -341,7 +342,7 @@ fn criteria_ascdesc() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use std::collections::BTreeSet; | ||||
| use bumpalo::Bump; | ||||
| use heed::EnvOpenOptions; | ||||
| use milli::documents::mmap_from_objects; | ||||
| use milli::progress::Progress; | ||||
| use milli::update::new::indexer; | ||||
| use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; | ||||
| use milli::vector::EmbeddingConfigs; | ||||
| @@ -135,7 +136,7 @@ fn test_typo_disabled_on_word() { | ||||
|             None, | ||||
|             &mut new_fields_ids_map, | ||||
|             &|| false, | ||||
|             &|_progress| (), | ||||
|             Progress::default(), | ||||
|         ) | ||||
|         .unwrap(); | ||||
|  | ||||
| @@ -150,7 +151,7 @@ fn test_typo_disabled_on_word() { | ||||
|         &document_changes, | ||||
|         embedders, | ||||
|         &|| false, | ||||
|         &|_| (), | ||||
|         &Progress::default(), | ||||
|     ) | ||||
|     .unwrap(); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user