diff --git a/Cargo.lock b/Cargo.lock index 42a90d0ba..061bd95b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2604,7 +2604,7 @@ dependencies = [ [[package]] name = "hannoy" version = "0.0.2" -source = "git+https://github.com/nnethercott/hannoy?branch=main#93a24c4cdf712152c90d27a2898715f22942c35c" +source = "git+https://github.com/nnethercott/hannoy?branch=main#8d1846b188ed2cc8776fdb86805eefbfbde9ddd1" dependencies = [ "bytemuck", "byteorder", @@ -2617,6 +2617,7 @@ dependencies = [ "rayon", "roaring", "rustc-hash 2.1.1", + "steppe", "thiserror 2.0.12", "tinyvec", "tracing", @@ -3056,9 +3057,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" +checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" dependencies = [ "equivalent", "hashbrown 0.15.4", @@ -4004,6 +4005,7 @@ dependencies = [ "smallstr", "smallvec", "smartstring", + "steppe", "tempfile", "thiserror 2.0.12", "thread_local", @@ -5864,6 +5866,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "steppe" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dead99cdf718f37bcd1d22dda9b498f35c5aa22894b755bfd94bf8c2daec9427" +dependencies = [ + "convert_case 0.8.0", + "indexmap", +] + [[package]] name = "strsim" version = "0.10.0" diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index 81687de9a..930a044c1 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -96,6 +96,7 @@ url = "2.5.4" hashbrown = "0.15.4" bumpalo = "3.18.1" bumparaw-collections = "0.1.4" +steppe = { version = "0.4.0", default-features = false } thread_local = "1.1.9" allocator-api2 = "0.3.0" rustc-hash = "2.1.1" diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs index 2ec4fd6de..fd56c46d6 100644 --- a/crates/milli/src/progress.rs +++ b/crates/milli/src/progress.rs @@ -96,14 +96,6 @@ impl Progress { durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect() } - - // TODO: ideally we should expose the progress in a way that let arroy use it directly - // pub(crate) fn update_progress_from_hannoy(&self, progress: hannoy::WriterProgress) { - // self.update_progress(progress.main); - // if let Some(sub) = progress.sub { - // self.update_progress(sub); - // } - // } } /// Generate the names associated with the durations and push them. @@ -317,3 +309,27 @@ impl Step for VariableNameStep { // self.max // } // } + +// Integration with steppe + +impl steppe::Progress for Progress { + fn update(&self, sub_progress: impl steppe::Step) { + self.update_progress(Compat(sub_progress)); + } +} + +struct Compat(T); + +impl Step for Compat { + fn name(&self) -> Cow<'static, str> { + self.0.name().into() + } + + fn current(&self) -> u32 { + self.0.current().try_into().unwrap_or(u32::MAX) + } + + fn total(&self) -> u32 { + self.0.total().try_into().unwrap_or(u32::MAX) + } +} diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index b6f69514e..b2ae1811a 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -526,7 +526,7 @@ where writer.build_and_quantize( wtxn, // In the settings we don't have any progress to share - &Progress::default(), + Progress::default(), &mut rng, dimension, is_quantizing, diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index a023e1431..9cb014e25 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -136,7 +136,7 @@ where .unwrap_or(false); writer.build_and_quantize( wtxn, - progress, + progress.clone(), &mut rng, dimensions, is_being_quantized, diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index ddc5ebffa..bba617782 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -140,7 +140,7 @@ impl VectorStore { pub fn build_and_quantize( &mut self, wtxn: &mut RwTxn, - progress: &Progress, + progress: Progress, rng: &mut R, dimension: usize, quantizing: bool, @@ -151,12 +151,12 @@ impl VectorStore { if self.quantized { let writer = hannoy::Writer::new(self.quantized_db(), index, dimension); if writer.need_build(wtxn)? { - writer - .builder(rng) - // .progress(|step| progress.update_progress_from_hannoy(step)) + let mut builder = writer.builder(rng).progress(progress.clone()); + builder + .available_memory(hannoy_memory.unwrap_or(usize::MAX)) .cancel(cancel) .ef_construction(HANNOY_EF_CONSTRUCTION) - .build::(wtxn)? + .build::(wtxn)?; } else if writer.is_empty(wtxn)? { continue; } @@ -169,18 +169,16 @@ impl VectorStore { // sensitive. if quantizing && !self.quantized { let writer = writer.prepare_changing_distance::(wtxn)?; - writer - .builder(rng) + let mut builder = writer.builder(rng).progress(progress.clone()); + builder .available_memory(hannoy_memory.unwrap_or(usize::MAX)) - // .progress(|step| progress.update_progress_from_hannoy(step)) .cancel(cancel) .ef_construction(HANNOY_EF_CONSTRUCTION) .build::(wtxn)?; } else if writer.need_build(wtxn)? { - writer - .builder(rng) + let mut builder = writer.builder(rng).progress(progress.clone()); + builder .available_memory(hannoy_memory.unwrap_or(usize::MAX)) - // .progress(|step| progress.update_progress_from_hannoy(step)) .cancel(cancel) .ef_construction(HANNOY_EF_CONSTRUCTION) .build::(wtxn)?;