Integrate the hannoy progress

This commit is contained in:
Clément Renault
2025-08-11 18:05:17 +02:00
committed by Louis Dureuil
parent aef07f4bfa
commit 0d4b78a217
6 changed files with 51 additions and 24 deletions

View File

@ -96,6 +96,7 @@ url = "2.5.4"
hashbrown = "0.15.4"
bumpalo = "3.18.1"
bumparaw-collections = "0.1.4"
steppe = { version = "0.4.0", default-features = false }
thread_local = "1.1.9"
allocator-api2 = "0.3.0"
rustc-hash = "2.1.1"

View File

@ -96,14 +96,6 @@ impl Progress {
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
}
// TODO: ideally we should expose the progress in a way that let arroy use it directly
// pub(crate) fn update_progress_from_hannoy(&self, progress: hannoy::WriterProgress) {
// self.update_progress(progress.main);
// if let Some(sub) = progress.sub {
// self.update_progress(sub);
// }
// }
}
/// Generate the names associated with the durations and push them.
@ -317,3 +309,27 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
// self.max
// }
// }
// Integration with steppe
impl steppe::Progress for Progress {
fn update(&self, sub_progress: impl steppe::Step) {
self.update_progress(Compat(sub_progress));
}
}
struct Compat<T: steppe::Step>(T);
impl<T: steppe::Step> Step for Compat<T> {
fn name(&self) -> Cow<'static, str> {
self.0.name().into()
}
fn current(&self) -> u32 {
self.0.current().try_into().unwrap_or(u32::MAX)
}
fn total(&self) -> u32 {
self.0.total().try_into().unwrap_or(u32::MAX)
}
}

View File

@ -526,7 +526,7 @@ where
writer.build_and_quantize(
wtxn,
// In the settings we don't have any progress to share
&Progress::default(),
Progress::default(),
&mut rng,
dimension,
is_quantizing,

View File

@ -136,7 +136,7 @@ where
.unwrap_or(false);
writer.build_and_quantize(
wtxn,
progress,
progress.clone(),
&mut rng,
dimensions,
is_being_quantized,

View File

@ -140,7 +140,7 @@ impl VectorStore {
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
&mut self,
wtxn: &mut RwTxn,
progress: &Progress,
progress: Progress,
rng: &mut R,
dimension: usize,
quantizing: bool,
@ -151,12 +151,12 @@ impl VectorStore {
if self.quantized {
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
if writer.need_build(wtxn)? {
writer
.builder(rng)
// .progress(|step| progress.update_progress_from_hannoy(step))
let mut builder = writer.builder(rng).progress(progress.clone());
builder
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
.cancel(cancel)
.ef_construction(HANNOY_EF_CONSTRUCTION)
.build::<HANNOY_M, HANNOY_M0>(wtxn)?
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
} else if writer.is_empty(wtxn)? {
continue;
}
@ -169,18 +169,16 @@ impl VectorStore {
// sensitive.
if quantizing && !self.quantized {
let writer = writer.prepare_changing_distance::<Hamming>(wtxn)?;
writer
.builder(rng)
let mut builder = writer.builder(rng).progress(progress.clone());
builder
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
// .progress(|step| progress.update_progress_from_hannoy(step))
.cancel(cancel)
.ef_construction(HANNOY_EF_CONSTRUCTION)
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
} else if writer.need_build(wtxn)? {
writer
.builder(rng)
let mut builder = writer.builder(rng).progress(progress.clone());
builder
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
// .progress(|step| progress.update_progress_from_hannoy(step))
.cancel(cancel)
.ef_construction(HANNOY_EF_CONSTRUCTION)
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;