mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-06 04:36:32 +00:00
Integrate the hannoy progress
This commit is contained in:
18
Cargo.lock
generated
18
Cargo.lock
generated
@ -2604,7 +2604,7 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "hannoy"
|
name = "hannoy"
|
||||||
version = "0.0.2"
|
version = "0.0.2"
|
||||||
source = "git+https://github.com/nnethercott/hannoy?branch=main#93a24c4cdf712152c90d27a2898715f22942c35c"
|
source = "git+https://github.com/nnethercott/hannoy?branch=main#8d1846b188ed2cc8776fdb86805eefbfbde9ddd1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@ -2617,6 +2617,7 @@ dependencies = [
|
|||||||
"rayon",
|
"rayon",
|
||||||
"roaring",
|
"roaring",
|
||||||
"rustc-hash 2.1.1",
|
"rustc-hash 2.1.1",
|
||||||
|
"steppe",
|
||||||
"thiserror 2.0.12",
|
"thiserror 2.0.12",
|
||||||
"tinyvec",
|
"tinyvec",
|
||||||
"tracing",
|
"tracing",
|
||||||
@ -3056,9 +3057,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "2.9.0"
|
version = "2.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e"
|
checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"equivalent",
|
"equivalent",
|
||||||
"hashbrown 0.15.4",
|
"hashbrown 0.15.4",
|
||||||
@ -4004,6 +4005,7 @@ dependencies = [
|
|||||||
"smallstr",
|
"smallstr",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
"smartstring",
|
"smartstring",
|
||||||
|
"steppe",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror 2.0.12",
|
"thiserror 2.0.12",
|
||||||
"thread_local",
|
"thread_local",
|
||||||
@ -5864,6 +5866,16 @@ version = "1.1.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "steppe"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dead99cdf718f37bcd1d22dda9b498f35c5aa22894b755bfd94bf8c2daec9427"
|
||||||
|
dependencies = [
|
||||||
|
"convert_case 0.8.0",
|
||||||
|
"indexmap",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
@ -96,6 +96,7 @@ url = "2.5.4"
|
|||||||
hashbrown = "0.15.4"
|
hashbrown = "0.15.4"
|
||||||
bumpalo = "3.18.1"
|
bumpalo = "3.18.1"
|
||||||
bumparaw-collections = "0.1.4"
|
bumparaw-collections = "0.1.4"
|
||||||
|
steppe = { version = "0.4.0", default-features = false }
|
||||||
thread_local = "1.1.9"
|
thread_local = "1.1.9"
|
||||||
allocator-api2 = "0.3.0"
|
allocator-api2 = "0.3.0"
|
||||||
rustc-hash = "2.1.1"
|
rustc-hash = "2.1.1"
|
||||||
|
@ -96,14 +96,6 @@ impl Progress {
|
|||||||
|
|
||||||
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
|
durations.drain(..).map(|(name, duration)| (name, format!("{duration:.2?}"))).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: ideally we should expose the progress in a way that let arroy use it directly
|
|
||||||
// pub(crate) fn update_progress_from_hannoy(&self, progress: hannoy::WriterProgress) {
|
|
||||||
// self.update_progress(progress.main);
|
|
||||||
// if let Some(sub) = progress.sub {
|
|
||||||
// self.update_progress(sub);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate the names associated with the durations and push them.
|
/// Generate the names associated with the durations and push them.
|
||||||
@ -317,3 +309,27 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
|
|||||||
// self.max
|
// self.max
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
// Integration with steppe
|
||||||
|
|
||||||
|
impl steppe::Progress for Progress {
|
||||||
|
fn update(&self, sub_progress: impl steppe::Step) {
|
||||||
|
self.update_progress(Compat(sub_progress));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Compat<T: steppe::Step>(T);
|
||||||
|
|
||||||
|
impl<T: steppe::Step> Step for Compat<T> {
|
||||||
|
fn name(&self) -> Cow<'static, str> {
|
||||||
|
self.0.name().into()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current(&self) -> u32 {
|
||||||
|
self.0.current().try_into().unwrap_or(u32::MAX)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn total(&self) -> u32 {
|
||||||
|
self.0.total().try_into().unwrap_or(u32::MAX)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -526,7 +526,7 @@ where
|
|||||||
writer.build_and_quantize(
|
writer.build_and_quantize(
|
||||||
wtxn,
|
wtxn,
|
||||||
// In the settings we don't have any progress to share
|
// In the settings we don't have any progress to share
|
||||||
&Progress::default(),
|
Progress::default(),
|
||||||
&mut rng,
|
&mut rng,
|
||||||
dimension,
|
dimension,
|
||||||
is_quantizing,
|
is_quantizing,
|
||||||
|
@ -136,7 +136,7 @@ where
|
|||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
writer.build_and_quantize(
|
writer.build_and_quantize(
|
||||||
wtxn,
|
wtxn,
|
||||||
progress,
|
progress.clone(),
|
||||||
&mut rng,
|
&mut rng,
|
||||||
dimensions,
|
dimensions,
|
||||||
is_being_quantized,
|
is_being_quantized,
|
||||||
|
@ -140,7 +140,7 @@ impl VectorStore {
|
|||||||
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
|
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
|
||||||
&mut self,
|
&mut self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
progress: &Progress,
|
progress: Progress,
|
||||||
rng: &mut R,
|
rng: &mut R,
|
||||||
dimension: usize,
|
dimension: usize,
|
||||||
quantizing: bool,
|
quantizing: bool,
|
||||||
@ -151,12 +151,12 @@ impl VectorStore {
|
|||||||
if self.quantized {
|
if self.quantized {
|
||||||
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
let writer = hannoy::Writer::new(self.quantized_db(), index, dimension);
|
||||||
if writer.need_build(wtxn)? {
|
if writer.need_build(wtxn)? {
|
||||||
writer
|
let mut builder = writer.builder(rng).progress(progress.clone());
|
||||||
.builder(rng)
|
builder
|
||||||
// .progress(|step| progress.update_progress_from_hannoy(step))
|
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
|
||||||
.cancel(cancel)
|
.cancel(cancel)
|
||||||
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
||||||
.build::<HANNOY_M, HANNOY_M0>(wtxn)?
|
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||||
} else if writer.is_empty(wtxn)? {
|
} else if writer.is_empty(wtxn)? {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -169,18 +169,16 @@ impl VectorStore {
|
|||||||
// sensitive.
|
// sensitive.
|
||||||
if quantizing && !self.quantized {
|
if quantizing && !self.quantized {
|
||||||
let writer = writer.prepare_changing_distance::<Hamming>(wtxn)?;
|
let writer = writer.prepare_changing_distance::<Hamming>(wtxn)?;
|
||||||
writer
|
let mut builder = writer.builder(rng).progress(progress.clone());
|
||||||
.builder(rng)
|
builder
|
||||||
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
|
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
|
||||||
// .progress(|step| progress.update_progress_from_hannoy(step))
|
|
||||||
.cancel(cancel)
|
.cancel(cancel)
|
||||||
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
||||||
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||||
} else if writer.need_build(wtxn)? {
|
} else if writer.need_build(wtxn)? {
|
||||||
writer
|
let mut builder = writer.builder(rng).progress(progress.clone());
|
||||||
.builder(rng)
|
builder
|
||||||
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
|
.available_memory(hannoy_memory.unwrap_or(usize::MAX))
|
||||||
// .progress(|step| progress.update_progress_from_hannoy(step))
|
|
||||||
.cancel(cancel)
|
.cancel(cancel)
|
||||||
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
||||||
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
.build::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||||
|
Reference in New Issue
Block a user