Compare commits

...

5 Commits

Author SHA1 Message Date
Kerollmops
dd66414e28 Use an optimized version to rebuild all graph links 2025-12-19 12:14:05 +01:00
Kerollmops
4538ceedcc Fix display of the dumpless upgrade progress 2025-12-17 09:38:38 +01:00
Kerollmops
146a5b7a63 Rebuild the hannoy graph when dumpless upgrading to v0.30.1 2025-12-17 09:38:38 +01:00
Kerollmops
b2b9f2239b Bump version to 0.30.1 2025-12-17 09:38:38 +01:00
Kerollmops
042820693e fixme: Bump the version of hannoy to 0.1.2 2025-12-17 09:38:38 +01:00
6 changed files with 120 additions and 23 deletions

39
Cargo.lock generated
View File

@@ -580,7 +580,7 @@ source = "git+https://github.com/meilisearch/bbqueue#e8af4a4bccc8eb36b2b0442c4a9
[[package]] [[package]]
name = "benchmarks" name = "benchmarks"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bumpalo", "bumpalo",
@@ -790,7 +790,7 @@ dependencies = [
[[package]] [[package]]
name = "build-info" name = "build-info"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"time", "time",
@@ -1786,7 +1786,7 @@ dependencies = [
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"big_s", "big_s",
@@ -2018,7 +2018,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]] [[package]]
name = "file-store" name = "file-store"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"tempfile", "tempfile",
"thiserror 2.0.17", "thiserror 2.0.17",
@@ -2040,7 +2040,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"insta", "insta",
"levenshtein_automata", "levenshtein_automata",
@@ -2068,7 +2068,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -2231,7 +2231,7 @@ dependencies = [
[[package]] [[package]]
name = "fuzzers" name = "fuzzers"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"arbitrary", "arbitrary",
"bumpalo", "bumpalo",
@@ -2698,9 +2698,8 @@ dependencies = [
[[package]] [[package]]
name = "hannoy" name = "hannoy"
version = "0.1.0-nested-rtxns" version = "0.1.2-nested-rtxns"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/nnethercott/hannoy?branch=use-heed-nested-rtxns#41f375e3080f311a5bef30eb647a2162262b5abd"
checksum = "be82bf3f2108ddc8885e3d306fcd7f4692066bfe26065ca8b42ba417f3c26dd1"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
@@ -3185,7 +3184,7 @@ dependencies = [
[[package]] [[package]]
name = "index-scheduler" name = "index-scheduler"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"backoff", "backoff",
@@ -3449,7 +3448,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"criterion", "criterion",
"serde_json", "serde_json",
@@ -3939,7 +3938,7 @@ checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0"
[[package]] [[package]]
name = "meili-snap" name = "meili-snap"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"insta", "insta",
"md5 0.8.0", "md5 0.8.0",
@@ -3950,7 +3949,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch" name = "meilisearch"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"actix-cors", "actix-cors",
"actix-http", "actix-http",
@@ -4048,7 +4047,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-auth" name = "meilisearch-auth"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"base64 0.22.1", "base64 0.22.1",
"enum-iterator", "enum-iterator",
@@ -4067,7 +4066,7 @@ dependencies = [
[[package]] [[package]]
name = "meilisearch-types" name = "meilisearch-types"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
@@ -4105,7 +4104,7 @@ dependencies = [
[[package]] [[package]]
name = "meilitool" name = "meilitool"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
@@ -4139,7 +4138,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"arroy", "arroy",
"bbqueue", "bbqueue",
@@ -4718,7 +4717,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"big_s", "big_s",
"serde_json", "serde_json",
@@ -7758,7 +7757,7 @@ dependencies = [
[[package]] [[package]]
name = "xtask" name = "xtask"
version = "1.30.0" version = "1.30.1"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"build-info", "build-info",

View File

@@ -23,7 +23,7 @@ members = [
] ]
[workspace.package] [workspace.package]
version = "1.30.0" version = "1.30.1"
authors = [ authors = [
"Quentin de Quelen <quentin@dequelen.me>", "Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>", "Clément Renault <clement@meilisearch.com>",

View File

@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
"sync", "sync",
] } ] }
arroy = "0.6.4-nested-rtxns" arroy = "0.6.4-nested-rtxns"
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] } hannoy = { git = "https://github.com/nnethercott/hannoy", branch = "use-heed-nested-rtxns", features = ["arroy"] }
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.41" tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] } ureq = { version = "2.12.1", features = ["json"] }

View File

@@ -3,6 +3,7 @@ mod v1_13;
mod v1_14; mod v1_14;
mod v1_15; mod v1_15;
mod v1_16; mod v1_16;
mod v1_30_1;
use heed::RwTxn; use heed::RwTxn;
use v1_12::{FixFieldDistribution, RecomputeStats}; use v1_12::{FixFieldDistribution, RecomputeStats};
@@ -10,6 +11,7 @@ use v1_13::AddNewStats;
use v1_14::UpgradeArroyVersion; use v1_14::UpgradeArroyVersion;
use v1_15::RecomputeWordFst; use v1_15::RecomputeWordFst;
use v1_16::SwitchToMultimodal; use v1_16::SwitchToMultimodal;
use v1_30_1::RebuildHannoyGraph;
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH}; use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::progress::{Progress, VariableNameStep}; use crate::progress::{Progress, VariableNameStep};
@@ -33,6 +35,7 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
&UpgradeArroyVersion {}, &UpgradeArroyVersion {},
&RecomputeWordFst {}, &RecomputeWordFst {},
&SwitchToMultimodal {}, &SwitchToMultimodal {},
&RebuildHannoyGraph,
]; ];
/// Return true if the cached stats of the index must be regenerated /// Return true if the cached stats of the index must be regenerated
@@ -58,12 +61,12 @@ where
return Err(crate::Error::InternalError(InternalError::AbortedIndexation)); return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
} }
if upgrade.must_upgrade(initial_version) { if upgrade.must_upgrade(initial_version) {
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
progress.update_progress(VariableNameStep::<UpgradeVersion>::new( progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
upgrade.description(), upgrade.description(),
i as u32, i as u32,
upgrade_functions.len() as u32, upgrade_functions.len() as u32,
)); ));
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
} else { } else {
progress.update_progress(VariableNameStep::<UpgradeVersion>::new( progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
"Skipping migration that must not be applied", "Skipping migration that must not be applied",

View File

@@ -0,0 +1,50 @@
use heed::RwTxn;
use rand::SeedableRng as _;
use super::UpgradeIndex;
use crate::progress::Progress;
use crate::vector::VectorStore;
use crate::{Index, Result};
/// Rebuilds the hannoy graph and do not touch to the embeddings.
///
/// This follows a bug in hannoy v0.0.9 and v0.1.0 where the graph
/// was not built correctly.
pub(super) struct RebuildHannoyGraph;
impl UpgradeIndex for RebuildHannoyGraph {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
let embedders = index.embedding_configs();
let backend = index.get_vector_store(wtxn)?.unwrap_or_default();
for config in embedders.embedding_configs(wtxn)? {
let embedder_info = embedders.embedder_info(wtxn, &config.name)?.unwrap();
let mut vector_store = VectorStore::new(
backend,
index.vector_store,
embedder_info.embedder_id,
config.config.quantized(),
);
let seed = rand::random();
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
vector_store.rebuild_graph(
wtxn,
progress.clone(),
&mut rng,
vector_store.dimensions(wtxn)?.unwrap(),
&|| false,
)?;
}
Ok(false)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 30, 1)
}
fn description(&self) -> &'static str {
"Rebuilding graph links"
}
}

View File

@@ -246,6 +246,32 @@ impl VectorStore {
Ok(()) Ok(())
} }
pub fn rebuild_graph<R: rand::Rng + rand::SeedableRng>(
&mut self,
wtxn: &mut RwTxn,
progress: Progress,
rng: &mut R,
dimension: usize,
cancel: &(impl Fn() -> bool + Sync + Send),
) -> Result<(), crate::Error> {
for index in vector_store_range_for_embedder(self.embedder_index) {
if self.backend == VectorStoreBackend::Hannoy {
if self.quantized {
let writer = hannoy::Writer::new(self._hannoy_quantized_db(), index, dimension);
if !writer.is_empty(wtxn)? {
hannoy_rebuild_graph(wtxn, &progress, rng, cancel, &writer)?;
}
} else {
let writer = hannoy::Writer::new(self._hannoy_angular_db(), index, dimension);
if !writer.is_empty(wtxn)? {
hannoy_rebuild_graph(wtxn, &progress, rng, cancel, &writer)?;
}
}
}
}
Ok(())
}
/// Overwrite all the embeddings associated with the index and item ID. /// Overwrite all the embeddings associated with the index and item ID.
/// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings. /// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings.
/// You should call `del_items` on the `item_id` before calling this method. /// You should call `del_items` on the `item_id` before calling this method.
@@ -1185,6 +1211,25 @@ where
Ok(()) Ok(())
} }
fn hannoy_rebuild_graph<R, D>(
wtxn: &mut RwTxn<'_>,
progress: &Progress,
rng: &mut R,
cancel: &(impl Fn() -> bool + Sync + Send),
writer: &hannoy::Writer<D>,
) -> Result<(), crate::Error>
where
R: rand::Rng + rand::SeedableRng,
D: hannoy::Distance,
{
let mut builder = writer.builder(rng).progress(progress.clone());
builder
.cancel(cancel)
.ef_construction(HANNOY_EF_CONSTRUCTION)
.force_rebuild::<HANNOY_M, HANNOY_M0>(wtxn)?;
Ok(())
}
#[derive(Debug, Default, Clone)] #[derive(Debug, Default, Clone)]
pub struct VectorStoreStats { pub struct VectorStoreStats {
pub number_of_embeddings: u64, pub number_of_embeddings: u64,