Compare commits

...

5 Commits

Author SHA1 Message Date
Kerollmops
dd66414e28 Use an optimized version to rebuild all graph links 2025-12-19 12:14:05 +01:00
Kerollmops
4538ceedcc Fix display of the dumpless upgrade progress 2025-12-17 09:38:38 +01:00
Kerollmops
146a5b7a63 Rebuild the hannoy graph when dumpless upgrading to v0.30.1 2025-12-17 09:38:38 +01:00
Kerollmops
b2b9f2239b Bump version to 0.30.1 2025-12-17 09:38:38 +01:00
Kerollmops
042820693e fixme: Bump the version of hannoy to 0.1.2 2025-12-17 09:38:38 +01:00
6 changed files with 120 additions and 23 deletions

39
Cargo.lock generated
View File

@@ -580,7 +580,7 @@ source = "git+https://github.com/meilisearch/bbqueue#e8af4a4bccc8eb36b2b0442c4a9
[[package]]
name = "benchmarks"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"anyhow",
"bumpalo",
@@ -790,7 +790,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"anyhow",
"time",
@@ -1786,7 +1786,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"anyhow",
"big_s",
@@ -2018,7 +2018,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "file-store"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"tempfile",
"thiserror 2.0.17",
@@ -2040,7 +2040,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"insta",
"levenshtein_automata",
@@ -2068,7 +2068,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"criterion",
"serde_json",
@@ -2231,7 +2231,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"arbitrary",
"bumpalo",
@@ -2698,9 +2698,8 @@ dependencies = [
[[package]]
name = "hannoy"
version = "0.1.0-nested-rtxns"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be82bf3f2108ddc8885e3d306fcd7f4692066bfe26065ca8b42ba417f3c26dd1"
version = "0.1.2-nested-rtxns"
source = "git+https://github.com/nnethercott/hannoy?branch=use-heed-nested-rtxns#41f375e3080f311a5bef30eb647a2162262b5abd"
dependencies = [
"bytemuck",
"byteorder",
@@ -3185,7 +3184,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"anyhow",
"backoff",
@@ -3449,7 +3448,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"criterion",
"serde_json",
@@ -3939,7 +3938,7 @@ checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0"
[[package]]
name = "meili-snap"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"insta",
"md5 0.8.0",
@@ -3950,7 +3949,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"actix-cors",
"actix-http",
@@ -4048,7 +4047,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"base64 0.22.1",
"enum-iterator",
@@ -4067,7 +4066,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"actix-web",
"anyhow",
@@ -4105,7 +4104,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"anyhow",
"clap",
@@ -4139,7 +4138,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"arroy",
"bbqueue",
@@ -4718,7 +4717,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "permissive-json-pointer"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"big_s",
"serde_json",
@@ -7758,7 +7757,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.30.0"
version = "1.30.1"
dependencies = [
"anyhow",
"build-info",

View File

@@ -23,7 +23,7 @@ members = [
]
[workspace.package]
version = "1.30.0"
version = "1.30.1"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
"sync",
] }
arroy = "0.6.4-nested-rtxns"
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
hannoy = { git = "https://github.com/nnethercott/hannoy", branch = "use-heed-nested-rtxns", features = ["arroy"] }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }

View File

@@ -3,6 +3,7 @@ mod v1_13;
mod v1_14;
mod v1_15;
mod v1_16;
mod v1_30_1;
use heed::RwTxn;
use v1_12::{FixFieldDistribution, RecomputeStats};
@@ -10,6 +11,7 @@ use v1_13::AddNewStats;
use v1_14::UpgradeArroyVersion;
use v1_15::RecomputeWordFst;
use v1_16::SwitchToMultimodal;
use v1_30_1::RebuildHannoyGraph;
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::progress::{Progress, VariableNameStep};
@@ -33,6 +35,7 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
&UpgradeArroyVersion {},
&RecomputeWordFst {},
&SwitchToMultimodal {},
&RebuildHannoyGraph,
];
/// Return true if the cached stats of the index must be regenerated
@@ -58,12 +61,12 @@ where
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
}
if upgrade.must_upgrade(initial_version) {
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
upgrade.description(),
i as u32,
upgrade_functions.len() as u32,
));
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
} else {
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
"Skipping migration that must not be applied",

View File

@@ -0,0 +1,50 @@
use heed::RwTxn;
use rand::SeedableRng as _;
use super::UpgradeIndex;
use crate::progress::Progress;
use crate::vector::VectorStore;
use crate::{Index, Result};
/// Rebuilds the hannoy graph and do not touch to the embeddings.
///
/// This follows a bug in hannoy v0.0.9 and v0.1.0 where the graph
/// was not built correctly.
pub(super) struct RebuildHannoyGraph;
impl UpgradeIndex for RebuildHannoyGraph {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
let embedders = index.embedding_configs();
let backend = index.get_vector_store(wtxn)?.unwrap_or_default();
for config in embedders.embedding_configs(wtxn)? {
let embedder_info = embedders.embedder_info(wtxn, &config.name)?.unwrap();
let mut vector_store = VectorStore::new(
backend,
index.vector_store,
embedder_info.embedder_id,
config.config.quantized(),
);
let seed = rand::random();
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
vector_store.rebuild_graph(
wtxn,
progress.clone(),
&mut rng,
vector_store.dimensions(wtxn)?.unwrap(),
&|| false,
)?;
}
Ok(false)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 30, 1)
}
fn description(&self) -> &'static str {
"Rebuilding graph links"
}
}

View File

@@ -246,6 +246,32 @@ impl VectorStore {
Ok(())
}
pub fn rebuild_graph<R: rand::Rng + rand::SeedableRng>(
&mut self,
wtxn: &mut RwTxn,
progress: Progress,
rng: &mut R,
dimension: usize,
cancel: &(impl Fn() -> bool + Sync + Send),
) -> Result<(), crate::Error> {
for index in vector_store_range_for_embedder(self.embedder_index) {
if self.backend == VectorStoreBackend::Hannoy {
if self.quantized {
let writer = hannoy::Writer::new(self._hannoy_quantized_db(), index, dimension);
if !writer.is_empty(wtxn)? {
hannoy_rebuild_graph(wtxn, &progress, rng, cancel, &writer)?;
}
} else {
let writer = hannoy::Writer::new(self._hannoy_angular_db(), index, dimension);
if !writer.is_empty(wtxn)? {
hannoy_rebuild_graph(wtxn, &progress, rng, cancel, &writer)?;
}
}
}
}
Ok(())
}
/// Overwrite all the embeddings associated with the index and item ID.
/// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings.
/// You should call `del_items` on the `item_id` before calling this method.
@@ -1185,6 +1211,25 @@ where
Ok(())
}
fn hannoy_rebuild_graph<R, D>(
wtxn: &mut RwTxn<'_>,
progress: &Progress,
rng: &mut R,
cancel: &(impl Fn() -> bool + Sync + Send),
writer: &hannoy::Writer<D>,
) -> Result<(), crate::Error>
where
R: rand::Rng + rand::SeedableRng,
D: hannoy::Distance,
{
let mut builder = writer.builder(rng).progress(progress.clone());
builder
.cancel(cancel)
.ef_construction(HANNOY_EF_CONSTRUCTION)
.force_rebuild::<HANNOY_M, HANNOY_M0>(wtxn)?;
Ok(())
}
#[derive(Debug, Default, Clone)]
pub struct VectorStoreStats {
pub number_of_embeddings: u64,