mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-21 03:46:57 +00:00
Compare commits
5 Commits
openapi-co
...
integrate-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd66414e28 | ||
|
|
4538ceedcc | ||
|
|
146a5b7a63 | ||
|
|
b2b9f2239b | ||
|
|
042820693e |
39
Cargo.lock
generated
39
Cargo.lock
generated
@@ -580,7 +580,7 @@ source = "git+https://github.com/meilisearch/bbqueue#e8af4a4bccc8eb36b2b0442c4a9
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bumpalo",
|
||||
@@ -790,7 +790,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@@ -1786,7 +1786,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -2018,7 +2018,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"tempfile",
|
||||
"thiserror 2.0.17",
|
||||
@@ -2040,7 +2040,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"levenshtein_automata",
|
||||
@@ -2068,7 +2068,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -2231,7 +2231,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"bumpalo",
|
||||
@@ -2698,9 +2698,8 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "hannoy"
|
||||
version = "0.1.0-nested-rtxns"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be82bf3f2108ddc8885e3d306fcd7f4692066bfe26065ca8b42ba417f3c26dd1"
|
||||
version = "0.1.2-nested-rtxns"
|
||||
source = "git+https://github.com/nnethercott/hannoy?branch=use-heed-nested-rtxns#41f375e3080f311a5bef30eb647a2162262b5abd"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"byteorder",
|
||||
@@ -3185,7 +3184,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"backoff",
|
||||
@@ -3449,7 +3448,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -3939,7 +3938,7 @@ checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5 0.8.0",
|
||||
@@ -3950,7 +3949,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -4048,7 +4047,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"enum-iterator",
|
||||
@@ -4067,7 +4066,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -4105,7 +4104,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@@ -4139,7 +4138,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"arroy",
|
||||
"bbqueue",
|
||||
@@ -4718,7 +4717,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -7758,7 +7757,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
||||
@@ -23,7 +23,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.30.0"
|
||||
version = "1.30.1"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
|
||||
"sync",
|
||||
] }
|
||||
arroy = "0.6.4-nested-rtxns"
|
||||
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
|
||||
hannoy = { git = "https://github.com/nnethercott/hannoy", branch = "use-heed-nested-rtxns", features = ["arroy"] }
|
||||
rand = "0.8.5"
|
||||
tracing = "0.1.41"
|
||||
ureq = { version = "2.12.1", features = ["json"] }
|
||||
|
||||
@@ -3,6 +3,7 @@ mod v1_13;
|
||||
mod v1_14;
|
||||
mod v1_15;
|
||||
mod v1_16;
|
||||
mod v1_30_1;
|
||||
|
||||
use heed::RwTxn;
|
||||
use v1_12::{FixFieldDistribution, RecomputeStats};
|
||||
@@ -10,6 +11,7 @@ use v1_13::AddNewStats;
|
||||
use v1_14::UpgradeArroyVersion;
|
||||
use v1_15::RecomputeWordFst;
|
||||
use v1_16::SwitchToMultimodal;
|
||||
use v1_30_1::RebuildHannoyGraph;
|
||||
|
||||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use crate::progress::{Progress, VariableNameStep};
|
||||
@@ -33,6 +35,7 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
||||
&UpgradeArroyVersion {},
|
||||
&RecomputeWordFst {},
|
||||
&SwitchToMultimodal {},
|
||||
&RebuildHannoyGraph,
|
||||
];
|
||||
|
||||
/// Return true if the cached stats of the index must be regenerated
|
||||
@@ -58,12 +61,12 @@ where
|
||||
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
|
||||
}
|
||||
if upgrade.must_upgrade(initial_version) {
|
||||
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
|
||||
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
||||
upgrade.description(),
|
||||
i as u32,
|
||||
upgrade_functions.len() as u32,
|
||||
));
|
||||
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
|
||||
} else {
|
||||
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
||||
"Skipping migration that must not be applied",
|
||||
|
||||
50
crates/milli/src/update/upgrade/v1_30_1.rs
Normal file
50
crates/milli/src/update/upgrade/v1_30_1.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use heed::RwTxn;
|
||||
use rand::SeedableRng as _;
|
||||
|
||||
use super::UpgradeIndex;
|
||||
use crate::progress::Progress;
|
||||
use crate::vector::VectorStore;
|
||||
use crate::{Index, Result};
|
||||
|
||||
/// Rebuilds the hannoy graph and do not touch to the embeddings.
|
||||
///
|
||||
/// This follows a bug in hannoy v0.0.9 and v0.1.0 where the graph
|
||||
/// was not built correctly.
|
||||
pub(super) struct RebuildHannoyGraph;
|
||||
|
||||
impl UpgradeIndex for RebuildHannoyGraph {
|
||||
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
|
||||
let embedders = index.embedding_configs();
|
||||
let backend = index.get_vector_store(wtxn)?.unwrap_or_default();
|
||||
|
||||
for config in embedders.embedding_configs(wtxn)? {
|
||||
let embedder_info = embedders.embedder_info(wtxn, &config.name)?.unwrap();
|
||||
let mut vector_store = VectorStore::new(
|
||||
backend,
|
||||
index.vector_store,
|
||||
embedder_info.embedder_id,
|
||||
config.config.quantized(),
|
||||
);
|
||||
|
||||
let seed = rand::random();
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
|
||||
vector_store.rebuild_graph(
|
||||
wtxn,
|
||||
progress.clone(),
|
||||
&mut rng,
|
||||
vector_store.dimensions(wtxn)?.unwrap(),
|
||||
&|| false,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||
initial_version < (1, 30, 1)
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Rebuilding graph links"
|
||||
}
|
||||
}
|
||||
@@ -246,6 +246,32 @@ impl VectorStore {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn rebuild_graph<R: rand::Rng + rand::SeedableRng>(
|
||||
&mut self,
|
||||
wtxn: &mut RwTxn,
|
||||
progress: Progress,
|
||||
rng: &mut R,
|
||||
dimension: usize,
|
||||
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||
) -> Result<(), crate::Error> {
|
||||
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||
if self.backend == VectorStoreBackend::Hannoy {
|
||||
if self.quantized {
|
||||
let writer = hannoy::Writer::new(self._hannoy_quantized_db(), index, dimension);
|
||||
if !writer.is_empty(wtxn)? {
|
||||
hannoy_rebuild_graph(wtxn, &progress, rng, cancel, &writer)?;
|
||||
}
|
||||
} else {
|
||||
let writer = hannoy::Writer::new(self._hannoy_angular_db(), index, dimension);
|
||||
if !writer.is_empty(wtxn)? {
|
||||
hannoy_rebuild_graph(wtxn, &progress, rng, cancel, &writer)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Overwrite all the embeddings associated with the index and item ID.
|
||||
/// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings.
|
||||
/// You should call `del_items` on the `item_id` before calling this method.
|
||||
@@ -1185,6 +1211,25 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn hannoy_rebuild_graph<R, D>(
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
progress: &Progress,
|
||||
rng: &mut R,
|
||||
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||
writer: &hannoy::Writer<D>,
|
||||
) -> Result<(), crate::Error>
|
||||
where
|
||||
R: rand::Rng + rand::SeedableRng,
|
||||
D: hannoy::Distance,
|
||||
{
|
||||
let mut builder = writer.builder(rng).progress(progress.clone());
|
||||
builder
|
||||
.cancel(cancel)
|
||||
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
||||
.force_rebuild::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct VectorStoreStats {
|
||||
pub number_of_embeddings: u64,
|
||||
|
||||
Reference in New Issue
Block a user