mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-21 20:06:58 +00:00
Compare commits
5 Commits
openapi-co
...
integrate-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd66414e28 | ||
|
|
4538ceedcc | ||
|
|
146a5b7a63 | ||
|
|
b2b9f2239b | ||
|
|
042820693e |
39
Cargo.lock
generated
39
Cargo.lock
generated
@@ -580,7 +580,7 @@ source = "git+https://github.com/meilisearch/bbqueue#e8af4a4bccc8eb36b2b0442c4a9
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "benchmarks"
|
name = "benchmarks"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
@@ -790,7 +790,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "build-info"
|
name = "build-info"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"time",
|
"time",
|
||||||
@@ -1786,7 +1786,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dump"
|
name = "dump"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"big_s",
|
"big_s",
|
||||||
@@ -2018,7 +2018,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "file-store"
|
name = "file-store"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror 2.0.17",
|
"thiserror 2.0.17",
|
||||||
@@ -2040,7 +2040,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filter-parser"
|
name = "filter-parser"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
@@ -2068,7 +2068,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flatten-serde-json"
|
name = "flatten-serde-json"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -2231,7 +2231,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fuzzers"
|
name = "fuzzers"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
@@ -2698,9 +2698,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hannoy"
|
name = "hannoy"
|
||||||
version = "0.1.0-nested-rtxns"
|
version = "0.1.2-nested-rtxns"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "git+https://github.com/nnethercott/hannoy?branch=use-heed-nested-rtxns#41f375e3080f311a5bef30eb647a2162262b5abd"
|
||||||
checksum = "be82bf3f2108ddc8885e3d306fcd7f4692066bfe26065ca8b42ba417f3c26dd1"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"byteorder",
|
"byteorder",
|
||||||
@@ -3185,7 +3184,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "index-scheduler"
|
name = "index-scheduler"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"backoff",
|
"backoff",
|
||||||
@@ -3449,7 +3448,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "json-depth-checker"
|
name = "json-depth-checker"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"criterion",
|
"criterion",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -3939,7 +3938,7 @@ checksum = "ae960838283323069879657ca3de837e9f7bbb4c7bf6ea7f1b290d5e9476d2e0"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meili-snap"
|
name = "meili-snap"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"md5 0.8.0",
|
"md5 0.8.0",
|
||||||
@@ -3950,7 +3949,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch"
|
name = "meilisearch"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-cors",
|
"actix-cors",
|
||||||
"actix-http",
|
"actix-http",
|
||||||
@@ -4048,7 +4047,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-auth"
|
name = "meilisearch-auth"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
"enum-iterator",
|
"enum-iterator",
|
||||||
@@ -4067,7 +4066,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilisearch-types"
|
name = "meilisearch-types"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"actix-web",
|
"actix-web",
|
||||||
"anyhow",
|
"anyhow",
|
||||||
@@ -4105,7 +4104,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilitool"
|
name = "meilitool"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"clap",
|
"clap",
|
||||||
@@ -4139,7 +4138,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milli"
|
name = "milli"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arroy",
|
"arroy",
|
||||||
"bbqueue",
|
"bbqueue",
|
||||||
@@ -4718,7 +4717,7 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "permissive-json-pointer"
|
name = "permissive-json-pointer"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"big_s",
|
"big_s",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -7758,7 +7757,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xtask"
|
name = "xtask"
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"build-info",
|
"build-info",
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.30.0"
|
version = "1.30.1"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
"Clément Renault <clement@meilisearch.com>",
|
"Clément Renault <clement@meilisearch.com>",
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
|
|||||||
"sync",
|
"sync",
|
||||||
] }
|
] }
|
||||||
arroy = "0.6.4-nested-rtxns"
|
arroy = "0.6.4-nested-rtxns"
|
||||||
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
|
hannoy = { git = "https://github.com/nnethercott/hannoy", branch = "use-heed-nested-rtxns", features = ["arroy"] }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
tracing = "0.1.41"
|
tracing = "0.1.41"
|
||||||
ureq = { version = "2.12.1", features = ["json"] }
|
ureq = { version = "2.12.1", features = ["json"] }
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ mod v1_13;
|
|||||||
mod v1_14;
|
mod v1_14;
|
||||||
mod v1_15;
|
mod v1_15;
|
||||||
mod v1_16;
|
mod v1_16;
|
||||||
|
mod v1_30_1;
|
||||||
|
|
||||||
use heed::RwTxn;
|
use heed::RwTxn;
|
||||||
use v1_12::{FixFieldDistribution, RecomputeStats};
|
use v1_12::{FixFieldDistribution, RecomputeStats};
|
||||||
@@ -10,6 +11,7 @@ use v1_13::AddNewStats;
|
|||||||
use v1_14::UpgradeArroyVersion;
|
use v1_14::UpgradeArroyVersion;
|
||||||
use v1_15::RecomputeWordFst;
|
use v1_15::RecomputeWordFst;
|
||||||
use v1_16::SwitchToMultimodal;
|
use v1_16::SwitchToMultimodal;
|
||||||
|
use v1_30_1::RebuildHannoyGraph;
|
||||||
|
|
||||||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||||
use crate::progress::{Progress, VariableNameStep};
|
use crate::progress::{Progress, VariableNameStep};
|
||||||
@@ -33,6 +35,7 @@ const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
|
|||||||
&UpgradeArroyVersion {},
|
&UpgradeArroyVersion {},
|
||||||
&RecomputeWordFst {},
|
&RecomputeWordFst {},
|
||||||
&SwitchToMultimodal {},
|
&SwitchToMultimodal {},
|
||||||
|
&RebuildHannoyGraph,
|
||||||
];
|
];
|
||||||
|
|
||||||
/// Return true if the cached stats of the index must be regenerated
|
/// Return true if the cached stats of the index must be regenerated
|
||||||
@@ -58,12 +61,12 @@ where
|
|||||||
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
|
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
|
||||||
}
|
}
|
||||||
if upgrade.must_upgrade(initial_version) {
|
if upgrade.must_upgrade(initial_version) {
|
||||||
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
|
|
||||||
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
||||||
upgrade.description(),
|
upgrade.description(),
|
||||||
i as u32,
|
i as u32,
|
||||||
upgrade_functions.len() as u32,
|
upgrade_functions.len() as u32,
|
||||||
));
|
));
|
||||||
|
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
|
||||||
} else {
|
} else {
|
||||||
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
||||||
"Skipping migration that must not be applied",
|
"Skipping migration that must not be applied",
|
||||||
|
|||||||
50
crates/milli/src/update/upgrade/v1_30_1.rs
Normal file
50
crates/milli/src/update/upgrade/v1_30_1.rs
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
use heed::RwTxn;
|
||||||
|
use rand::SeedableRng as _;
|
||||||
|
|
||||||
|
use super::UpgradeIndex;
|
||||||
|
use crate::progress::Progress;
|
||||||
|
use crate::vector::VectorStore;
|
||||||
|
use crate::{Index, Result};
|
||||||
|
|
||||||
|
/// Rebuilds the hannoy graph and do not touch to the embeddings.
|
||||||
|
///
|
||||||
|
/// This follows a bug in hannoy v0.0.9 and v0.1.0 where the graph
|
||||||
|
/// was not built correctly.
|
||||||
|
pub(super) struct RebuildHannoyGraph;
|
||||||
|
|
||||||
|
impl UpgradeIndex for RebuildHannoyGraph {
|
||||||
|
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
|
||||||
|
let embedders = index.embedding_configs();
|
||||||
|
let backend = index.get_vector_store(wtxn)?.unwrap_or_default();
|
||||||
|
|
||||||
|
for config in embedders.embedding_configs(wtxn)? {
|
||||||
|
let embedder_info = embedders.embedder_info(wtxn, &config.name)?.unwrap();
|
||||||
|
let mut vector_store = VectorStore::new(
|
||||||
|
backend,
|
||||||
|
index.vector_store,
|
||||||
|
embedder_info.embedder_id,
|
||||||
|
config.config.quantized(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let seed = rand::random();
|
||||||
|
let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
|
||||||
|
vector_store.rebuild_graph(
|
||||||
|
wtxn,
|
||||||
|
progress.clone(),
|
||||||
|
&mut rng,
|
||||||
|
vector_store.dimensions(wtxn)?.unwrap(),
|
||||||
|
&|| false,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
|
||||||
|
initial_version < (1, 30, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn description(&self) -> &'static str {
|
||||||
|
"Rebuilding graph links"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -246,6 +246,32 @@ impl VectorStore {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn rebuild_graph<R: rand::Rng + rand::SeedableRng>(
|
||||||
|
&mut self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
progress: Progress,
|
||||||
|
rng: &mut R,
|
||||||
|
dimension: usize,
|
||||||
|
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||||
|
) -> Result<(), crate::Error> {
|
||||||
|
for index in vector_store_range_for_embedder(self.embedder_index) {
|
||||||
|
if self.backend == VectorStoreBackend::Hannoy {
|
||||||
|
if self.quantized {
|
||||||
|
let writer = hannoy::Writer::new(self._hannoy_quantized_db(), index, dimension);
|
||||||
|
if !writer.is_empty(wtxn)? {
|
||||||
|
hannoy_rebuild_graph(wtxn, &progress, rng, cancel, &writer)?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let writer = hannoy::Writer::new(self._hannoy_angular_db(), index, dimension);
|
||||||
|
if !writer.is_empty(wtxn)? {
|
||||||
|
hannoy_rebuild_graph(wtxn, &progress, rng, cancel, &writer)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Overwrite all the embeddings associated with the index and item ID.
|
/// Overwrite all the embeddings associated with the index and item ID.
|
||||||
/// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings.
|
/// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings.
|
||||||
/// You should call `del_items` on the `item_id` before calling this method.
|
/// You should call `del_items` on the `item_id` before calling this method.
|
||||||
@@ -1185,6 +1211,25 @@ where
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn hannoy_rebuild_graph<R, D>(
|
||||||
|
wtxn: &mut RwTxn<'_>,
|
||||||
|
progress: &Progress,
|
||||||
|
rng: &mut R,
|
||||||
|
cancel: &(impl Fn() -> bool + Sync + Send),
|
||||||
|
writer: &hannoy::Writer<D>,
|
||||||
|
) -> Result<(), crate::Error>
|
||||||
|
where
|
||||||
|
R: rand::Rng + rand::SeedableRng,
|
||||||
|
D: hannoy::Distance,
|
||||||
|
{
|
||||||
|
let mut builder = writer.builder(rng).progress(progress.clone());
|
||||||
|
builder
|
||||||
|
.cancel(cancel)
|
||||||
|
.ef_construction(HANNOY_EF_CONSTRUCTION)
|
||||||
|
.force_rebuild::<HANNOY_M, HANNOY_M0>(wtxn)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default, Clone)]
|
#[derive(Debug, Default, Clone)]
|
||||||
pub struct VectorStoreStats {
|
pub struct VectorStoreStats {
|
||||||
pub number_of_embeddings: u64,
|
pub number_of_embeddings: u64,
|
||||||
|
|||||||
Reference in New Issue
Block a user