mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-05 20:26:31 +00:00
Make the VectorStore aware of the index version
This commit is contained in:
@ -146,7 +146,6 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let mut index_wtxn = index.write_txn()?;
|
let mut index_wtxn = index.write_txn()?;
|
||||||
|
|
||||||
let index_version = index.get_version(&index_wtxn)?.unwrap_or((1, 12, 0));
|
let index_version = index.get_version(&index_wtxn)?.unwrap_or((1, 12, 0));
|
||||||
let package_version = (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);
|
let package_version = (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);
|
||||||
if index_version != package_version {
|
if index_version != package_version {
|
||||||
|
@ -1769,10 +1769,12 @@ impl Index {
|
|||||||
) -> Result<BTreeMap<String, EmbeddingsWithMetadata>> {
|
) -> Result<BTreeMap<String, EmbeddingsWithMetadata>> {
|
||||||
let mut res = BTreeMap::new();
|
let mut res = BTreeMap::new();
|
||||||
let embedders = self.embedding_configs();
|
let embedders = self.embedding_configs();
|
||||||
|
let index_version = self.get_version(rtxn)?.unwrap();
|
||||||
for config in embedders.embedding_configs(rtxn)? {
|
for config in embedders.embedding_configs(rtxn)? {
|
||||||
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
|
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
|
||||||
let has_fragments = config.config.embedder_options.has_fragments();
|
let has_fragments = config.config.embedder_options.has_fragments();
|
||||||
let reader = VectorStore::new(
|
let reader = VectorStore::new(
|
||||||
|
index_version,
|
||||||
self.vector_store,
|
self.vector_store,
|
||||||
embedder_info.embedder_id,
|
embedder_info.embedder_id,
|
||||||
config.config.quantized(),
|
config.config.quantized(),
|
||||||
@ -1795,10 +1797,15 @@ impl Index {
|
|||||||
pub fn hannoy_stats(&self, rtxn: &RoTxn<'_>) -> Result<HannoyStats> {
|
pub fn hannoy_stats(&self, rtxn: &RoTxn<'_>) -> Result<HannoyStats> {
|
||||||
let mut stats = HannoyStats::default();
|
let mut stats = HannoyStats::default();
|
||||||
let embedding_configs = self.embedding_configs();
|
let embedding_configs = self.embedding_configs();
|
||||||
|
let index_version = self.get_version(rtxn)?.unwrap();
|
||||||
for config in embedding_configs.embedding_configs(rtxn)? {
|
for config in embedding_configs.embedding_configs(rtxn)? {
|
||||||
let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap();
|
let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap();
|
||||||
let reader =
|
let reader = VectorStore::new(
|
||||||
VectorStore::new(self.vector_store, embedder_id, config.config.quantized());
|
index_version,
|
||||||
|
self.vector_store,
|
||||||
|
embedder_id,
|
||||||
|
config.config.quantized(),
|
||||||
|
);
|
||||||
reader.aggregate_stats(rtxn, &mut stats)?;
|
reader.aggregate_stats(rtxn, &mut stats)?;
|
||||||
}
|
}
|
||||||
Ok(stats)
|
Ok(stats)
|
||||||
|
@ -56,7 +56,12 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
|||||||
let target = &self.target;
|
let target = &self.target;
|
||||||
|
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
let reader = VectorStore::new(ctx.index.vector_store, self.embedder_index, self.quantized);
|
let reader = VectorStore::new(
|
||||||
|
ctx.index.get_version(ctx.txn)?.unwrap(),
|
||||||
|
ctx.index.vector_store,
|
||||||
|
self.embedder_index,
|
||||||
|
self.quantized,
|
||||||
|
);
|
||||||
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
||||||
self.cached_sorted_docids = results.into_iter();
|
self.cached_sorted_docids = results.into_iter();
|
||||||
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
|
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
|
||||||
|
@ -72,7 +72,12 @@ impl<'a> Similar<'a> {
|
|||||||
crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned())
|
crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned())
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let reader = VectorStore::new(self.index.vector_store, embedder_index, self.quantized);
|
let reader = VectorStore::new(
|
||||||
|
self.index.get_version(self.rtxn)?.unwrap(),
|
||||||
|
self.index.vector_store,
|
||||||
|
embedder_index,
|
||||||
|
self.quantized,
|
||||||
|
);
|
||||||
let results = reader.nns_by_item(
|
let results = reader.nns_by_item(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.id,
|
self.id,
|
||||||
|
@ -485,6 +485,7 @@ where
|
|||||||
|
|
||||||
// If an embedder wasn't used in the typedchunk but must be binary quantized
|
// If an embedder wasn't used in the typedchunk but must be binary quantized
|
||||||
// we should insert it in `dimension`
|
// we should insert it in `dimension`
|
||||||
|
let index_version = self.index.get_version(&self.wtxn)?.unwrap();
|
||||||
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
for (name, action) in settings_diff.embedding_config_updates.iter() {
|
||||||
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
|
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
|
||||||
let index = self.index.embedding_configs().embedder_id(self.wtxn, name)?.ok_or(
|
let index = self.index.embedding_configs().embedder_id(self.wtxn, name)?.ok_or(
|
||||||
@ -493,7 +494,12 @@ where
|
|||||||
key: None,
|
key: None,
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
let reader = VectorStore::new(self.index.vector_store, index, action.was_quantized);
|
let reader = VectorStore::new(
|
||||||
|
index_version,
|
||||||
|
self.index.vector_store,
|
||||||
|
index,
|
||||||
|
action.was_quantized,
|
||||||
|
);
|
||||||
let Some(dim) = reader.dimensions(self.wtxn)? else {
|
let Some(dim) = reader.dimensions(self.wtxn)? else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
@ -522,7 +528,8 @@ where
|
|||||||
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
|
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
|
||||||
|
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
let mut writer = VectorStore::new(vector_hannoy, embedder_index, was_quantized);
|
let mut writer =
|
||||||
|
VectorStore::new(index_version, vector_hannoy, embedder_index, was_quantized);
|
||||||
writer.build_and_quantize(
|
writer.build_and_quantize(
|
||||||
wtxn,
|
wtxn,
|
||||||
// In the settings we don't have any progress to share
|
// In the settings we don't have any progress to share
|
||||||
|
@ -834,6 +834,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let index_version = self.index.get_version(wtxn)?.unwrap();
|
||||||
let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff
|
let readers: BTreeMap<&str, (VectorStore, &RoaringBitmap)> = settings_diff
|
||||||
.embedding_config_updates
|
.embedding_config_updates
|
||||||
.iter()
|
.iter()
|
||||||
@ -842,6 +843,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
action.write_back()
|
action.write_back()
|
||||||
{
|
{
|
||||||
let reader = VectorStore::new(
|
let reader = VectorStore::new(
|
||||||
|
index_version,
|
||||||
self.index.vector_store,
|
self.index.vector_store,
|
||||||
*embedder_id,
|
*embedder_id,
|
||||||
action.was_quantized,
|
action.was_quantized,
|
||||||
@ -949,8 +951,12 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
else {
|
else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let hannoy =
|
let hannoy = VectorStore::new(
|
||||||
VectorStore::new(self.index.vector_store, infos.embedder_id, was_quantized);
|
index_version,
|
||||||
|
self.index.vector_store,
|
||||||
|
infos.embedder_id,
|
||||||
|
was_quantized,
|
||||||
|
);
|
||||||
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
|
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
@ -619,6 +619,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
let _entered = span.enter();
|
let _entered = span.enter();
|
||||||
|
|
||||||
let embedders = index.embedding_configs();
|
let embedders = index.embedding_configs();
|
||||||
|
let index_version = index.get_version(wtxn)?.unwrap();
|
||||||
|
|
||||||
let mut remove_vectors_builder = MergerBuilder::new(KeepFirst);
|
let mut remove_vectors_builder = MergerBuilder::new(KeepFirst);
|
||||||
let mut manual_vectors_builder = MergerBuilder::new(KeepFirst);
|
let mut manual_vectors_builder = MergerBuilder::new(KeepFirst);
|
||||||
@ -677,7 +678,12 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
.get(&embedder_name)
|
.get(&embedder_name)
|
||||||
.is_some_and(|conf| conf.is_quantized);
|
.is_some_and(|conf| conf.is_quantized);
|
||||||
// FIXME: allow customizing distance
|
// FIXME: allow customizing distance
|
||||||
let writer = VectorStore::new(index.vector_store, infos.embedder_id, binary_quantized);
|
let writer = VectorStore::new(
|
||||||
|
index_version,
|
||||||
|
index.vector_store,
|
||||||
|
infos.embedder_id,
|
||||||
|
binary_quantized,
|
||||||
|
);
|
||||||
|
|
||||||
// remove vectors for docids we want them removed
|
// remove vectors for docids we want them removed
|
||||||
let merger = remove_vectors_builder.build();
|
let merger = remove_vectors_builder.build();
|
||||||
|
@ -8,7 +8,7 @@ use document_changes::{DocumentChanges, IndexingContext};
|
|||||||
pub use document_deletion::DocumentDeletion;
|
pub use document_deletion::DocumentDeletion;
|
||||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use heed::RwTxn;
|
use heed::{RoTxn, RwTxn};
|
||||||
pub use partial_dump::PartialDump;
|
pub use partial_dump::PartialDump;
|
||||||
pub use post_processing::recompute_word_fst_from_word_docids_database;
|
pub use post_processing::recompute_word_fst_from_word_docids_database;
|
||||||
pub use update_by_function::UpdateByFunction;
|
pub use update_by_function::UpdateByFunction;
|
||||||
@ -130,6 +130,7 @@ where
|
|||||||
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||||
|
|
||||||
let vector_arroy = index.vector_store;
|
let vector_arroy = index.vector_store;
|
||||||
|
let index_version = index.get_version(wtxn)?.unwrap();
|
||||||
let hannoy_writers: Result<HashMap<_, _>> = embedders
|
let hannoy_writers: Result<HashMap<_, _>> = embedders
|
||||||
.inner_as_ref()
|
.inner_as_ref()
|
||||||
.iter()
|
.iter()
|
||||||
@ -143,7 +144,12 @@ where
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
let dimensions = runtime.embedder.dimensions();
|
let dimensions = runtime.embedder.dimensions();
|
||||||
let writer = VectorStore::new(vector_arroy, embedder_index, runtime.is_quantized);
|
let writer = VectorStore::new(
|
||||||
|
index_version,
|
||||||
|
vector_arroy,
|
||||||
|
embedder_index,
|
||||||
|
runtime.is_quantized,
|
||||||
|
);
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
embedder_index,
|
embedder_index,
|
||||||
@ -285,6 +291,7 @@ where
|
|||||||
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
|
let index_embedder_category_ids = settings_delta.new_embedder_category_id();
|
||||||
let mut hannoy_writers = hannoy_writers_from_embedder_actions(
|
let mut hannoy_writers = hannoy_writers_from_embedder_actions(
|
||||||
index,
|
index,
|
||||||
|
wtxn,
|
||||||
embedder_actions,
|
embedder_actions,
|
||||||
new_embedders,
|
new_embedders,
|
||||||
index_embedder_category_ids,
|
index_embedder_category_ids,
|
||||||
@ -338,11 +345,13 @@ where
|
|||||||
|
|
||||||
fn hannoy_writers_from_embedder_actions<'indexer>(
|
fn hannoy_writers_from_embedder_actions<'indexer>(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
|
rtxn: &RoTxn,
|
||||||
embedder_actions: &'indexer BTreeMap<String, EmbedderAction>,
|
embedder_actions: &'indexer BTreeMap<String, EmbedderAction>,
|
||||||
embedders: &'indexer RuntimeEmbedders,
|
embedders: &'indexer RuntimeEmbedders,
|
||||||
index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>,
|
index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>,
|
||||||
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, VectorStore, usize)>> {
|
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, VectorStore, usize)>> {
|
||||||
let vector_arroy = index.vector_store;
|
let vector_arroy = index.vector_store;
|
||||||
|
let index_version = index.get_version(rtxn)?.unwrap();
|
||||||
|
|
||||||
embedders
|
embedders
|
||||||
.inner_as_ref()
|
.inner_as_ref()
|
||||||
@ -360,8 +369,12 @@ fn hannoy_writers_from_embedder_actions<'indexer>(
|
|||||||
},
|
},
|
||||||
)));
|
)));
|
||||||
};
|
};
|
||||||
let writer =
|
let writer = VectorStore::new(
|
||||||
VectorStore::new(vector_arroy, embedder_category_id, action.was_quantized);
|
index_version,
|
||||||
|
vector_arroy,
|
||||||
|
embedder_category_id,
|
||||||
|
action.was_quantized,
|
||||||
|
);
|
||||||
let dimensions = runtime.embedder.dimensions();
|
let dimensions = runtime.embedder.dimensions();
|
||||||
Some(Ok((
|
Some(Ok((
|
||||||
embedder_category_id,
|
embedder_category_id,
|
||||||
@ -384,7 +397,12 @@ where
|
|||||||
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
|
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let reader = VectorStore::new(index.vector_store, *embedder_id, action.was_quantized);
|
let reader = VectorStore::new(
|
||||||
|
index.get_version(wtxn)?.unwrap(),
|
||||||
|
index.vector_store,
|
||||||
|
*embedder_id,
|
||||||
|
action.was_quantized,
|
||||||
|
);
|
||||||
let Some(dimensions) = reader.dimensions(wtxn)? else {
|
let Some(dimensions) = reader.dimensions(wtxn)? else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
@ -400,7 +418,12 @@ where
|
|||||||
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
|
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let arroy = VectorStore::new(index.vector_store, infos.embedder_id, was_quantized);
|
let arroy = VectorStore::new(
|
||||||
|
index.get_version(wtxn)?.unwrap(),
|
||||||
|
index.vector_store,
|
||||||
|
infos.embedder_id,
|
||||||
|
was_quantized,
|
||||||
|
);
|
||||||
let Some(dimensions) = arroy.dimensions(wtxn)? else {
|
let Some(dimensions) = arroy.dimensions(wtxn)? else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
@ -120,8 +120,13 @@ impl<'t> VectorDocumentFromDb<'t> {
|
|||||||
config: &IndexEmbeddingConfig,
|
config: &IndexEmbeddingConfig,
|
||||||
status: &EmbeddingStatus,
|
status: &EmbeddingStatus,
|
||||||
) -> Result<VectorEntry<'t>> {
|
) -> Result<VectorEntry<'t>> {
|
||||||
let reader =
|
let index_version = self.index.get_version(self.rtxn)?.unwrap();
|
||||||
VectorStore::new(self.index.vector_store, embedder_id, config.config.quantized());
|
let reader = VectorStore::new(
|
||||||
|
index_version,
|
||||||
|
self.index.vector_store,
|
||||||
|
embedder_id,
|
||||||
|
config.config.quantized(),
|
||||||
|
);
|
||||||
let vectors = reader.item_vectors(self.rtxn, self.docid)?;
|
let vectors = reader.item_vectors(self.rtxn, self.docid)?;
|
||||||
|
|
||||||
Ok(VectorEntry {
|
Ok(VectorEntry {
|
||||||
|
@ -17,11 +17,13 @@ impl UpgradeIndex for Latest_V1_17_To_V1_18_0 {
|
|||||||
progress: Progress,
|
progress: Progress,
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
let embedding_configs = index.embedding_configs();
|
let embedding_configs = index.embedding_configs();
|
||||||
|
let index_version = index.get_version(wtxn)?.unwrap();
|
||||||
for config in embedding_configs.embedding_configs(wtxn)? {
|
for config in embedding_configs.embedding_configs(wtxn)? {
|
||||||
// TODO use the embedder name to display progress
|
// TODO use the embedder name to display progress
|
||||||
let quantized = config.config.quantized();
|
let quantized = config.config.quantized();
|
||||||
let embedder_id = embedding_configs.embedder_id(wtxn, &config.name)?.unwrap();
|
let embedder_id = embedding_configs.embedder_id(wtxn, &config.name)?.unwrap();
|
||||||
let vector_store = VectorStore::new(index.vector_store, embedder_id, quantized);
|
let vector_store =
|
||||||
|
VectorStore::new(index_version, index.vector_store, embedder_id, quantized);
|
||||||
vector_store.convert_from_arroy(wtxn, progress.clone())?;
|
vector_store.convert_from_arroy(wtxn, progress.clone())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,18 +47,20 @@ const HANNOY_M: usize = 16;
|
|||||||
const HANNOY_M0: usize = 32;
|
const HANNOY_M0: usize = 32;
|
||||||
|
|
||||||
pub struct VectorStore {
|
pub struct VectorStore {
|
||||||
quantized: bool,
|
version: (u32, u32, u32),
|
||||||
embedder_index: u8,
|
|
||||||
database: hannoy::Database<Unspecified>,
|
database: hannoy::Database<Unspecified>,
|
||||||
|
embedder_index: u8,
|
||||||
|
quantized: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VectorStore {
|
impl VectorStore {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
|
version: (u32, u32, u32),
|
||||||
database: hannoy::Database<Unspecified>,
|
database: hannoy::Database<Unspecified>,
|
||||||
embedder_index: u8,
|
embedder_index: u8,
|
||||||
quantized: bool,
|
quantized: bool,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self { database, embedder_index, quantized }
|
Self { version, database, embedder_index, quantized }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn embedder_index(&self) -> u8 {
|
pub fn embedder_index(&self) -> u8 {
|
||||||
|
Reference in New Issue
Block a user