mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-06 12:46:31 +00:00
Reintroduce arroy and support for dumpless upgrade from previous versions
This commit is contained in:
committed by
Louis Dureuil
parent
db9f205184
commit
3bc192ae52
@ -76,6 +76,8 @@ pub enum InternalError {
|
||||
#[error("Cannot upgrade to the following version: v{0}.{1}.{2}.")]
|
||||
CannotUpgradeToVersion(u32, u32, u32),
|
||||
#[error(transparent)]
|
||||
ArroyError(#[from] arroy::Error),
|
||||
#[error(transparent)]
|
||||
HannoyError(#[from] hannoy::Error),
|
||||
#[error(transparent)]
|
||||
VectorEmbeddingError(#[from] crate::vector::Error),
|
||||
@ -419,6 +421,28 @@ impl From<crate::vector::Error> for Error {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<arroy::Error> for Error {
|
||||
fn from(value: arroy::Error) -> Self {
|
||||
match value {
|
||||
arroy::Error::Heed(heed) => heed.into(),
|
||||
arroy::Error::Io(io) => io.into(),
|
||||
arroy::Error::InvalidVecDimension { expected, received } => {
|
||||
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
|
||||
}
|
||||
arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
|
||||
arroy::Error::DatabaseFull
|
||||
| arroy::Error::InvalidItemAppend
|
||||
| arroy::Error::UnmatchingDistance { .. }
|
||||
| arroy::Error::NeedBuild(_)
|
||||
| arroy::Error::MissingKey { .. }
|
||||
| arroy::Error::MissingMetadata(_)
|
||||
| arroy::Error::CannotDecodeKeyMode { .. } => {
|
||||
Error::InternalError(InternalError::ArroyError(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<hannoy::Error> for Error {
|
||||
fn from(value: hannoy::Error) -> Self {
|
||||
match value {
|
||||
|
@ -180,7 +180,7 @@ pub struct Index {
|
||||
/// Maps an embedder name to its id in the hannoy store.
|
||||
pub(crate) embedder_category_id: Database<Unspecified, Unspecified>,
|
||||
/// Vector store based on hannoy™.
|
||||
pub vector_hannoy: hannoy::Database<Unspecified>,
|
||||
pub vector_store: hannoy::Database<Unspecified>,
|
||||
|
||||
/// Maps the document id to the document as an obkv store.
|
||||
pub(crate) documents: Database<BEU32, ObkvCodec>,
|
||||
@ -264,7 +264,7 @@ impl Index {
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_hannoy,
|
||||
vector_store: vector_hannoy,
|
||||
embedder_category_id,
|
||||
documents,
|
||||
};
|
||||
@ -1773,7 +1773,7 @@ impl Index {
|
||||
let embedder_info = embedders.embedder_info(rtxn, &config.name)?.unwrap();
|
||||
let has_fragments = config.config.embedder_options.has_fragments();
|
||||
let reader = VectorStore::new(
|
||||
self.vector_hannoy,
|
||||
self.vector_store,
|
||||
embedder_info.embedder_id,
|
||||
config.config.quantized(),
|
||||
);
|
||||
@ -1798,7 +1798,7 @@ impl Index {
|
||||
for config in embedding_configs.embedding_configs(rtxn)? {
|
||||
let embedder_id = embedding_configs.embedder_id(rtxn, &config.name)?.unwrap();
|
||||
let reader =
|
||||
VectorStore::new(self.vector_hannoy, embedder_id, config.config.quantized());
|
||||
VectorStore::new(self.vector_store, embedder_id, config.config.quantized());
|
||||
reader.aggregate_stats(rtxn, &mut stats)?;
|
||||
}
|
||||
Ok(stats)
|
||||
@ -1842,7 +1842,7 @@ impl Index {
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_hannoy,
|
||||
vector_store: vector_hannoy,
|
||||
embedder_category_id,
|
||||
documents,
|
||||
} = self;
|
||||
|
@ -53,7 +53,7 @@ pub use search::new::{
|
||||
};
|
||||
use serde_json::Value;
|
||||
pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||
pub use {charabia as tokenizer, hannoy, heed, rhai};
|
||||
pub use {arroy, charabia as tokenizer, hannoy, heed, rhai};
|
||||
|
||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||
pub use self::attribute_patterns::{AttributePatterns, PatternMatch};
|
||||
|
@ -56,7 +56,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
let target = &self.target;
|
||||
|
||||
let before = Instant::now();
|
||||
let reader = VectorStore::new(ctx.index.vector_hannoy, self.embedder_index, self.quantized);
|
||||
let reader = VectorStore::new(ctx.index.vector_store, self.embedder_index, self.quantized);
|
||||
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
||||
self.cached_sorted_docids = results.into_iter();
|
||||
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
|
||||
|
@ -72,7 +72,7 @@ impl<'a> Similar<'a> {
|
||||
crate::UserError::InvalidSimilarEmbedder(self.embedder_name.to_owned())
|
||||
})?;
|
||||
|
||||
let reader = VectorStore::new(self.index.vector_hannoy, embedder_index, self.quantized);
|
||||
let reader = VectorStore::new(self.index.vector_store, embedder_index, self.quantized);
|
||||
let results = reader.nns_by_item(
|
||||
self.rtxn,
|
||||
self.id,
|
||||
|
@ -2,7 +2,8 @@ use heed::RwTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::{database_stats::DatabaseStats, FieldDistribution, Index, Result};
|
||||
use crate::database_stats::DatabaseStats;
|
||||
use crate::{FieldDistribution, Index, Result};
|
||||
|
||||
pub struct ClearDocuments<'t, 'i> {
|
||||
wtxn: &'t mut RwTxn<'i>,
|
||||
@ -45,7 +46,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
||||
facet_id_is_empty_docids,
|
||||
field_id_docid_facet_f64s,
|
||||
field_id_docid_facet_strings,
|
||||
vector_hannoy,
|
||||
vector_store,
|
||||
embedder_category_id: _,
|
||||
documents,
|
||||
} = self.index;
|
||||
@ -88,7 +89,7 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
|
||||
field_id_docid_facet_f64s.clear(self.wtxn)?;
|
||||
field_id_docid_facet_strings.clear(self.wtxn)?;
|
||||
// vector
|
||||
vector_hannoy.clear(self.wtxn)?;
|
||||
vector_store.clear(self.wtxn)?;
|
||||
|
||||
documents.clear(self.wtxn)?;
|
||||
|
||||
|
@ -493,8 +493,7 @@ where
|
||||
key: None,
|
||||
},
|
||||
)?;
|
||||
let reader =
|
||||
VectorStore::new(self.index.vector_hannoy, index, action.was_quantized);
|
||||
let reader = VectorStore::new(self.index.vector_store, index, action.was_quantized);
|
||||
let Some(dim) = reader.dimensions(self.wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
@ -504,7 +503,7 @@ where
|
||||
|
||||
for (embedder_name, dimension) in dimension {
|
||||
let wtxn = &mut *self.wtxn;
|
||||
let vector_hannoy = self.index.vector_hannoy;
|
||||
let vector_hannoy = self.index.vector_store;
|
||||
let cancel = &self.should_abort;
|
||||
|
||||
let embedder_index =
|
||||
|
@ -842,7 +842,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
action.write_back()
|
||||
{
|
||||
let reader = VectorStore::new(
|
||||
self.index.vector_hannoy,
|
||||
self.index.vector_store,
|
||||
*embedder_id,
|
||||
action.was_quantized,
|
||||
);
|
||||
@ -950,7 +950,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
continue;
|
||||
};
|
||||
let hannoy =
|
||||
VectorStore::new(self.index.vector_hannoy, infos.embedder_id, was_quantized);
|
||||
VectorStore::new(self.index.vector_store, infos.embedder_id, was_quantized);
|
||||
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
|
@ -677,7 +677,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
.get(&embedder_name)
|
||||
.is_some_and(|conf| conf.is_quantized);
|
||||
// FIXME: allow customizing distance
|
||||
let writer = VectorStore::new(index.vector_hannoy, infos.embedder_id, binary_quantized);
|
||||
let writer = VectorStore::new(index.vector_store, infos.embedder_id, binary_quantized);
|
||||
|
||||
// remove vectors for docids we want them removed
|
||||
let merger = remove_vectors_builder.build();
|
||||
|
@ -130,7 +130,7 @@ where
|
||||
|
||||
let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
|
||||
|
||||
let vector_arroy = index.vector_hannoy;
|
||||
let vector_arroy = index.vector_store;
|
||||
let hannoy_writers: Result<HashMap<_, _>> = embedders
|
||||
.inner_as_ref()
|
||||
.iter()
|
||||
@ -343,7 +343,7 @@ fn hannoy_writers_from_embedder_actions<'indexer>(
|
||||
embedders: &'indexer RuntimeEmbedders,
|
||||
index_embedder_category_ids: &'indexer std::collections::HashMap<String, u8>,
|
||||
) -> Result<HashMap<u8, (&'indexer str, &'indexer Embedder, VectorStore, usize)>> {
|
||||
let vector_arroy = index.vector_hannoy;
|
||||
let vector_arroy = index.vector_store;
|
||||
|
||||
embedders
|
||||
.inner_as_ref()
|
||||
@ -385,7 +385,7 @@ where
|
||||
let Some(WriteBackToDocuments { embedder_id, .. }) = action.write_back() else {
|
||||
continue;
|
||||
};
|
||||
let reader = VectorStore::new(index.vector_hannoy, *embedder_id, action.was_quantized);
|
||||
let reader = VectorStore::new(index.vector_store, *embedder_id, action.was_quantized);
|
||||
let Some(dimensions) = reader.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
@ -401,7 +401,7 @@ where
|
||||
let Some(infos) = index.embedding_configs().embedder_info(wtxn, embedder_name)? else {
|
||||
continue;
|
||||
};
|
||||
let arroy = VectorStore::new(index.vector_hannoy, infos.embedder_id, was_quantized);
|
||||
let arroy = VectorStore::new(index.vector_store, infos.embedder_id, was_quantized);
|
||||
let Some(dimensions) = arroy.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
|
@ -121,7 +121,7 @@ impl<'t> VectorDocumentFromDb<'t> {
|
||||
status: &EmbeddingStatus,
|
||||
) -> Result<VectorEntry<'t>> {
|
||||
let reader =
|
||||
VectorStore::new(self.index.vector_hannoy, embedder_id, config.config.quantized());
|
||||
VectorStore::new(self.index.vector_store, embedder_id, config.config.quantized());
|
||||
let vectors = reader.item_vectors(self.rtxn, self.docid)?;
|
||||
|
||||
Ok(VectorEntry {
|
||||
|
@ -1,4 +1,4 @@
|
||||
use hannoy::distances::Cosine;
|
||||
use arroy::distances::Cosine;
|
||||
use heed::RwTxn;
|
||||
|
||||
use super::UpgradeIndex;
|
||||
@ -25,13 +25,12 @@ impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
|
||||
progress.update_progress(VectorStore::UpdateInternalVersions);
|
||||
|
||||
let rtxn = index.read_txn()?;
|
||||
// hannoy::upgrade::from_0_5_to_0_6::<Cosine>(
|
||||
// &rtxn,
|
||||
// index.vector_hannoy.remap_data_type(),
|
||||
// wtxn,
|
||||
// index.vector_hannoy.remap_data_type(),
|
||||
// )?;
|
||||
unimplemented!("upgrade hannoy");
|
||||
arroy::upgrade::from_0_5_to_0_6::<Cosine>(
|
||||
&rtxn,
|
||||
index.vector_store.remap_types(),
|
||||
wtxn,
|
||||
index.vector_store.remap_types(),
|
||||
)?;
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
Reference in New Issue
Block a user