mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-02 09:46:29 +00:00
Use Hannoy instead of arroy
This commit is contained in:
committed by
Louis Dureuil
parent
580bfb06b4
commit
affcaef556
@@ -39,7 +39,7 @@ use crate::update::{
|
||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||
};
|
||||
use crate::vector::db::EmbedderInfo;
|
||||
use crate::vector::{ArroyWrapper, RuntimeEmbedders};
|
||||
use crate::vector::{HannoyWrapper, RuntimeEmbedders};
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result, UserError};
|
||||
|
||||
static MERGED_DATABASE_COUNT: usize = 7;
|
||||
@@ -494,7 +494,7 @@ where
|
||||
},
|
||||
)?;
|
||||
let reader =
|
||||
ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized);
|
||||
HannoyWrapper::new(self.index.vector_hannoy, index, action.was_quantized);
|
||||
let Some(dim) = reader.dimensions(self.wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
@@ -504,7 +504,7 @@ where
|
||||
|
||||
for (embedder_name, dimension) in dimension {
|
||||
let wtxn = &mut *self.wtxn;
|
||||
let vector_arroy = self.index.vector_arroy;
|
||||
let vector_hannoy = self.index.vector_hannoy;
|
||||
let cancel = &self.should_abort;
|
||||
|
||||
let embedder_index =
|
||||
@@ -523,7 +523,7 @@ where
|
||||
let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);
|
||||
|
||||
pool.install(|| {
|
||||
let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
|
||||
let mut writer = HannoyWrapper::new(vector_hannoy, embedder_index, was_quantized);
|
||||
writer.build_and_quantize(
|
||||
wtxn,
|
||||
// In the settings we don't have any progress to share
|
||||
|
@@ -32,7 +32,7 @@ use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::update::{AvailableIds, UpdateIndexingStep};
|
||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use crate::vector::settings::{RemoveFragments, WriteBackToDocuments};
|
||||
use crate::vector::ArroyWrapper;
|
||||
use crate::vector::HannoyWrapper;
|
||||
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
|
||||
|
||||
pub struct TransformOutput {
|
||||
@@ -834,15 +834,15 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
None
|
||||
};
|
||||
|
||||
let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
|
||||
let readers: BTreeMap<&str, (HannoyWrapper, &RoaringBitmap)> = settings_diff
|
||||
.embedding_config_updates
|
||||
.iter()
|
||||
.filter_map(|(name, action)| {
|
||||
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
|
||||
action.write_back()
|
||||
{
|
||||
let reader = ArroyWrapper::new(
|
||||
self.index.vector_arroy,
|
||||
let reader = HannoyWrapper::new(
|
||||
self.index.vector_hannoy,
|
||||
*embedder_id,
|
||||
action.was_quantized,
|
||||
);
|
||||
@@ -884,7 +884,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
|
||||
let injected_vectors: std::result::Result<
|
||||
serde_json::Map<String, serde_json::Value>,
|
||||
arroy::Error,
|
||||
hannoy::Error,
|
||||
> = readers
|
||||
.iter()
|
||||
.filter_map(|(name, (reader, user_provided))| {
|
||||
@@ -949,9 +949,9 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let arroy =
|
||||
ArroyWrapper::new(self.index.vector_arroy, infos.embedder_id, was_quantized);
|
||||
let Some(dimensions) = arroy.dimensions(wtxn)? else {
|
||||
let hannoy =
|
||||
HannoyWrapper::new(self.index.vector_hannoy, infos.embedder_id, was_quantized);
|
||||
let Some(dimensions) = hannoy.dimensions(wtxn)? else {
|
||||
continue;
|
||||
};
|
||||
for fragment_id in fragment_ids {
|
||||
@@ -959,17 +959,17 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
|
||||
if infos.embedding_status.user_provided_docids().is_empty() {
|
||||
// no user provided: clear store
|
||||
arroy.clear_store(wtxn, *fragment_id, dimensions)?;
|
||||
hannoy.clear_store(wtxn, *fragment_id, dimensions)?;
|
||||
continue;
|
||||
}
|
||||
|
||||
// some user provided, remove only the ids that are not user provided
|
||||
let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| {
|
||||
let to_delete = hannoy.items_in_store(wtxn, *fragment_id, |items| {
|
||||
items - infos.embedding_status.user_provided_docids()
|
||||
})?;
|
||||
|
||||
for to_delete in to_delete {
|
||||
arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
||||
hannoy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -27,7 +27,7 @@ use crate::update::index_documents::helpers::{
|
||||
};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::db::{EmbeddingStatusDelta, IndexEmbeddingConfig};
|
||||
use crate::vector::ArroyWrapper;
|
||||
use crate::vector::HannoyWrapper;
|
||||
use crate::{
|
||||
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
|
||||
Result, SerializationError, U8StrStrCodec,
|
||||
@@ -677,7 +677,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
.get(&embedder_name)
|
||||
.is_some_and(|conf| conf.is_quantized);
|
||||
// FIXME: allow customizing distance
|
||||
let writer = ArroyWrapper::new(index.vector_arroy, infos.embedder_id, binary_quantized);
|
||||
let writer =
|
||||
HannoyWrapper::new(index.vector_hannoy, infos.embedder_id, binary_quantized);
|
||||
|
||||
// remove vectors for docids we want them removed
|
||||
let merger = remove_vectors_builder.build();
|
||||
|
Reference in New Issue
Block a user