update arroy to the latest working version

This commit is contained in:
Tamo
2025-06-23 23:37:47 +02:00
parent 5a7948bfab
commit bea11a1353
17 changed files with 105 additions and 88 deletions

View File

@@ -88,7 +88,7 @@ rhai = { version = "1.22.2", features = [
"sync",
] }
# arroy = "0.6.1"
arroy = { git = "https://github.com/meilisearch/arroy.git", rev = "5b748bac2c69c65a97980901b02067a3a545e357" } # incremental update
arroy = { git = "https://github.com/meilisearch/arroy.git", rev = "a63f0979b216dde10d50fdfa4fadcb2b1dea73c7" } # incremental update
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }

View File

@@ -411,18 +411,19 @@ impl From<arroy::Error> for Error {
arroy::Error::Heed(heed) => heed.into(),
arroy::Error::Io(io) => io.into(),
arroy::Error::InvalidVecDimension { expected, received } => {
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
}
Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
}
arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
arroy::Error::DatabaseFull
| arroy::Error::InvalidItemAppend
| arroy::Error::UnmatchingDistance { .. }
| arroy::Error::NeedBuild(_)
| arroy::Error::MissingKey { .. }
| arroy::Error::MissingMetadata(_)
| arroy::Error::CannotDecodeKeyMode { .. } => {
Error::InternalError(InternalError::ArroyError(value))
}
| arroy::Error::InvalidItemAppend
| arroy::Error::UnmatchingDistance { .. }
| arroy::Error::NeedBuild(_)
| arroy::Error::MissingKey { .. }
| arroy::Error::MissingMetadata(_)
| arroy::Error::CannotDecodeKeyMode { .. }
| arroy::Error::UnknownVersion { .. } => {
Error::InternalError(InternalError::ArroyError(value))
}
}
}
}

View File

@@ -1,7 +1,7 @@
use std::any::TypeId;
use std::borrow::Cow;
use std::marker::PhantomData;
use std::sync::atomic::{AtomicU32, AtomicUsize, Ordering};
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant};
@@ -13,8 +13,8 @@ use utoipa::ToSchema;
pub trait Step: 'static + Send + Sync {
fn name(&self) -> Cow<'static, str>;
fn current(&self) -> u32;
fn total(&self) -> u32;
fn current(&self) -> u64;
fn total(&self) -> u64;
}
#[derive(Clone, Default)]
@@ -132,13 +132,13 @@ pub trait NamedStep: 'static + Send + Sync + Default {
/// - The total number of steps doesn't change
pub struct AtomicSubStep<Name: NamedStep> {
unit_name: Name,
current: Arc<AtomicU32>,
total: u32,
current: Arc<AtomicU64>,
total: u64,
}
impl<Name: NamedStep> AtomicSubStep<Name> {
pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
let current = Arc::new(AtomicU32::new(0));
pub fn new(total: u64) -> (Arc<AtomicU64>, Self) {
let current = Arc::new(AtomicU64::new(0));
(current.clone(), Self { current, total, unit_name: Name::default() })
}
}
@@ -148,11 +148,11 @@ impl<Name: NamedStep> Step for AtomicSubStep<Name> {
self.unit_name.name().into()
}
fn current(&self) -> u32 {
fn current(&self) -> u64 {
self.current.load(Ordering::Relaxed)
}
fn total(&self) -> u32 {
fn total(&self) -> u64 {
self.total
}
}
@@ -183,13 +183,13 @@ macro_rules! make_enum_progress {
}
}
fn current(&self) -> u32 {
*self as u32
fn current(&self) -> u64 {
*self as u64
}
fn total(&self) -> u32 {
fn total(&self) -> u64 {
use $crate::progress::_private_enum_iterator::Sequence;
Self::CARDINALITY as u32
Self::CARDINALITY as u64
}
}
};
@@ -235,8 +235,8 @@ pub struct ProgressView {
#[schema(rename_all = "camelCase")]
pub struct ProgressStepView {
pub current_step: Cow<'static, str>,
pub finished: u32,
pub total: u32,
pub finished: u64,
pub total: u64,
}
/// Used when the name can change but it's still the same step.
@@ -252,13 +252,13 @@ pub struct ProgressStepView {
/// ```
pub struct VariableNameStep<U: Send + Sync + 'static> {
name: String,
current: u32,
total: u32,
current: u64,
total: u64,
phantom: PhantomData<U>,
}
impl<U: Send + Sync + 'static> VariableNameStep<U> {
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
pub fn new(name: impl Into<String>, current: u64, total: u64) -> Self {
Self { name: name.into(), current, total, phantom: PhantomData }
}
}
@@ -268,11 +268,11 @@ impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
self.name.clone().into()
}
fn current(&self) -> u32 {
fn current(&self) -> u64 {
self.current
}
fn total(&self) -> u32 {
fn total(&self) -> u64 {
self.total
}
}
@@ -282,8 +282,8 @@ impl Step for arroy::MainStep {
match self {
arroy::MainStep::PreProcessingTheItems => "pre processing the items",
arroy::MainStep::WritingTheDescendantsAndMetadata => {
"writing the descendants and metadata"
}
"writing the descendants and metadata"
}
arroy::MainStep::RetrieveTheUpdatedItems => "retrieve the updated items",
arroy::MainStep::WriteTheMetadata => "write the metadata",
arroy::MainStep::RetrievingTheItemsIds => "retrieving the items ids",
@@ -291,19 +291,20 @@ impl Step for arroy::MainStep {
arroy::MainStep::DeletingExtraTrees => "deleting extra trees",
arroy::MainStep::RemoveItemsFromExistingTrees => "remove items from existing trees",
arroy::MainStep::InsertItemsInCurrentTrees => "insert items in current trees",
arroy::MainStep::IncrementalIndexLargeDescendants => {
"incremental index large descendants"
}
arroy::MainStep::RetrievingTheItems => "retrieving the items",
arroy::MainStep::RetrievingTheTreeNodes => "retrieving the tree nodes",
arroy::MainStep::RetrieveTheLargeDescendants => "retrieve the large descendants",
arroy::MainStep::CreateTreesForItems => "create trees for items",
}
.into()
}
fn current(&self) -> u32 {
*self as u32
fn current(&self) -> u64 {
*self as u64
}
fn total(&self) -> u32 {
Self::CARDINALITY as u32
fn total(&self) -> u64 {
Self::CARDINALITY as u64
}
}
@@ -312,11 +313,11 @@ impl Step for arroy::SubStep {
self.unit.into()
}
fn current(&self) -> u32 {
fn current(&self) -> u64 {
self.current.load(Ordering::Relaxed)
}
fn total(&self) -> u32 {
fn total(&self) -> u64 {
self.max
}
}

View File

@@ -135,7 +135,7 @@ where
extractor_alloc.0.reset();
}
let total_documents = document_changes.len() as u32;
let total_documents = document_changes.len() as u64;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
@@ -167,7 +167,7 @@ where
});
let res = extractor.process(changes, context).map_err(Arc::new);
step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);
step.fetch_add(items.as_ref().len() as u64, Ordering::Relaxed);
// send back the doc_alloc in the pool
context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));

View File

@@ -85,14 +85,14 @@ impl<'pl> DocumentOperation<'pl> {
let mut primary_key = None;
let payload_count = operations.len();
let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32);
let (step, progress_step) = AtomicPayloadStep::new(payload_count as u64);
progress.update_progress(progress_step);
for (payload_index, operation) in operations.into_iter().enumerate() {
if must_stop_processing() {
return Err(InternalError::AbortedIndexation.into());
}
step.store(payload_index as u32, Ordering::Relaxed);
step.store(payload_index as u64, Ordering::Relaxed);
let mut bytes = 0;
let result = match operation {
@@ -145,7 +145,7 @@ impl<'pl> DocumentOperation<'pl> {
};
operations_stats.push(PayloadStats { document_count, bytes, error });
}
step.store(payload_count as u32, Ordering::Relaxed);
step.store(payload_count as u64, Ordering::Relaxed);
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =

View File

@@ -101,7 +101,7 @@ pub fn settings_change_extract<
extractor_alloc.0.reset();
}
let total_documents = documents.len() as u32;
let total_documents = documents.len() as u64;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
@@ -132,7 +132,7 @@ pub fn settings_change_extract<
.filter_map(|item| documents.item_to_database_document(context, item).transpose());
let res = extractor.process(documents, context).map_err(Arc::new);
step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);
step.fetch_add(items.as_ref().len() as u64, Ordering::Relaxed);
// send back the doc_alloc in the pool
context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));

View File

@@ -11,7 +11,7 @@ pub fn field_distribution(index: &Index, wtxn: &mut RwTxn<'_>, progress: &Progre
let field_id_map = index.fields_ids_map(wtxn)?;
let (update_document_count, sub_step) =
AtomicSubStep::<progress::Document>::new(document_count as u32);
AtomicSubStep::<progress::Document>::new(document_count as u64);
progress.update_progress(sub_step);
let docids = index.documents_ids(wtxn)?;

View File

@@ -81,8 +81,8 @@ where
target.1,
target.2
),
i as u32,
upgrade_path.len() as u32,
i as u64,
upgrade_path.len() as u64,
));
regenerate_stats |= upgrade.upgrade(wtxn, index, from, progress.clone())?;
index.put_version(wtxn, target)?;

View File

@@ -133,7 +133,7 @@ impl ArroyWrapper {
}
#[allow(clippy::too_many_arguments)]
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng>(
pub fn build_and_quantize<R: rand::Rng + rand::SeedableRng + Send + Sync>(
&mut self,
wtxn: &mut RwTxn,
progress: &Progress,