Update arroy and always build the tree that need to be built

This commit is contained in:
Tamo
2024-06-20 15:59:32 +02:00
parent ddd564665b
commit 1693332cab
9 changed files with 106 additions and 22 deletions

View File

@@ -79,7 +79,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
] }
tiktoken-rs = "0.5.8"
liquid = "0.26.4"
arroy = "0.3.1"
arroy = "0.4.0"
rand = "0.8.5"
tracing = "0.1.40"
ureq = { version = "2.9.7", features = ["json"] }

View File

@@ -281,8 +281,9 @@ impl From<arroy::Error> for Error {
arroy::Error::DatabaseFull
| arroy::Error::InvalidItemAppend
| arroy::Error::UnmatchingDistance { .. }
| arroy::Error::MissingNode
| arroy::Error::MissingMetadata => {
| arroy::Error::NeedBuild(_)
| arroy::Error::MissingKey { .. }
| arroy::Error::MissingMetadata(_) => {
Error::InternalError(InternalError::ArroyError(value))
}
}

View File

@@ -1610,7 +1610,7 @@ impl Index {
arroy::Reader::open(rtxn, k, self.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata => Ok(None),
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e.into()),
})
.transpose()
@@ -1643,7 +1643,7 @@ impl Index {
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata => Ok(None),
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e),
})
.transpose();

View File

@@ -547,10 +547,11 @@ where
pool.install(|| {
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
let writer = arroy::Writer::new(vector_arroy, k, dimension);
if writer.is_empty(wtxn)? {
if writer.need_build(wtxn)? {
writer.build(wtxn, &mut rng, None)?;
} else if writer.is_empty(wtxn)? {
break;
}
writer.build(wtxn, &mut rng, None)?;
}
Result::Ok(())
})