Compare commits

...

3 Commits

Author SHA1 Message Date
14a980e54e Add debugs 2024-12-12 09:38:49 +01:00
cbc453c6d1 Avoid cloning database bitmap when it's possible 2024-12-10 11:34:39 +01:00
2fb065b9fb Reduce merge allocations 2024-12-10 11:00:20 +01:00
2 changed files with 88 additions and 14 deletions

View File

@ -21,11 +21,15 @@ use super::ref_cell_ext::RefCellExt;
use super::thread_local::{FullySend, ThreadLocal};
use super::StdResult;
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
use crate::heed_codec::StrBEU16Codec;
use crate::index::db_name;
use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
use crate::update::new::KvReaderFieldId;
use crate::vector::Embedding;
use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
use crate::{
CboRoaringBitmapCodec, DocumentId, Error, FieldIdWordCountCodec, Index, InternalError,
U8StrStrCodec,
};
/// Creates a tuple of senders/receiver to be used by
/// the extractors and the writer loop.
@ -403,6 +407,32 @@ impl Database {
Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S,
}
}
pub fn stringify_key(&self, key: &[u8]) -> String {
use heed::types::*;
match self {
Database::WordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()),
Database::WordFidDocids => format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap()),
Database::WordPositionDocids => {
format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap())
}
Database::WordPairProximityDocids => {
format!("{:?}", U8StrStrCodec::bytes_decode(key).unwrap())
}
Database::ExactWordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()),
Database::FidWordCountDocids => {
format!("{:?}", FieldIdWordCountCodec::bytes_decode(key).unwrap())
}
Database::FieldIdDocidFacetStrings => {
format!("{:?}", FieldDocIdFacetStringCodec::bytes_decode(key).unwrap())
}
Database::FieldIdDocidFacetF64s => {
format!("{:?}", FieldDocIdFacetF64Codec::bytes_decode(key).unwrap())
}
d => unimplemented!("stringify_key for {:?}", d),
}
}
}
impl From<FacetKind> for Database {

View File

@ -80,16 +80,35 @@ where
}
merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
let current = database.get(&rtxn, key)?;
match merge_cbo_bitmaps(current, del, add)? {
Operation::Write(bitmap) => {
if let (Some(del), Some(current)) = (&del, &current) {
let current = CboRoaringBitmapCodec::deserialize_from(current).unwrap();
let diff = del - &current;
let external_ids = index.external_id_of(&rtxn, &diff).unwrap().into_iter().map(|id| id.unwrap()).collect::<Vec<_>>();
if !del.is_subset(&current) {
eprintln!(
"======================== {:?}: {} -> c: {:?} d: {:?} a: {:?} extra: {:?} extra_external_ids: {:?}",
D::DATABASE,
D::DATABASE.stringify_key(key),
&current,
del,
add,
diff,
external_ids
);
}
}
match merge_cbo_bitmaps(current, del, add) {
Ok(Operation::Write(bitmap)) => {
docids_sender.write(key, &bitmap)?;
Ok(())
}
Operation::Delete => {
Ok(Operation::Delete) => {
docids_sender.delete(key)?;
Ok(())
}
Operation::Ignore => Ok(()),
Ok(Operation::Ignore) => Ok(()),
Err(e) => Err(e),
}
})
})
@ -234,21 +253,46 @@ fn merge_cbo_bitmaps(
(None, Some(_del), Some(add)) => Ok(Operation::Write(add)),
(Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
(Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
(Some(current), Some(del), add) => {
(Some(current), Some(mut del), add) => {
debug_assert!(
del.is_subset(&current),
"del is not a subset of current, which must be impossible."
);
let output = match add {
Some(add) => (&current - (&del - &add)) | (add - del),
None => &current - del,
Some(add) => {
del -= &add;
if del.is_empty() {
if add.is_subset(&current) {
// no changes, no allocation
None
} else {
// addition
Some(current | add)
}
} else {
if add.is_subset(&current) {
// deletion only, no union
Some(current - del)
} else {
// deletion and addition
Some((current - del) | add)
}
}
}
// deletion only, no union
None => Some(current - del),
};
if output.is_empty() {
Ok(Operation::Delete)
} else if current == output {
Ok(Operation::Ignore)
} else {
Ok(Operation::Write(output))
match output {
Some(output) => {
if output.is_empty() {
Ok(Operation::Delete)
} else {
Ok(Operation::Write(output))
}
}
None => Ok(Operation::Ignore),
}
}
}