From 14a980e54ea40324aba5352eea25ae9b71ab2589 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 10 Dec 2024 14:19:44 +0100 Subject: [PATCH] Add debugs --- crates/milli/src/update/new/channel.rs | 32 +++++++++++++++++++++++++- crates/milli/src/update/new/merger.rs | 27 ++++++++++++++++++---- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs index 7590c02ac..6e27acb9b 100644 --- a/crates/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -21,11 +21,15 @@ use super::ref_cell_ext::RefCellExt; use super::thread_local::{FullySend, ThreadLocal}; use super::StdResult; use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec}; +use crate::heed_codec::StrBEU16Codec; use crate::index::db_name; use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY}; use crate::update::new::KvReaderFieldId; use crate::vector::Embedding; -use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError}; +use crate::{ + CboRoaringBitmapCodec, DocumentId, Error, FieldIdWordCountCodec, Index, InternalError, + U8StrStrCodec, +}; /// Creates a tuple of senders/receiver to be used by /// the extractors and the writer loop. @@ -403,6 +407,32 @@ impl Database { Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S, } } + + pub fn stringify_key(&self, key: &[u8]) -> String { + use heed::types::*; + + match self { + Database::WordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()), + Database::WordFidDocids => format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap()), + Database::WordPositionDocids => { + format!("{:?}", StrBEU16Codec::bytes_decode(key).unwrap()) + } + Database::WordPairProximityDocids => { + format!("{:?}", U8StrStrCodec::bytes_decode(key).unwrap()) + } + Database::ExactWordDocids => format!("{:?}", Str::bytes_decode(key).unwrap()), + Database::FidWordCountDocids => { + format!("{:?}", FieldIdWordCountCodec::bytes_decode(key).unwrap()) + } + Database::FieldIdDocidFacetStrings => { + format!("{:?}", FieldDocIdFacetStringCodec::bytes_decode(key).unwrap()) + } + Database::FieldIdDocidFacetF64s => { + format!("{:?}", FieldDocIdFacetF64Codec::bytes_decode(key).unwrap()) + } + d => unimplemented!("stringify_key for {:?}", d), + } + } } impl From for Database { diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs index 9774b55e8..b2b1080d4 100644 --- a/crates/milli/src/update/new/merger.rs +++ b/crates/milli/src/update/new/merger.rs @@ -80,16 +80,35 @@ where } merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| { let current = database.get(&rtxn, key)?; - match merge_cbo_bitmaps(current, del, add)? { - Operation::Write(bitmap) => { + + if let (Some(del), Some(current)) = (&del, ¤t) { + let current = CboRoaringBitmapCodec::deserialize_from(current).unwrap(); + let diff = del - ¤t; + let external_ids = index.external_id_of(&rtxn, &diff).unwrap().into_iter().map(|id| id.unwrap()).collect::>(); + if !del.is_subset(¤t) { + eprintln!( + "======================== {:?}: {} -> c: {:?} d: {:?} a: {:?} extra: {:?} extra_external_ids: {:?}", + D::DATABASE, + D::DATABASE.stringify_key(key), + ¤t, + del, + add, + diff, + external_ids + ); + } + } + match merge_cbo_bitmaps(current, del, add) { + Ok(Operation::Write(bitmap)) => { docids_sender.write(key, &bitmap)?; Ok(()) } - Operation::Delete => { + Ok(Operation::Delete) => { docids_sender.delete(key)?; Ok(()) } - Operation::Ignore => Ok(()), + Ok(Operation::Ignore) => Ok(()), + Err(e) => Err(e), } }) })