diff --git a/crates/dump/src/reader/v6/mod.rs b/crates/dump/src/reader/v6/mod.rs index 645e7cb07..b6b359d74 100644 --- a/crates/dump/src/reader/v6/mod.rs +++ b/crates/dump/src/reader/v6/mod.rs @@ -57,6 +57,7 @@ pub struct V6Reader { instance_uid: Option, metadata: Metadata, tasks: BufReader, + tasks2: BufReader, batches: Option>, keys: BufReader, features: Option, @@ -123,6 +124,7 @@ impl V6Reader { metadata: serde_json::from_reader(&*meta_file)?, instance_uid, tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), + tasks2: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), batches, keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), features, @@ -188,11 +190,22 @@ impl V6Reader { })) } + fn tasks2(&mut self) -> Box> + '_> { + Box::new( + (&mut self.tasks2) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + ) + } + pub fn batches(&mut self) -> Box> + '_> { + // Get batches but filters batches so that those whose tasks have been deleted are not returned + // This is due to bug #5827 that caused them not to be deleted before version 1.18 + let mut task_uids = RoaringBitmap::new(); let mut faulty = false; - for task in self.tasks() { - let Ok((task, _)) = task else { + for task in self.tasks2() { + let Ok(task) = task else { // If we can't read the tasks, just give up trying to filter the batches // The database may contain phantom batches, but that's not that big of a deal faulty = true;