rewrite update store

This commit is contained in:
Marin Postma
2021-04-22 10:14:29 +02:00
parent 51829ad85e
commit 4fe2a13c71
28 changed files with 896 additions and 826 deletions

View File

@ -1,14 +1,16 @@
use std::collections::HashSet;
use std::io::SeekFrom;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use futures::StreamExt;
use log::info;
use oxidized_json_checker::JsonChecker;
use tokio::fs;
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
use tokio::io::AsyncWriteExt;
use tokio::runtime::Handle;
use tokio::sync::mpsc;
use uuid::Uuid;
use futures::StreamExt;
use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStore, UpdateStoreInfo};
use crate::index_controller::index_actor::{IndexActorHandle, CONCURRENT_INDEX_MSG};
@ -32,18 +34,14 @@ where
path: impl AsRef<Path>,
index_handle: I,
) -> anyhow::Result<Self> {
let path = path.as_ref().to_owned().join("updates");
let path = path.as_ref().join("updates");
std::fs::create_dir_all(&path)?;
let mut options = heed::EnvOpenOptions::new();
options.map_size(update_db_size);
let handle = index_handle.clone();
let store = UpdateStore::open(options, &path, move |uuid, meta, file| {
futures::executor::block_on(handle.update(uuid, meta, file))
})
.map_err(|e| UpdateError::Error(e.into()))?;
let store = UpdateStore::open(options, &path, index_handle.clone())?;
std::fs::create_dir_all(path.join("update_files"))?;
assert!(path.exists());
Ok(Self {
@ -95,40 +93,54 @@ where
meta: UpdateMeta,
mut payload: mpsc::Receiver<PayloadData<D>>,
) -> Result<UpdateStatus> {
let update_file_id = uuid::Uuid::new_v4();
let path = self
.path
.join(format!("update_files/update_{}", update_file_id));
let mut file = fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
while let Some(bytes) = payload.recv().await {
match bytes {
Ok(bytes) => {
file.write_all(bytes.as_ref())
let file_path = match meta {
UpdateMeta::DocumentsAddition { .. }
| UpdateMeta::DeleteDocuments => {
let update_file_id = uuid::Uuid::new_v4();
let path = self
.path
.join(format!("update_files/update_{}", update_file_id));
let mut file = fs::OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let mut file_len = 0;
while let Some(bytes) = payload.recv().await {
match bytes {
Ok(bytes) => {
file_len += bytes.as_ref().len();
file.write_all(bytes.as_ref())
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
Err(e) => {
return Err(UpdateError::Error(e));
}
}
}
if file_len != 0 {
file.flush()
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
Err(e) => {
return Err(UpdateError::Error(e));
let file = file.into_std().await;
Some((file, path))
} else {
// empty update, delete the empty file.
fs::remove_file(&path)
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
None
}
}
}
file.flush()
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
file.seek(SeekFrom::Start(0))
.await
.map_err(|e| UpdateError::Error(Box::new(e)))?;
let mut file = file.into_std().await;
_ => None
};
let update_store = self.store.clone();
@ -136,12 +148,9 @@ where
use std::io::{copy, sink, BufReader, Seek};
// If the payload is empty, ignore the check.
if file
.metadata()
.map_err(|e| UpdateError::Error(Box::new(e)))?
.len()
> 0
{
let path = if let Some((mut file, path)) = file_path {
// set the file back to the beginning
file.seek(SeekFrom::Start(0)).map_err(|e| UpdateError::Error(Box::new(e)))?;
// Check that the json payload is valid:
let reader = BufReader::new(&mut file);
let mut checker = JsonChecker::new(reader);
@ -153,7 +162,10 @@ where
let _: serde_json::Value = serde_json::from_reader(file)
.map_err(|e| UpdateError::Error(Box::new(e)))?;
}
}
Some(path)
} else {
None
};
// The payload is valid, we can register it to the update store.
update_store
@ -197,17 +209,11 @@ where
Ok(())
}
async fn handle_snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> {
async fn handle_snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let index_handle = self.index_handle.clone();
let update_store = self.store.clone();
tokio::task::spawn_blocking(move || -> anyhow::Result<()> {
// acquire write lock to prevent further writes during snapshot
// the update lock must be acquired BEFORE the write lock to prevent dead lock
let _lock = update_store.update_lock.lock();
let mut txn = update_store.env.write_txn()?;
// create db snapshot
update_store.snapshot(&mut txn, &path)?;
update_store.snapshot(&uuids, &path)?;
// Perform the snapshot of each index concurently. Only a third of the capabilities of
// the index actor at a time not to put too much pressure on the index actor
@ -218,7 +224,7 @@ where
.map(|&uuid| handle.snapshot(uuid, path.clone()))
.buffer_unordered(CONCURRENT_INDEX_MSG / 3);
futures::executor::block_on(async {
Handle::current().block_on(async {
while let Some(res) = stream.next().await {
res?;
}
@ -234,25 +240,14 @@ where
async fn handle_get_info(&self) -> Result<UpdateStoreInfo> {
let update_store = self.store.clone();
let processing = self.store.processing.clone();
let info = tokio::task::spawn_blocking(move || -> anyhow::Result<UpdateStoreInfo> {
let txn = update_store.env.read_txn()?;
let size = update_store.get_size(&txn)?;
let processing = processing
.read()
.as_ref()
.map(|(uuid, _)| uuid)
.cloned();
let info = UpdateStoreInfo {
size, processing
};
let info = update_store.get_info()?;
Ok(info)
})
.await
.map_err(|e| UpdateError::Error(e.into()))?
.map_err(|e| UpdateError::Error(e.into()))?;
Ok(info)
}
}

View File

@ -1,12 +1,13 @@
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;
use crate::index_controller::IndexActorHandle;
use crate::index_controller::{IndexActorHandle, UpdateStatus};
use super::{
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStatus, UpdateStoreInfo
PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStoreInfo,
};
#[derive(Clone)]
@ -63,7 +64,7 @@ where
receiver.await.expect("update actor killed.")
}
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()> {
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
let (ret, receiver) = oneshot::channel();
let msg = UpdateMsg::Snapshot { uuids, path, ret };
let _ = self.sender.send(msg).await;

View File

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::path::PathBuf;
use tokio::sync::{mpsc, oneshot};
@ -26,7 +27,7 @@ pub enum UpdateMsg<D> {
ret: oneshot::Sender<Result<()>>,
},
Snapshot {
uuids: Vec<Uuid>,
uuids: HashSet<Uuid>,
path: PathBuf,
ret: oneshot::Sender<Result<()>>,
},

View File

@ -3,22 +3,22 @@ mod handle_impl;
mod message;
mod update_store;
use std::path::PathBuf;
use std::{collections::HashSet, path::PathBuf};
use thiserror::Error;
use tokio::sync::mpsc;
use uuid::Uuid;
use crate::index::UpdateResult;
use crate::index_controller::{UpdateMeta, UpdateStatus};
use actor::UpdateActor;
use message::UpdateMsg;
use update_store::UpdateStore;
pub use update_store::UpdateStoreInfo;
pub use handle_impl::UpdateActorHandleImpl;
pub type Result<T> = std::result::Result<T, UpdateError>;
type UpdateStore = update_store::UpdateStore<UpdateMeta, UpdateResult, String>;
type PayloadData<D> = std::result::Result<D, Box<dyn std::error::Error + Sync + Send + 'static>>;
#[cfg(test)]
@ -32,13 +32,6 @@ pub enum UpdateError {
UnexistingUpdate(u64),
}
pub struct UpdateStoreInfo {
/// Size of the update store in bytes.
pub size: u64,
/// Uuid of the currently processing update if it exists
pub processing: Option<Uuid>,
}
#[async_trait::async_trait]
#[cfg_attr(test, automock(type Data=Vec<u8>;))]
pub trait UpdateActorHandle {
@ -47,7 +40,7 @@ pub trait UpdateActorHandle {
async fn get_all_updates_status(&self, uuid: Uuid) -> Result<Vec<UpdateStatus>>;
async fn update_status(&self, uuid: Uuid, id: u64) -> Result<UpdateStatus>;
async fn delete(&self, uuid: Uuid) -> Result<()>;
async fn snapshot(&self, uuids: Vec<Uuid>, path: PathBuf) -> Result<()>;
async fn snapshot(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()>;
async fn get_info(&self) -> Result<UpdateStoreInfo>;
async fn update(
&self,