WIP rebase on main

This commit is contained in:
tamo
2021-05-05 14:11:56 +02:00
parent 0f94ef8abc
commit c3552cecdf
20 changed files with 158 additions and 197 deletions

View File

@ -122,8 +122,8 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
Snapshot { uuid, path, ret } => {
let _ = ret.send(self.handle_snapshot(uuid, path).await);
}
Dump { uuid, path, ret } => {
let _ = ret.send(self.handle_dump(uuid, path).await);
Dump { uid, uuid, path, ret } => {
let _ = ret.send(self.handle_dump(&uid, uuid, path).await);
}
GetStats { uuid, ret } => {
let _ = ret.send(self.handle_get_stats(uuid).await);
@ -312,24 +312,52 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
Ok(())
}
async fn handle_dump(&self, uuid: Uuid, mut path: PathBuf) -> IndexResult<()> {
/// Create a `documents.jsonl` and a `settings.json` in `path/uid/` with a dump of all the
/// documents and all the settings.
async fn handle_dump(&self, uid: &str, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
use tokio::fs::create_dir_all;
use std::io::prelude::*;
path.push("indexes");
create_dir_all(&path)
.await
.map_err(|e| IndexError::Error(e.into()))?;
if let Some(index) = self.store.get(uuid).await? {
let mut index_path = path.join(format!("index-{}", uuid));
create_dir_all(&index_path)
.await
.map_err(|e| IndexError::Error(e.into()))?;
index_path.push("data.mdb");
let documents_path = path.join(uid).join("documents.jsonl");
let settings_path = path.join(uid).join("settings.json");
spawn_blocking(move || -> anyhow::Result<()> {
// first we dump all the documents
let file = File::create(documents_path)?;
let mut file = std::io::BufWriter::new(file);
// Get write txn to wait for ongoing write transaction before dump.
let _txn = index.write_txn()?;
index.env.copy_to_path(index_path, CompactionOption::Enabled)?;
let txn = index.write_txn()?;
let documents_ids = index.documents_ids(&txn)?;
// TODO: TAMO: calling this function here can consume **a lot** of RAM, we should
// use some kind of iterators -> waiting for a milli release
let documents = index.documents(&txn, documents_ids)?;
let fields_ids_map = index.fields_ids_map(&txn)?;
// we want to save **all** the fields in the dump.
let fields_to_dump: Vec<u8> = fields_ids_map.iter().map(|(id, _)| id).collect();
for (_doc_id, document) in documents {
let json = milli::obkv_to_json(&fields_to_dump, &fields_ids_map, document)?;
file.write_all(serde_json::to_string(&json)?.as_bytes())?;
file.write_all(b"\n")?;
}
// then we dump all the settings
let file = File::create(settings_path)?;
let mut file = std::io::BufWriter::new(file);
let settings = index.settings()?;
file.write_all(serde_json::to_string(&settings)?.as_bytes())?;
file.write_all(b"\n")?;
Ok(())
})
.await

View File

@ -136,9 +136,9 @@ impl IndexActorHandle for IndexActorHandleImpl {
Ok(receiver.await.expect("IndexActor has been killed")?)
}
async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
async fn dump(&self, uid: String, uuid: Uuid, path: PathBuf) -> IndexResult<()> {
let (ret, receiver) = oneshot::channel();
let msg = IndexMsg::Dump { uuid, path, ret };
let msg = IndexMsg::Dump { uid, uuid, path, ret };
let _ = self.sender.send(msg).await;
Ok(receiver.await.expect("IndexActor has been killed")?)
}

View File

@ -61,6 +61,7 @@ pub enum IndexMsg {
ret: oneshot::Sender<IndexResult<()>>,
},
Dump {
uid: String,
uuid: Uuid,
path: PathBuf,
ret: oneshot::Sender<IndexResult<()>>,

View File

@ -97,7 +97,7 @@ pub trait IndexActorHandle {
index_settings: IndexSettings,
) -> IndexResult<IndexMeta>;
async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
async fn dump(&self, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
async fn dump(&self, uid: String, uuid: Uuid, path: PathBuf) -> IndexResult<()>;
async fn get_index_stats(&self, uuid: Uuid) -> IndexResult<IndexStats>;
}