mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	add dump batch handler
This commit is contained in:
		@@ -9,7 +9,7 @@ use time::macros::format_description;
 | 
			
		||||
use time::OffsetDateTime;
 | 
			
		||||
use tokio::sync::{mpsc, oneshot, RwLock};
 | 
			
		||||
 | 
			
		||||
use super::error::{DumpActorError, Result};
 | 
			
		||||
use super::error::{DumpError, Result};
 | 
			
		||||
use super::{DumpInfo, DumpJob, DumpMsg, DumpStatus};
 | 
			
		||||
use crate::tasks::Scheduler;
 | 
			
		||||
use crate::update_file_store::UpdateFileStore;
 | 
			
		||||
@@ -106,7 +106,7 @@ impl DumpActor {
 | 
			
		||||
        let _lock = match self.lock.try_lock() {
 | 
			
		||||
            Some(lock) => lock,
 | 
			
		||||
            None => {
 | 
			
		||||
                ret.send(Err(DumpActorError::DumpAlreadyRunning))
 | 
			
		||||
                ret.send(Err(DumpError::DumpAlreadyRunning))
 | 
			
		||||
                    .expect("Dump actor is dead");
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
@@ -123,7 +123,6 @@ impl DumpActor {
 | 
			
		||||
            dump_path: self.dump_path.clone(),
 | 
			
		||||
            db_path: self.analytics_path.clone(),
 | 
			
		||||
            update_file_store: self.update_file_store.clone(),
 | 
			
		||||
            scheduler: self.scheduler.clone(),
 | 
			
		||||
            uid: uid.clone(),
 | 
			
		||||
            update_db_size: self.update_db_size,
 | 
			
		||||
            index_db_size: self.index_db_size,
 | 
			
		||||
@@ -155,7 +154,7 @@ impl DumpActor {
 | 
			
		||||
    async fn handle_dump_info(&self, uid: String) -> Result<DumpInfo> {
 | 
			
		||||
        match self.dump_infos.read().await.get(&uid) {
 | 
			
		||||
            Some(info) => Ok(info.clone()),
 | 
			
		||||
            _ => Err(DumpActorError::DumpDoesNotExist(uid)),
 | 
			
		||||
            _ => Err(DumpError::DumpDoesNotExist(uid)),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -3,10 +3,10 @@ use meilisearch_error::{internal_error, Code, ErrorCode};
 | 
			
		||||
 | 
			
		||||
use crate::{index_resolver::error::IndexResolverError, tasks::error::TaskError};
 | 
			
		||||
 | 
			
		||||
pub type Result<T> = std::result::Result<T, DumpActorError>;
 | 
			
		||||
pub type Result<T> = std::result::Result<T, DumpError>;
 | 
			
		||||
 | 
			
		||||
#[derive(thiserror::Error, Debug)]
 | 
			
		||||
pub enum DumpActorError {
 | 
			
		||||
pub enum DumpError {
 | 
			
		||||
    #[error("A dump is already processing. You must wait until the current process is finished before requesting another dump.")]
 | 
			
		||||
    DumpAlreadyRunning,
 | 
			
		||||
    #[error("Dump `{0}` not found.")]
 | 
			
		||||
@@ -18,7 +18,7 @@ pub enum DumpActorError {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
internal_error!(
 | 
			
		||||
    DumpActorError: milli::heed::Error,
 | 
			
		||||
    DumpError: milli::heed::Error,
 | 
			
		||||
    std::io::Error,
 | 
			
		||||
    tokio::task::JoinError,
 | 
			
		||||
    tokio::sync::oneshot::error::RecvError,
 | 
			
		||||
@@ -29,13 +29,13 @@ internal_error!(
 | 
			
		||||
    TaskError
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
impl ErrorCode for DumpActorError {
 | 
			
		||||
impl ErrorCode for DumpError {
 | 
			
		||||
    fn error_code(&self) -> Code {
 | 
			
		||||
        match self {
 | 
			
		||||
            DumpActorError::DumpAlreadyRunning => Code::DumpAlreadyInProgress,
 | 
			
		||||
            DumpActorError::DumpDoesNotExist(_) => Code::DumpNotFound,
 | 
			
		||||
            DumpActorError::Internal(_) => Code::Internal,
 | 
			
		||||
            DumpActorError::IndexResolver(e) => e.error_code(),
 | 
			
		||||
            DumpError::DumpAlreadyRunning => Code::DumpAlreadyInProgress,
 | 
			
		||||
            DumpError::DumpDoesNotExist(_) => Code::DumpNotFound,
 | 
			
		||||
            DumpError::Internal(_) => Code::Internal,
 | 
			
		||||
            DumpError::IndexResolver(e) => e.error_code(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
use tokio::sync::{mpsc, oneshot};
 | 
			
		||||
 | 
			
		||||
use super::error::Result;
 | 
			
		||||
use super::{DumpActorHandle, DumpInfo, DumpMsg};
 | 
			
		||||
use super::{DumpActorHandle, DumpMsg};
 | 
			
		||||
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
pub struct DumpActorHandleImpl {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,32 +1,30 @@
 | 
			
		||||
use std::fs::File;
 | 
			
		||||
use std::path::{Path, PathBuf};
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
 | 
			
		||||
use anyhow::bail;
 | 
			
		||||
use log::info;
 | 
			
		||||
use log::{info, trace};
 | 
			
		||||
use meilisearch_auth::AuthController;
 | 
			
		||||
use serde::{Deserialize, Serialize};
 | 
			
		||||
use time::OffsetDateTime;
 | 
			
		||||
 | 
			
		||||
pub use actor::DumpActor;
 | 
			
		||||
pub use handle_impl::*;
 | 
			
		||||
pub use message::DumpMsg;
 | 
			
		||||
use tempfile::TempDir;
 | 
			
		||||
use tokio::sync::RwLock;
 | 
			
		||||
use tokio::fs::create_dir_all;
 | 
			
		||||
 | 
			
		||||
use crate::compression::from_tar_gz;
 | 
			
		||||
use crate::analytics;
 | 
			
		||||
use crate::compression::{from_tar_gz, to_tar_gz};
 | 
			
		||||
use crate::dump::error::DumpError;
 | 
			
		||||
use crate::options::IndexerOpts;
 | 
			
		||||
use crate::tasks::Scheduler;
 | 
			
		||||
use crate::update_file_store::UpdateFileStore;
 | 
			
		||||
use error::Result;
 | 
			
		||||
 | 
			
		||||
use self::loaders::{v2, v3, v4};
 | 
			
		||||
 | 
			
		||||
mod actor;
 | 
			
		||||
// mod actor;
 | 
			
		||||
mod compat;
 | 
			
		||||
pub mod error;
 | 
			
		||||
mod handle_impl;
 | 
			
		||||
// mod handle_impl;
 | 
			
		||||
mod loaders;
 | 
			
		||||
mod message;
 | 
			
		||||
// mod message;
 | 
			
		||||
 | 
			
		||||
const META_FILE_NAME: &str = "metadata.json";
 | 
			
		||||
 | 
			
		||||
@@ -51,18 +49,6 @@ impl Metadata {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[async_trait::async_trait]
 | 
			
		||||
#[cfg_attr(test, mockall::automock)]
 | 
			
		||||
pub trait DumpActorHandle {
 | 
			
		||||
    /// Start the creation of a dump
 | 
			
		||||
    /// Implementation: [handle_impl::DumpActorHandleImpl::create_dump]
 | 
			
		||||
    async fn create_dump(&self) -> Result<DumpInfo>;
 | 
			
		||||
 | 
			
		||||
    /// Return the status of an already created dump
 | 
			
		||||
    /// Implementation: [handle_impl::DumpActorHandleImpl::dump_info]
 | 
			
		||||
    async fn dump_info(&self, uid: String) -> Result<DumpInfo>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Serialize, Deserialize, Debug)]
 | 
			
		||||
#[serde(rename_all = "camelCase")]
 | 
			
		||||
pub struct MetadataV1 {
 | 
			
		||||
@@ -159,49 +145,6 @@ pub enum DumpStatus {
 | 
			
		||||
    Failed,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Serialize, Clone)]
 | 
			
		||||
#[serde(rename_all = "camelCase")]
 | 
			
		||||
pub struct DumpInfo {
 | 
			
		||||
    pub uid: String,
 | 
			
		||||
    pub status: DumpStatus,
 | 
			
		||||
    #[serde(skip_serializing_if = "Option::is_none")]
 | 
			
		||||
    pub error: Option<String>,
 | 
			
		||||
    #[serde(with = "time::serde::rfc3339")]
 | 
			
		||||
    started_at: OffsetDateTime,
 | 
			
		||||
    #[serde(
 | 
			
		||||
        skip_serializing_if = "Option::is_none",
 | 
			
		||||
        with = "time::serde::rfc3339::option"
 | 
			
		||||
    )]
 | 
			
		||||
    finished_at: Option<OffsetDateTime>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl DumpInfo {
 | 
			
		||||
    pub fn new(uid: String, status: DumpStatus) -> Self {
 | 
			
		||||
        Self {
 | 
			
		||||
            uid,
 | 
			
		||||
            status,
 | 
			
		||||
            error: None,
 | 
			
		||||
            started_at: OffsetDateTime::now_utc(),
 | 
			
		||||
            finished_at: None,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn with_error(&mut self, error: String) {
 | 
			
		||||
        self.status = DumpStatus::Failed;
 | 
			
		||||
        self.finished_at = Some(OffsetDateTime::now_utc());
 | 
			
		||||
        self.error = Some(error);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn done(&mut self) {
 | 
			
		||||
        self.finished_at = Some(OffsetDateTime::now_utc());
 | 
			
		||||
        self.status = DumpStatus::Done;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn dump_already_in_progress(&self) -> bool {
 | 
			
		||||
        self.status == DumpStatus::InProgress
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn load_dump(
 | 
			
		||||
    dst_path: impl AsRef<Path>,
 | 
			
		||||
    src_path: impl AsRef<Path>,
 | 
			
		||||
@@ -313,76 +256,59 @@ fn persist_dump(dst_path: impl AsRef<Path>, tmp_dst: TempDir) -> anyhow::Result<
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub struct DumpJob {
 | 
			
		||||
    dump_path: PathBuf,
 | 
			
		||||
    db_path: PathBuf,
 | 
			
		||||
    update_file_store: UpdateFileStore,
 | 
			
		||||
    scheduler: Arc<RwLock<Scheduler>>,
 | 
			
		||||
    uid: String,
 | 
			
		||||
    update_db_size: usize,
 | 
			
		||||
    index_db_size: usize,
 | 
			
		||||
    pub dump_path: PathBuf,
 | 
			
		||||
    pub db_path: PathBuf,
 | 
			
		||||
    pub update_file_store: UpdateFileStore,
 | 
			
		||||
    pub uid: String,
 | 
			
		||||
    pub update_db_size: usize,
 | 
			
		||||
    pub index_db_size: usize,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl DumpJob {
 | 
			
		||||
    async fn run(self) -> Result<()> {
 | 
			
		||||
        // trace!("Performing dump.");
 | 
			
		||||
        //
 | 
			
		||||
        // create_dir_all(&self.dump_path).await?;
 | 
			
		||||
        //
 | 
			
		||||
        // let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??;
 | 
			
		||||
        // let temp_dump_path = temp_dump_dir.path().to_owned();
 | 
			
		||||
        //
 | 
			
		||||
        // let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size);
 | 
			
		||||
        // let meta_path = temp_dump_path.join(META_FILE_NAME);
 | 
			
		||||
        // let mut meta_file = File::create(&meta_path)?;
 | 
			
		||||
        // serde_json::to_writer(&mut meta_file, &meta)?;
 | 
			
		||||
        // analytics::copy_user_id(&self.db_path, &temp_dump_path);
 | 
			
		||||
        //
 | 
			
		||||
        // create_dir_all(&temp_dump_path.join("indexes")).await?;
 | 
			
		||||
        //
 | 
			
		||||
        // let (sender, receiver) = oneshot::channel();
 | 
			
		||||
        //
 | 
			
		||||
        // self.scheduler
 | 
			
		||||
        //     .write()
 | 
			
		||||
        //     .await
 | 
			
		||||
        //     .schedule_job(Job::Dump {
 | 
			
		||||
        //         ret: sender,
 | 
			
		||||
        //         path: temp_dump_path.clone(),
 | 
			
		||||
        //     })
 | 
			
		||||
        //     .await;
 | 
			
		||||
        //
 | 
			
		||||
        // // wait until the job has started performing before finishing the dump process
 | 
			
		||||
        // let sender = receiver.await??;
 | 
			
		||||
        //
 | 
			
		||||
        // AuthController::dump(&self.db_path, &temp_dump_path)?;
 | 
			
		||||
        //
 | 
			
		||||
        // //TODO(marin): this is not right, the scheduler should dump itself, not do it here...
 | 
			
		||||
    pub async fn run(self) -> Result<()> {
 | 
			
		||||
        trace!("Performing dump.");
 | 
			
		||||
 | 
			
		||||
        create_dir_all(&self.dump_path).await?;
 | 
			
		||||
 | 
			
		||||
        let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??;
 | 
			
		||||
        let temp_dump_path = temp_dump_dir.path().to_owned();
 | 
			
		||||
 | 
			
		||||
        let meta = MetadataVersion::new_v4(self.index_db_size, self.update_db_size);
 | 
			
		||||
        let meta_path = temp_dump_path.join(META_FILE_NAME);
 | 
			
		||||
        let mut meta_file = File::create(&meta_path)?;
 | 
			
		||||
        serde_json::to_writer(&mut meta_file, &meta)?;
 | 
			
		||||
        analytics::copy_user_id(&self.db_path, &temp_dump_path);
 | 
			
		||||
 | 
			
		||||
        create_dir_all(&temp_dump_path.join("indexes")).await?;
 | 
			
		||||
 | 
			
		||||
        AuthController::dump(&self.db_path, &temp_dump_path)?;
 | 
			
		||||
        // TODO: Dump indexes and updates
 | 
			
		||||
 | 
			
		||||
        //TODO(marin): this is not right, the scheduler should dump itself, not do it here...
 | 
			
		||||
        // self.scheduler
 | 
			
		||||
        //     .read()
 | 
			
		||||
        //     .await
 | 
			
		||||
        //     .dump(&temp_dump_path, self.update_file_store.clone())
 | 
			
		||||
        //     .await?;
 | 
			
		||||
        //
 | 
			
		||||
        // let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
 | 
			
		||||
        //     // for now we simply copy the updates/updates_files
 | 
			
		||||
        //     // FIXME: We may copy more files than necessary, if new files are added while we are
 | 
			
		||||
        //     // performing the dump. We need a way to filter them out.
 | 
			
		||||
        //
 | 
			
		||||
        //     let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?;
 | 
			
		||||
        //     to_tar_gz(temp_dump_path, temp_dump_file.path())
 | 
			
		||||
        //         .map_err(|e| DumpActorError::Internal(e.into()))?;
 | 
			
		||||
        //
 | 
			
		||||
        //     let dump_path = self.dump_path.join(self.uid).with_extension("dump");
 | 
			
		||||
        //     temp_dump_file.persist(&dump_path)?;
 | 
			
		||||
        //
 | 
			
		||||
        //     Ok(dump_path)
 | 
			
		||||
        // })
 | 
			
		||||
        // .await??;
 | 
			
		||||
        //
 | 
			
		||||
        // // notify the update loop that we are finished performing the dump.
 | 
			
		||||
        // let _ = sender.send(());
 | 
			
		||||
        //
 | 
			
		||||
        // info!("Created dump in {:?}.", dump_path);
 | 
			
		||||
        //
 | 
			
		||||
 | 
			
		||||
        let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
 | 
			
		||||
            // for now we simply copy the updates/updates_files
 | 
			
		||||
            // FIXME: We may copy more files than necessary, if new files are added while we are
 | 
			
		||||
            // performing the dump. We need a way to filter them out.
 | 
			
		||||
 | 
			
		||||
            let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?;
 | 
			
		||||
            to_tar_gz(temp_dump_path, temp_dump_file.path())
 | 
			
		||||
                .map_err(|e| DumpError::Internal(e.into()))?;
 | 
			
		||||
 | 
			
		||||
            let dump_path = self.dump_path.join(self.uid).with_extension("dump");
 | 
			
		||||
            temp_dump_file.persist(&dump_path)?;
 | 
			
		||||
 | 
			
		||||
            Ok(dump_path)
 | 
			
		||||
        })
 | 
			
		||||
        .await??;
 | 
			
		||||
 | 
			
		||||
        info!("Created dump in {:?}.", dump_path);
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user