mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	first mostly working version
This commit is contained in:
		
							
								
								
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -2284,6 +2284,7 @@ dependencies = [ | |||||||
|  "cargo_toml", |  "cargo_toml", | ||||||
|  "clap 4.0.9", |  "clap 4.0.9", | ||||||
|  "crossbeam-channel", |  "crossbeam-channel", | ||||||
|  |  "dump", | ||||||
|  "either", |  "either", | ||||||
|  "env_logger", |  "env_logger", | ||||||
|  "file-store", |  "file-store", | ||||||
|   | |||||||
| @@ -1,8 +1,10 @@ | |||||||
| use meilisearch_types::{ | use meilisearch_types::{ | ||||||
|     error::ResponseError, |     error::ResponseError, | ||||||
|  |     keys::Key, | ||||||
|     milli::update::IndexDocumentsMethod, |     milli::update::IndexDocumentsMethod, | ||||||
|     settings::Unchecked, |     settings::Unchecked, | ||||||
|     tasks::{Details, KindWithContent, Status, Task, TaskId}, |     tasks::{Details, KindWithContent, Status, Task, TaskId}, | ||||||
|  |     InstanceUid, | ||||||
| }; | }; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
| use time::OffsetDateTime; | use time::OffsetDateTime; | ||||||
| @@ -12,7 +14,7 @@ mod reader; | |||||||
| mod writer; | mod writer; | ||||||
|  |  | ||||||
| pub use error::Error; | pub use error::Error; | ||||||
| pub use reader::DumpReader; | pub use reader::{DumpReader, UpdateFile}; | ||||||
| pub use writer::DumpWriter; | pub use writer::DumpWriter; | ||||||
|  |  | ||||||
| const CURRENT_DUMP_VERSION: Version = Version::V6; | const CURRENT_DUMP_VERSION: Version = Version::V6; | ||||||
| @@ -49,14 +51,13 @@ pub enum Version { | |||||||
|     V6, |     V6, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] | #[derive(Debug, PartialEq, Serialize, Deserialize)] | ||||||
| #[serde(rename_all = "camelCase")] | #[serde(rename_all = "camelCase")] | ||||||
| pub struct TaskDump { | pub struct TaskDump { | ||||||
|     pub uid: TaskId, |     pub uid: TaskId, | ||||||
|     #[serde(default)] |     #[serde(default)] | ||||||
|     pub index_uid: Option<String>, |     pub index_uid: Option<String>, | ||||||
|     pub status: Status, |     pub status: Status, | ||||||
|     // TODO use our own Kind for the user |  | ||||||
|     #[serde(rename = "type")] |     #[serde(rename = "type")] | ||||||
|     pub kind: KindDump, |     pub kind: KindDump, | ||||||
|  |  | ||||||
| @@ -82,7 +83,7 @@ pub struct TaskDump { | |||||||
| } | } | ||||||
|  |  | ||||||
| // A `Kind` specific version made for the dump. If modified you may break the dump. | // A `Kind` specific version made for the dump. If modified you may break the dump. | ||||||
| #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] | #[derive(Debug, PartialEq, Serialize, Deserialize)] | ||||||
| #[serde(rename_all = "camelCase")] | #[serde(rename_all = "camelCase")] | ||||||
| pub enum KindDump { | pub enum KindDump { | ||||||
|     DocumentImport { |     DocumentImport { | ||||||
| @@ -118,7 +119,9 @@ pub enum KindDump { | |||||||
|         query: String, |         query: String, | ||||||
|         tasks: Vec<TaskId>, |         tasks: Vec<TaskId>, | ||||||
|     }, |     }, | ||||||
|     DumpExport, |     DumpExport { | ||||||
|  |         dump_uid: String, | ||||||
|  |     }, | ||||||
|     Snapshot, |     Snapshot, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -177,7 +180,7 @@ impl From<KindWithContent> for KindDump { | |||||||
|             KindWithContent::IndexSwap { lhs, rhs } => KindDump::IndexSwap { lhs, rhs }, |             KindWithContent::IndexSwap { lhs, rhs } => KindDump::IndexSwap { lhs, rhs }, | ||||||
|             KindWithContent::CancelTask { tasks } => KindDump::CancelTask { tasks }, |             KindWithContent::CancelTask { tasks } => KindDump::CancelTask { tasks }, | ||||||
|             KindWithContent::DeleteTasks { query, tasks } => KindDump::DeleteTasks { query, tasks }, |             KindWithContent::DeleteTasks { query, tasks } => KindDump::DeleteTasks { query, tasks }, | ||||||
|             KindWithContent::DumpExport { .. } => KindDump::DumpExport, |             KindWithContent::DumpExport { dump_uid, .. } => KindDump::DumpExport { dump_uid }, | ||||||
|             KindWithContent::Snapshot => KindDump::Snapshot, |             KindWithContent::Snapshot => KindDump::Snapshot, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -206,8 +209,7 @@ pub(crate) mod test { | |||||||
|     use uuid::Uuid; |     use uuid::Uuid; | ||||||
|  |  | ||||||
|     use crate::{ |     use crate::{ | ||||||
|         reader::{self, Document}, |         reader::Document, DumpReader, DumpWriter, IndexMetadata, KindDump, TaskDump, Version, | ||||||
|         DumpReader, DumpWriter, IndexMetadata, KindDump, TaskDump, Version, |  | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     pub fn create_test_instance_uid() -> Uuid { |     pub fn create_test_instance_uid() -> Uuid { | ||||||
|   | |||||||
| @@ -116,7 +116,9 @@ impl CompatV5ToV6 { | |||||||
|                             allow_index_creation, |                             allow_index_creation, | ||||||
|                             settings: settings.into(), |                             settings: settings.into(), | ||||||
|                         }, |                         }, | ||||||
|                         v5::tasks::TaskContent::Dump { .. } => v6::Kind::DumpExport, |                         v5::tasks::TaskContent::Dump { uid } => { | ||||||
|  |                             v6::Kind::DumpExport { dump_uid: uid } | ||||||
|  |                         } | ||||||
|                     }, |                     }, | ||||||
|                     details: task_view.details.map(|details| match details { |                     details: task_view.details.map(|details| match details { | ||||||
|                         v5::Details::DocumentAddition { |                         v5::Details::DocumentAddition { | ||||||
| @@ -412,7 +414,7 @@ pub(crate) mod test { | |||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
|         meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"0fff3c32487e3d3058d51ed951c1057f"); |         meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf"); | ||||||
|         assert_eq!(update_files.len(), 22); |         assert_eq!(update_files.len(), 22); | ||||||
|         assert!(update_files[0].is_none()); // the dump creation |         assert!(update_files[0].is_none()); // the dump creation | ||||||
|         assert!(update_files[1].is_some()); // the enqueued document addition |         assert!(update_files[1].is_some()); // the enqueued document addition | ||||||
|   | |||||||
| @@ -203,7 +203,7 @@ pub(crate) mod test { | |||||||
|         // tasks |         // tasks | ||||||
|         let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); |         let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap(); | ||||||
|         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); |         let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); | ||||||
|         meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"0fff3c32487e3d3058d51ed951c1057f"); |         meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf"); | ||||||
|         assert_eq!(update_files.len(), 22); |         assert_eq!(update_files.len(), 22); | ||||||
|         assert!(update_files[0].is_none()); // the dump creation |         assert!(update_files[0].is_none()); // the dump creation | ||||||
|         assert!(update_files[1].is_some()); // the enqueued document addition |         assert!(update_files[1].is_some()); // the enqueued document addition | ||||||
|   | |||||||
| @@ -109,7 +109,7 @@ impl V6Reader { | |||||||
|         &mut self, |         &mut self, | ||||||
|     ) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> { |     ) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> { | ||||||
|         Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { |         Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { | ||||||
|             let task: Task = serde_json::from_str(&line?)?; |             let task: Task = serde_json::from_str(&dbg!(line?)).unwrap(); | ||||||
|  |  | ||||||
|             let update_file_path = self |             let update_file_path = self | ||||||
|                 .dump |                 .dump | ||||||
| @@ -121,7 +121,8 @@ impl V6Reader { | |||||||
|             if update_file_path.exists() { |             if update_file_path.exists() { | ||||||
|                 Ok(( |                 Ok(( | ||||||
|                     task, |                     task, | ||||||
|                     Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>), |                     Some(Box::new(UpdateFile::new(&update_file_path).unwrap()) | ||||||
|  |                         as Box<super::UpdateFile>), | ||||||
|                 )) |                 )) | ||||||
|             } else { |             } else { | ||||||
|                 Ok((task, None)) |                 Ok((task, None)) | ||||||
|   | |||||||
| @@ -71,24 +71,26 @@ impl DumpWriter { | |||||||
| } | } | ||||||
|  |  | ||||||
| pub struct KeyWriter { | pub struct KeyWriter { | ||||||
|     file: File, |     keys: BufWriter<File>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl KeyWriter { | impl KeyWriter { | ||||||
|     pub(crate) fn new(path: PathBuf) -> Result<Self> { |     pub(crate) fn new(path: PathBuf) -> Result<Self> { | ||||||
|         let file = File::create(path.join("keys.jsonl"))?; |         let keys = File::create(path.join("keys.jsonl"))?; | ||||||
|         Ok(KeyWriter { file }) |         Ok(KeyWriter { | ||||||
|  |             keys: BufWriter::new(keys), | ||||||
|  |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn push_key(&mut self, key: &Key) -> Result<()> { |     pub fn push_key(&mut self, key: &Key) -> Result<()> { | ||||||
|         self.file.write_all(&serde_json::to_vec(key)?)?; |         self.keys.write_all(&serde_json::to_vec(key)?)?; | ||||||
|         self.file.write_all(b"\n")?; |         self.keys.write_all(b"\n")?; | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct TaskWriter { | pub struct TaskWriter { | ||||||
|     queue: File, |     queue: BufWriter<File>, | ||||||
|     update_files: PathBuf, |     update_files: PathBuf, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -101,7 +103,7 @@ impl TaskWriter { | |||||||
|         std::fs::create_dir(&update_files)?; |         std::fs::create_dir(&update_files)?; | ||||||
|  |  | ||||||
|         Ok(TaskWriter { |         Ok(TaskWriter { | ||||||
|             queue, |             queue: BufWriter::new(queue), | ||||||
|             update_files, |             update_files, | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
| @@ -111,6 +113,7 @@ impl TaskWriter { | |||||||
|     pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> { |     pub fn push_task(&mut self, task: &TaskDump) -> Result<UpdateFile> { | ||||||
|         self.queue.write_all(&serde_json::to_vec(task)?)?; |         self.queue.write_all(&serde_json::to_vec(task)?)?; | ||||||
|         self.queue.write_all(b"\n")?; |         self.queue.write_all(b"\n")?; | ||||||
|  |         self.queue.flush()?; | ||||||
|  |  | ||||||
|         Ok(UpdateFile::new( |         Ok(UpdateFile::new( | ||||||
|             self.update_files.join(format!("{}.jsonl", task.uid)), |             self.update_files.join(format!("{}.jsonl", task.uid)), | ||||||
|   | |||||||
| @@ -736,7 +736,7 @@ impl IndexScheduler { | |||||||
|                     let user_result = match user_result { |                     let user_result = match user_result { | ||||||
|                         Ok(count) => Ok(DocumentAdditionResult { |                         Ok(count) => Ok(DocumentAdditionResult { | ||||||
|                             indexed_documents: count, |                             indexed_documents: count, | ||||||
|                             number_of_documents: count, |                             number_of_documents: count, // TODO: this is wrong, we should use the value stored in the Details. | ||||||
|                         }), |                         }), | ||||||
|                         Err(e) => Err(milli::Error::from(e)), |                         Err(e) => Err(milli::Error::from(e)), | ||||||
|                     }; |                     }; | ||||||
|   | |||||||
| @@ -13,6 +13,8 @@ pub enum Error { | |||||||
|     IndexAlreadyExists(String), |     IndexAlreadyExists(String), | ||||||
|     #[error("Corrupted task queue.")] |     #[error("Corrupted task queue.")] | ||||||
|     CorruptedTaskQueue, |     CorruptedTaskQueue, | ||||||
|  |     #[error("Corrupted dump.")] | ||||||
|  |     CorruptedDump, | ||||||
|     #[error("Task `{0}` not found")] |     #[error("Task `{0}` not found")] | ||||||
|     TaskNotFound(TaskId), |     TaskNotFound(TaskId), | ||||||
|     // TODO: Lo: proper error message for this |     // TODO: Lo: proper error message for this | ||||||
| @@ -49,14 +51,15 @@ impl ErrorCode for Error { | |||||||
|             Error::InvalidStatus(_) => Code::BadRequest, |             Error::InvalidStatus(_) => Code::BadRequest, | ||||||
|             Error::InvalidKind(_) => Code::BadRequest, |             Error::InvalidKind(_) => Code::BadRequest, | ||||||
|  |  | ||||||
|             // TODO: TAMO: are all these errors really internal? |  | ||||||
|             Error::Dump(e) => e.error_code(), |             Error::Dump(e) => e.error_code(), | ||||||
|             Error::Milli(e) => e.error_code(), |             Error::Milli(e) => e.error_code(), | ||||||
|  |             // TODO: TAMO: are all these errors really internal? | ||||||
|             Error::Heed(_) => Code::Internal, |             Error::Heed(_) => Code::Internal, | ||||||
|             Error::FileStore(_) => Code::Internal, |             Error::FileStore(_) => Code::Internal, | ||||||
|             Error::IoError(_) => Code::Internal, |             Error::IoError(_) => Code::Internal, | ||||||
|             Error::Anyhow(_) => Code::Internal, |             Error::Anyhow(_) => Code::Internal, | ||||||
|             Error::CorruptedTaskQueue => Code::Internal, |             Error::CorruptedTaskQueue => Code::Internal, | ||||||
|  |             Error::CorruptedDump => Code::Internal, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -28,7 +28,7 @@ pub struct IndexMapper { | |||||||
|  |  | ||||||
|     base_path: PathBuf, |     base_path: PathBuf, | ||||||
|     index_size: usize, |     index_size: usize, | ||||||
|     indexer_config: Arc<IndexerConfig>, |     pub indexer_config: Arc<IndexerConfig>, | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Weither the index must not be inserted back | /// Weither the index must not be inserted back | ||||||
|   | |||||||
| @@ -9,13 +9,17 @@ mod utils; | |||||||
| pub type Result<T> = std::result::Result<T, Error>; | pub type Result<T> = std::result::Result<T, Error>; | ||||||
| pub type TaskId = u32; | pub type TaskId = u32; | ||||||
|  |  | ||||||
|  | use dump::{KindDump, TaskDump, UpdateFile}; | ||||||
| pub use error::Error; | pub use error::Error; | ||||||
|  | use meilisearch_types::keys::Key; | ||||||
|  | use meilisearch_types::milli::documents::DocumentsBatchBuilder; | ||||||
| use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; | ||||||
|  | use meilisearch_types::InstanceUid; | ||||||
|  |  | ||||||
| use std::path::PathBuf; | use std::path::PathBuf; | ||||||
| use std::sync::{Arc, RwLock}; | use std::sync::{Arc, RwLock}; | ||||||
|  |  | ||||||
| use file_store::{File, FileStore}; | use file_store::FileStore; | ||||||
| use meilisearch_types::error::ResponseError; | use meilisearch_types::error::ResponseError; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
| @@ -220,10 +224,6 @@ impl IndexScheduler { | |||||||
|         Ok(this) |         Ok(this) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn import_dump(&self, dump_path: PathBuf) -> Result<()> { |  | ||||||
|         todo!() |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// This function will execute in a different thread and must be called only once. |     /// This function will execute in a different thread and must be called only once. | ||||||
|     fn run(&self) { |     fn run(&self) { | ||||||
|         let run = Self { |         let run = Self { | ||||||
| @@ -254,6 +254,10 @@ impl IndexScheduler { | |||||||
|         }); |         }); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn indexer_config(&self) -> &IndexerConfig { | ||||||
|  |         &self.index_mapper.indexer_config | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Return the index corresponding to the name. If it wasn't opened before |     /// Return the index corresponding to the name. If it wasn't opened before | ||||||
|     /// it'll be opened. But if it doesn't exist on disk it'll throw an |     /// it'll be opened. But if it doesn't exist on disk it'll throw an | ||||||
|     /// `IndexNotFound` error. |     /// `IndexNotFound` error. | ||||||
| @@ -390,11 +394,138 @@ impl IndexScheduler { | |||||||
|         Ok(task) |         Ok(task) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn create_update_file(&self) -> Result<(Uuid, File)> { |     /// Register a new task comming from a dump in the scheduler. | ||||||
|  |     /// By takinig a mutable ref we're pretty sure no one will ever import a dump while actix is running. | ||||||
|  |     pub fn register_dumpped_task( | ||||||
|  |         &mut self, | ||||||
|  |         task: TaskDump, | ||||||
|  |         content_file: Option<Box<UpdateFile>>, | ||||||
|  |         keys: &[Key], | ||||||
|  |         instance_uid: Option<InstanceUid>, | ||||||
|  |     ) -> Result<Task> { | ||||||
|  |         // Currently we don't need to access the tasks queue while loading a dump thus I can block everything. | ||||||
|  |         let mut wtxn = self.env.write_txn()?; | ||||||
|  |  | ||||||
|  |         let content_uuid = if let Some(content_file) = content_file { | ||||||
|  |             let (uuid, mut file) = self.create_update_file()?; | ||||||
|  |             let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); | ||||||
|  |             for doc in content_file { | ||||||
|  |                 builder.append_json_object(&doc?)?; | ||||||
|  |             } | ||||||
|  |             builder.into_inner()?; | ||||||
|  |  | ||||||
|  |             file.persist()?; | ||||||
|  |  | ||||||
|  |             Some(uuid) | ||||||
|  |         } else { | ||||||
|  |             None | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let task = Task { | ||||||
|  |             uid: task.uid, | ||||||
|  |             enqueued_at: task.enqueued_at, | ||||||
|  |             started_at: task.started_at, | ||||||
|  |             finished_at: task.finished_at, | ||||||
|  |             error: task.error, | ||||||
|  |             details: task.details, | ||||||
|  |             status: task.status, | ||||||
|  |             kind: match task.kind { | ||||||
|  |                 KindDump::DocumentImport { | ||||||
|  |                     primary_key, | ||||||
|  |                     method, | ||||||
|  |                     documents_count, | ||||||
|  |                     allow_index_creation, | ||||||
|  |                 } => KindWithContent::DocumentImport { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     primary_key, | ||||||
|  |                     method, | ||||||
|  |                     content_file: content_uuid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     documents_count, | ||||||
|  |                     allow_index_creation, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { | ||||||
|  |                     documents_ids, | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::DocumentClear => KindWithContent::DocumentClear { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::Settings { | ||||||
|  |                     settings, | ||||||
|  |                     is_deletion, | ||||||
|  |                     allow_index_creation, | ||||||
|  |                 } => KindWithContent::Settings { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     new_settings: settings, | ||||||
|  |                     is_deletion, | ||||||
|  |                     allow_index_creation, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::IndexDeletion => KindWithContent::IndexDeletion { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     primary_key, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { | ||||||
|  |                     index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, | ||||||
|  |                     primary_key, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::IndexSwap { lhs, rhs } => KindWithContent::IndexSwap { lhs, rhs }, | ||||||
|  |                 KindDump::CancelTask { tasks } => KindWithContent::CancelTask { tasks }, | ||||||
|  |                 KindDump::DeleteTasks { query, tasks } => { | ||||||
|  |                     KindWithContent::DeleteTasks { query, tasks } | ||||||
|  |                 } | ||||||
|  |                 KindDump::DumpExport { dump_uid } => KindWithContent::DumpExport { | ||||||
|  |                     dump_uid, | ||||||
|  |                     keys: keys.to_vec(), | ||||||
|  |                     instance_uid, | ||||||
|  |                 }, | ||||||
|  |                 KindDump::Snapshot => KindWithContent::Snapshot, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         self.all_tasks | ||||||
|  |             .append(&mut wtxn, &BEU32::new(task.uid), &task)?; | ||||||
|  |  | ||||||
|  |         if let Some(indexes) = task.indexes() { | ||||||
|  |             for index in indexes { | ||||||
|  |                 self.update_index(&mut wtxn, index, |bitmap| { | ||||||
|  |                     bitmap.insert(task.uid); | ||||||
|  |                 })?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         self.update_status(&mut wtxn, task.status, |bitmap| { | ||||||
|  |             bitmap.insert(task.uid); | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| { | ||||||
|  |             (bitmap.insert(task.uid)); | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         match wtxn.commit() { | ||||||
|  |             Ok(()) => (), | ||||||
|  |             _e @ Err(_) => { | ||||||
|  |                 todo!("remove the data associated with the task"); | ||||||
|  |                 // _e?; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(task) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Create a new index without any associated task. | ||||||
|  |     pub fn create_raw_index(&self, name: &str) -> Result<Index> { | ||||||
|  |         let mut wtxn = self.env.write_txn()?; | ||||||
|  |         self.index_mapper.create_index(&mut wtxn, name) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn create_update_file(&self) -> Result<(Uuid, file_store::File)> { | ||||||
|         Ok(self.file_store.new_update()?) |         Ok(self.file_store.new_update()?) | ||||||
|     } |     } | ||||||
|     #[cfg(test)] |     #[cfg(test)] | ||||||
|     pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, File)> { |     pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> { | ||||||
|         Ok(self.file_store.new_update_with_uuid(uuid)?) |         Ok(self.file_store.new_update_with_uuid(uuid)?) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -165,6 +165,17 @@ impl AuthController { | |||||||
|             None => Ok(false), |             None => Ok(false), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Delete all the keys in the DB. | ||||||
|  |     pub fn raw_delete_all_keys(&mut self) -> Result<()> { | ||||||
|  |         self.store.delete_all_keys() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Delete all the keys in the DB. | ||||||
|  |     pub fn raw_insert_key(&mut self, key: Key) -> Result<()> { | ||||||
|  |         self.store.put_api_key(key)?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct AuthFilter { | pub struct AuthFilter { | ||||||
|   | |||||||
| @@ -197,6 +197,13 @@ impl HeedAuthStore { | |||||||
|         Ok(existing) |         Ok(existing) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn delete_all_keys(&self) -> Result<()> { | ||||||
|  |         let mut wtxn = self.env.write_txn()?; | ||||||
|  |         self.keys.clear(&mut wtxn)?; | ||||||
|  |         wtxn.commit()?; | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn list_api_keys(&self) -> Result<Vec<Key>> { |     pub fn list_api_keys(&self) -> Result<Vec<Key>> { | ||||||
|         let mut list = Vec::new(); |         let mut list = Vec::new(); | ||||||
|         let rtxn = self.env.read_txn()?; |         let rtxn = self.env.read_txn()?; | ||||||
|   | |||||||
| @@ -34,6 +34,7 @@ byte-unit = { version = "4.0.14", default-features = false, features = ["std", " | |||||||
| bytes = "1.2.1" | bytes = "1.2.1" | ||||||
| clap = { version = "4.0.9", features = ["derive", "env"] } | clap = { version = "4.0.9", features = ["derive", "env"] } | ||||||
| crossbeam-channel = "0.5.6" | crossbeam-channel = "0.5.6" | ||||||
|  | dump = { path = "../dump" } | ||||||
| either = "1.8.0" | either = "1.8.0" | ||||||
| env_logger = "0.9.1" | env_logger = "0.9.1" | ||||||
| flate2 = "1.0.24" | flate2 = "1.0.24" | ||||||
|   | |||||||
| @@ -13,14 +13,28 @@ pub mod metrics; | |||||||
| #[cfg(feature = "metrics")] | #[cfg(feature = "metrics")] | ||||||
| pub mod route_metrics; | pub mod route_metrics; | ||||||
|  |  | ||||||
| use std::sync::{atomic::AtomicBool, Arc}; | use std::{ | ||||||
|  |     fs::File, | ||||||
|  |     io::{BufReader, BufWriter, Seek, SeekFrom}, | ||||||
|  |     path::Path, | ||||||
|  |     sync::{atomic::AtomicBool, Arc}, | ||||||
|  | }; | ||||||
|  |  | ||||||
| use crate::error::MeilisearchHttpError; | use crate::error::MeilisearchHttpError; | ||||||
| use actix_web::error::JsonPayloadError; | use actix_web::error::JsonPayloadError; | ||||||
| use actix_web::web::Data; | use actix_web::web::Data; | ||||||
| use analytics::Analytics; | use analytics::Analytics; | ||||||
|  | use anyhow::bail; | ||||||
| use error::PayloadError; | use error::PayloadError; | ||||||
| use http::header::CONTENT_TYPE; | use http::header::CONTENT_TYPE; | ||||||
|  | use meilisearch_types::{ | ||||||
|  |     milli::{ | ||||||
|  |         self, | ||||||
|  |         documents::{DocumentsBatchBuilder, DocumentsBatchReader}, | ||||||
|  |         update::{IndexDocumentsConfig, IndexDocumentsMethod}, | ||||||
|  |     }, | ||||||
|  |     settings::apply_settings_to_builder, | ||||||
|  | }; | ||||||
| pub use option::Opt; | pub use option::Opt; | ||||||
|  |  | ||||||
| use actix_web::{web, HttpRequest}; | use actix_web::{web, HttpRequest}; | ||||||
| @@ -31,19 +45,83 @@ use meilisearch_auth::AuthController; | |||||||
|  |  | ||||||
| pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false); | pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false); | ||||||
|  |  | ||||||
|  | /// Check if a db is empty. It does not provide any information on the | ||||||
|  | /// validity of the data in it. | ||||||
|  | /// We consider a database as non empty when it's a non empty directory. | ||||||
|  | fn is_empty_db(db_path: impl AsRef<Path>) -> bool { | ||||||
|  |     let db_path = db_path.as_ref(); | ||||||
|  |  | ||||||
|  |     if !db_path.exists() { | ||||||
|  |         true | ||||||
|  |     // if we encounter an error or if the db is a file we consider the db non empty | ||||||
|  |     } else if let Ok(dir) = db_path.read_dir() { | ||||||
|  |         dir.count() == 0 | ||||||
|  |     } else { | ||||||
|  |         true | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| // TODO: TAMO: Finish setting up things | // TODO: TAMO: Finish setting up things | ||||||
| pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> { | pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(IndexScheduler, AuthController)> { | ||||||
|     let meilisearch = IndexScheduler::new( |     // we don't want to create anything in the data.ms yet, thus we | ||||||
|         opt.db_path.join("tasks"), |     // wrap our two builders in a closure that'll be executed later. | ||||||
|         opt.db_path.join("update_files"), |     let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key); | ||||||
|         opt.db_path.join("indexes"), |  | ||||||
|         opt.dumps_dir.clone(), |     let index_scheduler_builder = || { | ||||||
|         opt.max_index_size.get_bytes() as usize, |         IndexScheduler::new( | ||||||
|         (&opt.indexer_options).try_into()?, |             opt.db_path.join("tasks"), | ||||||
|         true, |             opt.db_path.join("update_files"), | ||||||
|         #[cfg(test)] |             opt.db_path.join("indexes"), | ||||||
|         todo!("We'll see later"), |             opt.dumps_dir.clone(), | ||||||
|     )?; |             opt.max_index_size.get_bytes() as usize, | ||||||
|  |             (&opt.indexer_options).try_into()?, | ||||||
|  |             true, | ||||||
|  |             #[cfg(test)] | ||||||
|  |             todo!("We'll see later"), | ||||||
|  |         ) | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let (index_scheduler, auth_controller) = if let Some(ref _path) = opt.import_snapshot { | ||||||
|  |         // handle the snapshot with something akin to the dumps | ||||||
|  |         // + the snapshot interval / spawning a thread | ||||||
|  |         todo!(); | ||||||
|  |     } else if let Some(ref path) = opt.import_dump { | ||||||
|  |         let empty_db = is_empty_db(&opt.db_path); | ||||||
|  |         let src_path_exists = path.exists(); | ||||||
|  |  | ||||||
|  |         if empty_db && src_path_exists { | ||||||
|  |             let mut index_scheduler = index_scheduler_builder()?; | ||||||
|  |             let mut auth_controller = auth_controller_builder()?; | ||||||
|  |             import_dump( | ||||||
|  |                 &opt.db_path, | ||||||
|  |                 path, | ||||||
|  |                 &mut index_scheduler, | ||||||
|  |                 &mut auth_controller, | ||||||
|  |             )?; | ||||||
|  |             (index_scheduler, auth_controller) | ||||||
|  |         } else if !empty_db && !opt.ignore_dump_if_db_exists { | ||||||
|  |             bail!( | ||||||
|  |                 "database already exists at {:?}, try to delete it or rename it", | ||||||
|  |                 opt.db_path | ||||||
|  |                     .canonicalize() | ||||||
|  |                     .unwrap_or_else(|_| opt.db_path.to_owned()) | ||||||
|  |             ) | ||||||
|  |         } else if !src_path_exists && !opt.ignore_missing_dump { | ||||||
|  |             bail!("dump doesn't exist at {:?}", path) | ||||||
|  |         } else { | ||||||
|  |             let mut index_scheduler = index_scheduler_builder()?; | ||||||
|  |             let mut auth_controller = auth_controller_builder()?; | ||||||
|  |             import_dump( | ||||||
|  |                 &opt.db_path, | ||||||
|  |                 path, | ||||||
|  |                 &mut index_scheduler, | ||||||
|  |                 &mut auth_controller, | ||||||
|  |             )?; | ||||||
|  |             (index_scheduler, auth_controller) | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         (index_scheduler_builder()?, auth_controller_builder()?) | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     /* |     /* | ||||||
|     TODO: We should start a thread to handle the snapshots. |     TODO: We should start a thread to handle the snapshots. | ||||||
| @@ -53,25 +131,125 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> { | |||||||
|         .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) |         .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) | ||||||
|         .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) |         .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) | ||||||
|         .set_snapshot_dir(opt.snapshot_dir.clone()) |         .set_snapshot_dir(opt.snapshot_dir.clone()) | ||||||
|         // dump |  | ||||||
|         .set_ignore_missing_dump(opt.ignore_missing_dump) |  | ||||||
|         .set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists) |  | ||||||
|         .set_dump_dst(opt.dumps_dir.clone()); |  | ||||||
|  |  | ||||||
|     if let Some(ref path) = opt.import_snapshot { |     if let Some(ref path) = opt.import_snapshot { | ||||||
|         meilisearch.set_import_snapshot(path.clone()); |         meilisearch.set_import_snapshot(path.clone()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if let Some(ref path) = opt.import_dump { |  | ||||||
|         meilisearch.set_dump_src(path.clone()); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if opt.schedule_snapshot { |     if opt.schedule_snapshot { | ||||||
|         meilisearch.set_schedule_snapshot(); |         meilisearch.set_schedule_snapshot(); | ||||||
|     } |     } | ||||||
|     */ |     */ | ||||||
|  |  | ||||||
|     Ok(meilisearch) |     Ok((index_scheduler, auth_controller)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn import_dump( | ||||||
|  |     db_path: &Path, | ||||||
|  |     dump_path: &Path, | ||||||
|  |     index_scheduler: &mut IndexScheduler, | ||||||
|  |     auth: &mut AuthController, | ||||||
|  | ) -> Result<(), anyhow::Error> { | ||||||
|  |     let reader = File::open(dump_path)?; | ||||||
|  |     let mut dump_reader = dump::DumpReader::open(reader)?; | ||||||
|  |  | ||||||
|  |     if let Some(date) = dump_reader.date() { | ||||||
|  |         log::info!( | ||||||
|  |             "Importing a dump of meilisearch `{:?}` from the {}", | ||||||
|  |             dump_reader.version(), // TODO: get the meilisearch version instead of the dump version | ||||||
|  |             date | ||||||
|  |         ); | ||||||
|  |     } else { | ||||||
|  |         log::info!( | ||||||
|  |             "Importing a dump of meilisearch `{:?}`", | ||||||
|  |             dump_reader.version(), // TODO: get the meilisearch version instead of the dump version | ||||||
|  |         ); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let instance_uid = dump_reader.instance_uid()?; | ||||||
|  |  | ||||||
|  |     // 1. Import the instance-uid. | ||||||
|  |     if let Some(ref instance_uid) = instance_uid { | ||||||
|  |         // we don't want to panic if there is an error with the instance-uid. | ||||||
|  |         let _ = std::fs::write( | ||||||
|  |             db_path.join("instance-uid"), | ||||||
|  |             instance_uid.to_string().as_bytes(), | ||||||
|  |         ); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // 2. Import the `Key`s. | ||||||
|  |     let mut keys = Vec::new(); | ||||||
|  |     auth.raw_delete_all_keys()?; | ||||||
|  |     for key in dump_reader.keys() { | ||||||
|  |         let key = key?; | ||||||
|  |         auth.raw_insert_key(key.clone())?; | ||||||
|  |         keys.push(key); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // 3. Import the tasks. | ||||||
|  |     for ret in dump_reader.tasks() { | ||||||
|  |         let (task, file) = ret?; | ||||||
|  |         index_scheduler.register_dumpped_task(task, file, &keys, instance_uid)?; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let indexer_config = index_scheduler.indexer_config(); | ||||||
|  |  | ||||||
|  |     // 4. Import the indexes. | ||||||
|  |     for index_reader in dump_reader.indexes()? { | ||||||
|  |         let mut index_reader = index_reader?; | ||||||
|  |         let metadata = index_reader.metadata(); | ||||||
|  |         log::info!("Importing index `{}`.", metadata.uid); | ||||||
|  |         let index = index_scheduler.create_raw_index(&metadata.uid)?; | ||||||
|  |  | ||||||
|  |         let mut wtxn = index.write_txn()?; | ||||||
|  |  | ||||||
|  |         let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config); | ||||||
|  |         // 4.1 Import the primary key if there is one. | ||||||
|  |         if let Some(ref primary_key) = metadata.primary_key { | ||||||
|  |             builder.set_primary_key(primary_key.to_string()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // 4.2 Import the settings. | ||||||
|  |         log::info!("Importing the settings."); | ||||||
|  |         let settings = index_reader.settings()?; | ||||||
|  |         apply_settings_to_builder(&settings, &mut builder); | ||||||
|  |         builder.execute(|indexing_step| { | ||||||
|  |             log::debug!("update: {:?}", indexing_step); | ||||||
|  |         })?; | ||||||
|  |  | ||||||
|  |         // 4.3 Import the documents. | ||||||
|  |         // 4.3.1 We need to recreate the grenad+obkv format accepted by the index. | ||||||
|  |         log::info!("Importing the documents."); | ||||||
|  |         let mut file = tempfile::tempfile()?; | ||||||
|  |         let mut builder = DocumentsBatchBuilder::new(BufWriter::new(&mut file)); | ||||||
|  |         for document in index_reader.documents()? { | ||||||
|  |             builder.append_json_object(&document?)?; | ||||||
|  |         } | ||||||
|  |         builder.into_inner()?; // this actually flush the content of the batch builder. | ||||||
|  |  | ||||||
|  |         // 4.3.2 We feed it to the milli index. | ||||||
|  |         file.seek(SeekFrom::Start(0))?; | ||||||
|  |         let reader = BufReader::new(file); | ||||||
|  |         let reader = DocumentsBatchReader::from_reader(reader)?; | ||||||
|  |  | ||||||
|  |         let builder = milli::update::IndexDocuments::new( | ||||||
|  |             &mut wtxn, | ||||||
|  |             &index, | ||||||
|  |             indexer_config, | ||||||
|  |             IndexDocumentsConfig { | ||||||
|  |                 update_method: IndexDocumentsMethod::ReplaceDocuments, | ||||||
|  |                 ..Default::default() | ||||||
|  |             }, | ||||||
|  |             |indexing_step| log::debug!("update: {:?}", indexing_step), | ||||||
|  |         )?; | ||||||
|  |  | ||||||
|  |         let (builder, user_result) = builder.add_documents(reader)?; | ||||||
|  |         log::info!("{} documents found.", user_result?); | ||||||
|  |         builder.execute()?; | ||||||
|  |         wtxn.commit()?; | ||||||
|  |         log::info!("All documents successfully imported."); | ||||||
|  |     } | ||||||
|  |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn configure_data( | pub fn configure_data( | ||||||
|   | |||||||
| @@ -48,9 +48,13 @@ async fn main() -> anyhow::Result<()> { | |||||||
|         _ => unreachable!(), |         _ => unreachable!(), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     let index_scheduler = setup_meilisearch(&opt)?; |     let (index_scheduler, auth_controller) = match setup_meilisearch(&opt) { | ||||||
|  |         Ok(ret) => ret, | ||||||
|     let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?; |         Err(e) => { | ||||||
|  |             std::fs::remove_dir_all(opt.db_path)?; | ||||||
|  |             return Err(e); | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     #[cfg(all(not(debug_assertions), feature = "analytics"))] |     #[cfg(all(not(debug_assertions), feature = "analytics"))] | ||||||
|     let analytics = if !opt.no_analytics { |     let analytics = if !opt.no_analytics { | ||||||
|   | |||||||
| @@ -242,7 +242,9 @@ async fn document_addition( | |||||||
|  |  | ||||||
|     let (uuid, mut update_file) = index_scheduler.create_update_file()?; |     let (uuid, mut update_file) = index_scheduler.create_update_file()?; | ||||||
|  |  | ||||||
|  |     // TODO: this can be slow, maybe we should spawn a thread? But the payload isn't Send+Sync :weary: | ||||||
|     // push the entire stream into a `Vec`. |     // push the entire stream into a `Vec`. | ||||||
|  |     // If someone sends us a never ending stream we're going to block the thread. | ||||||
|     // TODO: Maybe we should write it to a file to reduce the RAM consumption |     // TODO: Maybe we should write it to a file to reduce the RAM consumption | ||||||
|     // and then reread it to convert it to obkv? |     // and then reread it to convert it to obkv? | ||||||
|     let mut buffer = Vec::new(); |     let mut buffer = Vec::new(); | ||||||
|   | |||||||
| @@ -14,7 +14,7 @@ type Result<T> = std::result::Result<T, Error>; | |||||||
|  |  | ||||||
| pub type KeyId = Uuid; | pub type KeyId = Uuid; | ||||||
|  |  | ||||||
| #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] | #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] | ||||||
| pub struct Key { | pub struct Key { | ||||||
|     #[serde(skip_serializing_if = "Option::is_none")] |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|     pub description: Option<String>, |     pub description: Option<String>, | ||||||
|   | |||||||
| @@ -7,7 +7,7 @@ use std::str::FromStr; | |||||||
|  |  | ||||||
| /// A type that tries to match either a star (*) or | /// A type that tries to match either a star (*) or | ||||||
| /// any other thing that implements `FromStr`. | /// any other thing that implements `FromStr`. | ||||||
| #[derive(Debug)] | #[derive(Debug, Clone)] | ||||||
| pub enum StarOr<T> { | pub enum StarOr<T> { | ||||||
|     Star, |     Star, | ||||||
|     Other(T), |     Other(T), | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user