mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	write the dump export
This commit is contained in:
		| @@ -16,7 +16,6 @@ time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsi | |||||||
| tar = "0.4.38" | tar = "0.4.38" | ||||||
| anyhow = "1.0.65" | anyhow = "1.0.65" | ||||||
| log = "0.4.17" | log = "0.4.17" | ||||||
| index-scheduler = { path = "../index-scheduler" } |  | ||||||
| meilisearch-auth = { path = "../meilisearch-auth" } | meilisearch-auth = { path = "../meilisearch-auth" } | ||||||
| meilisearch-types = { path = "../meilisearch-types" } | meilisearch-types = { path = "../meilisearch-types" } | ||||||
| http = "0.2.8" | http = "0.2.8" | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | use meilisearch_types::error::{Code, ErrorCode}; | ||||||
| use thiserror::Error; | use thiserror::Error; | ||||||
|  |  | ||||||
| #[derive(Debug, Error)] | #[derive(Debug, Error)] | ||||||
| @@ -16,3 +17,19 @@ pub enum Error { | |||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
|     Uuid(#[from] uuid::Error), |     Uuid(#[from] uuid::Error), | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl ErrorCode for Error { | ||||||
|  |     fn error_code(&self) -> Code { | ||||||
|  |         match self { | ||||||
|  |             // Are these three really Internal errors? | ||||||
|  |             Error::Io(_) => Code::Internal, | ||||||
|  |             Error::Serde(_) => Code::Internal, | ||||||
|  |             Error::Uuid(_) => Code::Internal, | ||||||
|  |  | ||||||
|  |             // all these errors should never be raised when creating a dump, thus no error code should be associated. | ||||||
|  |             Error::DumpV1Unsupported => Code::Internal, | ||||||
|  |             Error::BadIndexName => Code::Internal, | ||||||
|  |             Error::MalformedTask => Code::Internal, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|   | |||||||
| @@ -52,12 +52,11 @@ pub(crate) mod test { | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     use big_s::S; |     use big_s::S; | ||||||
|     use index_scheduler::task::Details; |  | ||||||
|     use maplit::btreeset; |     use maplit::btreeset; | ||||||
|     use meilisearch_auth::{Action, Key}; |     use meilisearch_types::keys::{Action, Key}; | ||||||
|     use meilisearch_types::milli::{self, update::Setting}; |     use meilisearch_types::milli::{self, update::Setting}; | ||||||
|     use meilisearch_types::settings::{Checked, Settings}; |     use meilisearch_types::settings::{Checked, Settings}; | ||||||
|     use meilisearch_types::tasks::{DetailsView, Kind, Status, TaskView}; |     use meilisearch_types::tasks::{Kind, Status}; | ||||||
|     use meilisearch_types::{index_uid::IndexUid, star_or::StarOr}; |     use meilisearch_types::{index_uid::IndexUid, star_or::StarOr}; | ||||||
|     use serde_json::{json, Map, Value}; |     use serde_json::{json, Map, Value}; | ||||||
|     use time::{macros::datetime, Duration}; |     use time::{macros::datetime, Duration}; | ||||||
| @@ -116,7 +115,7 @@ pub(crate) mod test { | |||||||
|         settings.check() |         settings.check() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn create_test_tasks() -> Vec<(TaskView, Option<Vec<Document>>)> { |     pub fn create_test_tasks() -> Vec<(Task, Option<Vec<Document>>)> { | ||||||
|         vec![ |         vec![ | ||||||
|             ( |             ( | ||||||
|                 TaskView { |                 TaskView { | ||||||
|   | |||||||
| @@ -124,7 +124,7 @@ impl CompatV5ToV6 { | |||||||
|                             indexed_documents, |                             indexed_documents, | ||||||
|                         } => v6::Details::DocumentAddition { |                         } => v6::Details::DocumentAddition { | ||||||
|                             received_documents: received_documents as u64, |                             received_documents: received_documents as u64, | ||||||
|                             indexed_documents: indexed_documents.map_or(0, |i| i as u64), |                             indexed_documents: indexed_documents.map(|i| i as u64), | ||||||
|                         }, |                         }, | ||||||
|                         v5::Details::Settings { settings } => v6::Details::Settings { |                         v5::Details::Settings { settings } => v6::Details::Settings { | ||||||
|                             settings: settings.into(), |                             settings: settings.into(), | ||||||
|   | |||||||
							
								
								
									
										202
									
								
								dump/src/reader/v6/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										202
									
								
								dump/src/reader/v6/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,202 @@ | |||||||
|  | use std::{ | ||||||
|  |     fs::{self, File}, | ||||||
|  |     io::{BufRead, BufReader}, | ||||||
|  |     path::Path, | ||||||
|  |     str::FromStr, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | use tempfile::TempDir; | ||||||
|  | use time::OffsetDateTime; | ||||||
|  | use uuid::Uuid; | ||||||
|  |  | ||||||
|  | use crate::{Error, IndexMetadata, Result, Version}; | ||||||
|  |  | ||||||
|  | mod tasks; | ||||||
|  |  | ||||||
|  | pub use meilisearch_types::milli; | ||||||
|  |  | ||||||
|  | use super::Document; | ||||||
|  |  | ||||||
|  | pub type Metadata = crate::Metadata; | ||||||
|  |  | ||||||
|  | pub type Settings<T> = meilisearch_types::settings::Settings<T>; | ||||||
|  | pub type Checked = meilisearch_types::settings::Checked; | ||||||
|  | pub type Unchecked = meilisearch_types::settings::Unchecked; | ||||||
|  |  | ||||||
|  | pub type Task = tasks::TaskDump; | ||||||
|  | pub type Key = meilisearch_types::keys::Key; | ||||||
|  |  | ||||||
|  | // ===== Other types to clarify the code of the compat module | ||||||
|  | // everything related to the tasks | ||||||
|  | pub type Status = meilisearch_types::tasks::Status; | ||||||
|  | pub type Kind = tasks::KindDump; | ||||||
|  | pub type Details = meilisearch_types::tasks::Details; | ||||||
|  |  | ||||||
|  | // everything related to the settings | ||||||
|  | pub type Setting<T> = meilisearch_types::milli::update::Setting<T>; | ||||||
|  | pub type TypoTolerance = meilisearch_types::settings::TypoSettings; | ||||||
|  | pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting; | ||||||
|  | pub type FacetingSettings = meilisearch_types::settings::FacetingSettings; | ||||||
|  | pub type PaginationSettings = meilisearch_types::settings::PaginationSettings; | ||||||
|  |  | ||||||
|  | // everything related to the api keys | ||||||
|  | pub type Action = meilisearch_types::keys::Action; | ||||||
|  | pub type StarOr<T> = meilisearch_types::star_or::StarOr<T>; | ||||||
|  | pub type IndexUid = meilisearch_types::index_uid::IndexUid; | ||||||
|  |  | ||||||
|  | // everything related to the errors | ||||||
|  | pub type ResponseError = meilisearch_types::error::ResponseError; | ||||||
|  | pub type Code = meilisearch_types::error::Code; | ||||||
|  |  | ||||||
|  | pub struct V6Reader { | ||||||
|  |     dump: TempDir, | ||||||
|  |     instance_uid: Uuid, | ||||||
|  |     metadata: Metadata, | ||||||
|  |     tasks: BufReader<File>, | ||||||
|  |     keys: BufReader<File>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl V6Reader { | ||||||
|  |     pub fn open(dump: TempDir) -> Result<Self> { | ||||||
|  |         let meta_file = fs::read(dump.path().join("metadata.json"))?; | ||||||
|  |         let instance_uid = fs::read_to_string(dump.path().join("instance_uid.uuid"))?; | ||||||
|  |         let instance_uid = Uuid::from_str(&instance_uid)?; | ||||||
|  |  | ||||||
|  |         Ok(V6Reader { | ||||||
|  |             metadata: serde_json::from_reader(&*meta_file)?, | ||||||
|  |             instance_uid, | ||||||
|  |             tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), | ||||||
|  |             keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), | ||||||
|  |             dump, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn version(&self) -> Version { | ||||||
|  |         Version::V6 | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn date(&self) -> Option<OffsetDateTime> { | ||||||
|  |         Some(self.metadata.dump_date) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn instance_uid(&self) -> Result<Option<Uuid>> { | ||||||
|  |         Ok(Some(self.instance_uid)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn indexes(&self) -> Result<Box<dyn Iterator<Item = Result<V6IndexReader>> + '_>> { | ||||||
|  |         let entries = fs::read_dir(self.dump.path().join("indexes"))?; | ||||||
|  |         Ok(Box::new( | ||||||
|  |             entries | ||||||
|  |                 .map(|entry| -> Result<Option<_>> { | ||||||
|  |                     let entry = entry?; | ||||||
|  |                     if entry.file_type()?.is_dir() { | ||||||
|  |                         let index = V6IndexReader::new( | ||||||
|  |                             entry | ||||||
|  |                                 .file_name() | ||||||
|  |                                 .to_str() | ||||||
|  |                                 .ok_or(Error::BadIndexName)? | ||||||
|  |                                 .to_string(), | ||||||
|  |                             &entry.path(), | ||||||
|  |                         )?; | ||||||
|  |                         Ok(Some(index)) | ||||||
|  |                     } else { | ||||||
|  |                         Ok(None) | ||||||
|  |                     } | ||||||
|  |                 }) | ||||||
|  |                 .filter_map(|entry| entry.transpose()), | ||||||
|  |         )) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn tasks( | ||||||
|  |         &mut self, | ||||||
|  |     ) -> Box<dyn Iterator<Item = Result<(Task, Option<Box<super::UpdateFile>>)>> + '_> { | ||||||
|  |         Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { | ||||||
|  |             let task: Task = serde_json::from_str(&line?)?; | ||||||
|  |  | ||||||
|  |             let update_file_path = self | ||||||
|  |                 .dump | ||||||
|  |                 .path() | ||||||
|  |                 .join("tasks") | ||||||
|  |                 .join("update_files") | ||||||
|  |                 .join(format!("{}.jsonl", task.uid.to_string())); | ||||||
|  |  | ||||||
|  |             if update_file_path.exists() { | ||||||
|  |                 Ok(( | ||||||
|  |                     task, | ||||||
|  |                     Some(Box::new(UpdateFile::new(&update_file_path)?) as Box<super::UpdateFile>), | ||||||
|  |                 )) | ||||||
|  |             } else { | ||||||
|  |                 Ok((task, None)) | ||||||
|  |             } | ||||||
|  |         })) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn keys(&mut self) -> Box<dyn Iterator<Item = Result<Key>> + '_> { | ||||||
|  |         Box::new( | ||||||
|  |             (&mut self.keys) | ||||||
|  |                 .lines() | ||||||
|  |                 .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), | ||||||
|  |         ) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct UpdateFile { | ||||||
|  |     reader: BufReader<File>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl UpdateFile { | ||||||
|  |     fn new(path: &Path) -> Result<Self> { | ||||||
|  |         Ok(UpdateFile { | ||||||
|  |             reader: BufReader::new(File::open(path)?), | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Iterator for UpdateFile { | ||||||
|  |     type Item = Result<Document>; | ||||||
|  |  | ||||||
|  |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|  |         (&mut self.reader) | ||||||
|  |             .lines() | ||||||
|  |             .map(|line| { | ||||||
|  |                 line.map_err(Error::from) | ||||||
|  |                     .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) | ||||||
|  |             }) | ||||||
|  |             .next() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct V6IndexReader { | ||||||
|  |     metadata: IndexMetadata, | ||||||
|  |     documents: BufReader<File>, | ||||||
|  |     settings: BufReader<File>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl V6IndexReader { | ||||||
|  |     pub fn new(_name: String, path: &Path) -> Result<Self> { | ||||||
|  |         let metadata = File::open(path.join("metadata.json"))?; | ||||||
|  |  | ||||||
|  |         let ret = V6IndexReader { | ||||||
|  |             metadata: serde_json::from_reader(metadata)?, | ||||||
|  |             documents: BufReader::new(File::open(path.join("documents.jsonl"))?), | ||||||
|  |             settings: BufReader::new(File::open(path.join("settings.json"))?), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         Ok(ret) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn metadata(&self) -> &IndexMetadata { | ||||||
|  |         &self.metadata | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn documents(&mut self) -> Result<impl Iterator<Item = Result<Document>> + '_> { | ||||||
|  |         Ok((&mut self.documents) | ||||||
|  |             .lines() | ||||||
|  |             .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn settings(&mut self) -> Result<Settings<Checked>> { | ||||||
|  |         let settings: Settings<Unchecked> = serde_json::from_reader(&mut self.settings)?; | ||||||
|  |         Ok(settings.check()) | ||||||
|  |     } | ||||||
|  | } | ||||||
							
								
								
									
										81
									
								
								dump/src/reader/v6/tasks.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								dump/src/reader/v6/tasks.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | |||||||
|  | use meilisearch_types::{ | ||||||
|  |     error::ResponseError, | ||||||
|  |     milli::update::IndexDocumentsMethod, | ||||||
|  |     settings::Unchecked, | ||||||
|  |     tasks::{Details, Status, TaskId}, | ||||||
|  | }; | ||||||
|  | use serde::{Deserialize, Serialize}; | ||||||
|  | use time::OffsetDateTime; | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub struct TaskDump { | ||||||
|  |     pub uid: TaskId, | ||||||
|  |     #[serde(default)] | ||||||
|  |     pub index_uid: Option<String>, | ||||||
|  |     pub status: Status, | ||||||
|  |     // TODO use our own Kind for the user | ||||||
|  |     #[serde(rename = "type")] | ||||||
|  |     pub kind: KindDump, | ||||||
|  |  | ||||||
|  |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub details: Option<Details>, | ||||||
|  |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub error: Option<ResponseError>, | ||||||
|  |  | ||||||
|  |     #[serde(with = "time::serde::rfc3339")] | ||||||
|  |     pub enqueued_at: OffsetDateTime, | ||||||
|  |     #[serde( | ||||||
|  |         with = "time::serde::rfc3339::option", | ||||||
|  |         skip_serializing_if = "Option::is_none", | ||||||
|  |         default | ||||||
|  |     )] | ||||||
|  |     pub started_at: Option<OffsetDateTime>, | ||||||
|  |     #[serde( | ||||||
|  |         with = "time::serde::rfc3339::option", | ||||||
|  |         skip_serializing_if = "Option::is_none", | ||||||
|  |         default | ||||||
|  |     )] | ||||||
|  |     pub finished_at: Option<OffsetDateTime>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // A `Kind` specific version made for the dump. If modified you may break the dump. | ||||||
|  | #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] | ||||||
|  | #[serde(rename_all = "camelCase")] | ||||||
|  | pub enum KindDump { | ||||||
|  |     DocumentImport { | ||||||
|  |         primary_key: Option<String>, | ||||||
|  |         method: IndexDocumentsMethod, | ||||||
|  |         documents_count: u64, | ||||||
|  |         allow_index_creation: bool, | ||||||
|  |     }, | ||||||
|  |     DocumentDeletion { | ||||||
|  |         documents_ids: Vec<String>, | ||||||
|  |     }, | ||||||
|  |     DocumentClear, | ||||||
|  |     Settings { | ||||||
|  |         settings: meilisearch_types::settings::Settings<Unchecked>, | ||||||
|  |         is_deletion: bool, | ||||||
|  |         allow_index_creation: bool, | ||||||
|  |     }, | ||||||
|  |     IndexDeletion, | ||||||
|  |     IndexCreation { | ||||||
|  |         primary_key: Option<String>, | ||||||
|  |     }, | ||||||
|  |     IndexUpdate { | ||||||
|  |         primary_key: Option<String>, | ||||||
|  |     }, | ||||||
|  |     IndexSwap { | ||||||
|  |         lhs: String, | ||||||
|  |         rhs: String, | ||||||
|  |     }, | ||||||
|  |     CancelTask { | ||||||
|  |         tasks: Vec<TaskId>, | ||||||
|  |     }, | ||||||
|  |     DeleteTasks { | ||||||
|  |         query: String, | ||||||
|  |         tasks: Vec<TaskId>, | ||||||
|  |     }, | ||||||
|  |     DumpExport, | ||||||
|  |     Snapshot, | ||||||
|  | } | ||||||
| @@ -22,13 +22,15 @@ pub struct DumpWriter { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl DumpWriter { | impl DumpWriter { | ||||||
|     pub fn new(instance_uuid: Uuid) -> Result<DumpWriter> { |     pub fn new(instance_uuid: Option<Uuid>) -> Result<DumpWriter> { | ||||||
|         let dir = TempDir::new()?; |         let dir = TempDir::new()?; | ||||||
|  |  | ||||||
|  |         if let Some(instance_uuid) = instance_uuid { | ||||||
|             fs::write( |             fs::write( | ||||||
|                 dir.path().join("instance_uid.uuid"), |                 dir.path().join("instance_uid.uuid"), | ||||||
|                 &instance_uuid.as_hyphenated().to_string(), |                 &instance_uuid.as_hyphenated().to_string(), | ||||||
|             )?; |             )?; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let metadata = Metadata { |         let metadata = Metadata { | ||||||
|             dump_version: CURRENT_DUMP_VERSION, |             dump_version: CURRENT_DUMP_VERSION, | ||||||
| @@ -133,7 +135,6 @@ impl UpdateFile { | |||||||
|             writer.write_all(b"\n")?; |             writer.write_all(b"\n")?; | ||||||
|             writer.flush()?; |             writer.flush()?; | ||||||
|         } else { |         } else { | ||||||
|             dbg!(&self.path); |  | ||||||
|             let file = File::create(&self.path).unwrap(); |             let file = File::create(&self.path).unwrap(); | ||||||
|             self.writer = Some(BufWriter::new(file)); |             self.writer = Some(BufWriter::new(file)); | ||||||
|             self.push_document(document)?; |             self.push_document(document)?; | ||||||
|   | |||||||
| @@ -13,6 +13,7 @@ file-store = { path = "../file-store" } | |||||||
| log = "0.4.14" | log = "0.4.14" | ||||||
| meilisearch-types = { path = "../meilisearch-types" } | meilisearch-types = { path = "../meilisearch-types" } | ||||||
| roaring = { version = "0.10.0", features = ["serde"] } | roaring = { version = "0.10.0", features = ["serde"] } | ||||||
|  | dump = { path = "../dump" } | ||||||
| serde = { version = "1.0.136", features = ["derive"] } | serde = { version = "1.0.136", features = ["derive"] } | ||||||
| serde_json = { version = "1.0.85", features = ["preserve_order"] } | serde_json = { version = "1.0.85", features = ["preserve_order"] } | ||||||
| tempfile = "3.3.0" | tempfile = "3.3.0" | ||||||
|   | |||||||
| @@ -1,7 +1,11 @@ | |||||||
| use std::collections::HashSet; | use std::collections::HashSet; | ||||||
|  | use std::fs::File; | ||||||
|  | use std::io::BufWriter; | ||||||
|  |  | ||||||
| use crate::{autobatcher::BatchKind, Error, IndexScheduler, Result, TaskId}; | use crate::{autobatcher::BatchKind, Error, IndexScheduler, Result, TaskId}; | ||||||
|  |  | ||||||
|  | use dump::IndexMetadata; | ||||||
|  | use meilisearch_types::milli::documents::obkv_to_object; | ||||||
| use meilisearch_types::tasks::{Details, Kind, KindWithContent, Status, Task}; | use meilisearch_types::tasks::{Details, Kind, KindWithContent, Status, Task}; | ||||||
|  |  | ||||||
| use log::{debug, info}; | use log::{debug, info}; | ||||||
| @@ -25,7 +29,7 @@ pub(crate) enum Batch { | |||||||
|     Cancel(Task), |     Cancel(Task), | ||||||
|     TaskDeletion(Task), |     TaskDeletion(Task), | ||||||
|     Snapshot(Vec<Task>), |     Snapshot(Vec<Task>), | ||||||
|     Dump(Vec<Task>), |     Dump(Task), | ||||||
|     IndexOperation(IndexOperation), |     IndexOperation(IndexOperation), | ||||||
|     IndexCreation { |     IndexCreation { | ||||||
|         index_uid: String, |         index_uid: String, | ||||||
| @@ -100,9 +104,10 @@ impl Batch { | |||||||
|         match self { |         match self { | ||||||
|             Batch::Cancel(task) |             Batch::Cancel(task) | ||||||
|             | Batch::TaskDeletion(task) |             | Batch::TaskDeletion(task) | ||||||
|  |             | Batch::Dump(task) | ||||||
|             | Batch::IndexCreation { task, .. } |             | Batch::IndexCreation { task, .. } | ||||||
|             | Batch::IndexUpdate { task, .. } => vec![task.uid], |             | Batch::IndexUpdate { task, .. } => vec![task.uid], | ||||||
|             Batch::Snapshot(tasks) | Batch::Dump(tasks) | Batch::IndexDeletion { tasks, .. } => { |             Batch::Snapshot(tasks) | Batch::IndexDeletion { tasks, .. } => { | ||||||
|                 tasks.iter().map(|task| task.uid).collect() |                 tasks.iter().map(|task| task.uid).collect() | ||||||
|             } |             } | ||||||
|             Batch::IndexOperation(operation) => match operation { |             Batch::IndexOperation(operation) => match operation { | ||||||
| @@ -402,8 +407,11 @@ impl IndexScheduler { | |||||||
|  |  | ||||||
|         // 4. we batch the dumps. |         // 4. we batch the dumps. | ||||||
|         let to_dump = self.get_kind(rtxn, Kind::DumpExport)? & enqueued; |         let to_dump = self.get_kind(rtxn, Kind::DumpExport)? & enqueued; | ||||||
|         if !to_dump.is_empty() { |         if let Some(to_dump) = to_dump.min() { | ||||||
|             return Ok(Some(Batch::Dump(self.get_existing_tasks(rtxn, to_dump)?))); |             return Ok(Some(Batch::Dump( | ||||||
|  |                 self.get_task(rtxn, to_dump)? | ||||||
|  |                     .ok_or(Error::CorruptedTaskQueue)?, | ||||||
|  |             ))); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // 5. We take the next task and try to batch all the tasks associated with this index. |         // 5. We take the next task and try to batch all the tasks associated with this index. | ||||||
| @@ -477,7 +485,80 @@ impl IndexScheduler { | |||||||
|                 Ok(vec![task]) |                 Ok(vec![task]) | ||||||
|             } |             } | ||||||
|             Batch::Snapshot(_) => todo!(), |             Batch::Snapshot(_) => todo!(), | ||||||
|             Batch::Dump(_) => todo!(), |             Batch::Dump(mut task) => { | ||||||
|  |                 let KindWithContent::DumpExport { keys, instance_uid, dump_uid } = &task.kind else { | ||||||
|  |                     unreachable!(); | ||||||
|  |                 }; | ||||||
|  |                 let dump = dump::DumpWriter::new(instance_uid.clone())?; | ||||||
|  |                 let mut d_keys = dump.create_keys()?; | ||||||
|  |  | ||||||
|  |                 // 1. dump the keys | ||||||
|  |                 for key in keys { | ||||||
|  |                     d_keys.push_key(key)?; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 let rtxn = self.env.read_txn()?; | ||||||
|  |  | ||||||
|  |                 // 2. dump the tasks | ||||||
|  |                 let mut tasks = dump.create_tasks_queue()?; | ||||||
|  |                 for ret in self.all_tasks.iter(&rtxn)? { | ||||||
|  |                     let (_, task) = ret?; | ||||||
|  |                     let mut dump_content_file = tasks.push_task(&task)?; | ||||||
|  |  | ||||||
|  |                     // 2.1. Dump the `content_file` associated with the task if there is one. | ||||||
|  |                     if let Some(content_file) = task.content_uuid() { | ||||||
|  |                         let content_file = self.file_store.get_update(*content_file)?; | ||||||
|  |  | ||||||
|  |                         let reader = DocumentsBatchReader::from_reader(content_file) | ||||||
|  |                             .map_err(milli::Error::from)?; | ||||||
|  |  | ||||||
|  |                         let (mut cursor, documents_batch_index) = | ||||||
|  |                             reader.into_cursor_and_fields_index(); | ||||||
|  |  | ||||||
|  |                         while let Some(doc) = cursor.next_document().map_err(milli::Error::from)? { | ||||||
|  |                             dump_content_file | ||||||
|  |                                 .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?; | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 // TODO: maybe `self.indexes` could use this rtxn instead of creating its own | ||||||
|  |                 drop(rtxn); | ||||||
|  |  | ||||||
|  |                 // 3. Dump the indexes | ||||||
|  |                 for (uid, index) in self.indexes()? { | ||||||
|  |                     let rtxn = index.read_txn()?; | ||||||
|  |                     let metadata = IndexMetadata { | ||||||
|  |                         uid: uid.clone(), | ||||||
|  |                         primary_key: index.primary_key(&rtxn)?.map(String::from), | ||||||
|  |                         created_at: index.created_at(&rtxn)?, | ||||||
|  |                         updated_at: index.updated_at(&rtxn)?, | ||||||
|  |                     }; | ||||||
|  |                     let mut index_dumper = dump.create_index(&uid, &metadata)?; | ||||||
|  |  | ||||||
|  |                     let fields_ids_map = index.fields_ids_map(&rtxn)?; | ||||||
|  |                     let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||||
|  |  | ||||||
|  |                     // 3.1. Dump the documents | ||||||
|  |                     for ret in index.all_documents(&rtxn)? { | ||||||
|  |                         let (_id, doc) = ret?; | ||||||
|  |                         let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?; | ||||||
|  |                         index_dumper.push_document(&document)?; | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     // 3.2. Dump the settings | ||||||
|  |                     let settings = meilisearch_types::settings::settings(&index, &rtxn)?; | ||||||
|  |                     index_dumper.settings(&settings)?; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 let path = self.dumps_path.join(format!("{}.dump", dump_uid)); | ||||||
|  |                 let file = File::create(path).unwrap(); | ||||||
|  |                 dump.persist_to(BufWriter::new(file)).unwrap(); | ||||||
|  |  | ||||||
|  |                 task.status = Status::Succeeded; | ||||||
|  |  | ||||||
|  |                 Ok(vec![task]) | ||||||
|  |             } | ||||||
|             Batch::IndexOperation(operation) => { |             Batch::IndexOperation(operation) => { | ||||||
|                 #[rustfmt::skip] |                 #[rustfmt::skip] | ||||||
|                 let index = match operation { |                 let index = match operation { | ||||||
| @@ -679,14 +760,14 @@ impl IndexScheduler { | |||||||
|                             task.status = Status::Succeeded; |                             task.status = Status::Succeeded; | ||||||
|                             task.details = Some(Details::DocumentAddition { |                             task.details = Some(Details::DocumentAddition { | ||||||
|                                 received_documents: number_of_documents, |                                 received_documents: number_of_documents, | ||||||
|                                 indexed_documents, |                                 indexed_documents: Some(indexed_documents), | ||||||
|                             }); |                             }); | ||||||
|                         } |                         } | ||||||
|                         Err(error) => { |                         Err(error) => { | ||||||
|                             task.status = Status::Failed; |                             task.status = Status::Failed; | ||||||
|                             task.details = Some(Details::DocumentAddition { |                             task.details = Some(Details::DocumentAddition { | ||||||
|                                 received_documents: count, |                                 received_documents: count, | ||||||
|                                 indexed_documents: count, |                                 indexed_documents: Some(count), | ||||||
|                             }); |                             }); | ||||||
|                             task.error = Some(error.into()) |                             task.error = Some(error.into()) | ||||||
|                         } |                         } | ||||||
|   | |||||||
| @@ -24,6 +24,8 @@ pub enum Error { | |||||||
|     #[error("`{0}` is not a type. Available types are")] |     #[error("`{0}` is not a type. Available types are")] | ||||||
|     InvalidKind(String), |     InvalidKind(String), | ||||||
|  |  | ||||||
|  |     #[error(transparent)] | ||||||
|  |     Dump(#[from] dump::Error), | ||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
|     Heed(#[from] heed::Error), |     Heed(#[from] heed::Error), | ||||||
|     #[error(transparent)] |     #[error(transparent)] | ||||||
| @@ -48,8 +50,9 @@ impl ErrorCode for Error { | |||||||
|             Error::InvalidKind(_) => Code::BadRequest, |             Error::InvalidKind(_) => Code::BadRequest, | ||||||
|  |  | ||||||
|             // TODO: TAMO: are all these errors really internal? |             // TODO: TAMO: are all these errors really internal? | ||||||
|  |             Error::Dump(e) => e.error_code(), | ||||||
|  |             Error::Milli(e) => e.error_code(), | ||||||
|             Error::Heed(_) => Code::Internal, |             Error::Heed(_) => Code::Internal, | ||||||
|             Error::Milli(_) => Code::Internal, |  | ||||||
|             Error::FileStore(_) => Code::Internal, |             Error::FileStore(_) => Code::Internal, | ||||||
|             Error::IoError(_) => Code::Internal, |             Error::IoError(_) => Code::Internal, | ||||||
|             Error::Anyhow(_) => Code::Internal, |             Error::Anyhow(_) => Code::Internal, | ||||||
|   | |||||||
| @@ -154,6 +154,9 @@ pub struct IndexScheduler { | |||||||
|     /// Weither autobatching is enabled or not. |     /// Weither autobatching is enabled or not. | ||||||
|     pub(crate) autobatching_enabled: bool, |     pub(crate) autobatching_enabled: bool, | ||||||
|  |  | ||||||
|  |     /// The path used to create the dumps. | ||||||
|  |     pub(crate) dumps_path: PathBuf, | ||||||
|  |  | ||||||
|     // ================= test |     // ================= test | ||||||
|     /// The next entry is dedicated to the tests. |     /// The next entry is dedicated to the tests. | ||||||
|     /// It provide a way to break in multiple part of the scheduler. |     /// It provide a way to break in multiple part of the scheduler. | ||||||
| @@ -175,6 +178,7 @@ impl IndexScheduler { | |||||||
|         tasks_path: PathBuf, |         tasks_path: PathBuf, | ||||||
|         update_file_path: PathBuf, |         update_file_path: PathBuf, | ||||||
|         indexes_path: PathBuf, |         indexes_path: PathBuf, | ||||||
|  |         dumps_path: PathBuf, | ||||||
|         index_size: usize, |         index_size: usize, | ||||||
|         indexer_config: IndexerConfig, |         indexer_config: IndexerConfig, | ||||||
|         autobatching_enabled: bool, |         autobatching_enabled: bool, | ||||||
| @@ -183,6 +187,7 @@ impl IndexScheduler { | |||||||
|         std::fs::create_dir_all(&tasks_path)?; |         std::fs::create_dir_all(&tasks_path)?; | ||||||
|         std::fs::create_dir_all(&update_file_path)?; |         std::fs::create_dir_all(&update_file_path)?; | ||||||
|         std::fs::create_dir_all(&indexes_path)?; |         std::fs::create_dir_all(&indexes_path)?; | ||||||
|  |         std::fs::create_dir_all(&dumps_path)?; | ||||||
|  |  | ||||||
|         let mut options = heed::EnvOpenOptions::new(); |         let mut options = heed::EnvOpenOptions::new(); | ||||||
|         options.max_dbs(6); |         options.max_dbs(6); | ||||||
| @@ -205,6 +210,7 @@ impl IndexScheduler { | |||||||
|             // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things |             // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things | ||||||
|             wake_up: Arc::new(SignalEvent::auto(true)), |             wake_up: Arc::new(SignalEvent::auto(true)), | ||||||
|             autobatching_enabled, |             autobatching_enabled, | ||||||
|  |             dumps_path, | ||||||
|  |  | ||||||
|             #[cfg(test)] |             #[cfg(test)] | ||||||
|             test_breakpoint_sdr, |             test_breakpoint_sdr, | ||||||
| @@ -227,6 +233,7 @@ impl IndexScheduler { | |||||||
|             index_mapper: self.index_mapper.clone(), |             index_mapper: self.index_mapper.clone(), | ||||||
|             wake_up: self.wake_up.clone(), |             wake_up: self.wake_up.clone(), | ||||||
|             autobatching_enabled: self.autobatching_enabled, |             autobatching_enabled: self.autobatching_enabled, | ||||||
|  |             dumps_path: self.dumps_path.clone(), | ||||||
|  |  | ||||||
|             #[cfg(test)] |             #[cfg(test)] | ||||||
|             test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), |             test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), | ||||||
| @@ -342,7 +349,7 @@ impl IndexScheduler { | |||||||
|             started_at: None, |             started_at: None, | ||||||
|             finished_at: None, |             finished_at: None, | ||||||
|             error: None, |             error: None, | ||||||
|             details: task.default_details(), |             details: (&task).into(), | ||||||
|             status: Status::Enqueued, |             status: Status::Enqueued, | ||||||
|             kind: task, |             kind: task, | ||||||
|         }; |         }; | ||||||
| @@ -367,9 +374,9 @@ impl IndexScheduler { | |||||||
|  |  | ||||||
|         match wtxn.commit() { |         match wtxn.commit() { | ||||||
|             Ok(()) => (), |             Ok(()) => (), | ||||||
|             e @ Err(_) => { |             _e @ Err(_) => { | ||||||
|                 todo!("remove the data associated with the task"); |                 todo!("remove the data associated with the task"); | ||||||
|                 e?; |                 // _e?; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -436,6 +443,7 @@ impl IndexScheduler { | |||||||
|                     // TODO the info field should've been set by the process_batch function |                     // TODO the info field should've been set by the process_batch function | ||||||
|                     self.update_task(&mut wtxn, &task)?; |                     self.update_task(&mut wtxn, &task)?; | ||||||
|                 } |                 } | ||||||
|  |                 log::info!("A batch of tasks was successfully completed."); | ||||||
|             } |             } | ||||||
|             // In case of a failure we must get back and patch all the tasks with the error. |             // In case of a failure we must get back and patch all the tasks with the error. | ||||||
|             Err(err) => { |             Err(err) => { | ||||||
| @@ -453,7 +461,6 @@ impl IndexScheduler { | |||||||
|         } |         } | ||||||
|         *self.processing_tasks.write().unwrap() = (finished_at, RoaringBitmap::new()); |         *self.processing_tasks.write().unwrap() = (finished_at, RoaringBitmap::new()); | ||||||
|         wtxn.commit()?; |         wtxn.commit()?; | ||||||
|         log::info!("A batch of tasks was successfully completed."); |  | ||||||
|  |  | ||||||
|         #[cfg(test)] |         #[cfg(test)] | ||||||
|         self.test_breakpoint_sdr |         self.test_breakpoint_sdr | ||||||
| @@ -542,6 +549,7 @@ mod tests { | |||||||
|                 tempdir.path().join("db_path"), |                 tempdir.path().join("db_path"), | ||||||
|                 tempdir.path().join("file_store"), |                 tempdir.path().join("file_store"), | ||||||
|                 tempdir.path().join("indexes"), |                 tempdir.path().join("indexes"), | ||||||
|  |                 tempdir.path().join("dumps"), | ||||||
|                 1024 * 1024, |                 1024 * 1024, | ||||||
|                 IndexerConfig::default(), |                 IndexerConfig::default(), | ||||||
|                 autobatching, // enable autobatching |                 autobatching, // enable autobatching | ||||||
|   | |||||||
| @@ -1,16 +1,19 @@ | |||||||
| use std::{any::Any, sync::Arc}; | use std::{any::Any, sync::Arc}; | ||||||
|  |  | ||||||
| use actix_web::HttpRequest; | use actix_web::HttpRequest; | ||||||
|  | use meilisearch_types::InstanceUid; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
|  |  | ||||||
| use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt}; | use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt}; | ||||||
|  |  | ||||||
| use super::{find_user_id, Analytics}; | use super::{find_user_id, Analytics}; | ||||||
|  |  | ||||||
| pub struct MockAnalytics; | pub struct MockAnalytics { | ||||||
|  |     instance_uid: Option<InstanceUid>, | ||||||
|  | } | ||||||
|  |  | ||||||
| #[derive(Default)] | #[derive(Default)] | ||||||
| pub struct SearchAggregator {} | pub struct SearchAggregator; | ||||||
|  |  | ||||||
| #[allow(dead_code)] | #[allow(dead_code)] | ||||||
| impl SearchAggregator { | impl SearchAggregator { | ||||||
| @@ -23,13 +26,17 @@ impl SearchAggregator { | |||||||
|  |  | ||||||
| impl MockAnalytics { | impl MockAnalytics { | ||||||
|     #[allow(clippy::new_ret_no_self)] |     #[allow(clippy::new_ret_no_self)] | ||||||
|     pub fn new(opt: &Opt) -> (Arc<dyn Analytics>, String) { |     pub fn new(opt: &Opt) -> Arc<dyn Analytics> { | ||||||
|         let user = find_user_id(&opt.db_path).unwrap_or_default(); |         let instance_uid = find_user_id(&opt.db_path); | ||||||
|         (Arc::new(Self), user) |         Arc::new(Self { instance_uid }) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Analytics for MockAnalytics { | impl Analytics for MockAnalytics { | ||||||
|  |     fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> { | ||||||
|  |         self.instance_uid.as_ref() | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // These methods are noop and should be optimized out |     // These methods are noop and should be optimized out | ||||||
|     fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} |     fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} | ||||||
|     fn get_search(&self, _aggregate: super::SearchAggregator) {} |     fn get_search(&self, _aggregate: super::SearchAggregator) {} | ||||||
|   | |||||||
| @@ -5,8 +5,10 @@ mod segment_analytics; | |||||||
|  |  | ||||||
| use std::fs; | use std::fs; | ||||||
| use std::path::{Path, PathBuf}; | use std::path::{Path, PathBuf}; | ||||||
|  | use std::str::FromStr; | ||||||
|  |  | ||||||
| use actix_web::HttpRequest; | use actix_web::HttpRequest; | ||||||
|  | use meilisearch_types::InstanceUid; | ||||||
| use once_cell::sync::Lazy; | use once_cell::sync::Lazy; | ||||||
| use platform_dirs::AppDirs; | use platform_dirs::AppDirs; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
| @@ -51,13 +53,16 @@ fn config_user_id_path(db_path: &Path) -> Option<PathBuf> { | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid` | /// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid` | ||||||
| fn find_user_id(db_path: &Path) -> Option<String> { | fn find_user_id(db_path: &Path) -> Option<InstanceUid> { | ||||||
|     fs::read_to_string(db_path.join("instance-uid")) |     fs::read_to_string(db_path.join("instance-uid")) | ||||||
|         .ok() |         .ok() | ||||||
|         .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) |         .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) | ||||||
|  |         .and_then(|uid| InstanceUid::from_str(&uid).ok()) | ||||||
| } | } | ||||||
|  |  | ||||||
| pub trait Analytics: Sync + Send { | pub trait Analytics: Sync + Send { | ||||||
|  |     fn instance_uid(&self) -> Option<&InstanceUid>; | ||||||
|  |  | ||||||
|     /// The method used to publish most analytics that do not need to be batched every hours |     /// The method used to publish most analytics that do not need to be batched every hours | ||||||
|     fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); |     fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -37,6 +37,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> { | |||||||
|         opt.db_path.join("tasks"), |         opt.db_path.join("tasks"), | ||||||
|         opt.db_path.join("update_files"), |         opt.db_path.join("update_files"), | ||||||
|         opt.db_path.join("indexes"), |         opt.db_path.join("indexes"), | ||||||
|  |         opt.dumps_dir.clone(), | ||||||
|         opt.max_index_size.get_bytes() as usize, |         opt.max_index_size.get_bytes() as usize, | ||||||
|         (&opt.indexer_options).try_into()?, |         (&opt.indexer_options).try_into()?, | ||||||
|         true, |         true, | ||||||
|   | |||||||
| @@ -53,15 +53,15 @@ async fn main() -> anyhow::Result<()> { | |||||||
|     let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?; |     let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?; | ||||||
|  |  | ||||||
|     #[cfg(all(not(debug_assertions), feature = "analytics"))] |     #[cfg(all(not(debug_assertions), feature = "analytics"))] | ||||||
|     let (analytics, user) = if !opt.no_analytics { |     let analytics = if !opt.no_analytics { | ||||||
|         analytics::SegmentAnalytics::new(&opt, &meilisearch).await |         analytics::SegmentAnalytics::new(&opt, &meilisearch).await | ||||||
|     } else { |     } else { | ||||||
|         analytics::MockAnalytics::new(&opt) |         analytics::MockAnalytics::new(&opt) | ||||||
|     }; |     }; | ||||||
|     #[cfg(any(debug_assertions, not(feature = "analytics")))] |     #[cfg(any(debug_assertions, not(feature = "analytics")))] | ||||||
|     let (analytics, user) = analytics::MockAnalytics::new(&opt); |     let analytics = analytics::MockAnalytics::new(&opt); | ||||||
|  |  | ||||||
|     print_launch_resume(&opt, &user, config_read_from); |     print_launch_resume(&opt, analytics.clone(), config_read_from); | ||||||
|  |  | ||||||
|     run_http(index_scheduler, auth_controller, opt, analytics).await?; |     run_http(index_scheduler, auth_controller, opt, analytics).await?; | ||||||
|  |  | ||||||
| @@ -133,7 +133,7 @@ async fn run_http( | |||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option<PathBuf>) { | pub fn print_launch_resume(opt: &Opt, analytics: Arc<dyn Analytics>, config_read_from: Option<PathBuf>) { | ||||||
|     let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); |     let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); | ||||||
|     let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); |     let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); | ||||||
|     let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { |     let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { | ||||||
| @@ -186,8 +186,8 @@ Anonymous telemetry:\t\"Enabled\"" | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if !user.is_empty() { |     if let Some(instance_uid) = analytics.instance_uid() { | ||||||
|         eprintln!("Instance UID:\t\t\"{}\"", user); |         eprintln!("Instance UID:\t\t\"{}\"", instance_uid.to_string()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     eprintln!(); |     eprintln!(); | ||||||
|   | |||||||
| @@ -2,13 +2,17 @@ use actix_web::web::Data; | |||||||
| use actix_web::{web, HttpRequest, HttpResponse}; | use actix_web::{web, HttpRequest, HttpResponse}; | ||||||
| use index_scheduler::IndexScheduler; | use index_scheduler::IndexScheduler; | ||||||
| use log::debug; | use log::debug; | ||||||
|  | use meilisearch_auth::AuthController; | ||||||
| use meilisearch_types::error::ResponseError; | use meilisearch_types::error::ResponseError; | ||||||
| use meilisearch_types::tasks::KindWithContent; | use meilisearch_types::tasks::KindWithContent; | ||||||
| use serde_json::json; | use serde_json::json; | ||||||
|  | use time::macros::format_description; | ||||||
|  | use time::OffsetDateTime; | ||||||
|  |  | ||||||
| use crate::analytics::Analytics; | use crate::analytics::Analytics; | ||||||
| use crate::extractors::authentication::{policies::*, GuardedData}; | use crate::extractors::authentication::{policies::*, GuardedData}; | ||||||
| use crate::extractors::sequential_extractor::SeqHandler; | use crate::extractors::sequential_extractor::SeqHandler; | ||||||
|  | use crate::routes::SummarizedTaskView; | ||||||
|  |  | ||||||
| pub fn configure(cfg: &mut web::ServiceConfig) { | pub fn configure(cfg: &mut web::ServiceConfig) { | ||||||
|     cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); |     cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); | ||||||
| @@ -16,16 +20,28 @@ pub fn configure(cfg: &mut web::ServiceConfig) { | |||||||
|  |  | ||||||
| pub async fn create_dump( | pub async fn create_dump( | ||||||
|     index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>, |     index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>, | ||||||
|  |     auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>, | ||||||
|     req: HttpRequest, |     req: HttpRequest, | ||||||
|     analytics: web::Data<dyn Analytics>, |     analytics: web::Data<dyn Analytics>, | ||||||
| ) -> Result<HttpResponse, ResponseError> { | ) -> Result<HttpResponse, ResponseError> { | ||||||
|     analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); |     analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); | ||||||
|  |  | ||||||
|     let task = KindWithContent::DumpExport { |     let dump_uid = OffsetDateTime::now_utc() | ||||||
|         output: "todo".to_string().into(), |         .format(format_description!( | ||||||
|     }; |             "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" | ||||||
|     let res = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |         )) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|     debug!("returns: {:?}", res); |     let task = KindWithContent::DumpExport { | ||||||
|     Ok(HttpResponse::Accepted().json(res)) |         keys: auth_controller.list_keys()?, | ||||||
|  |         instance_uid: analytics.instance_uid().cloned(), | ||||||
|  |         dump_uid, | ||||||
|  |     }; | ||||||
|  |     let task: SummarizedTaskView = | ||||||
|  |         tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|  |  | ||||||
|  |     debug!("returns: {:?}", task); | ||||||
|  |     Ok(HttpResponse::Accepted().json(task)) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -109,7 +109,10 @@ pub async fn delete_document( | |||||||
|         index_uid, |         index_uid, | ||||||
|         documents_ids: vec![document_id], |         documents_ids: vec![document_id], | ||||||
|     }; |     }; | ||||||
|     let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |     let task: SummarizedTaskView = | ||||||
|  |         tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|     debug!("returns: {:?}", task); |     debug!("returns: {:?}", task); | ||||||
|     Ok(HttpResponse::Accepted().json(task)) |     Ok(HttpResponse::Accepted().json(task)) | ||||||
| } | } | ||||||
| @@ -314,7 +317,10 @@ pub async fn delete_documents( | |||||||
|         index_uid: path.into_inner(), |         index_uid: path.into_inner(), | ||||||
|         documents_ids: ids, |         documents_ids: ids, | ||||||
|     }; |     }; | ||||||
|     let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |     let task: SummarizedTaskView = | ||||||
|  |         tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|  |  | ||||||
|     debug!("returns: {:?}", task); |     debug!("returns: {:?}", task); | ||||||
|     Ok(HttpResponse::Accepted().json(task)) |     Ok(HttpResponse::Accepted().json(task)) | ||||||
| @@ -327,7 +333,10 @@ pub async fn clear_all_documents( | |||||||
|     let task = KindWithContent::DocumentClear { |     let task = KindWithContent::DocumentClear { | ||||||
|         index_uid: path.into_inner(), |         index_uid: path.into_inner(), | ||||||
|     }; |     }; | ||||||
|     let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |     let task: SummarizedTaskView = | ||||||
|  |         tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|  |  | ||||||
|     debug!("returns: {:?}", task); |     debug!("returns: {:?}", task); | ||||||
|     Ok(HttpResponse::Accepted().json(task)) |     Ok(HttpResponse::Accepted().json(task)) | ||||||
|   | |||||||
| @@ -13,7 +13,7 @@ use crate::analytics::Analytics; | |||||||
| use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData}; | use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData}; | ||||||
| use crate::extractors::sequential_extractor::SeqHandler; | use crate::extractors::sequential_extractor::SeqHandler; | ||||||
|  |  | ||||||
| use super::Pagination; | use super::{Pagination, SummarizedTaskView}; | ||||||
|  |  | ||||||
| pub mod documents; | pub mod documents; | ||||||
| pub mod search; | pub mod search; | ||||||
| @@ -108,7 +108,10 @@ pub async fn create_index( | |||||||
|             index_uid: uid, |             index_uid: uid, | ||||||
|             primary_key, |             primary_key, | ||||||
|         }; |         }; | ||||||
|         let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |         let task: SummarizedTaskView = | ||||||
|  |             tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|  |  | ||||||
|         Ok(HttpResponse::Accepted().json(task)) |         Ok(HttpResponse::Accepted().json(task)) | ||||||
|     } else { |     } else { | ||||||
| @@ -156,7 +159,10 @@ pub async fn update_index( | |||||||
|         primary_key: body.primary_key, |         primary_key: body.primary_key, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |     let task: SummarizedTaskView = | ||||||
|  |         tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|  |  | ||||||
|     debug!("returns: {:?}", task); |     debug!("returns: {:?}", task); | ||||||
|     Ok(HttpResponse::Accepted().json(task)) |     Ok(HttpResponse::Accepted().json(task)) | ||||||
| @@ -169,7 +175,10 @@ pub async fn delete_index( | |||||||
|     let task = KindWithContent::IndexDeletion { |     let task = KindWithContent::IndexDeletion { | ||||||
|         index_uid: index_uid.into_inner(), |         index_uid: index_uid.into_inner(), | ||||||
|     }; |     }; | ||||||
|     let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |     let task: SummarizedTaskView = | ||||||
|  |         tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|  |  | ||||||
|     Ok(HttpResponse::Accepted().json(task)) |     Ok(HttpResponse::Accepted().json(task)) | ||||||
| } | } | ||||||
|   | |||||||
| @@ -1,27 +1,16 @@ | |||||||
| use std::collections::BTreeSet; |  | ||||||
| use std::marker::PhantomData; |  | ||||||
|  |  | ||||||
| use actix_web::web::Data; | use actix_web::web::Data; | ||||||
| use fst::IntoStreamer; |  | ||||||
| use log::debug; | use log::debug; | ||||||
|  |  | ||||||
| use actix_web::{web, HttpRequest, HttpResponse}; | use actix_web::{web, HttpRequest, HttpResponse}; | ||||||
| use index_scheduler::IndexScheduler; | use index_scheduler::IndexScheduler; | ||||||
| use meilisearch_types::error::ResponseError; | use meilisearch_types::error::ResponseError; | ||||||
| use meilisearch_types::heed::RoTxn; | use meilisearch_types::settings::{settings, Settings, Unchecked}; | ||||||
| use meilisearch_types::milli::update::Setting; |  | ||||||
| use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET}; |  | ||||||
| use meilisearch_types::settings::{ |  | ||||||
|     Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings, |  | ||||||
|     Unchecked, |  | ||||||
| }; |  | ||||||
| use meilisearch_types::tasks::KindWithContent; | use meilisearch_types::tasks::KindWithContent; | ||||||
| use meilisearch_types::Index; |  | ||||||
| use serde_json::json; | use serde_json::json; | ||||||
|  |  | ||||||
| use crate::analytics::Analytics; | use crate::analytics::Analytics; | ||||||
| use crate::extractors::authentication::{policies::*, GuardedData}; | use crate::extractors::authentication::{policies::*, GuardedData}; | ||||||
| use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | use crate::routes::SummarizedTaskView; | ||||||
|  |  | ||||||
| #[macro_export] | #[macro_export] | ||||||
| macro_rules! make_setting_route { | macro_rules! make_setting_route { | ||||||
| @@ -33,14 +22,14 @@ macro_rules! make_setting_route { | |||||||
|  |  | ||||||
|             use index_scheduler::IndexScheduler; |             use index_scheduler::IndexScheduler; | ||||||
|             use meilisearch_types::milli::update::Setting; |             use meilisearch_types::milli::update::Setting; | ||||||
|             use meilisearch_types::settings::Settings; |             use meilisearch_types::settings::{settings, Settings}; | ||||||
|             use meilisearch_types::tasks::KindWithContent; |             use meilisearch_types::tasks::KindWithContent; | ||||||
|  |  | ||||||
|             use meilisearch_types::error::ResponseError; |             use meilisearch_types::error::ResponseError; | ||||||
|             use $crate::analytics::Analytics; |             use $crate::analytics::Analytics; | ||||||
|             use $crate::extractors::authentication::{policies::*, GuardedData}; |             use $crate::extractors::authentication::{policies::*, GuardedData}; | ||||||
|             use $crate::extractors::sequential_extractor::SeqHandler; |             use $crate::extractors::sequential_extractor::SeqHandler; | ||||||
|             use $crate::routes::indexes::settings::settings; |             use $crate::routes::SummarizedTaskView; | ||||||
|  |  | ||||||
|             pub async fn delete( |             pub async fn delete( | ||||||
|                 index_scheduler: GuardedData< |                 index_scheduler: GuardedData< | ||||||
| @@ -61,8 +50,10 @@ macro_rules! make_setting_route { | |||||||
|                     is_deletion: true, |                     is_deletion: true, | ||||||
|                     allow_index_creation, |                     allow_index_creation, | ||||||
|                 }; |                 }; | ||||||
|                 let task = |                 let task: SummarizedTaskView = | ||||||
|                     tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |                     tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |                         .await?? | ||||||
|  |                         .into(); | ||||||
|  |  | ||||||
|                 debug!("returns: {:?}", task); |                 debug!("returns: {:?}", task); | ||||||
|                 Ok(HttpResponse::Accepted().json(task)) |                 Ok(HttpResponse::Accepted().json(task)) | ||||||
| @@ -97,8 +88,10 @@ macro_rules! make_setting_route { | |||||||
|                     is_deletion: false, |                     is_deletion: false, | ||||||
|                     allow_index_creation, |                     allow_index_creation, | ||||||
|                 }; |                 }; | ||||||
|                 let task = |                 let task: SummarizedTaskView = | ||||||
|                     tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |                     tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |                         .await?? | ||||||
|  |                         .into(); | ||||||
|  |  | ||||||
|                 debug!("returns: {:?}", task); |                 debug!("returns: {:?}", task); | ||||||
|                 Ok(HttpResponse::Accepted().json(task)) |                 Ok(HttpResponse::Accepted().json(task)) | ||||||
| @@ -459,7 +452,10 @@ pub async fn update_all( | |||||||
|         is_deletion: false, |         is_deletion: false, | ||||||
|         allow_index_creation, |         allow_index_creation, | ||||||
|     }; |     }; | ||||||
|     let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |     let task: SummarizedTaskView = | ||||||
|  |         tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|  |  | ||||||
|     debug!("returns: {:?}", task); |     debug!("returns: {:?}", task); | ||||||
|     Ok(HttpResponse::Accepted().json(task)) |     Ok(HttpResponse::Accepted().json(task)) | ||||||
| @@ -489,113 +485,11 @@ pub async fn delete_all( | |||||||
|         is_deletion: true, |         is_deletion: true, | ||||||
|         allow_index_creation, |         allow_index_creation, | ||||||
|     }; |     }; | ||||||
|     let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??; |     let task: SummarizedTaskView = | ||||||
|  |         tokio::task::spawn_blocking(move || index_scheduler.register(task)) | ||||||
|  |             .await?? | ||||||
|  |             .into(); | ||||||
|  |  | ||||||
|     debug!("returns: {:?}", task); |     debug!("returns: {:?}", task); | ||||||
|     Ok(HttpResponse::Accepted().json(task)) |     Ok(HttpResponse::Accepted().json(task)) | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>, milli::Error> { |  | ||||||
|     let displayed_attributes = index |  | ||||||
|         .displayed_fields(rtxn)? |  | ||||||
|         .map(|fields| fields.into_iter().map(String::from).collect()); |  | ||||||
|  |  | ||||||
|     let searchable_attributes = index |  | ||||||
|         .user_defined_searchable_fields(rtxn)? |  | ||||||
|         .map(|fields| fields.into_iter().map(String::from).collect()); |  | ||||||
|  |  | ||||||
|     let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); |  | ||||||
|  |  | ||||||
|     let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); |  | ||||||
|  |  | ||||||
|     let criteria = index |  | ||||||
|         .criteria(rtxn)? |  | ||||||
|         .into_iter() |  | ||||||
|         .map(|c| c.to_string()) |  | ||||||
|         .collect(); |  | ||||||
|  |  | ||||||
|     let stop_words = index |  | ||||||
|         .stop_words(rtxn)? |  | ||||||
|         .map(|stop_words| -> Result<BTreeSet<_>, milli::Error> { |  | ||||||
|             Ok(stop_words.stream().into_strs()?.into_iter().collect()) |  | ||||||
|         }) |  | ||||||
|         .transpose()? |  | ||||||
|         .unwrap_or_default(); |  | ||||||
|     let distinct_field = index.distinct_field(rtxn)?.map(String::from); |  | ||||||
|  |  | ||||||
|     // in milli each word in the synonyms map were split on their separator. Since we lost |  | ||||||
|     // this information we are going to put space between words. |  | ||||||
|     let synonyms = index |  | ||||||
|         .synonyms(rtxn)? |  | ||||||
|         .iter() |  | ||||||
|         .map(|(key, values)| { |  | ||||||
|             ( |  | ||||||
|                 key.join(" "), |  | ||||||
|                 values.iter().map(|value| value.join(" ")).collect(), |  | ||||||
|             ) |  | ||||||
|         }) |  | ||||||
|         .collect(); |  | ||||||
|  |  | ||||||
|     let min_typo_word_len = MinWordSizeTyposSetting { |  | ||||||
|         one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?), |  | ||||||
|         two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?), |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     let disabled_words = match index.exact_words(rtxn)? { |  | ||||||
|         Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), |  | ||||||
|         None => BTreeSet::new(), |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     let disabled_attributes = index |  | ||||||
|         .exact_attributes(rtxn)? |  | ||||||
|         .into_iter() |  | ||||||
|         .map(String::from) |  | ||||||
|         .collect(); |  | ||||||
|  |  | ||||||
|     let typo_tolerance = TypoSettings { |  | ||||||
|         enabled: Setting::Set(index.authorize_typos(rtxn)?), |  | ||||||
|         min_word_size_for_typos: Setting::Set(min_typo_word_len), |  | ||||||
|         disable_on_words: Setting::Set(disabled_words), |  | ||||||
|         disable_on_attributes: Setting::Set(disabled_attributes), |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     let faceting = FacetingSettings { |  | ||||||
|         max_values_per_facet: Setting::Set( |  | ||||||
|             index |  | ||||||
|                 .max_values_per_facet(rtxn)? |  | ||||||
|                 .unwrap_or(DEFAULT_VALUES_PER_FACET), |  | ||||||
|         ), |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     let pagination = PaginationSettings { |  | ||||||
|         max_total_hits: Setting::Set( |  | ||||||
|             index |  | ||||||
|                 .pagination_max_total_hits(rtxn)? |  | ||||||
|                 .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), |  | ||||||
|         ), |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     Ok(Settings { |  | ||||||
|         displayed_attributes: match displayed_attributes { |  | ||||||
|             Some(attrs) => Setting::Set(attrs), |  | ||||||
|             None => Setting::Reset, |  | ||||||
|         }, |  | ||||||
|         searchable_attributes: match searchable_attributes { |  | ||||||
|             Some(attrs) => Setting::Set(attrs), |  | ||||||
|             None => Setting::Reset, |  | ||||||
|         }, |  | ||||||
|         filterable_attributes: Setting::Set(filterable_attributes), |  | ||||||
|         sortable_attributes: Setting::Set(sortable_attributes), |  | ||||||
|         ranking_rules: Setting::Set(criteria), |  | ||||||
|         stop_words: Setting::Set(stop_words), |  | ||||||
|         distinct_attribute: match distinct_field { |  | ||||||
|             Some(field) => Setting::Set(field), |  | ||||||
|             None => Setting::Reset, |  | ||||||
|         }, |  | ||||||
|         synonyms: Setting::Set(synonyms), |  | ||||||
|         typo_tolerance: Setting::Set(typo_tolerance), |  | ||||||
|         faceting: Setting::Set(faceting), |  | ||||||
|         pagination: Setting::Set(pagination), |  | ||||||
|         _kind: PhantomData, |  | ||||||
|     }) |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -121,7 +121,7 @@ impl From<Details> for DetailsView { | |||||||
|                 indexed_documents, |                 indexed_documents, | ||||||
|             } => DetailsView { |             } => DetailsView { | ||||||
|                 received_documents: Some(received_documents), |                 received_documents: Some(received_documents), | ||||||
|                 indexed_documents: Some(indexed_documents), |                 indexed_documents, | ||||||
|                 ..DetailsView::default() |                 ..DetailsView::default() | ||||||
|             }, |             }, | ||||||
|             Details::Settings { settings } => DetailsView { |             Details::Settings { settings } => DetailsView { | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ use std::str::FromStr; | |||||||
| use std::time::Instant; | use std::time::Instant; | ||||||
|  |  | ||||||
| use either::Either; | use either::Either; | ||||||
|  | use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | ||||||
| use meilisearch_types::{milli, Document}; | use meilisearch_types::{milli, Document}; | ||||||
| use milli::tokenizer::TokenizerBuilder; | use milli::tokenizer::TokenizerBuilder; | ||||||
| use milli::{ | use milli::{ | ||||||
| @@ -24,10 +25,6 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); | |||||||
| pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string(); | pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string(); | ||||||
| pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string(); | pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string(); | ||||||
|  |  | ||||||
| /// The maximimum number of results that the engine |  | ||||||
| /// will be able to return in one search call. |  | ||||||
| pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; |  | ||||||
|  |  | ||||||
| #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] | #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] | ||||||
| #[serde(rename_all = "camelCase", deny_unknown_fields)] | #[serde(rename_all = "camelCase", deny_unknown_fields)] | ||||||
| pub struct SearchQuery { | pub struct SearchQuery { | ||||||
|   | |||||||
| @@ -10,6 +10,7 @@ csv = "1.1.6" | |||||||
| either = { version = "1.6.1", features = ["serde"] } | either = { version = "1.6.1", features = ["serde"] } | ||||||
| milli = { git = "https://github.com/meilisearch/milli.git", branch = "indexation-abortion", default-features = false } | milli = { git = "https://github.com/meilisearch/milli.git", branch = "indexation-abortion", default-features = false } | ||||||
| enum-iterator = "0.7.0" | enum-iterator = "0.7.0" | ||||||
|  | fst = "0.4.7" | ||||||
| proptest = { version = "1.0.0", optional = true } | proptest = { version = "1.0.0", optional = true } | ||||||
| proptest-derive = { version = "0.3.0", optional = true } | proptest-derive = { version = "0.3.0", optional = true } | ||||||
| roaring = { version = "0.10.0", features = ["serde"] } | roaring = { version = "0.10.0", features = ["serde"] } | ||||||
|   | |||||||
| @@ -9,5 +9,7 @@ pub mod tasks; | |||||||
| pub use milli; | pub use milli; | ||||||
| pub use milli::heed; | pub use milli::heed; | ||||||
| pub use milli::Index; | pub use milli::Index; | ||||||
|  | use uuid::Uuid; | ||||||
|  |  | ||||||
| pub type Document = serde_json::Map<String, serde_json::Value>; | pub type Document = serde_json::Map<String, serde_json::Value>; | ||||||
|  | pub type InstanceUid = Uuid; | ||||||
|   | |||||||
| @@ -2,9 +2,15 @@ use std::collections::{BTreeMap, BTreeSet}; | |||||||
| use std::marker::PhantomData; | use std::marker::PhantomData; | ||||||
| use std::num::NonZeroUsize; | use std::num::NonZeroUsize; | ||||||
|  |  | ||||||
|  | use fst::IntoStreamer; | ||||||
| use milli::update::Setting; | use milli::update::Setting; | ||||||
|  | use milli::{Index, DEFAULT_VALUES_PER_FACET}; | ||||||
| use serde::{Deserialize, Serialize, Serializer}; | use serde::{Deserialize, Serialize, Serializer}; | ||||||
|  |  | ||||||
|  | /// The maximimum number of results that the engine | ||||||
|  | /// will be able to return in one search call. | ||||||
|  | pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; | ||||||
|  |  | ||||||
| fn serialize_with_wildcard<S>( | fn serialize_with_wildcard<S>( | ||||||
|     field: &Setting<Vec<String>>, |     field: &Setting<Vec<String>>, | ||||||
|     s: S, |     s: S, | ||||||
| @@ -366,6 +372,114 @@ pub fn apply_settings_to_builder( | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub fn settings( | ||||||
|  |     index: &Index, | ||||||
|  |     rtxn: &crate::heed::RoTxn, | ||||||
|  | ) -> Result<Settings<Checked>, milli::Error> { | ||||||
|  |     let displayed_attributes = index | ||||||
|  |         .displayed_fields(rtxn)? | ||||||
|  |         .map(|fields| fields.into_iter().map(String::from).collect()); | ||||||
|  |  | ||||||
|  |     let searchable_attributes = index | ||||||
|  |         .user_defined_searchable_fields(rtxn)? | ||||||
|  |         .map(|fields| fields.into_iter().map(String::from).collect()); | ||||||
|  |  | ||||||
|  |     let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); | ||||||
|  |  | ||||||
|  |     let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); | ||||||
|  |  | ||||||
|  |     let criteria = index | ||||||
|  |         .criteria(rtxn)? | ||||||
|  |         .into_iter() | ||||||
|  |         .map(|c| c.to_string()) | ||||||
|  |         .collect(); | ||||||
|  |  | ||||||
|  |     let stop_words = index | ||||||
|  |         .stop_words(rtxn)? | ||||||
|  |         .map(|stop_words| -> Result<BTreeSet<_>, milli::Error> { | ||||||
|  |             Ok(stop_words.stream().into_strs()?.into_iter().collect()) | ||||||
|  |         }) | ||||||
|  |         .transpose()? | ||||||
|  |         .unwrap_or_default(); | ||||||
|  |     let distinct_field = index.distinct_field(rtxn)?.map(String::from); | ||||||
|  |  | ||||||
|  |     // in milli each word in the synonyms map were split on their separator. Since we lost | ||||||
|  |     // this information we are going to put space between words. | ||||||
|  |     let synonyms = index | ||||||
|  |         .synonyms(rtxn)? | ||||||
|  |         .iter() | ||||||
|  |         .map(|(key, values)| { | ||||||
|  |             ( | ||||||
|  |                 key.join(" "), | ||||||
|  |                 values.iter().map(|value| value.join(" ")).collect(), | ||||||
|  |             ) | ||||||
|  |         }) | ||||||
|  |         .collect(); | ||||||
|  |  | ||||||
|  |     let min_typo_word_len = MinWordSizeTyposSetting { | ||||||
|  |         one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?), | ||||||
|  |         two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let disabled_words = match index.exact_words(rtxn)? { | ||||||
|  |         Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), | ||||||
|  |         None => BTreeSet::new(), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let disabled_attributes = index | ||||||
|  |         .exact_attributes(rtxn)? | ||||||
|  |         .into_iter() | ||||||
|  |         .map(String::from) | ||||||
|  |         .collect(); | ||||||
|  |  | ||||||
|  |     let typo_tolerance = TypoSettings { | ||||||
|  |         enabled: Setting::Set(index.authorize_typos(rtxn)?), | ||||||
|  |         min_word_size_for_typos: Setting::Set(min_typo_word_len), | ||||||
|  |         disable_on_words: Setting::Set(disabled_words), | ||||||
|  |         disable_on_attributes: Setting::Set(disabled_attributes), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let faceting = FacetingSettings { | ||||||
|  |         max_values_per_facet: Setting::Set( | ||||||
|  |             index | ||||||
|  |                 .max_values_per_facet(rtxn)? | ||||||
|  |                 .unwrap_or(DEFAULT_VALUES_PER_FACET), | ||||||
|  |         ), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     let pagination = PaginationSettings { | ||||||
|  |         max_total_hits: Setting::Set( | ||||||
|  |             index | ||||||
|  |                 .pagination_max_total_hits(rtxn)? | ||||||
|  |                 .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), | ||||||
|  |         ), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     Ok(Settings { | ||||||
|  |         displayed_attributes: match displayed_attributes { | ||||||
|  |             Some(attrs) => Setting::Set(attrs), | ||||||
|  |             None => Setting::Reset, | ||||||
|  |         }, | ||||||
|  |         searchable_attributes: match searchable_attributes { | ||||||
|  |             Some(attrs) => Setting::Set(attrs), | ||||||
|  |             None => Setting::Reset, | ||||||
|  |         }, | ||||||
|  |         filterable_attributes: Setting::Set(filterable_attributes), | ||||||
|  |         sortable_attributes: Setting::Set(sortable_attributes), | ||||||
|  |         ranking_rules: Setting::Set(criteria), | ||||||
|  |         stop_words: Setting::Set(stop_words), | ||||||
|  |         distinct_attribute: match distinct_field { | ||||||
|  |             Some(field) => Setting::Set(field), | ||||||
|  |             None => Setting::Reset, | ||||||
|  |         }, | ||||||
|  |         synonyms: Setting::Set(synonyms), | ||||||
|  |         typo_tolerance: Setting::Set(typo_tolerance), | ||||||
|  |         faceting: Setting::Set(faceting), | ||||||
|  |         pagination: Setting::Set(pagination), | ||||||
|  |         _kind: PhantomData, | ||||||
|  |     }) | ||||||
|  | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| pub(crate) mod test { | pub(crate) mod test { | ||||||
|     use proptest::prelude::*; |     use proptest::prelude::*; | ||||||
|   | |||||||
| @@ -3,7 +3,6 @@ use roaring::RoaringBitmap; | |||||||
| use serde::{Deserialize, Serialize, Serializer}; | use serde::{Deserialize, Serialize, Serializer}; | ||||||
| use std::{ | use std::{ | ||||||
|     fmt::{Display, Write}, |     fmt::{Display, Write}, | ||||||
|     path::PathBuf, |  | ||||||
|     str::FromStr, |     str::FromStr, | ||||||
| }; | }; | ||||||
| use time::{Duration, OffsetDateTime}; | use time::{Duration, OffsetDateTime}; | ||||||
| @@ -11,7 +10,9 @@ use uuid::Uuid; | |||||||
|  |  | ||||||
| use crate::{ | use crate::{ | ||||||
|     error::{Code, ResponseError}, |     error::{Code, ResponseError}, | ||||||
|  |     keys::Key, | ||||||
|     settings::{Settings, Unchecked}, |     settings::{Settings, Unchecked}, | ||||||
|  |     InstanceUid, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub type TaskId = u32; | pub type TaskId = u32; | ||||||
| @@ -71,6 +72,26 @@ impl Task { | |||||||
|             IndexSwap { lhs, rhs } => Some(vec![lhs, rhs]), |             IndexSwap { lhs, rhs } => Some(vec![lhs, rhs]), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Return the content-uuid if there is one | ||||||
|  |     pub fn content_uuid(&self) -> Option<&Uuid> { | ||||||
|  |         match self.kind { | ||||||
|  |             KindWithContent::DocumentImport { | ||||||
|  |                 ref content_file, .. | ||||||
|  |             } => Some(content_file), | ||||||
|  |             KindWithContent::DocumentDeletion { .. } | ||||||
|  |             | KindWithContent::DocumentClear { .. } | ||||||
|  |             | KindWithContent::Settings { .. } | ||||||
|  |             | KindWithContent::IndexDeletion { .. } | ||||||
|  |             | KindWithContent::IndexCreation { .. } | ||||||
|  |             | KindWithContent::IndexUpdate { .. } | ||||||
|  |             | KindWithContent::IndexSwap { .. } | ||||||
|  |             | KindWithContent::CancelTask { .. } | ||||||
|  |             | KindWithContent::DeleteTasks { .. } | ||||||
|  |             | KindWithContent::DumpExport { .. } | ||||||
|  |             | KindWithContent::Snapshot => None, | ||||||
|  |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] | #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] | ||||||
| @@ -120,7 +141,9 @@ pub enum KindWithContent { | |||||||
|         tasks: RoaringBitmap, |         tasks: RoaringBitmap, | ||||||
|     }, |     }, | ||||||
|     DumpExport { |     DumpExport { | ||||||
|         output: PathBuf, |         dump_uid: String, | ||||||
|  |         keys: Vec<Key>, | ||||||
|  |         instance_uid: Option<InstanceUid>, | ||||||
|     }, |     }, | ||||||
|     Snapshot, |     Snapshot, | ||||||
| } | } | ||||||
| @@ -167,7 +190,7 @@ impl KindWithContent { | |||||||
|                 documents_count, .. |                 documents_count, .. | ||||||
|             } => Some(Details::DocumentAddition { |             } => Some(Details::DocumentAddition { | ||||||
|                 received_documents: *documents_count, |                 received_documents: *documents_count, | ||||||
|                 indexed_documents: 0, |                 indexed_documents: Some(0), | ||||||
|             }), |             }), | ||||||
|             KindWithContent::DocumentDeletion { |             KindWithContent::DocumentDeletion { | ||||||
|                 index_uid: _, |                 index_uid: _, | ||||||
| @@ -204,6 +227,38 @@ impl KindWithContent { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl From<&KindWithContent> for Option<Details> { | ||||||
|  |     fn from(kind: &KindWithContent) -> Self { | ||||||
|  |         match kind { | ||||||
|  |             KindWithContent::DocumentImport { | ||||||
|  |                 documents_count, .. | ||||||
|  |             } => Some(Details::DocumentAddition { | ||||||
|  |                 received_documents: *documents_count, | ||||||
|  |                 indexed_documents: None, | ||||||
|  |             }), | ||||||
|  |             KindWithContent::DocumentDeletion { .. } => None, | ||||||
|  |             KindWithContent::DocumentClear { .. } => None, | ||||||
|  |             KindWithContent::Settings { new_settings, .. } => Some(Details::Settings { | ||||||
|  |                 settings: new_settings.clone(), | ||||||
|  |             }), | ||||||
|  |             KindWithContent::IndexDeletion { .. } => None, | ||||||
|  |             KindWithContent::IndexCreation { primary_key, .. } => Some(Details::IndexInfo { | ||||||
|  |                 primary_key: primary_key.clone(), | ||||||
|  |             }), | ||||||
|  |             KindWithContent::IndexUpdate { primary_key, .. } => Some(Details::IndexInfo { | ||||||
|  |                 primary_key: primary_key.clone(), | ||||||
|  |             }), | ||||||
|  |             KindWithContent::IndexSwap { .. } => None, | ||||||
|  |             KindWithContent::CancelTask { .. } => None, | ||||||
|  |             KindWithContent::DeleteTasks { .. } => todo!(), | ||||||
|  |             KindWithContent::DumpExport { dump_uid, .. } => Some(Details::Dump { | ||||||
|  |                 dump_uid: dump_uid.clone(), | ||||||
|  |             }), | ||||||
|  |             KindWithContent::Snapshot => None, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] | ||||||
| #[serde(rename_all = "camelCase")] | #[serde(rename_all = "camelCase")] | ||||||
| pub enum Status { | pub enum Status { | ||||||
| @@ -289,7 +344,7 @@ impl FromStr for Kind { | |||||||
| pub enum Details { | pub enum Details { | ||||||
|     DocumentAddition { |     DocumentAddition { | ||||||
|         received_documents: u64, |         received_documents: u64, | ||||||
|         indexed_documents: u64, |         indexed_documents: Option<u64>, | ||||||
|     }, |     }, | ||||||
|     Settings { |     Settings { | ||||||
|         settings: Settings<Unchecked>, |         settings: Settings<Unchecked>, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user