mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Remove once for all the meilisearch-lib crate
This commit is contained in:
		| @@ -1,170 +0,0 @@ | ||||
| use std::borrow::Borrow; | ||||
| use std::fmt::{self, Debug, Display}; | ||||
| use std::io::{self, BufReader, Read, Seek, Write}; | ||||
|  | ||||
| use either::Either; | ||||
| use meilisearch_types::error::{Code, ErrorCode}; | ||||
| use meilisearch_types::internal_error; | ||||
| use milli::documents::{DocumentsBatchBuilder, Error}; | ||||
| use milli::Object; | ||||
| use serde::Deserialize; | ||||
| use serde_json::error::Category; | ||||
|  | ||||
| type Result<T> = std::result::Result<T, DocumentFormatError>; | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum PayloadType { | ||||
|     Ndjson, | ||||
|     Json, | ||||
|     Csv, | ||||
| } | ||||
|  | ||||
| impl fmt::Display for PayloadType { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         match self { | ||||
|             PayloadType::Ndjson => f.write_str("ndjson"), | ||||
|             PayloadType::Json => f.write_str("json"), | ||||
|             PayloadType::Csv => f.write_str("csv"), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub enum DocumentFormatError { | ||||
|     Internal(Box<dyn std::error::Error + Send + Sync + 'static>), | ||||
|     MalformedPayload(Error, PayloadType), | ||||
| } | ||||
|  | ||||
| impl Display for DocumentFormatError { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         match self { | ||||
|             Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e), | ||||
|             Self::MalformedPayload(me, b) => match me.borrow() { | ||||
|                 Error::Json(se) => { | ||||
|                     let mut message = match se.classify() { | ||||
|                         Category::Data => { | ||||
|                             "data are neither an object nor a list of objects".to_string() | ||||
|                         } | ||||
|                         _ => se.to_string(), | ||||
|                     }; | ||||
|  | ||||
|                     // https://github.com/meilisearch/meilisearch/issues/2107 | ||||
|                     // The user input maybe insanely long. We need to truncate it. | ||||
|                     let ellipsis = "..."; | ||||
|                     let trim_input_prefix_len = 50; | ||||
|                     let trim_input_suffix_len = 85; | ||||
|  | ||||
|                     if message.len() | ||||
|                         > trim_input_prefix_len + trim_input_suffix_len + ellipsis.len() | ||||
|                     { | ||||
|                         message.replace_range( | ||||
|                             trim_input_prefix_len..message.len() - trim_input_suffix_len, | ||||
|                             ellipsis, | ||||
|                         ); | ||||
|                     } | ||||
|  | ||||
|                     write!( | ||||
|                         f, | ||||
|                         "The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.", | ||||
|                         b, message | ||||
|                 ) | ||||
|                 } | ||||
|                 _ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me), | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl std::error::Error for DocumentFormatError {} | ||||
|  | ||||
| impl From<(PayloadType, Error)> for DocumentFormatError { | ||||
|     fn from((ty, error): (PayloadType, Error)) -> Self { | ||||
|         match error { | ||||
|             Error::Io(e) => Self::Internal(Box::new(e)), | ||||
|             e => Self::MalformedPayload(e, ty), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl ErrorCode for DocumentFormatError { | ||||
|     fn error_code(&self) -> Code { | ||||
|         match self { | ||||
|             DocumentFormatError::Internal(_) => Code::Internal, | ||||
|             DocumentFormatError::MalformedPayload(_, _) => Code::MalformedPayload, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| internal_error!(DocumentFormatError: io::Error); | ||||
|  | ||||
| /// Reads CSV from input and write an obkv batch to writer. | ||||
| pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result<usize> { | ||||
|     let mut builder = DocumentsBatchBuilder::new(writer); | ||||
|  | ||||
|     let csv = csv::Reader::from_reader(input); | ||||
|     builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?; | ||||
|  | ||||
|     let count = builder.documents_count(); | ||||
|     let _ = builder | ||||
|         .into_inner() | ||||
|         .map_err(Into::into) | ||||
|         .map_err(DocumentFormatError::Internal)?; | ||||
|  | ||||
|     Ok(count as usize) | ||||
| } | ||||
|  | ||||
| /// Reads JSON Lines from input and write an obkv batch to writer. | ||||
| pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result<usize> { | ||||
|     let mut builder = DocumentsBatchBuilder::new(writer); | ||||
|     let reader = BufReader::new(input); | ||||
|  | ||||
|     for result in serde_json::Deserializer::from_reader(reader).into_iter() { | ||||
|         let object = result | ||||
|             .map_err(Error::Json) | ||||
|             .map_err(|e| (PayloadType::Ndjson, e))?; | ||||
|         builder | ||||
|             .append_json_object(&object) | ||||
|             .map_err(Into::into) | ||||
|             .map_err(DocumentFormatError::Internal)?; | ||||
|     } | ||||
|  | ||||
|     let count = builder.documents_count(); | ||||
|     let _ = builder | ||||
|         .into_inner() | ||||
|         .map_err(Into::into) | ||||
|         .map_err(DocumentFormatError::Internal)?; | ||||
|  | ||||
|     Ok(count as usize) | ||||
| } | ||||
|  | ||||
| /// Reads JSON from input and write an obkv batch to writer. | ||||
| pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> { | ||||
|     let mut builder = DocumentsBatchBuilder::new(writer); | ||||
|     let reader = BufReader::new(input); | ||||
|  | ||||
|     #[derive(Deserialize, Debug)] | ||||
|     #[serde(transparent)] | ||||
|     struct ArrayOrSingleObject { | ||||
|         #[serde(with = "either::serde_untagged")] | ||||
|         inner: Either<Vec<Object>, Object>, | ||||
|     } | ||||
|  | ||||
|     let content: ArrayOrSingleObject = serde_json::from_reader(reader) | ||||
|         .map_err(Error::Json) | ||||
|         .map_err(|e| (PayloadType::Json, e))?; | ||||
|  | ||||
|     for object in content.inner.map_right(|o| vec![o]).into_inner() { | ||||
|         builder | ||||
|             .append_json_object(&object) | ||||
|             .map_err(Into::into) | ||||
|             .map_err(DocumentFormatError::Internal)?; | ||||
|     } | ||||
|  | ||||
|     let count = builder.documents_count(); | ||||
|     let _ = builder | ||||
|         .into_inner() | ||||
|         .map_err(Into::into) | ||||
|         .map_err(DocumentFormatError::Internal)?; | ||||
|  | ||||
|     Ok(count as usize) | ||||
| } | ||||
| @@ -1,152 +0,0 @@ | ||||
| use anyhow::bail; | ||||
| use meilisearch_types::error::Code; | ||||
| use milli::update::IndexDocumentsMethod; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::index::{Settings, Unchecked}; | ||||
|  | ||||
| #[derive(Serialize, Deserialize)] | ||||
| pub struct UpdateEntry { | ||||
|     pub uuid: Uuid, | ||||
|     pub update: UpdateStatus, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub enum UpdateFormat { | ||||
|     Json, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| pub struct DocumentAdditionResult { | ||||
|     pub nb_documents: usize, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| pub enum UpdateResult { | ||||
|     DocumentsAddition(DocumentAdditionResult), | ||||
|     DocumentDeletion { deleted: u64 }, | ||||
|     Other, | ||||
| } | ||||
|  | ||||
| #[allow(clippy::large_enum_variant)] | ||||
| #[derive(Debug, Clone, Serialize, Deserialize)] | ||||
| #[serde(tag = "type")] | ||||
| pub enum UpdateMeta { | ||||
|     DocumentsAddition { | ||||
|         method: IndexDocumentsMethod, | ||||
|         format: UpdateFormat, | ||||
|         primary_key: Option<String>, | ||||
|     }, | ||||
|     ClearDocuments, | ||||
|     DeleteDocuments { | ||||
|         ids: Vec<String>, | ||||
|     }, | ||||
|     Settings(Settings<Unchecked>), | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Enqueued { | ||||
|     pub update_id: u64, | ||||
|     pub meta: UpdateMeta, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub enqueued_at: OffsetDateTime, | ||||
|     pub content: Option<Uuid>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Processed { | ||||
|     pub success: UpdateResult, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub processed_at: OffsetDateTime, | ||||
|     #[serde(flatten)] | ||||
|     pub from: Processing, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Processing { | ||||
|     #[serde(flatten)] | ||||
|     pub from: Enqueued, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub started_processing_at: OffsetDateTime, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Aborted { | ||||
|     #[serde(flatten)] | ||||
|     pub from: Enqueued, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub aborted_at: OffsetDateTime, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct Failed { | ||||
|     #[serde(flatten)] | ||||
|     pub from: Processing, | ||||
|     pub error: ResponseError, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub failed_at: OffsetDateTime, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize)] | ||||
| #[serde(tag = "status", rename_all = "camelCase")] | ||||
| pub enum UpdateStatus { | ||||
|     Processing(Processing), | ||||
|     Enqueued(Enqueued), | ||||
|     Processed(Processed), | ||||
|     Aborted(Aborted), | ||||
|     Failed(Failed), | ||||
| } | ||||
|  | ||||
| type StatusCode = (); | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct ResponseError { | ||||
|     #[serde(skip)] | ||||
|     pub code: StatusCode, | ||||
|     pub message: String, | ||||
|     pub error_code: String, | ||||
|     pub error_type: String, | ||||
|     pub error_link: String, | ||||
| } | ||||
|  | ||||
| pub fn error_code_from_str(s: &str) -> anyhow::Result<Code> { | ||||
|     let code = match s { | ||||
|         "index_creation_failed" => Code::CreateIndex, | ||||
|         "index_already_exists" => Code::IndexAlreadyExists, | ||||
|         "index_not_found" => Code::IndexNotFound, | ||||
|         "invalid_index_uid" => Code::InvalidIndexUid, | ||||
|         "invalid_state" => Code::InvalidState, | ||||
|         "missing_primary_key" => Code::MissingPrimaryKey, | ||||
|         "primary_key_already_present" => Code::PrimaryKeyAlreadyPresent, | ||||
|         "invalid_request" => Code::InvalidRankingRule, | ||||
|         "max_fields_limit_exceeded" => Code::MaxFieldsLimitExceeded, | ||||
|         "missing_document_id" => Code::MissingDocumentId, | ||||
|         "invalid_facet" => Code::Filter, | ||||
|         "invalid_filter" => Code::Filter, | ||||
|         "invalid_sort" => Code::Sort, | ||||
|         "bad_parameter" => Code::BadParameter, | ||||
|         "bad_request" => Code::BadRequest, | ||||
|         "document_not_found" => Code::DocumentNotFound, | ||||
|         "internal" => Code::Internal, | ||||
|         "invalid_geo_field" => Code::InvalidGeoField, | ||||
|         "invalid_token" => Code::InvalidToken, | ||||
|         "missing_authorization_header" => Code::MissingAuthorizationHeader, | ||||
|         "payload_too_large" => Code::PayloadTooLarge, | ||||
|         "unretrievable_document" => Code::RetrieveDocument, | ||||
|         "search_error" => Code::SearchDocuments, | ||||
|         "unsupported_media_type" => Code::UnsupportedMediaType, | ||||
|         "dump_already_in_progress" => Code::DumpAlreadyInProgress, | ||||
|         "dump_process_failed" => Code::DumpProcessFailed, | ||||
|         _ => bail!("unknown error code."), | ||||
|     }; | ||||
|  | ||||
|     Ok(code) | ||||
| } | ||||
| @@ -1,103 +0,0 @@ | ||||
| use std::fs::{self, create_dir_all, File}; | ||||
| use std::io::{BufReader, Write}; | ||||
| use std::path::Path; | ||||
|  | ||||
| use fs_extra::dir::{self, CopyOptions}; | ||||
| use log::info; | ||||
| use serde_json::{Deserializer, Map, Value}; | ||||
| use tempfile::tempdir; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::dump::{compat, Metadata}; | ||||
| use crate::options::IndexerOpts; | ||||
| use crate::tasks::task::Task; | ||||
|  | ||||
| pub fn load_dump( | ||||
|     meta: Metadata, | ||||
|     src: impl AsRef<Path>, | ||||
|     dst: impl AsRef<Path>, | ||||
|     index_db_size: usize, | ||||
|     meta_env_size: usize, | ||||
|     indexing_options: &IndexerOpts, | ||||
| ) -> anyhow::Result<()> { | ||||
|     info!("Patching dump V4 to dump V5..."); | ||||
|  | ||||
|     let patched_dir = tempdir()?; | ||||
|     let options = CopyOptions::default(); | ||||
|  | ||||
|     // Indexes | ||||
|     dir::copy(src.as_ref().join("indexes"), &patched_dir, &options)?; | ||||
|  | ||||
|     // Index uuids | ||||
|     dir::copy(src.as_ref().join("index_uuids"), &patched_dir, &options)?; | ||||
|  | ||||
|     // Metadata | ||||
|     fs::copy( | ||||
|         src.as_ref().join("metadata.json"), | ||||
|         patched_dir.path().join("metadata.json"), | ||||
|     )?; | ||||
|  | ||||
|     // Updates | ||||
|     patch_updates(&src, &patched_dir)?; | ||||
|  | ||||
|     // Keys | ||||
|     patch_keys(&src, &patched_dir)?; | ||||
|  | ||||
|     super::v5::load_dump( | ||||
|         meta, | ||||
|         &patched_dir, | ||||
|         dst, | ||||
|         index_db_size, | ||||
|         meta_env_size, | ||||
|         indexing_options, | ||||
|     ) | ||||
| } | ||||
|  | ||||
| fn patch_updates(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> { | ||||
|     let updates_path = src.as_ref().join("updates/data.jsonl"); | ||||
|     let output_updates_path = dst.as_ref().join("updates/data.jsonl"); | ||||
|     create_dir_all(output_updates_path.parent().unwrap())?; | ||||
|     let updates_file = File::open(updates_path)?; | ||||
|     let mut output_update_file = File::create(output_updates_path)?; | ||||
|  | ||||
|     serde_json::Deserializer::from_reader(updates_file) | ||||
|         .into_iter::<compat::v4::Task>() | ||||
|         .try_for_each(|task| -> anyhow::Result<()> { | ||||
|             let task: Task = task?.into(); | ||||
|  | ||||
|             serde_json::to_writer(&mut output_update_file, &task)?; | ||||
|             output_update_file.write_all(b"\n")?; | ||||
|  | ||||
|             Ok(()) | ||||
|         })?; | ||||
|  | ||||
|     output_update_file.flush()?; | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| fn patch_keys(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> { | ||||
|     let keys_file_src = src.as_ref().join("keys"); | ||||
|  | ||||
|     if !keys_file_src.exists() { | ||||
|         return Ok(()); | ||||
|     } | ||||
|  | ||||
|     fs::create_dir_all(&dst)?; | ||||
|     let keys_file_dst = dst.as_ref().join("keys"); | ||||
|     let mut writer = File::create(&keys_file_dst)?; | ||||
|  | ||||
|     let reader = BufReader::new(File::open(&keys_file_src)?); | ||||
|     for key in Deserializer::from_reader(reader).into_iter() { | ||||
|         let mut key: Map<String, Value> = key?; | ||||
|  | ||||
|         // generate a new uuid v4 and insert it in the key. | ||||
|         let uid = serde_json::to_value(Uuid::new_v4()).unwrap(); | ||||
|         key.insert("uid".to_string(), uid); | ||||
|  | ||||
|         serde_json::to_writer(&mut writer, &key)?; | ||||
|         writer.write_all(b"\n")?; | ||||
|     } | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
| @@ -1,161 +0,0 @@ | ||||
| use std::fs::{create_dir_all, File}; | ||||
| use std::io::{BufReader, Seek, SeekFrom, Write}; | ||||
| use std::path::Path; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use indexmap::IndexMap; | ||||
| use milli::documents::DocumentsBatchReader; | ||||
| use milli::heed::{EnvOpenOptions, RoTxn}; | ||||
| use milli::update::{IndexDocumentsConfig, IndexerConfig}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::document_formats::read_ndjson; | ||||
| use crate::index::updates::apply_settings_to_builder; | ||||
|  | ||||
| use super::error::Result; | ||||
| use super::{index::Index, Settings, Unchecked}; | ||||
|  | ||||
| #[derive(Serialize, Deserialize)] | ||||
| struct DumpMeta { | ||||
|     settings: Settings<Unchecked>, | ||||
|     primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| const META_FILE_NAME: &str = "meta.json"; | ||||
| const DATA_FILE_NAME: &str = "documents.jsonl"; | ||||
|  | ||||
| impl Index { | ||||
|     pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> { | ||||
|         // acquire write txn make sure any ongoing write is finished before we start. | ||||
|         let txn = self.write_txn()?; | ||||
|         let path = path.as_ref().join(format!("indexes/{}", self.uuid)); | ||||
|  | ||||
|         create_dir_all(&path)?; | ||||
|  | ||||
|         self.dump_documents(&txn, &path)?; | ||||
|         self.dump_meta(&txn, &path)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> { | ||||
|         let document_file_path = path.as_ref().join(DATA_FILE_NAME); | ||||
|         let mut document_file = File::create(&document_file_path)?; | ||||
|  | ||||
|         let documents = self.all_documents(txn)?; | ||||
|         let fields_ids_map = self.fields_ids_map(txn)?; | ||||
|  | ||||
|         // dump documents | ||||
|         let mut json_map = IndexMap::new(); | ||||
|         for document in documents { | ||||
|             let (_, reader) = document?; | ||||
|  | ||||
|             for (fid, bytes) in reader.iter() { | ||||
|                 if let Some(name) = fields_ids_map.name(fid) { | ||||
|                     json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?); | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             serde_json::to_writer(&mut document_file, &json_map)?; | ||||
|             document_file.write_all(b"\n")?; | ||||
|  | ||||
|             json_map.clear(); | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> { | ||||
|         let meta_file_path = path.as_ref().join(META_FILE_NAME); | ||||
|         let mut meta_file = File::create(&meta_file_path)?; | ||||
|  | ||||
|         let settings = self.settings_txn(txn)?.into_unchecked(); | ||||
|         let primary_key = self.primary_key(txn)?.map(String::from); | ||||
|         let meta = DumpMeta { | ||||
|             settings, | ||||
|             primary_key, | ||||
|         }; | ||||
|  | ||||
|         serde_json::to_writer(&mut meta_file, &meta)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn load_dump( | ||||
|         src: impl AsRef<Path>, | ||||
|         dst: impl AsRef<Path>, | ||||
|         size: usize, | ||||
|         indexer_config: &IndexerConfig, | ||||
|     ) -> anyhow::Result<()> { | ||||
|         let dir_name = src | ||||
|             .as_ref() | ||||
|             .file_name() | ||||
|             .with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; | ||||
|  | ||||
|         let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); | ||||
|         create_dir_all(&dst_dir_path)?; | ||||
|  | ||||
|         let meta_path = src.as_ref().join(META_FILE_NAME); | ||||
|         let meta_file = File::open(meta_path)?; | ||||
|         let DumpMeta { | ||||
|             settings, | ||||
|             primary_key, | ||||
|         } = serde_json::from_reader(meta_file)?; | ||||
|         let settings = settings.check(); | ||||
|  | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(size); | ||||
|         options.max_readers(1024); | ||||
|         let index = milli::Index::new(options, &dst_dir_path)?; | ||||
|  | ||||
|         let mut txn = index.write_txn()?; | ||||
|  | ||||
|         // Apply settings first | ||||
|         let mut builder = milli::update::Settings::new(&mut txn, &index, indexer_config); | ||||
|  | ||||
|         if let Some(primary_key) = primary_key { | ||||
|             builder.set_primary_key(primary_key); | ||||
|         } | ||||
|  | ||||
|         apply_settings_to_builder(&settings, &mut builder); | ||||
|  | ||||
|         builder.execute(|_| ())?; | ||||
|  | ||||
|         let document_file_path = src.as_ref().join(DATA_FILE_NAME); | ||||
|         let reader = BufReader::new(File::open(&document_file_path)?); | ||||
|  | ||||
|         let mut tmp_doc_file = tempfile::tempfile()?; | ||||
|  | ||||
|         let empty = match read_ndjson(reader, &mut tmp_doc_file) { | ||||
|             // if there was no document in the file it's because the index was empty | ||||
|             Ok(0) => true, | ||||
|             Ok(_) => false, | ||||
|             Err(e) => return Err(e.into()), | ||||
|         }; | ||||
|  | ||||
|         if !empty { | ||||
|             tmp_doc_file.seek(SeekFrom::Start(0))?; | ||||
|  | ||||
|             let documents_reader = DocumentsBatchReader::from_reader(tmp_doc_file)?; | ||||
|  | ||||
|             //If the document file is empty, we don't perform the document addition, to prevent | ||||
|             //a primary key error to be thrown. | ||||
|             let config = IndexDocumentsConfig::default(); | ||||
|             let builder = milli::update::IndexDocuments::new( | ||||
|                 &mut txn, | ||||
|                 &index, | ||||
|                 indexer_config, | ||||
|                 config, | ||||
|                 |_| (), | ||||
|             )?; | ||||
|             let (builder, user_error) = builder.add_documents(documents_reader)?; | ||||
|             user_error?; | ||||
|             builder.execute()?; | ||||
|         } | ||||
|  | ||||
|         txn.commit()?; | ||||
|         index.prepare_for_closing().wait(); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| @@ -1,333 +0,0 @@ | ||||
| use std::collections::BTreeSet; | ||||
| use std::fs::create_dir_all; | ||||
| use std::marker::PhantomData; | ||||
| use std::ops::Deref; | ||||
| use std::path::Path; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use fst::IntoStreamer; | ||||
| use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn}; | ||||
| use milli::update::{IndexerConfig, Setting}; | ||||
| use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET}; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use serde_json::{Map, Value}; | ||||
| use time::OffsetDateTime; | ||||
| use uuid::Uuid; | ||||
| use walkdir::WalkDir; | ||||
|  | ||||
| use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | ||||
|  | ||||
| use super::error::IndexError; | ||||
| use super::error::Result; | ||||
| use super::updates::{FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, TypoSettings}; | ||||
| use super::{Checked, Settings}; | ||||
|  | ||||
| pub type Document = Map<String, Value>; | ||||
|  | ||||
| #[derive(Debug, Serialize, Deserialize, Clone)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct IndexMeta { | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub created_at: OffsetDateTime, | ||||
|     #[serde(with = "time::serde::rfc3339")] | ||||
|     pub updated_at: OffsetDateTime, | ||||
|     pub primary_key: Option<String>, | ||||
| } | ||||
|  | ||||
| impl IndexMeta { | ||||
|     pub fn new(index: &Index) -> Result<Self> { | ||||
|         let txn = index.read_txn()?; | ||||
|         Self::new_txn(index, &txn) | ||||
|     } | ||||
|  | ||||
|     pub fn new_txn(index: &Index, txn: &milli::heed::RoTxn) -> Result<Self> { | ||||
|         let created_at = index.created_at(txn)?; | ||||
|         let updated_at = index.updated_at(txn)?; | ||||
|         let primary_key = index.primary_key(txn)?.map(String::from); | ||||
|         Ok(Self { | ||||
|             created_at, | ||||
|             updated_at, | ||||
|             primary_key, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Debug)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct IndexStats { | ||||
|     #[serde(skip)] | ||||
|     pub size: u64, | ||||
|     pub number_of_documents: u64, | ||||
|     /// Whether the current index is performing an update. It is initially `None` when the | ||||
|     /// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is | ||||
|     /// later set to either true or false, we we retrieve the information from the `UpdateStore` | ||||
|     pub is_indexing: Option<bool>, | ||||
|     pub field_distribution: FieldDistribution, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, derivative::Derivative)] | ||||
| #[derivative(Debug)] | ||||
| pub struct Index { | ||||
|     pub uuid: Uuid, | ||||
|     #[derivative(Debug = "ignore")] | ||||
|     pub inner: Arc<milli::Index>, | ||||
|     #[derivative(Debug = "ignore")] | ||||
|     pub indexer_config: Arc<IndexerConfig>, | ||||
| } | ||||
|  | ||||
| impl Deref for Index { | ||||
|     type Target = milli::Index; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         self.inner.as_ref() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Index { | ||||
|     pub fn open( | ||||
|         path: impl AsRef<Path>, | ||||
|         size: usize, | ||||
|         uuid: Uuid, | ||||
|         update_handler: Arc<IndexerConfig>, | ||||
|     ) -> Result<Self> { | ||||
|         log::debug!("opening index in {}", path.as_ref().display()); | ||||
|         create_dir_all(&path)?; | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(size); | ||||
|         options.max_readers(1024); | ||||
|         let inner = Arc::new(milli::Index::new(options, &path)?); | ||||
|         Ok(Index { | ||||
|             inner, | ||||
|             uuid, | ||||
|             indexer_config: update_handler, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Asynchronously close the underlying index | ||||
|     pub fn close(self) { | ||||
|         self.inner.as_ref().clone().prepare_for_closing(); | ||||
|     } | ||||
|  | ||||
|     pub fn stats(&self) -> Result<IndexStats> { | ||||
|         let rtxn = self.read_txn()?; | ||||
|  | ||||
|         Ok(IndexStats { | ||||
|             size: self.size(), | ||||
|             number_of_documents: self.number_of_documents(&rtxn)?, | ||||
|             is_indexing: None, | ||||
|             field_distribution: self.field_distribution(&rtxn)?, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn meta(&self) -> Result<IndexMeta> { | ||||
|         IndexMeta::new(self) | ||||
|     } | ||||
|     pub fn settings(&self) -> Result<Settings<Checked>> { | ||||
|         let txn = self.read_txn()?; | ||||
|         self.settings_txn(&txn) | ||||
|     } | ||||
|  | ||||
|     pub fn uuid(&self) -> Uuid { | ||||
|         self.uuid | ||||
|     } | ||||
|  | ||||
|     pub fn settings_txn(&self, txn: &RoTxn) -> Result<Settings<Checked>> { | ||||
|         let displayed_attributes = self | ||||
|             .displayed_fields(txn)? | ||||
|             .map(|fields| fields.into_iter().map(String::from).collect()); | ||||
|  | ||||
|         let searchable_attributes = self | ||||
|             .user_defined_searchable_fields(txn)? | ||||
|             .map(|fields| fields.into_iter().map(String::from).collect()); | ||||
|  | ||||
|         let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect(); | ||||
|  | ||||
|         let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect(); | ||||
|  | ||||
|         let criteria = self | ||||
|             .criteria(txn)? | ||||
|             .into_iter() | ||||
|             .map(|c| c.to_string()) | ||||
|             .collect(); | ||||
|  | ||||
|         let stop_words = self | ||||
|             .stop_words(txn)? | ||||
|             .map(|stop_words| -> Result<BTreeSet<_>> { | ||||
|                 Ok(stop_words.stream().into_strs()?.into_iter().collect()) | ||||
|             }) | ||||
|             .transpose()? | ||||
|             .unwrap_or_default(); | ||||
|         let distinct_field = self.distinct_field(txn)?.map(String::from); | ||||
|  | ||||
|         // in milli each word in the synonyms map were split on their separator. Since we lost | ||||
|         // this information we are going to put space between words. | ||||
|         let synonyms = self | ||||
|             .synonyms(txn)? | ||||
|             .iter() | ||||
|             .map(|(key, values)| { | ||||
|                 ( | ||||
|                     key.join(" "), | ||||
|                     values.iter().map(|value| value.join(" ")).collect(), | ||||
|                 ) | ||||
|             }) | ||||
|             .collect(); | ||||
|  | ||||
|         let min_typo_word_len = MinWordSizeTyposSetting { | ||||
|             one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), | ||||
|             two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), | ||||
|         }; | ||||
|  | ||||
|         let disabled_words = match self.exact_words(txn)? { | ||||
|             Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), | ||||
|             None => BTreeSet::new(), | ||||
|         }; | ||||
|  | ||||
|         let disabled_attributes = self | ||||
|             .exact_attributes(txn)? | ||||
|             .into_iter() | ||||
|             .map(String::from) | ||||
|             .collect(); | ||||
|  | ||||
|         let typo_tolerance = TypoSettings { | ||||
|             enabled: Setting::Set(self.authorize_typos(txn)?), | ||||
|             min_word_size_for_typos: Setting::Set(min_typo_word_len), | ||||
|             disable_on_words: Setting::Set(disabled_words), | ||||
|             disable_on_attributes: Setting::Set(disabled_attributes), | ||||
|         }; | ||||
|  | ||||
|         let faceting = FacetingSettings { | ||||
|             max_values_per_facet: Setting::Set( | ||||
|                 self.max_values_per_facet(txn)? | ||||
|                     .unwrap_or(DEFAULT_VALUES_PER_FACET), | ||||
|             ), | ||||
|         }; | ||||
|  | ||||
|         let pagination = PaginationSettings { | ||||
|             max_total_hits: Setting::Set( | ||||
|                 self.pagination_max_total_hits(txn)? | ||||
|                     .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), | ||||
|             ), | ||||
|         }; | ||||
|  | ||||
|         Ok(Settings { | ||||
|             displayed_attributes: match displayed_attributes { | ||||
|                 Some(attrs) => Setting::Set(attrs), | ||||
|                 None => Setting::Reset, | ||||
|             }, | ||||
|             searchable_attributes: match searchable_attributes { | ||||
|                 Some(attrs) => Setting::Set(attrs), | ||||
|                 None => Setting::Reset, | ||||
|             }, | ||||
|             filterable_attributes: Setting::Set(filterable_attributes), | ||||
|             sortable_attributes: Setting::Set(sortable_attributes), | ||||
|             ranking_rules: Setting::Set(criteria), | ||||
|             stop_words: Setting::Set(stop_words), | ||||
|             distinct_attribute: match distinct_field { | ||||
|                 Some(field) => Setting::Set(field), | ||||
|                 None => Setting::Reset, | ||||
|             }, | ||||
|             synonyms: Setting::Set(synonyms), | ||||
|             typo_tolerance: Setting::Set(typo_tolerance), | ||||
|             faceting: Setting::Set(faceting), | ||||
|             pagination: Setting::Set(pagination), | ||||
|             _kind: PhantomData, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     /// Return the total number of documents contained in the index + the selected documents. | ||||
|     pub fn retrieve_documents<S: AsRef<str>>( | ||||
|         &self, | ||||
|         offset: usize, | ||||
|         limit: usize, | ||||
|         attributes_to_retrieve: Option<Vec<S>>, | ||||
|     ) -> Result<(u64, Vec<Document>)> { | ||||
|         let txn = self.read_txn()?; | ||||
|  | ||||
|         let fields_ids_map = self.fields_ids_map(&txn)?; | ||||
|         let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||
|  | ||||
|         let mut documents = Vec::new(); | ||||
|         for entry in self.all_documents(&txn)?.skip(offset).take(limit) { | ||||
|             let (_id, obkv) = entry?; | ||||
|             let document = obkv_to_json(&all_fields, &fields_ids_map, obkv)?; | ||||
|             let document = match &attributes_to_retrieve { | ||||
|                 Some(attributes_to_retrieve) => permissive_json_pointer::select_values( | ||||
|                     &document, | ||||
|                     attributes_to_retrieve.iter().map(|s| s.as_ref()), | ||||
|                 ), | ||||
|                 None => document, | ||||
|             }; | ||||
|             documents.push(document); | ||||
|         } | ||||
|  | ||||
|         let number_of_documents = self.number_of_documents(&txn)?; | ||||
|  | ||||
|         Ok((number_of_documents, documents)) | ||||
|     } | ||||
|  | ||||
|     pub fn retrieve_document<S: AsRef<str>>( | ||||
|         &self, | ||||
|         doc_id: String, | ||||
|         attributes_to_retrieve: Option<Vec<S>>, | ||||
|     ) -> Result<Document> { | ||||
|         let txn = self.read_txn()?; | ||||
|  | ||||
|         let fields_ids_map = self.fields_ids_map(&txn)?; | ||||
|         let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); | ||||
|  | ||||
|         let internal_id = self | ||||
|             .external_documents_ids(&txn)? | ||||
|             .get(doc_id.as_bytes()) | ||||
|             .ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?; | ||||
|  | ||||
|         let document = self | ||||
|             .documents(&txn, std::iter::once(internal_id))? | ||||
|             .into_iter() | ||||
|             .next() | ||||
|             .map(|(_, d)| d) | ||||
|             .ok_or(IndexError::DocumentNotFound(doc_id))?; | ||||
|  | ||||
|         let document = obkv_to_json(&all_fields, &fields_ids_map, document)?; | ||||
|         let document = match &attributes_to_retrieve { | ||||
|             Some(attributes_to_retrieve) => permissive_json_pointer::select_values( | ||||
|                 &document, | ||||
|                 attributes_to_retrieve.iter().map(|s| s.as_ref()), | ||||
|             ), | ||||
|             None => document, | ||||
|         }; | ||||
|  | ||||
|         Ok(document) | ||||
|     } | ||||
|  | ||||
|     pub fn size(&self) -> u64 { | ||||
|         WalkDir::new(self.path()) | ||||
|             .into_iter() | ||||
|             .filter_map(|entry| entry.ok()) | ||||
|             .filter_map(|entry| entry.metadata().ok()) | ||||
|             .filter(|metadata| metadata.is_file()) | ||||
|             .fold(0, |acc, m| acc + m.len()) | ||||
|     } | ||||
|  | ||||
|     pub fn snapshot(&self, path: impl AsRef<Path>) -> Result<()> { | ||||
|         let mut dst = path.as_ref().join(format!("indexes/{}/", self.uuid)); | ||||
|         create_dir_all(&dst)?; | ||||
|         dst.push("data.mdb"); | ||||
|         let _txn = self.write_txn()?; | ||||
|         self.inner.copy_to_path(dst, CompactionOption::Enabled)?; | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// When running tests, when a server instance is dropped, the environment is not actually closed, | ||||
| /// leaving a lot of open file descriptors. | ||||
| impl Drop for Index { | ||||
|     fn drop(&mut self) { | ||||
|         // When dropping the last instance of an index, we want to close the index | ||||
|         // Note that the close is actually performed only if all the instances a effectively | ||||
|         // dropped | ||||
|  | ||||
|         if Arc::strong_count(&self.inner) == 1 { | ||||
|             self.inner.as_ref().clone().prepare_for_closing(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,108 +0,0 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::convert::TryFrom; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use milli::update::IndexerConfig; | ||||
| use tokio::fs; | ||||
| use tokio::sync::RwLock; | ||||
| use tokio::task::spawn_blocking; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use super::error::{IndexResolverError, Result}; | ||||
| use crate::index::Index; | ||||
| use crate::options::IndexerOpts; | ||||
|  | ||||
| type AsyncMap<K, V> = Arc<RwLock<HashMap<K, V>>>; | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| #[cfg_attr(test, mockall::automock)] | ||||
| pub trait IndexStore { | ||||
|     async fn create(&self, uuid: Uuid) -> Result<Index>; | ||||
|     async fn get(&self, uuid: Uuid) -> Result<Option<Index>>; | ||||
|     async fn delete(&self, uuid: Uuid) -> Result<Option<Index>>; | ||||
| } | ||||
|  | ||||
| pub struct MapIndexStore { | ||||
|     index_store: AsyncMap<Uuid, Index>, | ||||
|     path: PathBuf, | ||||
|     index_size: usize, | ||||
|     indexer_config: Arc<IndexerConfig>, | ||||
| } | ||||
|  | ||||
| impl MapIndexStore { | ||||
|     pub fn new( | ||||
|         path: impl AsRef<Path>, | ||||
|         index_size: usize, | ||||
|         indexer_opts: &IndexerOpts, | ||||
|     ) -> anyhow::Result<Self> { | ||||
|         let indexer_config = Arc::new(IndexerConfig::try_from(indexer_opts)?); | ||||
|         let path = path.as_ref().join("indexes/"); | ||||
|         let index_store = Arc::new(RwLock::new(HashMap::new())); | ||||
|         Ok(Self { | ||||
|             index_store, | ||||
|             path, | ||||
|             index_size, | ||||
|             indexer_config, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[async_trait::async_trait] | ||||
| impl IndexStore for MapIndexStore { | ||||
|     async fn create(&self, uuid: Uuid) -> Result<Index> { | ||||
|         // We need to keep the lock until we are sure the db file has been opened correctly, to | ||||
|         // ensure that another db is not created at the same time. | ||||
|         let mut lock = self.index_store.write().await; | ||||
|  | ||||
|         if let Some(index) = lock.get(&uuid) { | ||||
|             return Ok(index.clone()); | ||||
|         } | ||||
|         let path = self.path.join(format!("{}", uuid)); | ||||
|         if path.exists() { | ||||
|             return Err(IndexResolverError::UuidAlreadyExists(uuid)); | ||||
|         } | ||||
|  | ||||
|         let index_size = self.index_size; | ||||
|         let update_handler = self.indexer_config.clone(); | ||||
|         let index = spawn_blocking(move || -> Result<Index> { | ||||
|             let index = Index::open(path, index_size, uuid, update_handler)?; | ||||
|             Ok(index) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         lock.insert(uuid, index.clone()); | ||||
|  | ||||
|         Ok(index) | ||||
|     } | ||||
|  | ||||
|     async fn get(&self, uuid: Uuid) -> Result<Option<Index>> { | ||||
|         let guard = self.index_store.read().await; | ||||
|         match guard.get(&uuid) { | ||||
|             Some(index) => Ok(Some(index.clone())), | ||||
|             None => { | ||||
|                 // drop the guard here so we can perform the write after without deadlocking; | ||||
|                 drop(guard); | ||||
|                 let path = self.path.join(format!("{}", uuid)); | ||||
|                 if !path.exists() { | ||||
|                     return Ok(None); | ||||
|                 } | ||||
|  | ||||
|                 let index_size = self.index_size; | ||||
|                 let update_handler = self.indexer_config.clone(); | ||||
|                 let index = | ||||
|                     spawn_blocking(move || Index::open(path, index_size, uuid, update_handler)) | ||||
|                         .await??; | ||||
|                 self.index_store.write().await.insert(uuid, index.clone()); | ||||
|                 Ok(Some(index)) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     async fn delete(&self, uuid: Uuid) -> Result<Option<Index>> { | ||||
|         let db_path = self.path.join(format!("{}", uuid)); | ||||
|         fs::remove_dir_all(db_path).await?; | ||||
|         let index = self.index_store.write().await.remove(&uuid); | ||||
|         Ok(index) | ||||
|     } | ||||
| } | ||||
| @@ -1,50 +0,0 @@ | ||||
| #[macro_use] | ||||
| pub mod error; | ||||
| pub mod options; | ||||
|  | ||||
| mod analytics; | ||||
| mod document_formats; | ||||
| // TODO: TAMO: reenable the dumps | ||||
| #[cfg(todo)] | ||||
| mod dump; | ||||
| mod index_controller; | ||||
| mod snapshot; | ||||
|  | ||||
| use std::env::VarError; | ||||
| use std::ffi::OsStr; | ||||
| use std::path::Path; | ||||
|  | ||||
| // TODO: TAMO: rename the MeiliSearch in Meilisearch | ||||
| pub use index_controller::error::IndexControllerError; | ||||
| pub use index_controller::Meilisearch as MeiliSearch; | ||||
| pub use milli; | ||||
| pub use milli::heed; | ||||
|  | ||||
| mod compression; | ||||
|  | ||||
| /// Check if a db is empty. It does not provide any information on the | ||||
| /// validity of the data in it. | ||||
| /// We consider a database as non empty when it's a non empty directory. | ||||
| pub fn is_empty_db(db_path: impl AsRef<Path>) -> bool { | ||||
|     let db_path = db_path.as_ref(); | ||||
|  | ||||
|     if !db_path.exists() { | ||||
|         true | ||||
|     // if we encounter an error or if the db is a file we consider the db non empty | ||||
|     } else if let Ok(dir) = db_path.read_dir() { | ||||
|         dir.count() == 0 | ||||
|     } else { | ||||
|         true | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Checks if the key is defined in the environment variables. | ||||
| /// If not, inserts it with the given value. | ||||
| pub fn export_to_env_if_not_present<T>(key: &str, value: T) | ||||
| where | ||||
|     T: AsRef<OsStr>, | ||||
| { | ||||
|     if let Err(VarError::NotPresent) = std::env::var(key) { | ||||
|         std::env::set_var(key, value); | ||||
|     } | ||||
| } | ||||
| @@ -1,205 +0,0 @@ | ||||
| use crate::export_to_env_if_not_present; | ||||
|  | ||||
| use core::fmt; | ||||
| use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr}; | ||||
|  | ||||
| use byte_unit::{Byte, ByteError}; | ||||
| use clap::Parser; | ||||
| use milli::update::IndexerConfig; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use sysinfo::{RefreshKind, System, SystemExt}; | ||||
|  | ||||
| const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; | ||||
| const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; | ||||
| const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; | ||||
| const DEFAULT_LOG_EVERY_N: usize = 100000; | ||||
|  | ||||
| #[derive(Debug, Clone, Parser, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "snake_case", deny_unknown_fields)] | ||||
| pub struct IndexerOpts { | ||||
|     /// Sets the amount of documents to skip before printing | ||||
|     /// a log regarding the indexing advancement. | ||||
|     #[serde(skip_serializing, default = "default_log_every_n")] | ||||
|     #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k | ||||
|     pub log_every_n: usize, | ||||
|  | ||||
|     /// Grenad max number of chunks in bytes. | ||||
|     #[serde(skip_serializing)] | ||||
|     #[clap(long, hide = true)] | ||||
|     pub max_nb_chunks: Option<usize>, | ||||
|  | ||||
|     /// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch | ||||
|     /// uses no more than two thirds of available memory. | ||||
|     #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] | ||||
|     #[serde(default)] | ||||
|     pub max_indexing_memory: MaxMemory, | ||||
|  | ||||
|     /// Sets the maximum number of threads Meilisearch can use during indexation. By default, the | ||||
|     /// indexer avoids using more than half of a machine's total processing units. This ensures | ||||
|     /// Meilisearch is always ready to perform searches, even while you are updating an index. | ||||
|     #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] | ||||
|     #[serde(default)] | ||||
|     pub max_indexing_threads: MaxThreads, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)] | ||||
| #[serde(rename_all = "snake_case", deny_unknown_fields)] | ||||
| pub struct SchedulerConfig { | ||||
|     /// Deactivates auto-batching when provided. | ||||
|     #[clap(long, env = DISABLE_AUTO_BATCHING)] | ||||
|     #[serde(default)] | ||||
|     pub disable_auto_batching: bool, | ||||
| } | ||||
|  | ||||
| impl IndexerOpts { | ||||
|     /// Exports the values to their corresponding env vars if they are not set. | ||||
|     pub fn export_to_env(self) { | ||||
|         let IndexerOpts { | ||||
|             max_indexing_memory, | ||||
|             max_indexing_threads, | ||||
|             log_every_n: _, | ||||
|             max_nb_chunks: _, | ||||
|         } = self; | ||||
|         if let Some(max_indexing_memory) = max_indexing_memory.0 { | ||||
|             export_to_env_if_not_present( | ||||
|                 MEILI_MAX_INDEXING_MEMORY, | ||||
|                 max_indexing_memory.to_string(), | ||||
|             ); | ||||
|         } | ||||
|         export_to_env_if_not_present( | ||||
|             MEILI_MAX_INDEXING_THREADS, | ||||
|             max_indexing_threads.0.to_string(), | ||||
|         ); | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl TryFrom<&IndexerOpts> for IndexerConfig { | ||||
|     type Error = anyhow::Error; | ||||
|  | ||||
|     fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> { | ||||
|         let thread_pool = rayon::ThreadPoolBuilder::new() | ||||
|             .num_threads(*other.max_indexing_threads) | ||||
|             .build()?; | ||||
|  | ||||
|         Ok(Self { | ||||
|             log_every_n: Some(other.log_every_n), | ||||
|             max_nb_chunks: other.max_nb_chunks, | ||||
|             max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize), | ||||
|             thread_pool: Some(thread_pool), | ||||
|             max_positions_per_attributes: None, | ||||
|             ..Default::default() | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Default for IndexerOpts { | ||||
|     fn default() -> Self { | ||||
|         Self { | ||||
|             log_every_n: 100_000, | ||||
|             max_nb_chunks: None, | ||||
|             max_indexing_memory: MaxMemory::default(), | ||||
|             max_indexing_threads: MaxThreads::default(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl SchedulerConfig { | ||||
|     pub fn export_to_env(self) { | ||||
|         let SchedulerConfig { | ||||
|             disable_auto_batching, | ||||
|         } = self; | ||||
|         export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A type used to detect the max memory available and use 2/3 of it. | ||||
| #[derive(Debug, Clone, Copy, Serialize, Deserialize)] | ||||
| pub struct MaxMemory(Option<Byte>); | ||||
|  | ||||
| impl FromStr for MaxMemory { | ||||
|     type Err = ByteError; | ||||
|  | ||||
|     fn from_str(s: &str) -> Result<MaxMemory, ByteError> { | ||||
|         Byte::from_str(s).map(Some).map(MaxMemory) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Default for MaxMemory { | ||||
|     fn default() -> MaxMemory { | ||||
|         MaxMemory( | ||||
|             total_memory_bytes() | ||||
|                 .map(|bytes| bytes * 2 / 3) | ||||
|                 .map(Byte::from_bytes), | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for MaxMemory { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         match self.0 { | ||||
|             Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), | ||||
|             None => f.write_str("unknown"), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Deref for MaxMemory { | ||||
|     type Target = Option<Byte>; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl MaxMemory { | ||||
|     pub fn unlimited() -> Self { | ||||
|         Self(None) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Returns the total amount of bytes available or `None` if this system isn't supported. | ||||
| fn total_memory_bytes() -> Option<u64> { | ||||
|     if System::IS_SUPPORTED { | ||||
|         let memory_kind = RefreshKind::new().with_memory(); | ||||
|         let mut system = System::new_with_specifics(memory_kind); | ||||
|         system.refresh_memory(); | ||||
|         Some(system.total_memory() * 1024) // KiB into bytes | ||||
|     } else { | ||||
|         None | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, Serialize, Deserialize)] | ||||
| pub struct MaxThreads(usize); | ||||
|  | ||||
| impl FromStr for MaxThreads { | ||||
|     type Err = ParseIntError; | ||||
|  | ||||
|     fn from_str(s: &str) -> Result<Self, Self::Err> { | ||||
|         usize::from_str(s).map(Self) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Default for MaxThreads { | ||||
|     fn default() -> Self { | ||||
|         MaxThreads(num_cpus::get() / 2) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for MaxThreads { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         write!(f, "{}", self.0) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Deref for MaxThreads { | ||||
|     type Target = usize; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn default_log_every_n() -> usize { | ||||
|     DEFAULT_LOG_EVERY_N | ||||
| } | ||||
| @@ -1,204 +0,0 @@ | ||||
| use std::fs; | ||||
| use std::path::{Path, PathBuf}; | ||||
| use std::sync::Arc; | ||||
| use std::time::Duration; | ||||
|  | ||||
| use anyhow::bail; | ||||
| use fs_extra::dir::{self, CopyOptions}; | ||||
| use log::{info, trace}; | ||||
| use meilisearch_auth::open_auth_store_env; | ||||
| use milli::heed::CompactionOption; | ||||
| use tokio::sync::RwLock; | ||||
| use tokio::time::sleep; | ||||
| use walkdir::WalkDir; | ||||
|  | ||||
| use crate::compression::from_tar_gz; | ||||
| use crate::index_controller::open_meta_env; | ||||
| use crate::index_controller::versioning::VERSION_FILE_NAME; | ||||
| use index_scheduler::IndexScheduler; | ||||
|  | ||||
| pub struct SnapshotService { | ||||
|     pub(crate) db_path: PathBuf, | ||||
|     pub(crate) snapshot_period: Duration, | ||||
|     pub(crate) snapshot_path: PathBuf, | ||||
|     pub(crate) index_size: usize, | ||||
|     pub(crate) meta_env_size: usize, | ||||
|     pub(crate) scheduler: IndexScheduler, | ||||
| } | ||||
|  | ||||
| impl SnapshotService { | ||||
|     pub async fn run(self) { | ||||
|         info!( | ||||
|             "Snapshot scheduled every {}s.", | ||||
|             self.snapshot_period.as_secs() | ||||
|         ); | ||||
|         loop { | ||||
|             let snapshot_job = SnapshotJob { | ||||
|                 dest_path: self.snapshot_path.clone(), | ||||
|                 src_path: self.db_path.clone(), | ||||
|                 meta_env_size: self.meta_env_size, | ||||
|                 index_size: self.index_size, | ||||
|             }; | ||||
|             // TODO: TAMO: reenable the snapshots | ||||
|             // self.scheduler.write().await.schedule_snapshot(snapshot_job); | ||||
|             sleep(self.snapshot_period).await; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn load_snapshot( | ||||
|     db_path: impl AsRef<Path>, | ||||
|     snapshot_path: impl AsRef<Path>, | ||||
|     ignore_snapshot_if_db_exists: bool, | ||||
|     ignore_missing_snapshot: bool, | ||||
| ) -> anyhow::Result<()> { | ||||
|     let empty_db = crate::is_empty_db(&db_path); | ||||
|     let snapshot_path_exists = snapshot_path.as_ref().exists(); | ||||
|  | ||||
|     if empty_db && snapshot_path_exists { | ||||
|         match from_tar_gz(snapshot_path, &db_path) { | ||||
|             Ok(()) => Ok(()), | ||||
|             Err(e) => { | ||||
|                 //clean created db folder | ||||
|                 std::fs::remove_dir_all(&db_path)?; | ||||
|                 Err(e) | ||||
|             } | ||||
|         } | ||||
|     } else if !empty_db && !ignore_snapshot_if_db_exists { | ||||
|         bail!( | ||||
|             "database already exists at {:?}, try to delete it or rename it", | ||||
|             db_path | ||||
|                 .as_ref() | ||||
|                 .canonicalize() | ||||
|                 .unwrap_or_else(|_| db_path.as_ref().to_owned()) | ||||
|         ) | ||||
|     } else if !snapshot_path_exists && !ignore_missing_snapshot { | ||||
|         bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref()) | ||||
|     } else { | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct SnapshotJob { | ||||
|     dest_path: PathBuf, | ||||
|     src_path: PathBuf, | ||||
|  | ||||
|     meta_env_size: usize, | ||||
|     index_size: usize, | ||||
| } | ||||
|  | ||||
| impl SnapshotJob { | ||||
|     pub async fn run(self) -> anyhow::Result<()> { | ||||
|         tokio::task::spawn_blocking(|| self.run_sync()).await??; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn run_sync(self) -> anyhow::Result<()> { | ||||
|         trace!("Performing snapshot."); | ||||
|  | ||||
|         let snapshot_dir = self.dest_path.clone(); | ||||
|         std::fs::create_dir_all(&snapshot_dir)?; | ||||
|         let temp_snapshot_dir = tempfile::tempdir()?; | ||||
|         let temp_snapshot_path = temp_snapshot_dir.path(); | ||||
|  | ||||
|         self.snapshot_version_file(temp_snapshot_path)?; | ||||
|         self.snapshot_meta_env(temp_snapshot_path)?; | ||||
|         self.snapshot_file_store(temp_snapshot_path)?; | ||||
|         self.snapshot_indexes(temp_snapshot_path)?; | ||||
|         self.snapshot_auth(temp_snapshot_path)?; | ||||
|  | ||||
|         let db_name = self | ||||
|             .src_path | ||||
|             .file_name() | ||||
|             .and_then(|n| n.to_str()) | ||||
|             .unwrap_or("data.ms") | ||||
|             .to_string(); | ||||
|  | ||||
|         let snapshot_path = self.dest_path.join(format!("{}.snapshot", db_name)); | ||||
|         let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?; | ||||
|         let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); | ||||
|         crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; | ||||
|         let _file = temp_snapshot_file.persist(&snapshot_path)?; | ||||
|  | ||||
|         #[cfg(unix)] | ||||
|         { | ||||
|             use std::fs::Permissions; | ||||
|             use std::os::unix::fs::PermissionsExt; | ||||
|  | ||||
|             let perm = Permissions::from_mode(0o644); | ||||
|             _file.set_permissions(perm)?; | ||||
|         } | ||||
|  | ||||
|         trace!("Created snapshot in {:?}.", snapshot_path); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn snapshot_version_file(&self, path: &Path) -> anyhow::Result<()> { | ||||
|         let dst = path.join(VERSION_FILE_NAME); | ||||
|         let src = self.src_path.join(VERSION_FILE_NAME); | ||||
|  | ||||
|         fs::copy(src, dst)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn snapshot_meta_env(&self, path: &Path) -> anyhow::Result<()> { | ||||
|         let env = open_meta_env(&self.src_path, self.meta_env_size)?; | ||||
|  | ||||
|         let dst = path.join("data.mdb"); | ||||
|         env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn snapshot_file_store(&self, path: &Path) -> anyhow::Result<()> { | ||||
|         // for now we simply copy the updates/updates_files | ||||
|         // FIXME(marin): We may copy more files than necessary, if new files are added while we are | ||||
|         // performing the snapshop. We need a way to filter them out. | ||||
|  | ||||
|         let dst = path.join("updates"); | ||||
|         fs::create_dir_all(&dst)?; | ||||
|         let options = CopyOptions::default(); | ||||
|         dir::copy(self.src_path.join("updates/updates_files"), dst, &options)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn snapshot_indexes(&self, path: &Path) -> anyhow::Result<()> { | ||||
|         let indexes_path = self.src_path.join("indexes/"); | ||||
|         let dst = path.join("indexes/"); | ||||
|  | ||||
|         for entry in WalkDir::new(indexes_path).max_depth(1).into_iter().skip(1) { | ||||
|             let entry = entry?; | ||||
|             let name = entry.file_name(); | ||||
|             let dst = dst.join(name); | ||||
|  | ||||
|             std::fs::create_dir_all(&dst)?; | ||||
|  | ||||
|             let dst = dst.join("data.mdb"); | ||||
|  | ||||
|             let mut options = milli::heed::EnvOpenOptions::new(); | ||||
|             options.map_size(self.index_size); | ||||
|             options.max_readers(1024); | ||||
|             let index = milli::Index::new(options, entry.path())?; | ||||
|             index.copy_to_path(dst, CompactionOption::Enabled)?; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn snapshot_auth(&self, path: &Path) -> anyhow::Result<()> { | ||||
|         let auth_path = self.src_path.join("auth"); | ||||
|         let dst = path.join("auth"); | ||||
|         std::fs::create_dir_all(&dst)?; | ||||
|         let dst = dst.join("data.mdb"); | ||||
|  | ||||
|         let env = open_auth_store_env(&auth_path)?; | ||||
|         env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
| @@ -1,420 +0,0 @@ | ||||
| mod store; | ||||
|  | ||||
| use std::collections::HashSet; | ||||
| use std::io::{BufWriter, Write}; | ||||
| use std::path::Path; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use log::debug; | ||||
| use milli::heed::{Env, RwTxn}; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| use super::batch::BatchContent; | ||||
| use super::error::TaskError; | ||||
| use super::scheduler::Processing; | ||||
| use super::task::{Task, TaskContent, TaskId}; | ||||
| use super::Result; | ||||
| use crate::tasks::task::TaskEvent; | ||||
| use crate::update_file_store::UpdateFileStore; | ||||
|  | ||||
| #[cfg(test)] | ||||
| pub use store::test::MockStore as Store; | ||||
| #[cfg(not(test))] | ||||
| pub use store::Store; | ||||
|  | ||||
| type FilterFn = Box<dyn Fn(&Task) -> bool + Sync + Send + 'static>; | ||||
|  | ||||
| /// Defines constraints to be applied when querying for Tasks from the store. | ||||
| #[derive(Default)] | ||||
| pub struct TaskFilter { | ||||
|     indexes: Option<HashSet<String>>, | ||||
|     filter_fn: Option<FilterFn>, | ||||
| } | ||||
|  | ||||
| impl TaskFilter { | ||||
|     fn pass(&self, task: &Task) -> bool { | ||||
|         match task.index_uid() { | ||||
|             Some(index_uid) => self | ||||
|                 .indexes | ||||
|                 .as_ref() | ||||
|                 .map_or(true, |indexes| indexes.contains(index_uid)), | ||||
|             None => false, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn filtered_indexes(&self) -> Option<&HashSet<String>> { | ||||
|         self.indexes.as_ref() | ||||
|     } | ||||
|  | ||||
|     /// Adds an index to the filter, so the filter must match this index. | ||||
|     pub fn filter_index(&mut self, index: String) { | ||||
|         self.indexes | ||||
|             .get_or_insert_with(Default::default) | ||||
|             .insert(index); | ||||
|     } | ||||
|  | ||||
|     pub fn filter_fn(&mut self, f: FilterFn) { | ||||
|         self.filter_fn.replace(f); | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct TaskStore { | ||||
|     store: Arc<Store>, | ||||
| } | ||||
|  | ||||
| impl Clone for TaskStore { | ||||
|     fn clone(&self) -> Self { | ||||
|         Self { | ||||
|             store: self.store.clone(), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl TaskStore { | ||||
|     pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> { | ||||
|         let store = Arc::new(Store::new(env)?); | ||||
|         Ok(Self { store }) | ||||
|     } | ||||
|  | ||||
|     pub async fn register(&self, content: TaskContent) -> Result<Task> { | ||||
|         debug!("registering update: {:?}", content); | ||||
|         let store = self.store.clone(); | ||||
|         let task = tokio::task::spawn_blocking(move || -> Result<Task> { | ||||
|             let mut txn = store.wtxn()?; | ||||
|             let next_task_id = store.next_task_id(&mut txn)?; | ||||
|             let created_at = TaskEvent::Created(OffsetDateTime::now_utc()); | ||||
|             let task = Task { | ||||
|                 id: next_task_id, | ||||
|                 content, | ||||
|                 events: vec![created_at], | ||||
|             }; | ||||
|  | ||||
|             store.put(&mut txn, &task)?; | ||||
|             txn.commit()?; | ||||
|  | ||||
|             Ok(task) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         Ok(task) | ||||
|     } | ||||
|  | ||||
|     pub fn register_raw_update(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { | ||||
|         self.store.put(wtxn, task)?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> { | ||||
|         let store = self.store.clone(); | ||||
|         let task = tokio::task::spawn_blocking(move || -> Result<_> { | ||||
|             let txn = store.rtxn()?; | ||||
|             let task = store.get(&txn, id)?; | ||||
|             Ok(task) | ||||
|         }) | ||||
|         .await?? | ||||
|         .ok_or(TaskError::UnexistingTask(id))?; | ||||
|  | ||||
|         match filter { | ||||
|             Some(filter) => filter | ||||
|                 .pass(&task) | ||||
|                 .then_some(task) | ||||
|                 .ok_or(TaskError::UnexistingTask(id)), | ||||
|             None => Ok(task), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// This methods takes a `Processing` which contains the next task ids to process, and returns | ||||
|     /// the corresponding tasks along with the ownership to the passed processing. | ||||
|     /// | ||||
|     /// We need get_processing_tasks to take ownership over `Processing` because we need it to be | ||||
|     /// valid for 'static. | ||||
|     pub async fn get_processing_tasks( | ||||
|         &self, | ||||
|         processing: Processing, | ||||
|     ) -> Result<(Processing, BatchContent)> { | ||||
|         let store = self.store.clone(); | ||||
|         let tasks = tokio::task::spawn_blocking(move || -> Result<_> { | ||||
|             let txn = store.rtxn()?; | ||||
|  | ||||
|             let content = match processing { | ||||
|                 Processing::DocumentAdditions(ref ids) => { | ||||
|                     let mut tasks = Vec::new(); | ||||
|  | ||||
|                     for id in ids.iter() { | ||||
|                         let task = store | ||||
|                             .get(&txn, *id)? | ||||
|                             .ok_or(TaskError::UnexistingTask(*id))?; | ||||
|                         tasks.push(task); | ||||
|                     } | ||||
|                     BatchContent::DocumentsAdditionBatch(tasks) | ||||
|                 } | ||||
|                 Processing::IndexUpdate(id) => { | ||||
|                     let task = store.get(&txn, id)?.ok_or(TaskError::UnexistingTask(id))?; | ||||
|                     BatchContent::IndexUpdate(task) | ||||
|                 } | ||||
|                 Processing::Dump(id) => { | ||||
|                     let task = store.get(&txn, id)?.ok_or(TaskError::UnexistingTask(id))?; | ||||
|                     debug_assert!(matches!(task.content, TaskContent::Dump { .. })); | ||||
|                     BatchContent::Dump(task) | ||||
|                 } | ||||
|                 Processing::Nothing => BatchContent::Empty, | ||||
|             }; | ||||
|  | ||||
|             Ok((processing, content)) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         Ok(tasks) | ||||
|     } | ||||
|  | ||||
|     pub async fn update_tasks(&self, tasks: Vec<Task>) -> Result<Vec<Task>> { | ||||
|         let store = self.store.clone(); | ||||
|  | ||||
|         let tasks = tokio::task::spawn_blocking(move || -> Result<_> { | ||||
|             let mut txn = store.wtxn()?; | ||||
|  | ||||
|             for task in &tasks { | ||||
|                 store.put(&mut txn, task)?; | ||||
|             } | ||||
|  | ||||
|             txn.commit()?; | ||||
|  | ||||
|             Ok(tasks) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         Ok(tasks) | ||||
|     } | ||||
|  | ||||
|     pub async fn fetch_unfinished_tasks(&self, offset: Option<TaskId>) -> Result<Vec<Task>> { | ||||
|         let store = self.store.clone(); | ||||
|  | ||||
|         tokio::task::spawn_blocking(move || { | ||||
|             let txn = store.rtxn()?; | ||||
|             let tasks = store.fetch_unfinished_tasks(&txn, offset)?; | ||||
|             Ok(tasks) | ||||
|         }) | ||||
|         .await? | ||||
|     } | ||||
|  | ||||
|     pub async fn list_tasks( | ||||
|         &self, | ||||
|         offset: Option<TaskId>, | ||||
|         filter: Option<TaskFilter>, | ||||
|         limit: Option<usize>, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         let store = self.store.clone(); | ||||
|  | ||||
|         tokio::task::spawn_blocking(move || { | ||||
|             let txn = store.rtxn()?; | ||||
|             let tasks = store.list_tasks(&txn, offset, filter, limit)?; | ||||
|             Ok(tasks) | ||||
|         }) | ||||
|         .await? | ||||
|     } | ||||
|  | ||||
|     pub async fn dump( | ||||
|         env: Arc<Env>, | ||||
|         dir_path: impl AsRef<Path>, | ||||
|         update_file_store: UpdateFileStore, | ||||
|     ) -> Result<()> { | ||||
|         let store = Self::new(env)?; | ||||
|         let update_dir = dir_path.as_ref().join("updates"); | ||||
|         let updates_file = update_dir.join("data.jsonl"); | ||||
|         let tasks = store.list_tasks(None, None, None).await?; | ||||
|  | ||||
|         let dir_path = dir_path.as_ref().to_path_buf(); | ||||
|         tokio::task::spawn_blocking(move || -> Result<()> { | ||||
|             std::fs::create_dir(&update_dir)?; | ||||
|             let updates_file = std::fs::File::create(updates_file)?; | ||||
|             let mut updates_file = BufWriter::new(updates_file); | ||||
|  | ||||
|             for task in tasks { | ||||
|                 serde_json::to_writer(&mut updates_file, &task)?; | ||||
|                 updates_file.write_all(b"\n")?; | ||||
|  | ||||
|                 if !task.is_finished() { | ||||
|                     if let Some(content_uuid) = task.get_content_uuid() { | ||||
|                         update_file_store.dump(content_uuid, &dir_path)?; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             updates_file.flush()?; | ||||
|             Ok(()) | ||||
|         }) | ||||
|         .await??; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn load_dump(src: impl AsRef<Path>, env: Arc<Env>) -> anyhow::Result<()> { | ||||
|         // create a dummy update field store, since it is not needed right now. | ||||
|         let store = Self::new(env.clone())?; | ||||
|  | ||||
|         let src_update_path = src.as_ref().join("updates"); | ||||
|         let update_data = std::fs::File::open(&src_update_path.join("data.jsonl"))?; | ||||
|         let update_data = std::io::BufReader::new(update_data); | ||||
|  | ||||
|         let stream = serde_json::Deserializer::from_reader(update_data).into_iter::<Task>(); | ||||
|  | ||||
|         let mut wtxn = env.write_txn()?; | ||||
|         for entry in stream { | ||||
|             store.register_raw_update(&mut wtxn, &entry?)?; | ||||
|         } | ||||
|         wtxn.commit()?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| pub mod test { | ||||
|     use crate::tasks::{scheduler::Processing, task_store::store::test::tmp_env}; | ||||
|  | ||||
|     use super::*; | ||||
|  | ||||
|     use meilisearch_types::index_uid::IndexUid; | ||||
|     use nelson::Mocker; | ||||
|     use proptest::{ | ||||
|         strategy::Strategy, | ||||
|         test_runner::{Config, TestRunner}, | ||||
|     }; | ||||
|  | ||||
|     pub enum MockTaskStore { | ||||
|         Real(TaskStore), | ||||
|         Mock(Arc<Mocker>), | ||||
|     } | ||||
|  | ||||
|     impl Clone for MockTaskStore { | ||||
|         fn clone(&self) -> Self { | ||||
|             match self { | ||||
|                 Self::Real(x) => Self::Real(x.clone()), | ||||
|                 Self::Mock(x) => Self::Mock(x.clone()), | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     impl MockTaskStore { | ||||
|         pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> { | ||||
|             Ok(Self::Real(TaskStore::new(env)?)) | ||||
|         } | ||||
|  | ||||
|         pub async fn dump( | ||||
|             env: Arc<milli::heed::Env>, | ||||
|             path: impl AsRef<Path>, | ||||
|             update_file_store: UpdateFileStore, | ||||
|         ) -> Result<()> { | ||||
|             TaskStore::dump(env, path, update_file_store).await | ||||
|         } | ||||
|  | ||||
|         pub fn mock(mocker: Mocker) -> Self { | ||||
|             Self::Mock(Arc::new(mocker)) | ||||
|         } | ||||
|  | ||||
|         pub async fn update_tasks(&self, tasks: Vec<Task>) -> Result<Vec<Task>> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.update_tasks(tasks).await, | ||||
|                 Self::Mock(m) => unsafe { | ||||
|                     m.get::<_, Result<Vec<Task>>>("update_tasks").call(tasks) | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn get_task(&self, id: TaskId, filter: Option<TaskFilter>) -> Result<Task> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.get_task(id, filter).await, | ||||
|                 Self::Mock(m) => unsafe { m.get::<_, Result<Task>>("get_task").call((id, filter)) }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn get_processing_tasks( | ||||
|             &self, | ||||
|             tasks: Processing, | ||||
|         ) -> Result<(Processing, BatchContent)> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.get_processing_tasks(tasks).await, | ||||
|                 Self::Mock(m) => unsafe { m.get("get_pending_task").call(tasks) }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn fetch_unfinished_tasks(&self, from: Option<TaskId>) -> Result<Vec<Task>> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.fetch_unfinished_tasks(from).await, | ||||
|                 Self::Mock(m) => unsafe { m.get("fetch_unfinished_tasks").call(from) }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn list_tasks( | ||||
|             &self, | ||||
|             from: Option<TaskId>, | ||||
|             filter: Option<TaskFilter>, | ||||
|             limit: Option<usize>, | ||||
|         ) -> Result<Vec<Task>> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.list_tasks(from, filter, limit).await, | ||||
|                 Self::Mock(m) => unsafe { m.get("list_tasks").call((from, filter, limit)) }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub async fn register(&self, content: TaskContent) -> Result<Task> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.register(content).await, | ||||
|                 Self::Mock(_m) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn register_raw_update(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { | ||||
|             match self { | ||||
|                 Self::Real(s) => s.register_raw_update(wtxn, task), | ||||
|                 Self::Mock(_m) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn load_dump(path: impl AsRef<Path>, env: Arc<Env>) -> anyhow::Result<()> { | ||||
|             TaskStore::load_dump(path, env) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_increment_task_id() { | ||||
|         let tmp = tmp_env(); | ||||
|         let store = Store::new(tmp.env()).unwrap(); | ||||
|  | ||||
|         let mut txn = store.wtxn().unwrap(); | ||||
|         assert_eq!(store.next_task_id(&mut txn).unwrap(), 0); | ||||
|         txn.abort().unwrap(); | ||||
|  | ||||
|         let gen_task = |id: TaskId| Task { | ||||
|             id, | ||||
|             content: TaskContent::IndexCreation { | ||||
|                 primary_key: None, | ||||
|                 index_uid: IndexUid::new_unchecked("test"), | ||||
|             }, | ||||
|             events: Vec::new(), | ||||
|         }; | ||||
|  | ||||
|         let mut runner = TestRunner::new(Config::default()); | ||||
|         runner | ||||
|             .run(&(0..100u32).prop_map(gen_task), |task| { | ||||
|                 let mut txn = store.wtxn().unwrap(); | ||||
|                 let previous_id = store.next_task_id(&mut txn).unwrap(); | ||||
|  | ||||
|                 store.put(&mut txn, &task).unwrap(); | ||||
|  | ||||
|                 let next_id = store.next_task_id(&mut txn).unwrap(); | ||||
|  | ||||
|                 // if we put a task whose task_id is less than the next_id, then the next_id remains | ||||
|                 // unchanged, otherwise it becomes task.id + 1 | ||||
|                 if task.id < previous_id { | ||||
|                     assert_eq!(next_id, previous_id) | ||||
|                 } else { | ||||
|                     assert_eq!(next_id, task.id + 1); | ||||
|                 } | ||||
|  | ||||
|                 txn.commit().unwrap(); | ||||
|  | ||||
|                 Ok(()) | ||||
|             }) | ||||
|             .unwrap(); | ||||
|     } | ||||
| } | ||||
| @@ -1,377 +0,0 @@ | ||||
| #[allow(clippy::upper_case_acronyms)] | ||||
|  | ||||
| type BEU32 = milli::heed::zerocopy::U32<milli::heed::byteorder::BE>; | ||||
|  | ||||
| const INDEX_UIDS_TASK_IDS: &str = "index-uids-task-ids"; | ||||
| const TASKS: &str = "tasks"; | ||||
|  | ||||
| use std::collections::HashSet; | ||||
| use std::ops::Bound::{Excluded, Unbounded}; | ||||
| use std::result::Result as StdResult; | ||||
| use std::sync::Arc; | ||||
|  | ||||
| use milli::heed::types::{OwnedType, SerdeJson, Str}; | ||||
| use milli::heed::{Database, Env, RoTxn, RwTxn}; | ||||
| use milli::heed_codec::RoaringBitmapCodec; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::tasks::task::{Task, TaskId}; | ||||
|  | ||||
| use super::super::Result; | ||||
| use super::TaskFilter; | ||||
|  | ||||
| pub struct Store { | ||||
|     env: Arc<Env>, | ||||
|     /// Maps an index uid to the set of tasks ids associated to it. | ||||
|     index_uid_task_ids: Database<Str, RoaringBitmapCodec>, | ||||
|     tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>, | ||||
| } | ||||
|  | ||||
| impl Drop for Store { | ||||
|     fn drop(&mut self) { | ||||
|         if Arc::strong_count(&self.env) == 1 { | ||||
|             self.env.as_ref().clone().prepare_for_closing(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Store { | ||||
|     /// Create a new store from the specified `Path`. | ||||
|     /// Be really cautious when calling this function, the returned `Store` may | ||||
|     /// be in an invalid state, with dangling processing tasks. | ||||
|     /// You want to patch  all un-finished tasks and put them in your pending | ||||
|     /// queue with the `reset_and_return_unfinished_update` method. | ||||
|     pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> { | ||||
|         let index_uid_task_ids = env.create_database(Some(INDEX_UIDS_TASK_IDS))?; | ||||
|         let tasks = env.create_database(Some(TASKS))?; | ||||
|  | ||||
|         Ok(Self { | ||||
|             env, | ||||
|             index_uid_task_ids, | ||||
|             tasks, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     pub fn wtxn(&self) -> Result<RwTxn> { | ||||
|         Ok(self.env.write_txn()?) | ||||
|     } | ||||
|  | ||||
|     pub fn rtxn(&self) -> Result<RoTxn> { | ||||
|         Ok(self.env.read_txn()?) | ||||
|     } | ||||
|  | ||||
|     /// Returns the id for the next task. | ||||
|     /// | ||||
|     /// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit | ||||
|     /// the task to the store in the same transaction, no one else will have this task id. | ||||
|     pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> { | ||||
|         let id = self | ||||
|             .tasks | ||||
|             .lazily_decode_data() | ||||
|             .last(txn)? | ||||
|             .map(|(id, _)| id.get() + 1) | ||||
|             .unwrap_or(0); | ||||
|         Ok(id) | ||||
|     } | ||||
|  | ||||
|     pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> { | ||||
|         self.tasks.put(txn, &BEU32::new(task.id), task)?; | ||||
|         // only add the task to the indexes index if it has an index_uid | ||||
|         if let Some(index_uid) = task.index_uid() { | ||||
|             let mut tasks_set = self | ||||
|                 .index_uid_task_ids | ||||
|                 .get(txn, index_uid)? | ||||
|                 .unwrap_or_default(); | ||||
|  | ||||
|             tasks_set.insert(task.id); | ||||
|  | ||||
|             self.index_uid_task_ids.put(txn, index_uid, &tasks_set)?; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result<Option<Task>> { | ||||
|         let task = self.tasks.get(txn, &BEU32::new(id))?; | ||||
|         Ok(task) | ||||
|     } | ||||
|  | ||||
|     /// Returns the unfinished tasks starting from the given taskId in ascending order. | ||||
|     pub fn fetch_unfinished_tasks(&self, txn: &RoTxn, from: Option<TaskId>) -> Result<Vec<Task>> { | ||||
|         // We must NEVER re-enqueue an already processed task! It's content uuid would point to an unexisting file. | ||||
|         // | ||||
|         // TODO(marin): This may create some latency when the first batch lazy loads the pending updates. | ||||
|         let from = from.unwrap_or_default(); | ||||
|  | ||||
|         let result: StdResult<Vec<_>, milli::heed::Error> = self | ||||
|             .tasks | ||||
|             .range(txn, &(BEU32::new(from)..))? | ||||
|             .map(|r| r.map(|(_, t)| t)) | ||||
|             .filter(|result| result.as_ref().map_or(true, |t| !t.is_finished())) | ||||
|             .collect(); | ||||
|  | ||||
|         result.map_err(Into::into) | ||||
|     } | ||||
|  | ||||
|     /// Returns all the tasks starting from the given taskId and going in descending order. | ||||
|     pub fn list_tasks( | ||||
|         &self, | ||||
|         txn: &RoTxn, | ||||
|         from: Option<TaskId>, | ||||
|         filter: Option<TaskFilter>, | ||||
|         limit: Option<usize>, | ||||
|     ) -> Result<Vec<Task>> { | ||||
|         let from = match from { | ||||
|             Some(from) => from, | ||||
|             None => self.tasks.last(txn)?.map_or(0, |(id, _)| id.get()), | ||||
|         }; | ||||
|  | ||||
|         let filter_fn = |task: &Task| { | ||||
|             filter | ||||
|                 .as_ref() | ||||
|                 .and_then(|f| f.filter_fn.as_ref()) | ||||
|                 .map_or(true, |f| f(task)) | ||||
|         }; | ||||
|  | ||||
|         let result: Result<Vec<_>> = match filter.as_ref().and_then(|f| f.filtered_indexes()) { | ||||
|             Some(indexes) => self | ||||
|                 .compute_candidates(txn, indexes, from)? | ||||
|                 .filter(|result| result.as_ref().map_or(true, filter_fn)) | ||||
|                 .take(limit.unwrap_or(usize::MAX)) | ||||
|                 .collect(), | ||||
|             None => self | ||||
|                 .tasks | ||||
|                 .rev_range(txn, &(..=BEU32::new(from)))? | ||||
|                 .map(|r| r.map(|(_, t)| t).map_err(Into::into)) | ||||
|                 .filter(|result| result.as_ref().map_or(true, filter_fn)) | ||||
|                 .take(limit.unwrap_or(usize::MAX)) | ||||
|                 .collect(), | ||||
|         }; | ||||
|  | ||||
|         result.map_err(Into::into) | ||||
|     } | ||||
|  | ||||
|     fn compute_candidates<'a>( | ||||
|         &'a self, | ||||
|         txn: &'a RoTxn, | ||||
|         indexes: &HashSet<String>, | ||||
|         from: TaskId, | ||||
|     ) -> Result<impl Iterator<Item = Result<Task>> + 'a> { | ||||
|         let mut candidates = RoaringBitmap::new(); | ||||
|  | ||||
|         for index_uid in indexes { | ||||
|             if let Some(tasks_set) = self.index_uid_task_ids.get(txn, index_uid)? { | ||||
|                 candidates |= tasks_set; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         candidates.remove_range((Excluded(from), Unbounded)); | ||||
|  | ||||
|         let iter = candidates | ||||
|             .into_iter() | ||||
|             .rev() | ||||
|             .filter_map(|id| self.get(txn, id).transpose()); | ||||
|  | ||||
|         Ok(iter) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| pub mod test { | ||||
|     use itertools::Itertools; | ||||
|     use meilisearch_types::index_uid::IndexUid; | ||||
|     use milli::heed::EnvOpenOptions; | ||||
|     use nelson::Mocker; | ||||
|     use tempfile::TempDir; | ||||
|  | ||||
|     use crate::tasks::task::TaskContent; | ||||
|  | ||||
|     use super::*; | ||||
|  | ||||
|     /// TODO: use this mock to test the task store properly. | ||||
|     #[allow(dead_code)] | ||||
|     pub enum MockStore { | ||||
|         Real(Store), | ||||
|         Fake(Mocker), | ||||
|     } | ||||
|  | ||||
|     pub struct TmpEnv(TempDir, Arc<milli::heed::Env>); | ||||
|  | ||||
|     impl TmpEnv { | ||||
|         pub fn env(&self) -> Arc<milli::heed::Env> { | ||||
|             self.1.clone() | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn tmp_env() -> TmpEnv { | ||||
|         let tmp = tempfile::tempdir().unwrap(); | ||||
|  | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(4096 * 100000); | ||||
|         options.max_dbs(1000); | ||||
|         let env = Arc::new(options.open(tmp.path()).unwrap()); | ||||
|  | ||||
|         TmpEnv(tmp, env) | ||||
|     } | ||||
|  | ||||
|     impl MockStore { | ||||
|         pub fn new(env: Arc<milli::heed::Env>) -> Result<Self> { | ||||
|             Ok(Self::Real(Store::new(env)?)) | ||||
|         } | ||||
|  | ||||
|         pub fn wtxn(&self) -> Result<RwTxn> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.wtxn(), | ||||
|                 MockStore::Fake(_) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn rtxn(&self) -> Result<RoTxn> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.rtxn(), | ||||
|                 MockStore::Fake(_) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn next_task_id(&self, txn: &mut RwTxn) -> Result<TaskId> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.next_task_id(txn), | ||||
|                 MockStore::Fake(_) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.put(txn, task), | ||||
|                 MockStore::Fake(_) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result<Option<Task>> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.get(txn, id), | ||||
|                 MockStore::Fake(_) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn fetch_unfinished_tasks( | ||||
|             &self, | ||||
|             txn: &RoTxn, | ||||
|             from: Option<TaskId>, | ||||
|         ) -> Result<Vec<Task>> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.fetch_unfinished_tasks(txn, from), | ||||
|                 MockStore::Fake(_) => todo!(), | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         pub fn list_tasks( | ||||
|             &self, | ||||
|             txn: &RoTxn, | ||||
|             from: Option<TaskId>, | ||||
|             filter: Option<TaskFilter>, | ||||
|             limit: Option<usize>, | ||||
|         ) -> Result<Vec<Task>> { | ||||
|             match self { | ||||
|                 MockStore::Real(index) => index.list_tasks(txn, from, filter, limit), | ||||
|                 MockStore::Fake(_) => todo!(), | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_ordered_filtered_updates() { | ||||
|         let tmp = tmp_env(); | ||||
|         let store = Store::new(tmp.env()).unwrap(); | ||||
|  | ||||
|         let tasks = (0..100) | ||||
|             .map(|_| Task { | ||||
|                 id: rand::random(), | ||||
|                 content: TaskContent::IndexDeletion { | ||||
|                     index_uid: IndexUid::new_unchecked("test"), | ||||
|                 }, | ||||
|                 events: vec![], | ||||
|             }) | ||||
|             .collect::<Vec<_>>(); | ||||
|  | ||||
|         let mut txn = store.env.write_txn().unwrap(); | ||||
|         tasks | ||||
|             .iter() | ||||
|             .try_for_each(|t| store.put(&mut txn, t)) | ||||
|             .unwrap(); | ||||
|  | ||||
|         let mut filter = TaskFilter::default(); | ||||
|         filter.filter_index("test".into()); | ||||
|  | ||||
|         let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); | ||||
|  | ||||
|         assert!(tasks | ||||
|             .iter() | ||||
|             .map(|t| t.id) | ||||
|             .tuple_windows() | ||||
|             .all(|(a, b)| a > b)); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn test_filter_same_index_prefix() { | ||||
|         let tmp = tmp_env(); | ||||
|         let store = Store::new(tmp.env()).unwrap(); | ||||
|  | ||||
|         let task_1 = Task { | ||||
|             id: 1, | ||||
|             content: TaskContent::IndexDeletion { | ||||
|                 index_uid: IndexUid::new_unchecked("test"), | ||||
|             }, | ||||
|             events: vec![], | ||||
|         }; | ||||
|  | ||||
|         let task_2 = Task { | ||||
|             id: 0, | ||||
|             content: TaskContent::IndexDeletion { | ||||
|                 index_uid: IndexUid::new_unchecked("test1"), | ||||
|             }, | ||||
|             events: vec![], | ||||
|         }; | ||||
|  | ||||
|         let mut txn = store.wtxn().unwrap(); | ||||
|         store.put(&mut txn, &task_1).unwrap(); | ||||
|         store.put(&mut txn, &task_2).unwrap(); | ||||
|  | ||||
|         let mut filter = TaskFilter::default(); | ||||
|         filter.filter_index("test".into()); | ||||
|  | ||||
|         let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); | ||||
|  | ||||
|         txn.abort().unwrap(); | ||||
|         assert_eq!(tasks.len(), 1); | ||||
|         assert_eq!(tasks.first().as_ref().unwrap().index_uid().unwrap(), "test"); | ||||
|  | ||||
|         // same thing but invert the ids | ||||
|         let task_1 = Task { | ||||
|             id: 0, | ||||
|             content: TaskContent::IndexDeletion { | ||||
|                 index_uid: IndexUid::new_unchecked("test"), | ||||
|             }, | ||||
|             events: vec![], | ||||
|         }; | ||||
|         let task_2 = Task { | ||||
|             id: 1, | ||||
|             content: TaskContent::IndexDeletion { | ||||
|                 index_uid: IndexUid::new_unchecked("test1"), | ||||
|             }, | ||||
|             events: vec![], | ||||
|         }; | ||||
|  | ||||
|         let mut txn = store.wtxn().unwrap(); | ||||
|         store.put(&mut txn, &task_1).unwrap(); | ||||
|         store.put(&mut txn, &task_2).unwrap(); | ||||
|  | ||||
|         let mut filter = TaskFilter::default(); | ||||
|         filter.filter_index("test".into()); | ||||
|  | ||||
|         let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); | ||||
|  | ||||
|         assert_eq!(tasks.len(), 1); | ||||
|         assert_eq!(tasks.first().as_ref().unwrap().index_uid().unwrap(), "test"); | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user