mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	jsonl support
This commit is contained in:
		| @@ -11,7 +11,7 @@ use milli::update::Setting; | ||||
| use serde::{Deserialize, Deserializer, Serialize}; | ||||
| use uuid::Uuid; | ||||
|  | ||||
| use crate::document_formats::read_jsonl; | ||||
| use crate::document_formats::read_ndjson; | ||||
| use crate::index::apply_settings_to_builder; | ||||
| use crate::index::update_handler::UpdateHandler; | ||||
| use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; | ||||
| @@ -124,7 +124,7 @@ fn load_index( | ||||
|  | ||||
|     let mut tmp_doc_file = tempfile::tempfile()?; | ||||
|  | ||||
|     read_jsonl(reader, &mut tmp_doc_file)?; | ||||
|     read_ndjson(reader, &mut tmp_doc_file)?; | ||||
|  | ||||
|     tmp_doc_file.seek(SeekFrom::Start(0))?; | ||||
|  | ||||
| @@ -213,7 +213,7 @@ impl From<Settings> for index_controller::Settings<Unchecked> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| // /// Extract Settings from `settings.json` file present at provided `dir_path` | ||||
| /// Extract Settings from `settings.json` file present at provided `dir_path` | ||||
| fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> { | ||||
|     let path = dir_path.as_ref().join("settings.json"); | ||||
|     let file = File::open(path)?; | ||||
|   | ||||
| @@ -72,6 +72,7 @@ pub struct IndexController { | ||||
| pub enum DocumentAdditionFormat { | ||||
|     Json, | ||||
|     Csv, | ||||
|     Ndjson, | ||||
| } | ||||
|  | ||||
| #[derive(Serialize, Debug)] | ||||
|   | ||||
| @@ -10,7 +10,7 @@ use uuid::Uuid; | ||||
|  | ||||
| const UPDATE_FILES_PATH: &str = "updates/updates_files"; | ||||
|  | ||||
| use crate::document_formats::read_jsonl; | ||||
| use crate::document_formats::read_ndjson; | ||||
|  | ||||
| pub struct UpdateFile { | ||||
|     path: PathBuf, | ||||
| @@ -86,7 +86,7 @@ impl UpdateFileStore { | ||||
|                 .ok_or_else(|| anyhow::anyhow!("invalid update file name"))?; | ||||
|             let dst_path = dst_update_files_path.join(file_uuid); | ||||
|             let dst_file = BufWriter::new(File::create(dst_path)?); | ||||
|             read_jsonl(update_file, dst_file)?; | ||||
|             read_ndjson(update_file, dst_file)?; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
| @@ -98,9 +98,9 @@ impl UpdateFileStore { | ||||
|         Ok(Self { path }) | ||||
|     } | ||||
|  | ||||
|     /// Created a new temporary update file. | ||||
|     /// Creates a new temporary update file. | ||||
|     /// | ||||
|     /// A call to persist is needed to persist in the database. | ||||
|     /// A call to `persist` is needed to persist the file in the database. | ||||
|     pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { | ||||
|         let file = NamedTempFile::new()?; | ||||
|         let uuid = Uuid::new_v4(); | ||||
| @@ -110,14 +110,14 @@ impl UpdateFileStore { | ||||
|         Ok((uuid, update_file)) | ||||
|     } | ||||
|  | ||||
|     /// Returns a the file corresponding to the requested uuid. | ||||
|     /// Returns the file corresponding to the requested uuid. | ||||
|     pub fn get_update(&self, uuid: Uuid) -> Result<File> { | ||||
|         let path = self.path.join(uuid.to_string()); | ||||
|         let file = File::open(path)?; | ||||
|         Ok(file) | ||||
|     } | ||||
|  | ||||
|     /// Copies the content of the update file poited to by uuid to dst directory. | ||||
|     /// Copies the content of the update file pointed to by `uuid` to the `dst` directory. | ||||
|     pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef<Path>) -> Result<()> { | ||||
|         let src = self.path.join(uuid.to_string()); | ||||
|         let mut dst = dst.as_ref().join(UPDATE_FILES_PATH); | ||||
| @@ -127,7 +127,7 @@ impl UpdateFileStore { | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Peform a dump of the given update file uuid into the provided snapshot path. | ||||
|     /// Peforms a dump of the given update file uuid into the provided dump path. | ||||
|     pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> { | ||||
|         let uuid_string = uuid.to_string(); | ||||
|         let update_file_path = self.path.join(&uuid_string); | ||||
| @@ -140,7 +140,8 @@ impl UpdateFileStore { | ||||
|         let mut document_reader = DocumentBatchReader::from_reader(update_file)?; | ||||
|  | ||||
|         let mut document_buffer = Map::new(); | ||||
|         // TODO: we need to find a way to do this more efficiently. (create a custom serializer to | ||||
|         // TODO: we need to find a way to do this more efficiently. (create a custom serializer | ||||
|         // for | ||||
|         // jsonl for example...) | ||||
|         while let Some((index, document)) = document_reader.next_document_with_index()? { | ||||
|             for (field_id, content) in document.iter() { | ||||
|   | ||||
| @@ -17,8 +17,6 @@ pub enum UpdateLoopError { | ||||
|     UnexistingUpdate(u64), | ||||
|     #[error("Internal error: {0}")] | ||||
|     Internal(Box<dyn Error + Send + Sync + 'static>), | ||||
|     //#[error("{0}")] | ||||
|     //IndexActor(#[from] IndexActorError), | ||||
|     #[error( | ||||
|         "update store was shut down due to a fatal error, please check your logs for more info." | ||||
|     )] | ||||
|   | ||||
| @@ -21,7 +21,7 @@ use uuid::Uuid; | ||||
| use self::error::{Result, UpdateLoopError}; | ||||
| pub use self::message::UpdateMsg; | ||||
| use self::store::{UpdateStore, UpdateStoreInfo}; | ||||
| use crate::document_formats::{read_csv, read_json}; | ||||
| use crate::document_formats::{read_csv, read_json, read_ndjson}; | ||||
| use crate::index::{Index, Settings, Unchecked}; | ||||
| use crate::index_controller::update_file_store::UpdateFileStore; | ||||
| use status::UpdateStatus; | ||||
| @@ -40,7 +40,7 @@ pub fn create_update_handler( | ||||
|     let (sender, receiver) = mpsc::channel(100); | ||||
|     let actor = UpdateLoop::new(update_store_size, receiver, path, index_resolver)?; | ||||
|  | ||||
|     tokio::task::spawn_local(actor.run()); | ||||
|     tokio::task::spawn(actor.run()); | ||||
|  | ||||
|     Ok(sender) | ||||
| } | ||||
| @@ -197,6 +197,7 @@ impl UpdateLoop { | ||||
|                     match format { | ||||
|                         DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?, | ||||
|                         DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?, | ||||
|                         DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?, | ||||
|                     } | ||||
|  | ||||
|                     update_file.persist()?; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user