mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	Merge #2523
2523: Improve the tasks error reporting when processed in batches r=irevoire a=Kerollmops This fixes #2478 by changing the behavior of the task handler when there is an error in a batch of document addition or update. What changes is that when there is a user error in a task in a batch we now report this task as failed with the right error message but we continue to process the other tasks. A user error can be when a geo field is invalid, a document id is invalid, or missing. fixes #2582, #2478 Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -1,17 +0,0 @@ | ||||
| use meilisearch_lib::heed::Env; | ||||
| use walkdir::WalkDir; | ||||
|  | ||||
| pub trait EnvSizer { | ||||
|     fn size(&self) -> u64; | ||||
| } | ||||
|  | ||||
| impl EnvSizer for Env { | ||||
|     fn size(&self) -> u64 { | ||||
|         WalkDir::new(self.path()) | ||||
|             .into_iter() | ||||
|             .filter_map(|entry| entry.ok()) | ||||
|             .filter_map(|entry| entry.metadata().ok()) | ||||
|             .filter(|metadata| metadata.is_file()) | ||||
|             .fold(0, |acc, m| acc + m.len()) | ||||
|     } | ||||
| } | ||||
| @@ -1,3 +0,0 @@ | ||||
| mod env; | ||||
|  | ||||
| pub use env::EnvSizer; | ||||
| @@ -5,7 +5,6 @@ pub mod analytics; | ||||
| pub mod task; | ||||
| #[macro_use] | ||||
| pub mod extractors; | ||||
| pub mod helpers; | ||||
| pub mod option; | ||||
| pub mod routes; | ||||
|  | ||||
| @@ -30,9 +29,9 @@ pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false); | ||||
| pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> { | ||||
|     let mut meilisearch = MeiliSearch::builder(); | ||||
|  | ||||
|     // enable autobatching? | ||||
|     // disable autobatching? | ||||
|     AUTOBATCHING_ENABLED.store( | ||||
|         opt.scheduler_options.enable_auto_batching, | ||||
|         !opt.scheduler_options.disable_auto_batching, | ||||
|         std::sync::atomic::Ordering::Relaxed, | ||||
|     ); | ||||
|  | ||||
|   | ||||
| @@ -231,7 +231,7 @@ pub struct TaskView { | ||||
|     #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] | ||||
|     finished_at: Option<OffsetDateTime>, | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     batch_uid: Option<Option<BatchId>>, | ||||
|     batch_uid: Option<BatchId>, | ||||
| } | ||||
|  | ||||
| impl From<Task> for TaskView { | ||||
| @@ -380,15 +380,15 @@ impl From<Task> for TaskView { | ||||
|  | ||||
|         let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts)); | ||||
|  | ||||
|         let batch_uid = if AUTOBATCHING_ENABLED.load(std::sync::atomic::Ordering::Relaxed) { | ||||
|             let id = events.iter().find_map(|e| match e { | ||||
|                 TaskEvent::Batched { batch_id, .. } => Some(*batch_id), | ||||
|                 _ => None, | ||||
|             }); | ||||
|             Some(id) | ||||
|         } else { | ||||
|             None | ||||
|         }; | ||||
|         let batch_uid = AUTOBATCHING_ENABLED | ||||
|             .load(std::sync::atomic::Ordering::Relaxed) | ||||
|             .then(|| { | ||||
|                 events.iter().find_map(|e| match e { | ||||
|                     TaskEvent::Batched { batch_id, .. } => Some(*batch_id), | ||||
|                     _ => None, | ||||
|                 }) | ||||
|             }) | ||||
|             .flatten(); | ||||
|  | ||||
|         Self { | ||||
|             uid: id, | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| use crate::common::{GetAllDocumentsOptions, Server}; | ||||
| use actix_web::test; | ||||
|  | ||||
| use meilisearch_http::{analytics, create_app}; | ||||
| use serde_json::{json, Value}; | ||||
| use time::{format_description::well_known::Rfc3339, OffsetDateTime}; | ||||
| @@ -326,7 +327,7 @@ async fn error_add_malformed_json_documents() { | ||||
|     assert_eq!( | ||||
|         response["message"], | ||||
|         json!( | ||||
|             r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789", expected a documents, or a sequence of documents. at line 1 column 102`."# | ||||
|             r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...890123456789012345678901234567890123456789", expected a sequence at line 1 column 102`."# | ||||
|         ) | ||||
|     ); | ||||
|     assert_eq!(response["code"], json!("malformed_payload")); | ||||
| @@ -349,9 +350,7 @@ async fn error_add_malformed_json_documents() { | ||||
|     assert_eq!(status_code, 400); | ||||
|     assert_eq!( | ||||
|         response["message"], | ||||
|         json!( | ||||
|             r#"The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string "0123456789012345678901234567...90123456789m", expected a documents, or a sequence of documents. at line 1 column 103`."# | ||||
|         ) | ||||
|         json!("The `json` payload provided is malformed. `Couldn't serialize document value: invalid type: string \"0123456789012345678901234567...90123456789012345678901234567890123456789m\", expected a sequence at line 1 column 103`.") | ||||
|     ); | ||||
|     assert_eq!(response["code"], json!("malformed_payload")); | ||||
|     assert_eq!(response["type"], json!("invalid_request")); | ||||
| @@ -388,7 +387,7 @@ async fn error_add_malformed_ndjson_documents() { | ||||
|     assert_eq!( | ||||
|         response["message"], | ||||
|         json!( | ||||
|             r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 1 column 2`."# | ||||
|             r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 2 column 2`."# | ||||
|         ) | ||||
|     ); | ||||
|     assert_eq!(response["code"], json!("malformed_payload")); | ||||
| @@ -411,9 +410,7 @@ async fn error_add_malformed_ndjson_documents() { | ||||
|     assert_eq!(status_code, 400); | ||||
|     assert_eq!( | ||||
|         response["message"], | ||||
|         json!( | ||||
|             r#"The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 1 column 2`."# | ||||
|         ) | ||||
|         json!("The `ndjson` payload provided is malformed. `Couldn't serialize document value: key must be a string at line 2 column 2`.") | ||||
|     ); | ||||
|     assert_eq!(response["code"], json!("malformed_payload")); | ||||
|     assert_eq!(response["type"], json!("invalid_request")); | ||||
| @@ -1020,7 +1017,7 @@ async fn add_documents_invalid_geo_field() { | ||||
|     index.wait_task(2).await; | ||||
|     let (response, code) = index.get_task(2).await; | ||||
|     assert_eq!(code, 200); | ||||
|     assert_eq!(response["status"], "succeeded"); | ||||
|     assert_eq!(response["status"], "failed"); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| @@ -1099,3 +1096,62 @@ async fn add_documents_with_primary_key_twice() { | ||||
|     let (response, _code) = index.get_task(1).await; | ||||
|     assert_eq!(response["status"], "succeeded"); | ||||
| } | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn batch_several_documents_addition() { | ||||
|     let server = Server::new().await; | ||||
|     let index = server.index("test"); | ||||
|  | ||||
|     let mut documents: Vec<_> = (0..150usize) | ||||
|         .into_iter() | ||||
|         .map(|id| { | ||||
|             json!( | ||||
|                 { | ||||
|                     "id": id, | ||||
|                     "title": "foo", | ||||
|                     "desc": "bar" | ||||
|                 } | ||||
|             ) | ||||
|         }) | ||||
|         .collect(); | ||||
|  | ||||
|     documents[100] = json!({"title": "error", "desc": "error"}); | ||||
|  | ||||
|     // enqueue batch of documents | ||||
|     let mut waiter = Vec::new(); | ||||
|     for chunk in documents.chunks(30) { | ||||
|         waiter.push(index.add_documents(json!(chunk), Some("id"))); | ||||
|     } | ||||
|  | ||||
|     // wait first batch of documents to finish | ||||
|     futures::future::join_all(waiter).await; | ||||
|     index.wait_task(4).await; | ||||
|  | ||||
|     // run a second completely failing batch | ||||
|     documents[40] = json!({"title": "error", "desc": "error"}); | ||||
|     documents[70] = json!({"title": "error", "desc": "error"}); | ||||
|     documents[130] = json!({"title": "error", "desc": "error"}); | ||||
|     let mut waiter = Vec::new(); | ||||
|     for chunk in documents.chunks(30) { | ||||
|         waiter.push(index.add_documents(json!(chunk), Some("id"))); | ||||
|     } | ||||
|     // wait second batch of documents to finish | ||||
|     futures::future::join_all(waiter).await; | ||||
|     index.wait_task(9).await; | ||||
|  | ||||
|     let (response, _code) = index.filtered_tasks(&[], &["failed"]).await; | ||||
|  | ||||
|     // Check if only the 6th task failed | ||||
|     println!("{}", &response); | ||||
|     assert_eq!(response["results"].as_array().unwrap().len(), 5); | ||||
|  | ||||
|     // Check if there are exactly 120 documents (150 - 30) in the index; | ||||
|     let (response, code) = index | ||||
|         .get_all_documents(GetAllDocumentsOptions { | ||||
|             limit: Some(200), | ||||
|             ..Default::default() | ||||
|         }) | ||||
|         .await; | ||||
|     assert_eq!(code, 200, "failed with `{}`", response); | ||||
|     assert_eq!(response["results"].as_array().unwrap().len(), 120); | ||||
| } | ||||
|   | ||||
| @@ -708,9 +708,7 @@ async fn faceting_max_values_per_facet() { | ||||
|             }), | ||||
|             |response, code| { | ||||
|                 assert_eq!(code, 200, "{}", response); | ||||
|                 let numbers = dbg!(&response)["facetDistribution"]["number"] | ||||
|                     .as_object() | ||||
|                     .unwrap(); | ||||
|                 let numbers = &response["facetDistribution"]["number"].as_object().unwrap(); | ||||
|                 assert_eq!(numbers.len(), 10_000); | ||||
|             }, | ||||
|         ) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user