mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	add boolean support for csv documents
This commit is contained in:
		| @@ -279,6 +279,81 @@ async fn add_csv_document() { | |||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn add_csv_document_with_types() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("pets"); | ||||||
|  |  | ||||||
|  |     let document = "#id:number,name:string,race:string,age:number,cute:boolean | ||||||
|  | 0,jean,bernese mountain,2.5,true | ||||||
|  | 1,,,, | ||||||
|  | 2,lilou,pug,-2,false"; | ||||||
|  |  | ||||||
|  |     let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; | ||||||
|  |     snapshot!(code, @"202 Accepted"); | ||||||
|  |     snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" | ||||||
|  |     { | ||||||
|  |       "taskUid": 0, | ||||||
|  |       "indexUid": "pets", | ||||||
|  |       "status": "enqueued", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "enqueuedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |     let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await; | ||||||
|  |     snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" | ||||||
|  |     { | ||||||
|  |       "uid": 0, | ||||||
|  |       "indexUid": "pets", | ||||||
|  |       "status": "succeeded", | ||||||
|  |       "type": "documentAdditionOrUpdate", | ||||||
|  |       "canceledBy": null, | ||||||
|  |       "details": { | ||||||
|  |         "receivedDocuments": 3, | ||||||
|  |         "indexedDocuments": 3 | ||||||
|  |       }, | ||||||
|  |       "error": null, | ||||||
|  |       "duration": "[duration]", | ||||||
|  |       "enqueuedAt": "[date]", | ||||||
|  |       "startedAt": "[date]", | ||||||
|  |       "finishedAt": "[date]" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; | ||||||
|  |     snapshot!(code, @"200 OK"); | ||||||
|  |     snapshot!(json_string!(documents), @r###" | ||||||
|  |     { | ||||||
|  |       "results": [ | ||||||
|  |         { | ||||||
|  |           "#id": 0, | ||||||
|  |           "name": "jean", | ||||||
|  |           "race": "bernese mountain", | ||||||
|  |           "age": 2.5, | ||||||
|  |           "cute": true | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "#id": 1, | ||||||
|  |           "name": null, | ||||||
|  |           "race": null, | ||||||
|  |           "age": null, | ||||||
|  |           "cute": null | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |           "#id": 2, | ||||||
|  |           "name": "lilou", | ||||||
|  |           "race": "pug", | ||||||
|  |           "age": -2, | ||||||
|  |           "cute": false | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "offset": 0, | ||||||
|  |       "limit": 20, | ||||||
|  |       "total": 3 | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn add_csv_document_with_custom_delimiter() { | async fn add_csv_document_with_custom_delimiter() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
| @@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() { | |||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn add_csv_document_with_types_error() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("pets"); | ||||||
|  |  | ||||||
|  |     let document = "#id:number,a:boolean,b:number | ||||||
|  | 0,doggo,1"; | ||||||
|  |  | ||||||
|  |     let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.", | ||||||
|  |       "code": "malformed_payload", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#malformed_payload" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let document = "#id:number,a:boolean,b:number | ||||||
|  | 0,true,doggo"; | ||||||
|  |  | ||||||
|  |     let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.", | ||||||
|  |       "code": "malformed_payload", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#malformed_payload" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
| /// any other content-type is must be refused | /// any other content-type is must be refused | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn error_add_documents_test_bad_content_types() { | async fn error_add_documents_test_bad_content_types() { | ||||||
|   | |||||||
| @@ -116,12 +116,13 @@ impl<W: Write> DocumentsBatchBuilder<W> { | |||||||
|                 let value = &record[*i]; |                 let value = &record[*i]; | ||||||
|                 match type_ { |                 match type_ { | ||||||
|                     AllowedType::Number => { |                     AllowedType::Number => { | ||||||
|                         if value.trim().is_empty() { |                         let trimmed_value = value.trim(); | ||||||
|  |                         if trimmed_value.is_empty() { | ||||||
|                             to_writer(&mut self.value_buffer, &Value::Null)?; |                             to_writer(&mut self.value_buffer, &Value::Null)?; | ||||||
|                         } else if let Ok(integer) = value.trim().parse::<i64>() { |                         } else if let Ok(integer) = trimmed_value.parse::<i64>() { | ||||||
|                             to_writer(&mut self.value_buffer, &integer)?; |                             to_writer(&mut self.value_buffer, &integer)?; | ||||||
|                         } else { |                         } else { | ||||||
|                             match value.trim().parse::<f64>() { |                             match trimmed_value.parse::<f64>() { | ||||||
|                                 Ok(float) => { |                                 Ok(float) => { | ||||||
|                                     to_writer(&mut self.value_buffer, &float)?; |                                     to_writer(&mut self.value_buffer, &float)?; | ||||||
|                                 } |                                 } | ||||||
| @@ -135,6 +136,25 @@ impl<W: Write> DocumentsBatchBuilder<W> { | |||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|  |                     AllowedType::Boolean => { | ||||||
|  |                         let trimmed_value = value.trim(); | ||||||
|  |                         if trimmed_value.is_empty() { | ||||||
|  |                             to_writer(&mut self.value_buffer, &Value::Null)?; | ||||||
|  |                         } else { | ||||||
|  |                             match trimmed_value.parse::<bool>() { | ||||||
|  |                                 Ok(bool) => { | ||||||
|  |                                     to_writer(&mut self.value_buffer, &bool)?; | ||||||
|  |                                 } | ||||||
|  |                                 Err(error) => { | ||||||
|  |                                     return Err(Error::ParseBool { | ||||||
|  |                                         error, | ||||||
|  |                                         line, | ||||||
|  |                                         value: value.to_string(), | ||||||
|  |                                     }); | ||||||
|  |                                 } | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|                     AllowedType::String => { |                     AllowedType::String => { | ||||||
|                         if value.is_empty() { |                         if value.is_empty() { | ||||||
|                             to_writer(&mut self.value_buffer, &Value::Null)?; |                             to_writer(&mut self.value_buffer, &Value::Null)?; | ||||||
| @@ -173,6 +193,7 @@ impl<W: Write> DocumentsBatchBuilder<W> { | |||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| enum AllowedType { | enum AllowedType { | ||||||
|     String, |     String, | ||||||
|  |     Boolean, | ||||||
|     Number, |     Number, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -181,6 +202,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) { | |||||||
|     match header.rsplit_once(':') { |     match header.rsplit_once(':') { | ||||||
|         Some((field_name, field_type)) => match field_type { |         Some((field_name, field_type)) => match field_type { | ||||||
|             "string" => (field_name, AllowedType::String), |             "string" => (field_name, AllowedType::String), | ||||||
|  |             "boolean" => (field_name, AllowedType::Boolean), | ||||||
|             "number" => (field_name, AllowedType::Number), |             "number" => (field_name, AllowedType::Number), | ||||||
|             // if the pattern isn't reconized, we keep the whole field. |             // if the pattern isn't reconized, we keep the whole field. | ||||||
|             _otherwise => (header, AllowedType::String), |             _otherwise => (header, AllowedType::String), | ||||||
|   | |||||||
| @@ -90,6 +90,7 @@ impl DocumentsBatchIndex { | |||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| pub enum Error { | pub enum Error { | ||||||
|     ParseFloat { error: std::num::ParseFloatError, line: usize, value: String }, |     ParseFloat { error: std::num::ParseFloatError, line: usize, value: String }, | ||||||
|  |     ParseBool { error: std::str::ParseBoolError, line: usize, value: String }, | ||||||
|     InvalidDocumentFormat, |     InvalidDocumentFormat, | ||||||
|     InvalidEnrichedData, |     InvalidEnrichedData, | ||||||
|     InvalidUtf8(Utf8Error), |     InvalidUtf8(Utf8Error), | ||||||
| @@ -136,6 +137,9 @@ impl fmt::Display for Error { | |||||||
|             Error::ParseFloat { error, line, value } => { |             Error::ParseFloat { error, line, value } => { | ||||||
|                 write!(f, "Error parsing number {:?} at line {}: {}", value, line, error) |                 write!(f, "Error parsing number {:?} at line {}: {}", value, line, error) | ||||||
|             } |             } | ||||||
|  |             Error::ParseBool { error, line, value } => { | ||||||
|  |                 write!(f, "Error parsing boolean {:?} at line {}: {}", value, line, error) | ||||||
|  |             } | ||||||
|             Error::InvalidDocumentFormat => { |             Error::InvalidDocumentFormat => { | ||||||
|                 f.write_str("Invalid document addition format, missing the documents batch index.") |                 f.write_str("Invalid document addition format, missing the documents batch index.") | ||||||
|             } |             } | ||||||
| @@ -274,6 +278,19 @@ mod test { | |||||||
|         ]); |         ]); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[test] | ||||||
|  |     fn csv_types_dont_panic() { | ||||||
|  |         let csv1_content = | ||||||
|  |             "id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5"; | ||||||
|  |         let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content)); | ||||||
|  |  | ||||||
|  |         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||||
|  |         builder.append_csv(csv1).unwrap(); | ||||||
|  |         let vector = builder.into_inner().unwrap(); | ||||||
|  |  | ||||||
|  |         DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     #[test] |     #[test] | ||||||
|     fn out_of_order_csv_fields() { |     fn out_of_order_csv_fields() { | ||||||
|         let csv1_content = "id:number,b\n1,0"; |         let csv1_content = "id:number,b\n1,0"; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user