mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	First batch of PR comment
This commit is contained in:
		| @@ -2,7 +2,7 @@ use std::io::{self, Write}; | ||||
|  | ||||
| use grenad::{CompressionType, WriterBuilder}; | ||||
| use serde::de::Deserializer; | ||||
| use serde_json::{to_writer, Value}; | ||||
| use serde_json::to_writer; | ||||
|  | ||||
| use super::{DocumentsBatchIndex, Error, DOCUMENTS_BATCH_INDEX_KEY}; | ||||
| use crate::documents::serde_impl::DocumentVisitor; | ||||
| @@ -87,95 +87,6 @@ impl<W: Write> DocumentsBatchBuilder<W> { | ||||
|         de.deserialize_any(&mut visitor)? | ||||
|     } | ||||
|  | ||||
|     /// Appends a new CSV file into the batch and updates the `DocumentsBatchIndex` accordingly. | ||||
|     pub fn append_csv<R: io::Read>(&mut self, mut reader: csv::Reader<R>) -> Result<(), Error> { | ||||
|         // Make sure that we insert the fields ids in order as the obkv writer has this requirement. | ||||
|         let mut typed_fields_ids: Vec<_> = reader | ||||
|             .headers()? | ||||
|             .into_iter() | ||||
|             .map(parse_csv_header) | ||||
|             .map(|(k, t)| (self.fields_index.insert(k), t)) | ||||
|             .enumerate() | ||||
|             .collect(); | ||||
|         // Make sure that we insert the fields ids in order as the obkv writer has this requirement. | ||||
|         typed_fields_ids.sort_unstable_by_key(|(_, (fid, _))| *fid); | ||||
|  | ||||
|         let mut record = csv::StringRecord::new(); | ||||
|         let mut line = 0; | ||||
|         while reader.read_record(&mut record)? { | ||||
|             // We increment here and not at the end of the while loop to take | ||||
|             // the header offset into account. | ||||
|             line += 1; | ||||
|  | ||||
|             self.obkv_buffer.clear(); | ||||
|             let mut writer = obkv::KvWriter::new(&mut self.obkv_buffer); | ||||
|  | ||||
|             for (i, (field_id, type_)) in typed_fields_ids.iter() { | ||||
|                 self.value_buffer.clear(); | ||||
|  | ||||
|                 let value = &record[*i]; | ||||
|                 let trimmed_value = value.trim(); | ||||
|                 match type_ { | ||||
|                     AllowedType::Number => { | ||||
|                         if trimmed_value.is_empty() { | ||||
|                             to_writer(&mut self.value_buffer, &Value::Null)?; | ||||
|                         } else if let Ok(integer) = trimmed_value.parse::<i64>() { | ||||
|                             to_writer(&mut self.value_buffer, &integer)?; | ||||
|                         } else { | ||||
|                             match trimmed_value.parse::<f64>() { | ||||
|                                 Ok(float) => { | ||||
|                                     to_writer(&mut self.value_buffer, &float)?; | ||||
|                                 } | ||||
|                                 Err(error) => { | ||||
|                                     return Err(Error::ParseFloat { | ||||
|                                         error, | ||||
|                                         line, | ||||
|                                         value: value.to_string(), | ||||
|                                     }); | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                     AllowedType::Boolean => { | ||||
|                         if trimmed_value.is_empty() { | ||||
|                             to_writer(&mut self.value_buffer, &Value::Null)?; | ||||
|                         } else { | ||||
|                             match trimmed_value.parse::<bool>() { | ||||
|                                 Ok(bool) => { | ||||
|                                     to_writer(&mut self.value_buffer, &bool)?; | ||||
|                                 } | ||||
|                                 Err(error) => { | ||||
|                                     return Err(Error::ParseBool { | ||||
|                                         error, | ||||
|                                         line, | ||||
|                                         value: value.to_string(), | ||||
|                                     }); | ||||
|                                 } | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                     AllowedType::String => { | ||||
|                         if value.is_empty() { | ||||
|                             to_writer(&mut self.value_buffer, &Value::Null)?; | ||||
|                         } else { | ||||
|                             to_writer(&mut self.value_buffer, value)?; | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // We insert into the obkv writer the value buffer that has been filled just above. | ||||
|                 writer.insert(*field_id, &self.value_buffer)?; | ||||
|             } | ||||
|  | ||||
|             let internal_id = self.documents_count.to_be_bytes(); | ||||
|             let document_bytes = writer.into_inner()?; | ||||
|             self.writer.insert(internal_id, &document_bytes)?; | ||||
|             self.documents_count += 1; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Flushes the content on disk and stores the final version of the `DocumentsBatchIndex`. | ||||
|     pub fn into_inner(mut self) -> io::Result<W> { | ||||
|         let DocumentsBatchBuilder { mut writer, fields_index, .. } = self; | ||||
| @@ -189,35 +100,12 @@ impl<W: Write> DocumentsBatchBuilder<W> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug)] | ||||
| enum AllowedType { | ||||
|     String, | ||||
|     Boolean, | ||||
|     Number, | ||||
| } | ||||
|  | ||||
| fn parse_csv_header(header: &str) -> (&str, AllowedType) { | ||||
|     // if there are several separators we only split on the last one. | ||||
|     match header.rsplit_once(':') { | ||||
|         Some((field_name, field_type)) => match field_type { | ||||
|             "string" => (field_name, AllowedType::String), | ||||
|             "boolean" => (field_name, AllowedType::Boolean), | ||||
|             "number" => (field_name, AllowedType::Number), | ||||
|             // if the pattern isn't recognized, we keep the whole field. | ||||
|             _otherwise => (header, AllowedType::String), | ||||
|         }, | ||||
|         None => (header, AllowedType::String), | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::io::Cursor; | ||||
|  | ||||
|     use serde_json::json; | ||||
|  | ||||
|     use super::*; | ||||
|     use crate::documents::{obkv_to_object, DocumentsBatchReader}; | ||||
|     use crate::documents::DocumentsBatchReader; | ||||
|  | ||||
|     #[test] | ||||
|     fn add_single_documents_json() { | ||||
| @@ -253,348 +141,4 @@ mod test { | ||||
|  | ||||
|         assert!(cursor.next_document().unwrap().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn add_documents_csv() { | ||||
|         let csv_content = "id:number,field:string\n1,hello!\n2,blabla"; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         assert_eq!(builder.documents_count(), 2); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|         assert_eq!(index.len(), 2); | ||||
|  | ||||
|         let document = cursor.next_document().unwrap().unwrap(); | ||||
|         assert_eq!(document.iter().count(), 2); | ||||
|  | ||||
|         let document = cursor.next_document().unwrap().unwrap(); | ||||
|         assert_eq!(document.iter().count(), 2); | ||||
|  | ||||
|         assert!(cursor.next_document().unwrap().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn simple_csv_document() { | ||||
|         let csv_content = r#"city,country,pop | ||||
| "Boston","United States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "city": "Boston", | ||||
|                 "country": "United States", | ||||
|                 "pop": "4628910", | ||||
|             }) | ||||
|         ); | ||||
|  | ||||
|         assert!(cursor.next_document().unwrap().is_none()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn coma_in_field() { | ||||
|         let csv_content = r#"city,country,pop | ||||
| "Boston","United, States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "city": "Boston", | ||||
|                 "country": "United, States", | ||||
|                 "pop": "4628910", | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn quote_in_field() { | ||||
|         let csv_content = r#"city,country,pop | ||||
| "Boston","United"" States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "city": "Boston", | ||||
|                 "country": "United\" States", | ||||
|                 "pop": "4628910", | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn integer_in_field() { | ||||
|         let csv_content = r#"city,country,pop:number | ||||
| "Boston","United States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "city": "Boston", | ||||
|                 "country": "United States", | ||||
|                 "pop": 4628910, | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn integer_as_id() { | ||||
|         let csv_content = r#""id:number","title:string","comment:string" | ||||
| "1239","Pride and Prejudice","A great book""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "id": 1239, | ||||
|                 "title": "Pride and Prejudice", | ||||
|                 "comment": "A great book", | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn float_in_field() { | ||||
|         let csv_content = r#"city,country,pop:number | ||||
| "Boston","United States","4628910.01""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "city": "Boston", | ||||
|                 "country": "United States", | ||||
|                 "pop": 4628910.01, | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn several_colon_in_header() { | ||||
|         let csv_content = r#"city:love:string,country:state,pop | ||||
| "Boston","United States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "city:love": "Boston", | ||||
|                 "country:state": "United States", | ||||
|                 "pop": "4628910", | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn ending_by_colon_in_header() { | ||||
|         let csv_content = r#"city:,country,pop | ||||
| "Boston","United States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "city:": "Boston", | ||||
|                 "country": "United States", | ||||
|                 "pop": "4628910", | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn starting_by_colon_in_header() { | ||||
|         let csv_content = r#":city,country,pop | ||||
| "Boston","United States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 ":city": "Boston", | ||||
|                 "country": "United States", | ||||
|                 "pop": "4628910", | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[ignore] | ||||
|     #[test] | ||||
|     fn starting_by_colon_in_header2() { | ||||
|         let csv_content = r#":string,country,pop | ||||
| "Boston","United States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, _) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         assert!(cursor.next_document().is_err()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn double_colon_in_header() { | ||||
|         let csv_content = r#"city::string,country,pop | ||||
| "Boston","United States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector)) | ||||
|             .unwrap() | ||||
|             .into_cursor_and_fields_index(); | ||||
|  | ||||
|         let doc = cursor.next_document().unwrap().unwrap(); | ||||
|         let val = obkv_to_object(doc, &index).map(Value::from).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             val, | ||||
|             json!({ | ||||
|                 "city:": "Boston", | ||||
|                 "country": "United States", | ||||
|                 "pop": "4628910", | ||||
|             }) | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn bad_type_in_header() { | ||||
|         let csv_content = r#"city,country:number,pop | ||||
| "Boston","United States","4628910""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         assert!(builder.append_csv(csv).is_err()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn bad_column_count1() { | ||||
|         let csv_content = r#"city,country,pop | ||||
| "Boston","United States","4628910", "too much | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content"#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         assert!(builder.append_csv(csv).is_err()); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn bad_column_count2() { | ||||
|         let csv_content = r#"city,country,pop | ||||
| "Boston","United States""#; | ||||
|         let csv = csv::Reader::from_reader(Cursor::new(csv_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         assert!(builder.append_csv(csv).is_err()); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -253,33 +253,4 @@ mod test { | ||||
|             {"id": 2,"a": 0,"b": 0}, | ||||
|         ]); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn csv_types_dont_panic() { | ||||
|         let csv1_content = | ||||
|             "id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5"; | ||||
|         let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv1).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap(); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn out_of_order_csv_fields() { | ||||
|         let csv1_content = "id:number,b\n1,0"; | ||||
|         let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content)); | ||||
|  | ||||
|         let csv2_content = "id:number,a,b\n2,0,0"; | ||||
|         let csv2 = csv::Reader::from_reader(Cursor::new(csv2_content)); | ||||
|  | ||||
|         let mut builder = DocumentsBatchBuilder::new(Vec::new()); | ||||
|         builder.append_csv(csv1).unwrap(); | ||||
|         builder.append_csv(csv2).unwrap(); | ||||
|         let vector = builder.into_inner().unwrap(); | ||||
|  | ||||
|         DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap(); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -102,6 +102,7 @@ impl Metadata { | ||||
|         rules: &'rules [LocalizedAttributesRule], | ||||
|     ) -> Option<&'rules [Language]> { | ||||
|         let localized_attributes_rule_id = self.localized_attributes_rule_id?.get(); | ||||
|         // - 1: `localized_attributes_rule_id` is NonZero | ||||
|         let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap(); | ||||
|         Some(rule.locales()) | ||||
|     } | ||||
| @@ -160,6 +161,7 @@ impl MetadataBuilder { | ||||
|             .iter() | ||||
|             .flat_map(|v| v.iter()) | ||||
|             .position(|rule| rule.match_str(field)) | ||||
|             // saturating_add(1): make `id` `NonZero` | ||||
|             .map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap()); | ||||
|  | ||||
|         Metadata { searchable, filterable, sortable, localized_attributes_rule_id } | ||||
|   | ||||
| @@ -46,6 +46,7 @@ fn encode_f64_into_ordered_bytes( | ||||
|     f: f64, | ||||
|     buffer: &mut [u8; 16], | ||||
| ) -> Result<(), InvalidGloballyOrderedFloatError> { | ||||
|     // write the globally ordered float | ||||
|     let bytes = f64_into_bytes(f).ok_or(InvalidGloballyOrderedFloatError { float: f })?; | ||||
|     buffer[..8].copy_from_slice(&bytes[..]); | ||||
|     // Then the f64 value just to be able to read it back | ||||
|   | ||||
| @@ -87,23 +87,10 @@ pub enum WriterOperation { | ||||
| } | ||||
|  | ||||
| pub enum ArroyOperation { | ||||
|     /// TODO: call when deleting regular documents | ||||
|     DeleteVectors { | ||||
|         docid: DocumentId, | ||||
|     }, | ||||
|     SetVectors { | ||||
|         docid: DocumentId, | ||||
|         embedder_id: u8, | ||||
|         embeddings: Vec<Embedding>, | ||||
|     }, | ||||
|     SetVector { | ||||
|         docid: DocumentId, | ||||
|         embedder_id: u8, | ||||
|         embedding: Embedding, | ||||
|     }, | ||||
|     Finish { | ||||
|         configs: Vec<IndexEmbeddingConfig>, | ||||
|     }, | ||||
|     DeleteVectors { docid: DocumentId }, | ||||
|     SetVectors { docid: DocumentId, embedder_id: u8, embeddings: Vec<Embedding> }, | ||||
|     SetVector { docid: DocumentId, embedder_id: u8, embedding: Embedding }, | ||||
|     Finish { configs: Vec<IndexEmbeddingConfig> }, | ||||
| } | ||||
|  | ||||
| pub struct DbOperation { | ||||
| @@ -334,7 +321,6 @@ impl DocidsSender for FacetDocidsSender<'_> { | ||||
|     fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> { | ||||
|         let (facet_kind, key) = FacetKind::extract_from_key(key); | ||||
|         let database = Database::from(facet_kind); | ||||
|         // let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value)); | ||||
|         let entry = match facet_kind { | ||||
|             // skip level group size | ||||
|             FacetKind::String | FacetKind::Number => { | ||||
|   | ||||
| @@ -140,7 +140,6 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> { | ||||
|                     )?; | ||||
|                     document_extractor_data.docids_delta.insert_add_u32(docid); | ||||
|                     self.document_sender.uncompressed(docid, external_docid, content).unwrap(); | ||||
|                     // extracted_dictionary_sender.send(self, dictionary: &[u8]); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|   | ||||
| @@ -137,7 +137,6 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor { | ||||
|     fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> { | ||||
|         Ok(RefCell::new(GeoExtractorData { | ||||
|             removed: bumpalo::collections::Vec::new_in(extractor_alloc), | ||||
|             // inserted: Uell::new_in(extractor_alloc), | ||||
|             inserted: bumpalo::collections::Vec::new_in(extractor_alloc), | ||||
|             spilled_inserted: None, | ||||
|             spilled_removed: None, | ||||
| @@ -242,7 +241,7 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor { | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Extracts and validate the latitude and latitude from a document geo field. | ||||
| /// Extracts and validates the latitude and latitude from a document geo field. | ||||
| /// | ||||
| /// It can be of the form `{ "lat": 0.0, "lng": "1.0" }`. | ||||
| pub fn extract_geo_coordinates( | ||||
|   | ||||
| @@ -35,7 +35,6 @@ pub struct WordDocidsBalancedCaches<'extractor> { | ||||
| unsafe impl<'extractor> MostlySend for WordDocidsBalancedCaches<'extractor> {} | ||||
|  | ||||
| impl<'extractor> WordDocidsBalancedCaches<'extractor> { | ||||
|     /// TODO Make sure to give the same max_memory to all of them, without splitting it | ||||
|     pub fn new_in(buckets: usize, max_memory: Option<usize>, alloc: &'extractor Bump) -> Self { | ||||
|         Self { | ||||
|             word_fid_docids: BalancedCaches::new_in(buckets, max_memory, alloc), | ||||
|   | ||||
		Reference in New Issue
	
	Block a user