mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Introduce a parameter to disable the engine to autogenerate docids
This commit is contained in:
		| @@ -202,6 +202,7 @@ pub struct IndexDocuments<'t, 'u, 'i> { | ||||
|     indexing_jobs: Option<usize>, | ||||
|     update_method: IndexDocumentsMethod, | ||||
|     update_format: UpdateFormat, | ||||
|     autogenerate_docids: bool, | ||||
| } | ||||
|  | ||||
| impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> { | ||||
| @@ -219,6 +220,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> { | ||||
|             indexing_jobs: None, | ||||
|             update_method: IndexDocumentsMethod::ReplaceDocuments, | ||||
|             update_format: UpdateFormat::Json, | ||||
|             autogenerate_docids: true, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -272,6 +274,16 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> { | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn enable_autogenerate_docids(&mut self) -> &mut Self { | ||||
|         self.autogenerate_docids = true; | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn disable_autogenerate_docids(&mut self) -> &mut Self { | ||||
|         self.autogenerate_docids = false; | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn execute<R, F>(self, reader: R, progress_callback: F) -> anyhow::Result<()> | ||||
|     where | ||||
|         R: io::Read, | ||||
| @@ -288,6 +300,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> { | ||||
|             max_nb_chunks: self.max_nb_chunks, | ||||
|             max_memory: self.max_memory, | ||||
|             index_documents_method: self.update_method, | ||||
|             autogenerate_docids: self.autogenerate_docids, | ||||
|         }; | ||||
|  | ||||
|         let output = match self.update_format { | ||||
| @@ -636,6 +649,56 @@ mod tests { | ||||
|         drop(rtxn); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn not_auto_generated_csv_documents_ids() { | ||||
|         let path = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(10 * 1024 * 1024); // 10 MB | ||||
|         let index = Index::new(options, &path).unwrap(); | ||||
|  | ||||
|         // First we send 3 documents with ids from 1 to 3. | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|         let content = &b"name\nkevin\nkevina\nbenoit\n"[..]; | ||||
|         let mut builder = IndexDocuments::new(&mut wtxn, &index); | ||||
|         builder.disable_autogenerate_docids(); | ||||
|         builder.update_format(UpdateFormat::Csv); | ||||
|         assert!(builder.execute(content, |_, _| ()).is_err()); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         // Check that there is no document. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let count = index.number_of_documents(&rtxn).unwrap(); | ||||
|         assert_eq!(count, 0); | ||||
|         drop(rtxn); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn not_auto_generated_json_documents_ids() { | ||||
|         let path = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(10 * 1024 * 1024); // 10 MB | ||||
|         let index = Index::new(options, &path).unwrap(); | ||||
|  | ||||
|         // First we send 3 documents and 2 without ids. | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|         let content = &br#"[ | ||||
|             { "name": "kevina", "id": 21 }, | ||||
|             { "name": "kevin" }, | ||||
|             { "name": "benoit" } | ||||
|         ]"#[..]; | ||||
|         let mut builder = IndexDocuments::new(&mut wtxn, &index); | ||||
|         builder.disable_autogenerate_docids(); | ||||
|         builder.update_format(UpdateFormat::Json); | ||||
|         assert!(builder.execute(content, |_, _| ()).is_err()); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         // Check that there is no document. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let count = index.number_of_documents(&rtxn).unwrap(); | ||||
|         assert_eq!(count, 0); | ||||
|         drop(rtxn); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn simple_auto_generated_documents_ids() { | ||||
|         let path = tempfile::tempdir().unwrap(); | ||||
|   | ||||
| @@ -33,6 +33,7 @@ pub struct Transform<'t, 'i> { | ||||
|     pub max_nb_chunks: Option<usize>, | ||||
|     pub max_memory: Option<usize>, | ||||
|     pub index_documents_method: IndexDocumentsMethod, | ||||
|     pub autogenerate_docids: bool, | ||||
| } | ||||
|  | ||||
| impl Transform<'_, '_> { | ||||
| @@ -57,7 +58,14 @@ impl Transform<'_, '_> { | ||||
|             None => { | ||||
|                 match documents.get(0).and_then(|doc| doc.keys().find(|k| k.contains("id"))) { | ||||
|                     Some(key) => fields_ids_map.insert(&key).context("field id limit reached")?, | ||||
|                     None => fields_ids_map.insert("id").context("field id limit reached")?, | ||||
|                     None => { | ||||
|                         if !self.autogenerate_docids { | ||||
|                             // If there is no primary key in the current document batch, we must | ||||
|                             // return an error and not automatically generate any document id. | ||||
|                             return Err(anyhow!("missing primary key")) | ||||
|                         } | ||||
|                         fields_ids_map.insert("id").context("field id limit reached")? | ||||
|                     }, | ||||
|                 } | ||||
|             }, | ||||
|         }; | ||||
| @@ -130,6 +138,9 @@ impl Transform<'_, '_> { | ||||
|                     _ => return Err(anyhow!("documents ids must be either strings or numbers")), | ||||
|                 }, | ||||
|                 None => { | ||||
|                     if !self.autogenerate_docids { | ||||
|                         return Err(anyhow!("missing primary key")); | ||||
|                     } | ||||
|                     let uuid = uuid::Uuid::new_v4().to_hyphenated().encode_lower(&mut uuid_buffer); | ||||
|                     Cow::Borrowed(uuid) | ||||
|                 }, | ||||
| @@ -180,11 +191,16 @@ impl Transform<'_, '_> { | ||||
|         let primary_key_field_id = match user_id_pos { | ||||
|             Some(pos) => fields_ids_map.id(&headers[pos]).expect("found the primary key"), | ||||
|             None => { | ||||
|                 let id = fields_ids_map.insert("id").context("field id limit reached")?; | ||||
|                 if !self.autogenerate_docids { | ||||
|                     // If there is no primary key in the current document batch, we must | ||||
|                     // return an error and not automatically generate any document id. | ||||
|                     return Err(anyhow!("missing primary key")) | ||||
|                 } | ||||
|                 let field_id = fields_ids_map.insert("id").context("field id limit reached")?; | ||||
|                 // We make sure to add the primary key field id to the fields ids, | ||||
|                 // this way it is added to the obks. | ||||
|                 fields_ids.push((id, usize::max_value())); | ||||
|                 id | ||||
|                 fields_ids.push((field_id, usize::max_value())); | ||||
|                 field_id | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user