mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Merge #561
561: Enriched documents batch reader r=curquiza a=Kerollmops ~This PR is based on #555 and must be rebased on main after it has been merged to ease the review.~ This PR contains the work in #555 and can be merged on main as soon as reviewed and approved. - [x] Create an `EnrichedDocumentsBatchReader` that contains the external documents id. - [x] Extract the primary key name and make it accessible in the `EnrichedDocumentsBatchReader`. - [x] Use the external id from the `EnrichedDocumentsBatchReader` in the `Transform::read_documents`. - [x] Remove the `update_primary_key` from the _transform.rs_ file. - [x] Really generate the auto-generated documents ids. - [x] Insert the (auto-generated) document ids in the document while processing it in `Transform::read_documents`. Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
		| @@ -132,12 +132,13 @@ fn indexing_songs_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -169,12 +170,13 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -184,12 +186,13 @@ fn reindexing_songs_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -223,11 +226,12 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -279,11 +283,12 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -294,19 +299,21 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -339,13 +346,14 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|  | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -377,12 +385,13 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -415,12 +424,13 @@ fn indexing_wiki(c: &mut Criterion) { | ||||
|                 let indexing_config = | ||||
|                     IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -452,12 +462,13 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                 let indexing_config = | ||||
|                     IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -468,12 +479,13 @@ fn reindexing_wiki(c: &mut Criterion) { | ||||
|                 let indexing_config = | ||||
|                     IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -507,11 +519,12 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = | ||||
|                     IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -564,12 +577,13 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = | ||||
|                     IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = | ||||
|                     utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -581,24 +595,26 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|                 let indexing_config = | ||||
|                     IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = | ||||
|                     utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 let indexing_config = | ||||
|                     IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = | ||||
|                     utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -631,12 +647,13 @@ fn indexing_movies_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -667,12 +684,13 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -682,12 +700,13 @@ fn reindexing_movies_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -720,11 +739,12 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -775,12 +795,13 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                 // as we don't care about the time it takes. | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -791,21 +812,23 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -861,12 +884,13 @@ fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -922,11 +946,12 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -984,12 +1009,13 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -1021,12 +1047,13 @@ fn indexing_geo(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -1058,12 +1085,13 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -1074,12 +1102,13 @@ fn reindexing_geo(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
| @@ -1113,11 +1142,12 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) { | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                 let builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); | ||||
|                 let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|                 user_error.unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -7,12 +7,12 @@ use std::path::Path; | ||||
|  | ||||
| use criterion::BenchmarkId; | ||||
| use heed::EnvOpenOptions; | ||||
| use milli::documents::DocumentBatchReader; | ||||
| use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; | ||||
| use milli::update::{ | ||||
|     IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, | ||||
| }; | ||||
| use milli::{Filter, Index}; | ||||
| use serde_json::{Map, Value}; | ||||
| use milli::{Filter, Index, Object}; | ||||
| use serde_json::Value; | ||||
|  | ||||
| pub struct Conf<'a> { | ||||
|     /// where we are going to create our database.mmdb directory | ||||
| @@ -96,12 +96,10 @@ pub fn base_setup(conf: &Conf) -> Index { | ||||
|         update_method: IndexDocumentsMethod::ReplaceDocuments, | ||||
|         ..Default::default() | ||||
|     }; | ||||
|     let mut builder = | ||||
|         IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap(); | ||||
|     let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap(); | ||||
|     let documents = documents_from(conf.dataset, conf.dataset_format); | ||||
|  | ||||
|     builder.add_documents(documents).unwrap(); | ||||
|  | ||||
|     let (builder, user_error) = builder.add_documents(documents).unwrap(); | ||||
|     user_error.unwrap(); | ||||
|     builder.execute().unwrap(); | ||||
|     wtxn.commit().unwrap(); | ||||
|  | ||||
| @@ -140,7 +138,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl BufRead + Seek> { | ||||
| pub fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> { | ||||
|     let reader = | ||||
|         File::open(filename).expect(&format!("could not find the dataset in: {}", filename)); | ||||
|     let reader = BufReader::new(reader); | ||||
| @@ -150,39 +148,35 @@ pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<imp | ||||
|         "jsonl" => documents_from_jsonl(reader).unwrap(), | ||||
|         otherwise => panic!("invalid update format {:?}", otherwise), | ||||
|     }; | ||||
|     DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap() | ||||
|     DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap() | ||||
| } | ||||
|  | ||||
| fn documents_from_jsonl(mut reader: impl BufRead) -> anyhow::Result<Vec<u8>> { | ||||
|     let mut writer = Cursor::new(Vec::new()); | ||||
|     let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; | ||||
| fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> { | ||||
|     let mut documents = DocumentsBatchBuilder::new(Vec::new()); | ||||
|  | ||||
|     let mut buf = String::new(); | ||||
|  | ||||
|     while reader.read_line(&mut buf)? > 0 { | ||||
|         documents.extend_from_json(&mut buf.as_bytes())?; | ||||
|         buf.clear(); | ||||
|     for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() { | ||||
|         let object = result?; | ||||
|         documents.append_json_object(&object)?; | ||||
|     } | ||||
|     documents.finish()?; | ||||
|  | ||||
|     Ok(writer.into_inner()) | ||||
|     documents.into_inner().map_err(Into::into) | ||||
| } | ||||
|  | ||||
| fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> { | ||||
|     let mut writer = Cursor::new(Vec::new()); | ||||
|     let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; | ||||
|     let mut documents = DocumentsBatchBuilder::new(Vec::new()); | ||||
|  | ||||
|     documents.extend_from_json(reader)?; | ||||
|     documents.finish()?; | ||||
|     documents.append_json_array(reader)?; | ||||
|  | ||||
|     Ok(writer.into_inner()) | ||||
|     documents.into_inner().map_err(Into::into) | ||||
| } | ||||
|  | ||||
| fn documents_from_csv(reader: impl BufRead) -> anyhow::Result<Vec<u8>> { | ||||
|     let mut writer = Cursor::new(Vec::new()); | ||||
|     milli::documents::DocumentBatchBuilder::from_csv(reader, &mut writer)?.finish()?; | ||||
|     let csv = csv::Reader::from_reader(reader); | ||||
|  | ||||
|     Ok(writer.into_inner()) | ||||
|     let mut documents = DocumentsBatchBuilder::new(Vec::new()); | ||||
|     documents.append_csv(csv)?; | ||||
|  | ||||
|     documents.into_inner().map_err(Into::into) | ||||
| } | ||||
|  | ||||
| enum AllowedType { | ||||
| @@ -222,14 +216,14 @@ impl<R: Read> CSVDocumentDeserializer<R> { | ||||
| } | ||||
|  | ||||
| impl<R: Read> Iterator for CSVDocumentDeserializer<R> { | ||||
|     type Item = anyhow::Result<Map<String, Value>>; | ||||
|     type Item = anyhow::Result<Object>; | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         let csv_document = self.documents.next()?; | ||||
|  | ||||
|         match csv_document { | ||||
|             Ok(csv_document) => { | ||||
|                 let mut document = Map::new(); | ||||
|                 let mut document = Object::new(); | ||||
|  | ||||
|                 for ((field_name, field_type), value) in | ||||
|                     self.headers.iter().zip(csv_document.into_iter()) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user