mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-25 07:41:00 +00:00
Merge #561
561: Enriched documents batch reader r=curquiza a=Kerollmops ~This PR is based on #555 and must be rebased on main after it has been merged to ease the review.~ This PR contains the work in #555 and can be merged on main as soon as reviewed and approved. - [x] Create an `EnrichedDocumentsBatchReader` that contains the external documents id. - [x] Extract the primary key name and make it accessible in the `EnrichedDocumentsBatchReader`. - [x] Use the external id from the `EnrichedDocumentsBatchReader` in the `Transform::read_documents`. - [x] Remove the `update_primary_key` from the _transform.rs_ file. - [x] Really generate the auto-generated documents ids. - [x] Insert the (auto-generated) document ids in the document while processing it in `Transform::read_documents`. Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
@ -132,12 +132,13 @@ fn indexing_songs_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -169,12 +170,13 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -184,12 +186,13 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -223,11 +226,12 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -279,11 +283,12 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -294,19 +299,21 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -339,13 +346,14 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -377,12 +385,13 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -415,12 +424,13 @@ fn indexing_wiki(c: &mut Criterion) {
|
||||
let indexing_config =
|
||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -452,12 +462,13 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
let indexing_config =
|
||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -468,12 +479,13 @@ fn reindexing_wiki(c: &mut Criterion) {
|
||||
let indexing_config =
|
||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -507,11 +519,12 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config =
|
||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -564,12 +577,13 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config =
|
||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -581,24 +595,26 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
||||
let indexing_config =
|
||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
let indexing_config =
|
||||
IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents =
|
||||
utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -631,12 +647,13 @@ fn indexing_movies_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -667,12 +684,13 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -682,12 +700,13 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -720,11 +739,12 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -775,12 +795,13 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
// as we don't care about the time it takes.
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -791,21 +812,23 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -861,12 +884,13 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -922,11 +946,12 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -984,12 +1009,13 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -1021,12 +1047,13 @@ fn indexing_geo(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -1058,12 +1085,13 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -1074,12 +1102,13 @@ fn reindexing_geo(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
@ -1113,11 +1142,12 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder =
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())
|
||||
.unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "json");
|
||||
builder.add_documents(documents).unwrap();
|
||||
let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
|
@ -7,12 +7,12 @@ use std::path::Path;
|
||||
|
||||
use criterion::BenchmarkId;
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{
|
||||
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||
};
|
||||
use milli::{Filter, Index};
|
||||
use serde_json::{Map, Value};
|
||||
use milli::{Filter, Index, Object};
|
||||
use serde_json::Value;
|
||||
|
||||
pub struct Conf<'a> {
|
||||
/// where we are going to create our database.mmdb directory
|
||||
@ -96,12 +96,10 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
..Default::default()
|
||||
};
|
||||
let mut builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
||||
let builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
|
||||
let documents = documents_from(conf.dataset, conf.dataset_format);
|
||||
|
||||
builder.add_documents(documents).unwrap();
|
||||
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
@ -140,7 +138,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl BufRead + Seek> {
|
||||
pub fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
|
||||
let reader =
|
||||
File::open(filename).expect(&format!("could not find the dataset in: {}", filename));
|
||||
let reader = BufReader::new(reader);
|
||||
@ -150,39 +148,35 @@ pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<imp
|
||||
"jsonl" => documents_from_jsonl(reader).unwrap(),
|
||||
otherwise => panic!("invalid update format {:?}", otherwise),
|
||||
};
|
||||
DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||
DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
|
||||
}
|
||||
|
||||
fn documents_from_jsonl(mut reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||
fn documents_from_jsonl(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
let mut buf = String::new();
|
||||
|
||||
while reader.read_line(&mut buf)? > 0 {
|
||||
documents.extend_from_json(&mut buf.as_bytes())?;
|
||||
buf.clear();
|
||||
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
|
||||
let object = result?;
|
||||
documents.append_json_object(&object)?;
|
||||
}
|
||||
documents.finish()?;
|
||||
|
||||
Ok(writer.into_inner())
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_json(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
documents.extend_from_json(reader)?;
|
||||
documents.finish()?;
|
||||
documents.append_json_array(reader)?;
|
||||
|
||||
Ok(writer.into_inner())
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
fn documents_from_csv(reader: impl BufRead) -> anyhow::Result<Vec<u8>> {
|
||||
let mut writer = Cursor::new(Vec::new());
|
||||
milli::documents::DocumentBatchBuilder::from_csv(reader, &mut writer)?.finish()?;
|
||||
let csv = csv::Reader::from_reader(reader);
|
||||
|
||||
Ok(writer.into_inner())
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
documents.append_csv(csv)?;
|
||||
|
||||
documents.into_inner().map_err(Into::into)
|
||||
}
|
||||
|
||||
enum AllowedType {
|
||||
@ -222,14 +216,14 @@ impl<R: Read> CSVDocumentDeserializer<R> {
|
||||
}
|
||||
|
||||
impl<R: Read> Iterator for CSVDocumentDeserializer<R> {
|
||||
type Item = anyhow::Result<Map<String, Value>>;
|
||||
type Item = anyhow::Result<Object>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let csv_document = self.documents.next()?;
|
||||
|
||||
match csv_document {
|
||||
Ok(csv_document) => {
|
||||
let mut document = Map::new();
|
||||
let mut document = Object::new();
|
||||
|
||||
for ((field_name, field_type), value) in
|
||||
self.headers.iter().zip(csv_document.into_iter())
|
||||
|
Reference in New Issue
Block a user