mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Add deletion benchmarks
This commit is contained in:
		| @@ -16,6 +16,9 @@ jemallocator = "0.3.2" | ||||
| [dev-dependencies] | ||||
| heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1" } | ||||
| criterion = { version = "0.3.5", features = ["html_reports"] } | ||||
| rand = "0.8.5" | ||||
| rand_chacha = "0.3.1" | ||||
| roaring = "0.9.0" | ||||
|  | ||||
| [build-dependencies] | ||||
| anyhow = "1.0.56" | ||||
|   | ||||
| @@ -5,14 +5,21 @@ use std::fs::{create_dir_all, remove_dir_all}; | ||||
| use std::path::Path; | ||||
|  | ||||
| use criterion::{criterion_group, criterion_main, Criterion}; | ||||
| use heed::EnvOpenOptions; | ||||
| use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; | ||||
| use heed::{EnvOpenOptions, RwTxn}; | ||||
| use milli::update::{ | ||||
|     DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings, | ||||
| }; | ||||
| use milli::Index; | ||||
| use rand::seq::SliceRandom; | ||||
| use rand_chacha::rand_core::SeedableRng; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| #[cfg(target_os = "linux")] | ||||
| #[global_allocator] | ||||
| static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; | ||||
|  | ||||
| const BENCHMARK_ITERATION: usize = 10; | ||||
|  | ||||
| fn setup_dir(path: impl AsRef<Path>) { | ||||
|     match remove_dir_all(path.as_ref()) { | ||||
|         Ok(_) => (), | ||||
| @@ -31,39 +38,95 @@ fn setup_index() -> Index { | ||||
|     Index::new(options, path).unwrap() | ||||
| } | ||||
|  | ||||
| fn setup_settings<'t>( | ||||
|     wtxn: &mut RwTxn<'t, '_>, | ||||
|     index: &'t Index, | ||||
|     primary_key: &str, | ||||
|     searchable_fields: &[&str], | ||||
|     filterable_fields: &[&str], | ||||
|     sortable_fields: &[&str], | ||||
| ) { | ||||
|     let config = IndexerConfig::default(); | ||||
|     let mut builder = Settings::new(wtxn, index, &config); | ||||
|  | ||||
|     builder.set_primary_key(primary_key.to_owned()); | ||||
|  | ||||
|     let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect(); | ||||
|     builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|     let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect(); | ||||
|     builder.set_filterable_fields(filterable_fields); | ||||
|  | ||||
|     let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); | ||||
|     builder.set_sortable_fields(sortable_fields); | ||||
|  | ||||
|     builder.execute(|_| ()).unwrap(); | ||||
| } | ||||
|  | ||||
| fn setup_index_with_settings<'t>( | ||||
|     primary_key: &str, | ||||
|     searchable_fields: &[&str], | ||||
|     filterable_fields: &[&str], | ||||
|     sortable_fields: &[&str], | ||||
| ) -> milli::Index { | ||||
|     let index = setup_index(); | ||||
|     let mut wtxn = index.write_txn().unwrap(); | ||||
|     setup_settings( | ||||
|         &mut wtxn, | ||||
|         &index, | ||||
|         primary_key, | ||||
|         searchable_fields, | ||||
|         filterable_fields, | ||||
|         sortable_fields, | ||||
|     ); | ||||
|     wtxn.commit().unwrap(); | ||||
|  | ||||
|     index | ||||
| } | ||||
|  | ||||
| fn choose_document_ids_from_index_batched( | ||||
|     index: &Index, | ||||
|     count: usize, | ||||
|     batch_size: usize, | ||||
| ) -> Vec<RoaringBitmap> { | ||||
|     let rtxn = index.read_txn().unwrap(); | ||||
|     // create batch of document ids to delete | ||||
|     let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(7700); | ||||
|     let document_ids: Vec<_> = index.documents_ids(&rtxn).unwrap().into_iter().collect(); | ||||
|     let document_ids_to_delete: Vec<_> = | ||||
|         document_ids.choose_multiple(&mut rng, count).map(Clone::clone).collect(); | ||||
|  | ||||
|     document_ids_to_delete | ||||
|         .chunks(batch_size) | ||||
|         .map(|c| { | ||||
|             let mut batch = RoaringBitmap::new(); | ||||
|             for id in c { | ||||
|                 batch.insert(*id); | ||||
|             } | ||||
|  | ||||
|             batch | ||||
|         }) | ||||
|         .collect() | ||||
| } | ||||
|  | ||||
| fn indexing_songs_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing songs with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "album", "artist"]; | ||||
|                 let filterable_fields = | ||||
|                     ["released-timestamp", "duration-float", "genre", "country", "artist"]; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let displayed_fields = | ||||
|                     ["title", "album", "artist", "genre", "country", "released", "duration"] | ||||
|                         .iter() | ||||
|                         .map(|s| s.to_string()) | ||||
|                         .collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = | ||||
|                     ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 let faceted_fields = | ||||
|                     ["released-timestamp", "duration-float", "genre", "country", "artist"] | ||||
|                         .iter() | ||||
|                         .map(|s| s.to_string()) | ||||
|                         .collect(); | ||||
|                 builder.set_filterable_fields(faceted_fields); | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|                 index | ||||
|                 setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ) | ||||
|             }, | ||||
|             move |index| { | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -84,41 +147,85 @@ fn indexing_songs_default(c: &mut Criterion) { | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
| fn deleting_songs_in_batches_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.bench_function("Indexing songs in three batches with default settings", |b| { | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Deleting songs in batches with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "album", "artist"]; | ||||
|                 let filterable_fields = | ||||
|                     ["released-timestamp", "duration-float", "genre", "country", "artist"]; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let displayed_fields = | ||||
|                     ["title", "album", "artist", "genre", "country", "released", "duration"] | ||||
|                         .iter() | ||||
|                         .map(|s| s.to_string()) | ||||
|                         .collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = | ||||
|                     ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 let faceted_fields = | ||||
|                     ["released-timestamp", "duration-float", "genre", "country", "artist"] | ||||
|                         .iter() | ||||
|                         .map(|s| s.to_string()) | ||||
|                         .collect(); | ||||
|                 builder.set_filterable_fields(faceted_fields); | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 let index = setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ); | ||||
|  | ||||
|                 // We index only one half of the dataset in the setup part | ||||
|                 // as we don't care about the time it takes. | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 let count = 1250; | ||||
|                 let batch_size = 250; | ||||
|                 let document_ids_to_delete = | ||||
|                     choose_document_ids_from_index_batched(&index, count, batch_size); | ||||
|  | ||||
|                 (index, document_ids_to_delete) | ||||
|             }, | ||||
|             move |(index, document_ids_to_delete)| { | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|                 for ids in document_ids_to_delete { | ||||
|                     let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); | ||||
|                     builder.delete_documents(&ids); | ||||
|                     builder.execute().unwrap(); | ||||
|                 } | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
|             }, | ||||
|         ) | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing songs in three batches with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "album", "artist"]; | ||||
|                 let filterable_fields = | ||||
|                     ["released-timestamp", "duration-float", "genre", "country", "artist"]; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let index = setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ); | ||||
|  | ||||
|                 // We index only one half of the dataset in the setup part | ||||
|                 // as we don't care about the time it takes. | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
| @@ -160,34 +267,21 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||
|  | ||||
| fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing songs without faceted numbers", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "album", "artist"]; | ||||
|                 let filterable_fields = ["genre", "country", "artist"]; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let displayed_fields = | ||||
|                     ["title", "album", "artist", "genre", "country", "released", "duration"] | ||||
|                         .iter() | ||||
|                         .map(|s| s.to_string()) | ||||
|                         .collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = | ||||
|                     ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 let faceted_fields = | ||||
|                     ["genre", "country", "artist"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_filterable_fields(faceted_fields); | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|                 index | ||||
|                 setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ) | ||||
|             }, | ||||
|             move |index| { | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -211,30 +305,21 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||
|  | ||||
| fn indexing_songs_without_faceted_fields(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing songs without any facets", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "album", "artist"]; | ||||
|                 let filterable_fields = []; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let displayed_fields = | ||||
|                     ["title", "album", "artist", "genre", "country", "released", "duration"] | ||||
|                         .iter() | ||||
|                         .map(|s| s.to_string()) | ||||
|                         .collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = | ||||
|                     ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|                 index | ||||
|                 setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ) | ||||
|             }, | ||||
|             move |index| { | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -257,29 +342,21 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { | ||||
|  | ||||
| fn indexing_wiki(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing wiki", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "body"]; | ||||
|                 let filterable_fields = []; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let displayed_fields = | ||||
|                     ["title", "body", "url"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 // there is NO faceted fields at all | ||||
|  | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|                 index | ||||
|                 setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ) | ||||
|             }, | ||||
|             move |index| { | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -301,28 +378,81 @@ fn indexing_wiki(c: &mut Criterion) { | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn deleting_wiki_in_batches_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Deleting wiki in batches with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "body"]; | ||||
|                 let filterable_fields = []; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let index = setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ); | ||||
|  | ||||
|                 // We index only one half of the dataset in the setup part | ||||
|                 // as we don't care about the time it takes. | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = | ||||
|                     IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() }; | ||||
|                 let mut builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 let count = 1250; | ||||
|                 let batch_size = 250; | ||||
|                 let document_ids_to_delete = | ||||
|                     choose_document_ids_from_index_batched(&index, count, batch_size); | ||||
|  | ||||
|                 (index, document_ids_to_delete) | ||||
|             }, | ||||
|             move |(index, document_ids_to_delete)| { | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|                 for ids in document_ids_to_delete { | ||||
|                     let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); | ||||
|                     builder.delete_documents(&ids); | ||||
|                     builder.execute().unwrap(); | ||||
|                 } | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
|             }, | ||||
|         ) | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing wiki in three batches", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "body"]; | ||||
|                 let filterable_fields = []; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let index = setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ); | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let displayed_fields = | ||||
|                     ["title", "body", "url"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = ["title", "body"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 // there is NO faceted fields at all | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|  | ||||
|                 // We index only one half of the dataset in the setup part | ||||
|                 // as we don't care about the time it takes. | ||||
| @@ -376,34 +506,21 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { | ||||
|  | ||||
| fn indexing_movies_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing movies with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "overview"]; | ||||
|                 let filterable_fields = ["released_date", "genres"]; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let displayed_fields = ["title", "poster", "overview", "release_date", "genres"] | ||||
|                     .iter() | ||||
|                     .map(|s| s.to_string()) | ||||
|                     .collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = | ||||
|                     ["title", "overview"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 let faceted_fields = | ||||
|                     ["released_date", "genres"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_filterable_fields(faceted_fields); | ||||
|  | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|                 index | ||||
|                 setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ) | ||||
|             }, | ||||
|             move |index| { | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -424,35 +541,80 @@ fn indexing_movies_default(c: &mut Criterion) { | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn deleting_movies_in_batches_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Deleting movies in batches with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "overview"]; | ||||
|                 let filterable_fields = ["released_date", "genres"]; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let index = setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ); | ||||
|  | ||||
|                 // We index only one half of the dataset in the setup part | ||||
|                 // as we don't care about the time it takes. | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 let count = 1250; | ||||
|                 let batch_size = 250; | ||||
|                 let document_ids_to_delete = | ||||
|                     choose_document_ids_from_index_batched(&index, count, batch_size); | ||||
|  | ||||
|                 (index, document_ids_to_delete) | ||||
|             }, | ||||
|             move |(index, document_ids_to_delete)| { | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|                 for ids in document_ids_to_delete { | ||||
|                     let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); | ||||
|                     builder.delete_documents(&ids); | ||||
|                     builder.execute().unwrap(); | ||||
|                 } | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
|             }, | ||||
|         ) | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing movies in three batches", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = ["title", "overview"]; | ||||
|                 let filterable_fields = ["released_date", "genres"]; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 let index = setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ); | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let displayed_fields = ["title", "poster", "overview", "release_date", "genres"] | ||||
|                     .iter() | ||||
|                     .map(|s| s.to_string()) | ||||
|                     .collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = | ||||
|                     ["title", "overview"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 let faceted_fields = | ||||
|                     ["released_date", "genres"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_filterable_fields(faceted_fields); | ||||
|  | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|  | ||||
|                 // We index only one half of the dataset in the setup part | ||||
|                 // as we don't care about the time it takes. | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -500,17 +662,11 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { | ||||
|  | ||||
| fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing nested movies with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = [ | ||||
|                     "title", | ||||
|                     "overview", | ||||
| @@ -519,12 +675,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|                     "crew.name", | ||||
|                     "cast.character", | ||||
|                     "cast.name", | ||||
|                 ] | ||||
|                 .iter() | ||||
|                 .map(|s| s.to_string()) | ||||
|                 .collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 ]; | ||||
|                 let filterable_fields = [ | ||||
|                     "popularity", | ||||
|                     "release_date", | ||||
| @@ -540,21 +691,15 @@ fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|                     "crew.name", | ||||
|                     "cast.character", | ||||
|                     "cast.name", | ||||
|                 ] | ||||
|                 .iter() | ||||
|                 .map(|s| s.to_string()) | ||||
|                 .collect(); | ||||
|                 builder.set_filterable_fields(filterable_fields); | ||||
|                 ]; | ||||
|                 let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"]; | ||||
|  | ||||
|                 let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"] | ||||
|                     .iter() | ||||
|                     .map(|s| s.to_string()) | ||||
|                     .collect(); | ||||
|                 builder.set_sortable_fields(sortable_fields); | ||||
|  | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|                 index | ||||
|                 setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ) | ||||
|             }, | ||||
|             move |index| { | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -575,19 +720,13 @@ fn indexing_nested_movies_default(c: &mut Criterion) { | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
| fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.bench_function("Indexing nested movies without any facets", |b| { | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Deleting nested movies in batches with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("id".to_owned()); | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = [ | ||||
|                     "title", | ||||
|                     "overview", | ||||
| @@ -596,14 +735,94 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
|                     "crew.name", | ||||
|                     "cast.character", | ||||
|                     "cast.name", | ||||
|                 ] | ||||
|                 .iter() | ||||
|                 .map(|s| s.to_string()) | ||||
|                 .collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 ]; | ||||
|                 let filterable_fields = [ | ||||
|                     "popularity", | ||||
|                     "release_date", | ||||
|                     "runtime", | ||||
|                     "vote_average", | ||||
|                     "external_ids", | ||||
|                     "keywords", | ||||
|                     "providers.buy.name", | ||||
|                     "providers.rent.name", | ||||
|                     "providers.flatrate.name", | ||||
|                     "provider_names", | ||||
|                     "genres", | ||||
|                     "crew.name", | ||||
|                     "cast.character", | ||||
|                     "cast.name", | ||||
|                 ]; | ||||
|                 let sortable_fields = ["popularity", "runtime", "vote_average", "release_date"]; | ||||
|  | ||||
|                 let index = setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ); | ||||
|  | ||||
|                 // We index only one half of the dataset in the setup part | ||||
|                 // as we don't care about the time it takes. | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|                 index | ||||
|  | ||||
|                 let count = 1250; | ||||
|                 let batch_size = 250; | ||||
|                 let document_ids_to_delete = | ||||
|                     choose_document_ids_from_index_batched(&index, count, batch_size); | ||||
|  | ||||
|                 (index, document_ids_to_delete) | ||||
|             }, | ||||
|             move |(index, document_ids_to_delete)| { | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|                 for ids in document_ids_to_delete { | ||||
|                     let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); | ||||
|                     builder.delete_documents(&ids); | ||||
|                     builder.execute().unwrap(); | ||||
|                 } | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
|             }, | ||||
|         ) | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing nested movies without any facets", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let primary_key = "id"; | ||||
|                 let searchable_fields = [ | ||||
|                     "title", | ||||
|                     "overview", | ||||
|                     "provider_names", | ||||
|                     "genres", | ||||
|                     "crew.name", | ||||
|                     "cast.character", | ||||
|                     "cast.name", | ||||
|                 ]; | ||||
|                 let filterable_fields = []; | ||||
|                 let sortable_fields = []; | ||||
|  | ||||
|                 setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ) | ||||
|             }, | ||||
|             move |index| { | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -626,39 +845,21 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { | ||||
|  | ||||
| fn indexing_geo(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(10); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Indexing geo_point", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let index = setup_index(); | ||||
|                 let primary_key = "geonameid"; | ||||
|                 let searchable_fields = ["name", "alternatenames", "elevation"]; | ||||
|                 let filterable_fields = ["_geo", "population", "elevation"]; | ||||
|                 let sortable_fields = ["_geo", "population", "elevation"]; | ||||
|  | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||
|  | ||||
|                 builder.set_primary_key("geonameid".to_owned()); | ||||
|                 let displayed_fields = | ||||
|                     ["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"] | ||||
|                         .iter() | ||||
|                         .map(|s| s.to_string()) | ||||
|                         .collect(); | ||||
|                 builder.set_displayed_fields(displayed_fields); | ||||
|  | ||||
|                 let searchable_fields = | ||||
|                     ["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 let filterable_fields = | ||||
|                     ["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_filterable_fields(filterable_fields); | ||||
|  | ||||
|                 let sortable_fields = | ||||
|                     ["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect(); | ||||
|                 builder.set_sortable_fields(sortable_fields); | ||||
|  | ||||
|                 builder.execute(|_| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|                 index | ||||
|                 setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ) | ||||
|             }, | ||||
|             move |index| { | ||||
|                 let config = IndexerConfig::default(); | ||||
| @@ -680,18 +881,78 @@ fn indexing_geo(c: &mut Criterion) { | ||||
|     }); | ||||
| } | ||||
|  | ||||
| fn deleting_geo_in_batches_default(c: &mut Criterion) { | ||||
|     let mut group = c.benchmark_group("indexing"); | ||||
|     group.sample_size(BENCHMARK_ITERATION); | ||||
|     group.bench_function("Deleting geo_point in batches with default settings", |b| { | ||||
|         b.iter_with_setup( | ||||
|             move || { | ||||
|                 let primary_key = "geonameid"; | ||||
|                 let searchable_fields = ["name", "alternatenames", "elevation"]; | ||||
|                 let filterable_fields = ["_geo", "population", "elevation"]; | ||||
|                 let sortable_fields = ["_geo", "population", "elevation"]; | ||||
|  | ||||
|                 let index = setup_index_with_settings( | ||||
|                     &primary_key, | ||||
|                     &searchable_fields, | ||||
|                     &filterable_fields, | ||||
|                     &sortable_fields, | ||||
|                 ); | ||||
|  | ||||
|                 // We index only one half of the dataset in the setup part | ||||
|                 // as we don't care about the time it takes. | ||||
|                 let config = IndexerConfig::default(); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let indexing_config = IndexDocumentsConfig::default(); | ||||
|                 let mut builder = | ||||
|                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()) | ||||
|                         .unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "json"); | ||||
|                 builder.add_documents(documents).unwrap(); | ||||
|                 builder.execute().unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 let count = 1250; | ||||
|                 let batch_size = 250; | ||||
|                 let document_ids_to_delete = | ||||
|                     choose_document_ids_from_index_batched(&index, count, batch_size); | ||||
|  | ||||
|                 (index, document_ids_to_delete) | ||||
|             }, | ||||
|             move |(index, document_ids_to_delete)| { | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|  | ||||
|                 for ids in document_ids_to_delete { | ||||
|                     let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); | ||||
|                     builder.delete_documents(&ids); | ||||
|                     builder.execute().unwrap(); | ||||
|                 } | ||||
|  | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
|             }, | ||||
|         ) | ||||
|     }); | ||||
| } | ||||
|  | ||||
| criterion_group!( | ||||
|     benches, | ||||
|     indexing_songs_default, | ||||
|     deleting_songs_in_batches_default, | ||||
|     indexing_songs_without_faceted_numbers, | ||||
|     indexing_songs_without_faceted_fields, | ||||
|     indexing_songs_in_three_batches_default, | ||||
|     indexing_wiki, | ||||
|     deleting_wiki_in_batches_default, | ||||
|     indexing_wiki_in_three_batches, | ||||
|     indexing_movies_default, | ||||
|     deleting_movies_in_batches_default, | ||||
|     indexing_movies_in_three_batches, | ||||
|     indexing_nested_movies_default, | ||||
|     deleting_nested_movies_in_batches_default, | ||||
|     indexing_nested_movies_without_faceted_fields, | ||||
|     indexing_geo | ||||
|     indexing_geo, | ||||
|     deleting_geo_in_batches_default | ||||
| ); | ||||
| criterion_main!(benches); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user