mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Add a new songs benchmark to test multi batch indexing
This commit is contained in:
		| @@ -83,6 +83,77 @@ fn indexing_songs_default(c: &mut Criterion) { | |||||||
|     }); |     }); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn indexing_songs_in_three_batches_default(c: &mut Criterion) { | ||||||
|  |     let mut group = c.benchmark_group("indexing"); | ||||||
|  |     group.sample_size(10); | ||||||
|  |     group.bench_function("Indexing songs in three batches with default settings", |b| { | ||||||
|  |         b.iter_with_setup( | ||||||
|  |             move || { | ||||||
|  |                 let index = setup_index(); | ||||||
|  |  | ||||||
|  |                 let config = IndexerConfig::default(); | ||||||
|  |                 let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |                 let mut builder = Settings::new(&mut wtxn, &index, &config); | ||||||
|  |  | ||||||
|  |                 builder.set_primary_key("id".to_owned()); | ||||||
|  |                 let displayed_fields = | ||||||
|  |                     ["title", "album", "artist", "genre", "country", "released", "duration"] | ||||||
|  |                         .iter() | ||||||
|  |                         .map(|s| s.to_string()) | ||||||
|  |                         .collect(); | ||||||
|  |                 builder.set_displayed_fields(displayed_fields); | ||||||
|  |  | ||||||
|  |                 let searchable_fields = | ||||||
|  |                     ["title", "album", "artist"].iter().map(|s| s.to_string()).collect(); | ||||||
|  |                 builder.set_searchable_fields(searchable_fields); | ||||||
|  |  | ||||||
|  |                 let faceted_fields = | ||||||
|  |                     ["released-timestamp", "duration-float", "genre", "country", "artist"] | ||||||
|  |                         .iter() | ||||||
|  |                         .map(|s| s.to_string()) | ||||||
|  |                         .collect(); | ||||||
|  |                 builder.set_filterable_fields(faceted_fields); | ||||||
|  |                 builder.execute(|_| ()).unwrap(); | ||||||
|  |  | ||||||
|  |                 // We index only one half of the dataset in the setup part | ||||||
|  |                 // as we don't care about the time it take. | ||||||
|  |                 let config = IndexerConfig::default(); | ||||||
|  |                 let indexing_config = IndexDocumentsConfig::default(); | ||||||
|  |                 let mut builder = | ||||||
|  |                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()); | ||||||
|  |                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv"); | ||||||
|  |                 builder.add_documents(documents).unwrap(); | ||||||
|  |                 builder.execute().unwrap(); | ||||||
|  |  | ||||||
|  |                 wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |                 index | ||||||
|  |             }, | ||||||
|  |             move |index| { | ||||||
|  |                 let config = IndexerConfig::default(); | ||||||
|  |                 let indexing_config = IndexDocumentsConfig::default(); | ||||||
|  |                 let mut wtxn = index.write_txn().unwrap(); | ||||||
|  |                 let mut builder = | ||||||
|  |                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()); | ||||||
|  |                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv"); | ||||||
|  |                 builder.add_documents(documents).unwrap(); | ||||||
|  |                 builder.execute().unwrap(); | ||||||
|  |  | ||||||
|  |                 let indexing_config = IndexDocumentsConfig::default(); | ||||||
|  |                 let mut builder = | ||||||
|  |                     IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()); | ||||||
|  |                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv"); | ||||||
|  |                 builder.add_documents(documents).unwrap(); | ||||||
|  |                 builder.execute().unwrap(); | ||||||
|  |  | ||||||
|  |                 wtxn.commit().unwrap(); | ||||||
|  |  | ||||||
|  |                 index.prepare_for_closing().wait(); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
| fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||||
|     let mut group = c.benchmark_group("indexing"); |     let mut group = c.benchmark_group("indexing"); | ||||||
|     group.sample_size(10); |     group.sample_size(10); | ||||||
| @@ -332,6 +403,7 @@ criterion_group!( | |||||||
|     indexing_songs_default, |     indexing_songs_default, | ||||||
|     indexing_songs_without_faceted_numbers, |     indexing_songs_without_faceted_numbers, | ||||||
|     indexing_songs_without_faceted_fields, |     indexing_songs_without_faceted_fields, | ||||||
|  |     indexing_songs_in_three_batches_default, | ||||||
|     indexing_wiki, |     indexing_wiki, | ||||||
|     indexing_movies_default, |     indexing_movies_default, | ||||||
|     indexing_geo |     indexing_geo | ||||||
|   | |||||||
| @@ -11,10 +11,23 @@ use reqwest::IntoUrl; | |||||||
| const BASE_URL: &str = "https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets"; | const BASE_URL: &str = "https://milli-benchmarks.fra1.digitaloceanspaces.com/datasets"; | ||||||
|  |  | ||||||
| const DATASET_SONGS: (&str, &str) = ("smol-songs", "csv"); | const DATASET_SONGS: (&str, &str) = ("smol-songs", "csv"); | ||||||
|  | const DATASET_SONGS_1_2: (&str, &str) = ("smol-songs-1_2", "csv"); | ||||||
|  | const DATASET_SONGS_3_4: (&str, &str) = ("smol-songs-3_4", "csv"); | ||||||
|  | const DATASET_SONGS_4_4: (&str, &str) = ("smol-songs-4_4", "csv"); | ||||||
| const DATASET_WIKI: (&str, &str) = ("smol-wiki-articles", "csv"); | const DATASET_WIKI: (&str, &str) = ("smol-wiki-articles", "csv"); | ||||||
| const DATASET_MOVIES: (&str, &str) = ("movies", "json"); | const DATASET_MOVIES: (&str, &str) = ("movies", "json"); | ||||||
| const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl"); | const DATASET_GEO: (&str, &str) = ("smol-all-countries", "jsonl"); | ||||||
|  |  | ||||||
|  | const ALL_DATASETS: &[(&str, &str)] = &[ | ||||||
|  |     DATASET_SONGS, | ||||||
|  |     DATASET_SONGS_1_2, | ||||||
|  |     DATASET_SONGS_3_4, | ||||||
|  |     DATASET_SONGS_4_4, | ||||||
|  |     DATASET_WIKI, | ||||||
|  |     DATASET_MOVIES, | ||||||
|  |     DATASET_GEO, | ||||||
|  | ]; | ||||||
|  |  | ||||||
| /// The name of the environment variable used to select the path | /// The name of the environment variable used to select the path | ||||||
| /// of the directory containing the datasets | /// of the directory containing the datasets | ||||||
| const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH"; | const BASE_DATASETS_PATH_KEY: &str = "MILLI_BENCH_DATASETS_PATH"; | ||||||
| @@ -33,7 +46,7 @@ fn main() -> anyhow::Result<()> { | |||||||
|     )?; |     )?; | ||||||
|     writeln!(manifest_paths_file)?; |     writeln!(manifest_paths_file)?; | ||||||
|  |  | ||||||
|     for (dataset, extension) in [DATASET_SONGS, DATASET_WIKI, DATASET_MOVIES, DATASET_GEO] { |     for (dataset, extension) in ALL_DATASETS { | ||||||
|         let out_path = out_dir.join(dataset); |         let out_path = out_dir.join(dataset); | ||||||
|         let out_file = out_path.with_extension(extension); |         let out_file = out_path.with_extension(extension); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user