mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	push a first version of the benchmark for the typo
This commit is contained in:
		@@ -61,5 +61,5 @@ rand = "0.8.3"
 | 
			
		||||
default = []
 | 
			
		||||
 | 
			
		||||
[[bench]]
 | 
			
		||||
name = "search"
 | 
			
		||||
name = "typo"
 | 
			
		||||
harness = false
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										8
									
								
								milli/benches/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								milli/benches/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
			
		||||
Benchmarks
 | 
			
		||||
==========
 | 
			
		||||
 | 
			
		||||
For our benchmark we are using a small subset of the dataset songs.csv. It was generated with this command:
 | 
			
		||||
```
 | 
			
		||||
xsv sample --seed 42 song.csv -o smol_songs.csv
 | 
			
		||||
```
 | 
			
		||||
The original songs.csv datasets is available [here](https://meili-datasets.s3.fr-par.scw.cloud/songs.csv.gz)
 | 
			
		||||
@@ -1,22 +1,27 @@
 | 
			
		||||
use std::time::Duration;
 | 
			
		||||
mod utils;
 | 
			
		||||
 | 
			
		||||
use heed::EnvOpenOptions;
 | 
			
		||||
use milli::Index;
 | 
			
		||||
use std::time::Duration;
 | 
			
		||||
use criterion::{criterion_group, criterion_main, BenchmarkId};
 | 
			
		||||
 | 
			
		||||
fn bench_search(c: &mut criterion::Criterion) {
 | 
			
		||||
    let database = "books-4cpu.mmdb";
 | 
			
		||||
fn bench_typo(c: &mut criterion::Criterion) {
 | 
			
		||||
    let index = utils::base_setup(Some(vec!["typo".to_string()]));
 | 
			
		||||
 | 
			
		||||
    let queries = [
 | 
			
		||||
        "minogue kylie",
 | 
			
		||||
        "minogue kylie live",
 | 
			
		||||
        "mongus ",
 | 
			
		||||
        "thelonius monk ",
 | 
			
		||||
        "Disnaylande ",
 | 
			
		||||
        "the white striper ",
 | 
			
		||||
        "indochie ",
 | 
			
		||||
        "indochien ",
 | 
			
		||||
        "klub des loopers ",
 | 
			
		||||
        "fear of the duck ",
 | 
			
		||||
        "michel depech ",
 | 
			
		||||
        "stromal ",
 | 
			
		||||
        "dire straights ",
 | 
			
		||||
        "Arethla Franklin ",
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    let mut options = EnvOpenOptions::new();
 | 
			
		||||
    options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
 | 
			
		||||
    options.max_readers(10);
 | 
			
		||||
    let index = Index::new(options, database).unwrap();
 | 
			
		||||
 | 
			
		||||
    let mut group = c.benchmark_group("search");
 | 
			
		||||
    let mut group = c.benchmark_group("typo");
 | 
			
		||||
    group.sample_size(10);
 | 
			
		||||
    group.measurement_time(Duration::from_secs(12));
 | 
			
		||||
 | 
			
		||||
@@ -32,5 +37,5 @@ fn bench_search(c: &mut criterion::Criterion) {
 | 
			
		||||
    group.finish();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
criterion_group!(benches, bench_search);
 | 
			
		||||
criterion_group!(benches, bench_typo);
 | 
			
		||||
criterion_main!(benches);
 | 
			
		||||
							
								
								
									
										41
									
								
								milli/benches/utils.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								milli/benches/utils.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,41 @@
 | 
			
		||||
use std::{fs::{File, create_dir_all}};
 | 
			
		||||
 | 
			
		||||
use heed::EnvOpenOptions;
 | 
			
		||||
use milli::{Index, update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
 | 
			
		||||
 | 
			
		||||
pub fn base_setup(criteria: Option<Vec<String>>) -> Index {
 | 
			
		||||
    let database = "songs.mmdb";
 | 
			
		||||
    create_dir_all(&database).unwrap();
 | 
			
		||||
 | 
			
		||||
    let mut options = EnvOpenOptions::new();
 | 
			
		||||
    options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
 | 
			
		||||
    options.max_readers(10);
 | 
			
		||||
    let index = Index::new(options, database).unwrap();
 | 
			
		||||
 | 
			
		||||
    let update_builder = UpdateBuilder::new(0);
 | 
			
		||||
    let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
    let mut builder = update_builder.settings(&mut wtxn, &index);
 | 
			
		||||
 | 
			
		||||
    if let Some(criteria) = criteria {
 | 
			
		||||
        builder.reset_faceted_fields();
 | 
			
		||||
        builder.reset_criteria();
 | 
			
		||||
        builder.reset_stop_words();
 | 
			
		||||
 | 
			
		||||
        builder.set_criteria(criteria);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    builder.execute(|_, _| ()).unwrap();
 | 
			
		||||
    wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
    let update_builder = UpdateBuilder::new(0);
 | 
			
		||||
    let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
    let mut builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
    builder.update_format(UpdateFormat::Csv);
 | 
			
		||||
    builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
 | 
			
		||||
    // we called from cargo the current directory is supposed to be milli/milli
 | 
			
		||||
    let reader = File::open("benches/smol_songs.csv").unwrap();
 | 
			
		||||
    builder.execute(reader, |_, _| ()).unwrap();
 | 
			
		||||
    wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
    index
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user