mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	fix all benchmarks and add the compile time checking of the benhcmarks in the ci
This commit is contained in:
		| @@ -1,11 +1,12 @@ | ||||
| mod datasets_paths; | ||||
| mod utils; | ||||
|  | ||||
| use std::fs::{create_dir_all, remove_dir_all, File}; | ||||
| use std::fs::{create_dir_all, remove_dir_all}; | ||||
| use std::path::Path; | ||||
|  | ||||
| use criterion::{criterion_group, criterion_main, Criterion}; | ||||
| use heed::EnvOpenOptions; | ||||
| use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; | ||||
| use milli::update::UpdateBuilder; | ||||
| use milli::Index; | ||||
|  | ||||
| #[cfg(target_os = "linux")] | ||||
| @@ -67,15 +68,10 @@ fn indexing_songs_default(c: &mut Criterion) { | ||||
|             move |index| { | ||||
|                 let update_builder = UpdateBuilder::new(0); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|                 let builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|  | ||||
|                 builder.update_format(UpdateFormat::Csv); | ||||
|                 builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); | ||||
|                 let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!( | ||||
|                     "could not find the dataset in: {}", | ||||
|                     datasets_paths::SMOL_SONGS | ||||
|                 )); | ||||
|                 builder.execute(reader, |_, _| ()).unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.execute(documents, |_, _| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
| @@ -118,15 +114,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { | ||||
|             move |index| { | ||||
|                 let update_builder = UpdateBuilder::new(0); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|                 let builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|  | ||||
|                 builder.update_format(UpdateFormat::Csv); | ||||
|                 builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); | ||||
|                 let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!( | ||||
|                     "could not find the dataset in: {}", | ||||
|                     datasets_paths::SMOL_SONGS | ||||
|                 )); | ||||
|                 builder.execute(reader, |_, _| ()).unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.execute(documents, |_, _| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
| @@ -165,15 +156,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { | ||||
|             move |index| { | ||||
|                 let update_builder = UpdateBuilder::new(0); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|                 let builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|  | ||||
|                 builder.update_format(UpdateFormat::Csv); | ||||
|                 builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); | ||||
|                 let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!( | ||||
|                     "could not find the dataset in: {}", | ||||
|                     datasets_paths::SMOL_SONGS | ||||
|                 )); | ||||
|                 builder.execute(reader, |_, _| ()).unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); | ||||
|                 builder.execute(documents, |_, _| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
| @@ -211,15 +197,10 @@ fn indexing_wiki(c: &mut Criterion) { | ||||
|             move |index| { | ||||
|                 let update_builder = UpdateBuilder::new(0); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|                 let builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|  | ||||
|                 builder.update_format(UpdateFormat::Csv); | ||||
|                 builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); | ||||
|                 let reader = File::open(datasets_paths::SMOL_WIKI_ARTICLES).expect(&format!( | ||||
|                     "could not find the dataset in: {}", | ||||
|                     datasets_paths::SMOL_SONGS | ||||
|                 )); | ||||
|                 builder.execute(reader, |_, _| ()).unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); | ||||
|                 builder.execute(documents, |_, _| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
| @@ -262,13 +243,10 @@ fn indexing_movies_default(c: &mut Criterion) { | ||||
|             move |index| { | ||||
|                 let update_builder = UpdateBuilder::new(0); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|                 let builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|  | ||||
|                 builder.update_format(UpdateFormat::Json); | ||||
|                 builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); | ||||
|                 let reader = File::open(datasets_paths::MOVIES) | ||||
|                     .expect(&format!("could not find the dataset in: {}", datasets_paths::MOVIES)); | ||||
|                 builder.execute(reader, |_, _| ()).unwrap(); | ||||
|                 let documents = utils::documents_from(datasets_paths::MOVIES, "json"); | ||||
|                 builder.execute(documents, |_, _| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
| @@ -316,15 +294,11 @@ fn indexing_geo(c: &mut Criterion) { | ||||
|             move |index| { | ||||
|                 let update_builder = UpdateBuilder::new(0); | ||||
|                 let mut wtxn = index.write_txn().unwrap(); | ||||
|                 let mut builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|                 let builder = update_builder.index_documents(&mut wtxn, &index); | ||||
|  | ||||
|                 let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); | ||||
|                 builder.execute(documents, |_, _| ()).unwrap(); | ||||
|  | ||||
|                 builder.update_format(UpdateFormat::JsonStream); | ||||
|                 builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); | ||||
|                 let reader = File::open(datasets_paths::SMOL_ALL_COUNTRIES).expect(&format!( | ||||
|                     "could not find the dataset in: {}", | ||||
|                     datasets_paths::SMOL_ALL_COUNTRIES | ||||
|                 )); | ||||
|                 builder.execute(reader, |_, _| ()).unwrap(); | ||||
|                 wtxn.commit().unwrap(); | ||||
|  | ||||
|                 index.prepare_for_closing().wait(); | ||||
|   | ||||
| @@ -2,7 +2,7 @@ mod datasets_paths; | ||||
| mod utils; | ||||
|  | ||||
| use criterion::{criterion_group, criterion_main}; | ||||
| use milli::update::{Settings, UpdateFormat}; | ||||
| use milli::update::Settings; | ||||
| use utils::Conf; | ||||
|  | ||||
| #[cfg(target_os = "linux")] | ||||
| @@ -33,7 +33,7 @@ fn base_conf(builder: &mut Settings) { | ||||
| #[rustfmt::skip] | ||||
| const BASE_CONF: Conf = Conf { | ||||
|     dataset: datasets_paths::SMOL_ALL_COUNTRIES, | ||||
|     dataset_format: UpdateFormat::JsonStream, | ||||
|     dataset_format: "jsonl", | ||||
|     queries: &[ | ||||
|         "", | ||||
|     ], | ||||
|   | ||||
| @@ -1,10 +1,15 @@ | ||||
| #![allow(dead_code)] | ||||
|  | ||||
| use std::fs::{create_dir_all, remove_dir_all, File}; | ||||
| use std::io::{self, Cursor, Read, Seek}; | ||||
| use std::path::Path; | ||||
|  | ||||
| use criterion::BenchmarkId; | ||||
| use heed::EnvOpenOptions; | ||||
| use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat}; | ||||
| use milli::documents::DocumentBatchReader; | ||||
| use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder}; | ||||
| use milli::{FilterCondition, Index}; | ||||
| use serde_json::{Map, Value}; | ||||
|  | ||||
| pub struct Conf<'a> { | ||||
|     /// where we are going to create our database.mmdb directory | ||||
| @@ -13,7 +18,7 @@ pub struct Conf<'a> { | ||||
|     /// the dataset to be used, it must be an uncompressed csv | ||||
|     pub dataset: &'a str, | ||||
|     /// The format of the dataset | ||||
|     pub dataset_format: UpdateFormat, | ||||
|     pub dataset_format: &'a str, | ||||
|     pub group_name: &'a str, | ||||
|     pub queries: &'a [&'a str], | ||||
|     /// here you can change which criterion are used and in which order. | ||||
| @@ -33,7 +38,7 @@ pub struct Conf<'a> { | ||||
| impl Conf<'_> { | ||||
|     pub const BASE: Self = Conf { | ||||
|         database_name: "benches.mmdb", | ||||
|         dataset_format: UpdateFormat::Csv, | ||||
|         dataset_format: "csv", | ||||
|         dataset: "", | ||||
|         group_name: "", | ||||
|         queries: &[], | ||||
| @@ -87,11 +92,10 @@ pub fn base_setup(conf: &Conf) -> Index { | ||||
|     if let None = conf.primary_key { | ||||
|         builder.enable_autogenerate_docids(); | ||||
|     } | ||||
|     builder.update_format(conf.dataset_format); | ||||
|     let documents = documents_from(conf.dataset, conf.dataset_format); | ||||
|  | ||||
|     builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); | ||||
|     let reader = File::open(conf.dataset) | ||||
|         .expect(&format!("could not find the dataset in: {}", conf.dataset)); | ||||
|     builder.execute(reader, |_, _| ()).unwrap(); | ||||
|     builder.execute(documents, |_, _| ()).unwrap(); | ||||
|     wtxn.commit().unwrap(); | ||||
|  | ||||
|     index | ||||
| @@ -128,3 +132,58 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { | ||||
|         index.prepare_for_closing().wait(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl Read + Seek> { | ||||
|     let reader = | ||||
|         File::open(filename).expect(&format!("could not find the dataset in: {}", filename)); | ||||
|     let documents = match filetype { | ||||
|         "csv" => documents_from_csv(reader).unwrap(), | ||||
|         "json" => documents_from_json(reader).unwrap(), | ||||
|         "jsonl" => documents_from_jsonl(reader).unwrap(), | ||||
|         otherwise => panic!("invalid update format {:?}", otherwise), | ||||
|     }; | ||||
|     DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap() | ||||
| } | ||||
|  | ||||
| fn documents_from_jsonl(reader: impl io::Read) -> anyhow::Result<Vec<u8>> { | ||||
|     let mut writer = Cursor::new(Vec::new()); | ||||
|     let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; | ||||
|  | ||||
|     let values = serde_json::Deserializer::from_reader(reader) | ||||
|         .into_iter::<serde_json::Map<String, serde_json::Value>>(); | ||||
|     for document in values { | ||||
|         let document = document?; | ||||
|         documents.add_documents(document)?; | ||||
|     } | ||||
|     documents.finish()?; | ||||
|  | ||||
|     Ok(writer.into_inner()) | ||||
| } | ||||
|  | ||||
| fn documents_from_json(reader: impl io::Read) -> anyhow::Result<Vec<u8>> { | ||||
|     let mut writer = Cursor::new(Vec::new()); | ||||
|     let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; | ||||
|  | ||||
|     let json: serde_json::Value = serde_json::from_reader(reader)?; | ||||
|     documents.add_documents(json)?; | ||||
|     documents.finish()?; | ||||
|  | ||||
|     Ok(writer.into_inner()) | ||||
| } | ||||
|  | ||||
| fn documents_from_csv(reader: impl io::Read) -> anyhow::Result<Vec<u8>> { | ||||
|     let mut writer = Cursor::new(Vec::new()); | ||||
|     let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?; | ||||
|  | ||||
|     let mut records = csv::Reader::from_reader(reader); | ||||
|     let iter = records.deserialize::<Map<String, Value>>(); | ||||
|  | ||||
|     for doc in iter { | ||||
|         let doc = doc?; | ||||
|         documents.add_documents(doc)?; | ||||
|     } | ||||
|  | ||||
|     documents.finish()?; | ||||
|  | ||||
|     Ok(writer.into_inner()) | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user