mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Merge #364
364: Fix all the benchmarks r=Kerollmops a=irevoire #324 broke all benchmarks. I fixed everything and noticed that `cargo check --all` was insufficient to check the bench in multiple workspaces, so I also updated the CI to use `cargo check --workspace --all-targets`. Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
		
							
								
								
									
										2
									
								
								.github/workflows/rust.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/rust.yml
									
									
									
									
										vendored
									
									
								
							@@ -33,7 +33,7 @@ jobs:
 | 
			
		||||
      uses: actions-rs/cargo@v1
 | 
			
		||||
      with:
 | 
			
		||||
        command: check
 | 
			
		||||
        args: --all
 | 
			
		||||
        args: --workspace --all-targets
 | 
			
		||||
    - name: Run cargo test
 | 
			
		||||
      uses: actions-rs/cargo@v1
 | 
			
		||||
      with:
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,9 @@ publish = false
 | 
			
		||||
 | 
			
		||||
[dependencies]
 | 
			
		||||
milli = { path = "../milli" }
 | 
			
		||||
anyhow = "1.0"
 | 
			
		||||
serde_json = { version = "1.0.62", features = ["preserve_order"] }
 | 
			
		||||
csv = "1.1.6"
 | 
			
		||||
 | 
			
		||||
[target.'cfg(target_os = "linux")'.dependencies]
 | 
			
		||||
jemallocator = "0.3.2"
 | 
			
		||||
 
 | 
			
		||||
@@ -1,11 +1,12 @@
 | 
			
		||||
mod datasets_paths;
 | 
			
		||||
mod utils;
 | 
			
		||||
 | 
			
		||||
use std::fs::{create_dir_all, remove_dir_all, File};
 | 
			
		||||
use std::fs::{create_dir_all, remove_dir_all};
 | 
			
		||||
use std::path::Path;
 | 
			
		||||
 | 
			
		||||
use criterion::{criterion_group, criterion_main, Criterion};
 | 
			
		||||
use heed::EnvOpenOptions;
 | 
			
		||||
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
 | 
			
		||||
use milli::update::UpdateBuilder;
 | 
			
		||||
use milli::Index;
 | 
			
		||||
 | 
			
		||||
#[cfg(target_os = "linux")]
 | 
			
		||||
@@ -67,15 +68,10 @@ fn indexing_songs_default(c: &mut Criterion) {
 | 
			
		||||
            move |index| {
 | 
			
		||||
                let update_builder = UpdateBuilder::new(0);
 | 
			
		||||
                let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
                let mut builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
                let builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
 | 
			
		||||
                builder.update_format(UpdateFormat::Csv);
 | 
			
		||||
                builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
 | 
			
		||||
                let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!(
 | 
			
		||||
                    "could not find the dataset in: {}",
 | 
			
		||||
                    datasets_paths::SMOL_SONGS
 | 
			
		||||
                ));
 | 
			
		||||
                builder.execute(reader, |_, _| ()).unwrap();
 | 
			
		||||
                let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
 | 
			
		||||
                builder.execute(documents, |_, _| ()).unwrap();
 | 
			
		||||
                wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
                index.prepare_for_closing().wait();
 | 
			
		||||
@@ -118,15 +114,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
 | 
			
		||||
            move |index| {
 | 
			
		||||
                let update_builder = UpdateBuilder::new(0);
 | 
			
		||||
                let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
                let mut builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
                let builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
 | 
			
		||||
                builder.update_format(UpdateFormat::Csv);
 | 
			
		||||
                builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
 | 
			
		||||
                let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!(
 | 
			
		||||
                    "could not find the dataset in: {}",
 | 
			
		||||
                    datasets_paths::SMOL_SONGS
 | 
			
		||||
                ));
 | 
			
		||||
                builder.execute(reader, |_, _| ()).unwrap();
 | 
			
		||||
                let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
 | 
			
		||||
                builder.execute(documents, |_, _| ()).unwrap();
 | 
			
		||||
                wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
                index.prepare_for_closing().wait();
 | 
			
		||||
@@ -165,15 +156,10 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
 | 
			
		||||
            move |index| {
 | 
			
		||||
                let update_builder = UpdateBuilder::new(0);
 | 
			
		||||
                let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
                let mut builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
                let builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
 | 
			
		||||
                builder.update_format(UpdateFormat::Csv);
 | 
			
		||||
                builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
 | 
			
		||||
                let reader = File::open(datasets_paths::SMOL_SONGS).expect(&format!(
 | 
			
		||||
                    "could not find the dataset in: {}",
 | 
			
		||||
                    datasets_paths::SMOL_SONGS
 | 
			
		||||
                ));
 | 
			
		||||
                builder.execute(reader, |_, _| ()).unwrap();
 | 
			
		||||
                let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv");
 | 
			
		||||
                builder.execute(documents, |_, _| ()).unwrap();
 | 
			
		||||
                wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
                index.prepare_for_closing().wait();
 | 
			
		||||
@@ -211,15 +197,10 @@ fn indexing_wiki(c: &mut Criterion) {
 | 
			
		||||
            move |index| {
 | 
			
		||||
                let update_builder = UpdateBuilder::new(0);
 | 
			
		||||
                let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
                let mut builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
                let builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
 | 
			
		||||
                builder.update_format(UpdateFormat::Csv);
 | 
			
		||||
                builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
 | 
			
		||||
                let reader = File::open(datasets_paths::SMOL_WIKI_ARTICLES).expect(&format!(
 | 
			
		||||
                    "could not find the dataset in: {}",
 | 
			
		||||
                    datasets_paths::SMOL_SONGS
 | 
			
		||||
                ));
 | 
			
		||||
                builder.execute(reader, |_, _| ()).unwrap();
 | 
			
		||||
                let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv");
 | 
			
		||||
                builder.execute(documents, |_, _| ()).unwrap();
 | 
			
		||||
                wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
                index.prepare_for_closing().wait();
 | 
			
		||||
@@ -262,13 +243,10 @@ fn indexing_movies_default(c: &mut Criterion) {
 | 
			
		||||
            move |index| {
 | 
			
		||||
                let update_builder = UpdateBuilder::new(0);
 | 
			
		||||
                let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
                let mut builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
                let builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
 | 
			
		||||
                builder.update_format(UpdateFormat::Json);
 | 
			
		||||
                builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
 | 
			
		||||
                let reader = File::open(datasets_paths::MOVIES)
 | 
			
		||||
                    .expect(&format!("could not find the dataset in: {}", datasets_paths::MOVIES));
 | 
			
		||||
                builder.execute(reader, |_, _| ()).unwrap();
 | 
			
		||||
                let documents = utils::documents_from(datasets_paths::MOVIES, "json");
 | 
			
		||||
                builder.execute(documents, |_, _| ()).unwrap();
 | 
			
		||||
                wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
                index.prepare_for_closing().wait();
 | 
			
		||||
@@ -316,15 +294,11 @@ fn indexing_geo(c: &mut Criterion) {
 | 
			
		||||
            move |index| {
 | 
			
		||||
                let update_builder = UpdateBuilder::new(0);
 | 
			
		||||
                let mut wtxn = index.write_txn().unwrap();
 | 
			
		||||
                let mut builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
                let builder = update_builder.index_documents(&mut wtxn, &index);
 | 
			
		||||
 | 
			
		||||
                let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl");
 | 
			
		||||
                builder.execute(documents, |_, _| ()).unwrap();
 | 
			
		||||
 | 
			
		||||
                builder.update_format(UpdateFormat::JsonStream);
 | 
			
		||||
                builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
 | 
			
		||||
                let reader = File::open(datasets_paths::SMOL_ALL_COUNTRIES).expect(&format!(
 | 
			
		||||
                    "could not find the dataset in: {}",
 | 
			
		||||
                    datasets_paths::SMOL_ALL_COUNTRIES
 | 
			
		||||
                ));
 | 
			
		||||
                builder.execute(reader, |_, _| ()).unwrap();
 | 
			
		||||
                wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
                index.prepare_for_closing().wait();
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@ mod datasets_paths;
 | 
			
		||||
mod utils;
 | 
			
		||||
 | 
			
		||||
use criterion::{criterion_group, criterion_main};
 | 
			
		||||
use milli::update::{Settings, UpdateFormat};
 | 
			
		||||
use milli::update::Settings;
 | 
			
		||||
use utils::Conf;
 | 
			
		||||
 | 
			
		||||
#[cfg(target_os = "linux")]
 | 
			
		||||
@@ -33,7 +33,7 @@ fn base_conf(builder: &mut Settings) {
 | 
			
		||||
#[rustfmt::skip]
 | 
			
		||||
const BASE_CONF: Conf = Conf {
 | 
			
		||||
    dataset: datasets_paths::SMOL_ALL_COUNTRIES,
 | 
			
		||||
    dataset_format: UpdateFormat::JsonStream,
 | 
			
		||||
    dataset_format: "jsonl",
 | 
			
		||||
    queries: &[
 | 
			
		||||
        "",
 | 
			
		||||
    ],
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,15 @@
 | 
			
		||||
#![allow(dead_code)]
 | 
			
		||||
 | 
			
		||||
use std::fs::{create_dir_all, remove_dir_all, File};
 | 
			
		||||
use std::io::{self, Cursor, Read, Seek};
 | 
			
		||||
use std::path::Path;
 | 
			
		||||
 | 
			
		||||
use criterion::BenchmarkId;
 | 
			
		||||
use heed::EnvOpenOptions;
 | 
			
		||||
use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder, UpdateFormat};
 | 
			
		||||
use milli::documents::DocumentBatchReader;
 | 
			
		||||
use milli::update::{IndexDocumentsMethod, Settings, UpdateBuilder};
 | 
			
		||||
use milli::{FilterCondition, Index};
 | 
			
		||||
use serde_json::{Map, Value};
 | 
			
		||||
 | 
			
		||||
pub struct Conf<'a> {
 | 
			
		||||
    /// where we are going to create our database.mmdb directory
 | 
			
		||||
@@ -13,7 +18,7 @@ pub struct Conf<'a> {
 | 
			
		||||
    /// the dataset to be used, it must be an uncompressed csv
 | 
			
		||||
    pub dataset: &'a str,
 | 
			
		||||
    /// The format of the dataset
 | 
			
		||||
    pub dataset_format: UpdateFormat,
 | 
			
		||||
    pub dataset_format: &'a str,
 | 
			
		||||
    pub group_name: &'a str,
 | 
			
		||||
    pub queries: &'a [&'a str],
 | 
			
		||||
    /// here you can change which criterion are used and in which order.
 | 
			
		||||
@@ -33,7 +38,7 @@ pub struct Conf<'a> {
 | 
			
		||||
impl Conf<'_> {
 | 
			
		||||
    pub const BASE: Self = Conf {
 | 
			
		||||
        database_name: "benches.mmdb",
 | 
			
		||||
        dataset_format: UpdateFormat::Csv,
 | 
			
		||||
        dataset_format: "csv",
 | 
			
		||||
        dataset: "",
 | 
			
		||||
        group_name: "",
 | 
			
		||||
        queries: &[],
 | 
			
		||||
@@ -87,11 +92,10 @@ pub fn base_setup(conf: &Conf) -> Index {
 | 
			
		||||
    if let None = conf.primary_key {
 | 
			
		||||
        builder.enable_autogenerate_docids();
 | 
			
		||||
    }
 | 
			
		||||
    builder.update_format(conf.dataset_format);
 | 
			
		||||
    let documents = documents_from(conf.dataset, conf.dataset_format);
 | 
			
		||||
 | 
			
		||||
    builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
 | 
			
		||||
    let reader = File::open(conf.dataset)
 | 
			
		||||
        .expect(&format!("could not find the dataset in: {}", conf.dataset));
 | 
			
		||||
    builder.execute(reader, |_, _| ()).unwrap();
 | 
			
		||||
    builder.execute(documents, |_, _| ()).unwrap();
 | 
			
		||||
    wtxn.commit().unwrap();
 | 
			
		||||
 | 
			
		||||
    index
 | 
			
		||||
@@ -128,3 +132,58 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
 | 
			
		||||
        index.prepare_for_closing().wait();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn documents_from(filename: &str, filetype: &str) -> DocumentBatchReader<impl Read + Seek> {
 | 
			
		||||
    let reader =
 | 
			
		||||
        File::open(filename).expect(&format!("could not find the dataset in: {}", filename));
 | 
			
		||||
    let documents = match filetype {
 | 
			
		||||
        "csv" => documents_from_csv(reader).unwrap(),
 | 
			
		||||
        "json" => documents_from_json(reader).unwrap(),
 | 
			
		||||
        "jsonl" => documents_from_jsonl(reader).unwrap(),
 | 
			
		||||
        otherwise => panic!("invalid update format {:?}", otherwise),
 | 
			
		||||
    };
 | 
			
		||||
    DocumentBatchReader::from_reader(Cursor::new(documents)).unwrap()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn documents_from_jsonl(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
 | 
			
		||||
    let mut writer = Cursor::new(Vec::new());
 | 
			
		||||
    let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
 | 
			
		||||
 | 
			
		||||
    let values = serde_json::Deserializer::from_reader(reader)
 | 
			
		||||
        .into_iter::<serde_json::Map<String, serde_json::Value>>();
 | 
			
		||||
    for document in values {
 | 
			
		||||
        let document = document?;
 | 
			
		||||
        documents.add_documents(document)?;
 | 
			
		||||
    }
 | 
			
		||||
    documents.finish()?;
 | 
			
		||||
 | 
			
		||||
    Ok(writer.into_inner())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn documents_from_json(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
 | 
			
		||||
    let mut writer = Cursor::new(Vec::new());
 | 
			
		||||
    let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
 | 
			
		||||
 | 
			
		||||
    let json: serde_json::Value = serde_json::from_reader(reader)?;
 | 
			
		||||
    documents.add_documents(json)?;
 | 
			
		||||
    documents.finish()?;
 | 
			
		||||
 | 
			
		||||
    Ok(writer.into_inner())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn documents_from_csv(reader: impl io::Read) -> anyhow::Result<Vec<u8>> {
 | 
			
		||||
    let mut writer = Cursor::new(Vec::new());
 | 
			
		||||
    let mut documents = milli::documents::DocumentBatchBuilder::new(&mut writer)?;
 | 
			
		||||
 | 
			
		||||
    let mut records = csv::Reader::from_reader(reader);
 | 
			
		||||
    let iter = records.deserialize::<Map<String, Value>>();
 | 
			
		||||
 | 
			
		||||
    for doc in iter {
 | 
			
		||||
        let doc = doc?;
 | 
			
		||||
        documents.add_documents(doc)?;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    documents.finish()?;
 | 
			
		||||
 | 
			
		||||
    Ok(writer.into_inner())
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user