mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 08:41:00 +00:00
Add benchmarks
This commit is contained in:
@ -51,3 +51,8 @@ harness = false
|
|||||||
[[bench]]
|
[[bench]]
|
||||||
name = "indexing"
|
name = "indexing"
|
||||||
harness = false
|
harness = false
|
||||||
|
|
||||||
|
[[bench]]
|
||||||
|
name = "sort"
|
||||||
|
harness = false
|
||||||
|
|
||||||
|
108
crates/benchmarks/benches/sort.rs
Normal file
108
crates/benchmarks/benches/sort.rs
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
//! This benchmark module is used to compare the performance of sorting documents in /search VS /documents
|
||||||
|
//!
|
||||||
|
//! The tests/benchmarks were designed in the context of a query returning only 20 documents.
|
||||||
|
|
||||||
|
mod datasets_paths;
|
||||||
|
mod utils;
|
||||||
|
|
||||||
|
use criterion::{criterion_group, criterion_main};
|
||||||
|
use milli::update::Settings;
|
||||||
|
use utils::Conf;
|
||||||
|
|
||||||
|
#[cfg(not(windows))]
|
||||||
|
#[global_allocator]
|
||||||
|
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||||
|
|
||||||
|
fn base_conf(builder: &mut Settings) {
|
||||||
|
let displayed_fields =
|
||||||
|
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
builder.set_displayed_fields(displayed_fields);
|
||||||
|
|
||||||
|
let sortable_fields =
|
||||||
|
["_geo", "name", "population", "elevation", "timezone", "modification-date"]
|
||||||
|
.iter()
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect();
|
||||||
|
builder.set_sortable_fields(sortable_fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[rustfmt::skip]
|
||||||
|
const BASE_CONF: Conf = Conf {
|
||||||
|
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
|
||||||
|
dataset_format: "jsonl",
|
||||||
|
configure: base_conf,
|
||||||
|
primary_key: Some("geonameid"),
|
||||||
|
queries: &[""],
|
||||||
|
offsets: &[
|
||||||
|
Some((0, 20)), // The most common query in the real world
|
||||||
|
Some((0, 500)), // A query that ranges over many documents
|
||||||
|
Some((980, 20)), // The worst query that could happen in the real world
|
||||||
|
Some((800_000, 20)) // The worst query
|
||||||
|
],
|
||||||
|
get_documents: true,
|
||||||
|
..Conf::BASE
|
||||||
|
};
|
||||||
|
|
||||||
|
fn bench_sort(c: &mut criterion::Criterion) {
|
||||||
|
#[rustfmt::skip]
|
||||||
|
let confs = &[
|
||||||
|
// utils::Conf {
|
||||||
|
// group_name: "without sort",
|
||||||
|
// sort: None,
|
||||||
|
// ..BASE_CONF
|
||||||
|
// },
|
||||||
|
|
||||||
|
// utils::Conf {
|
||||||
|
// group_name: "sort on many different values",
|
||||||
|
// sort: Some(vec!["name:asc"]),
|
||||||
|
// ..BASE_CONF
|
||||||
|
// },
|
||||||
|
|
||||||
|
// utils::Conf {
|
||||||
|
// group_name: "sort on many similar values",
|
||||||
|
// sort: Some(vec!["timezone:desc"]),
|
||||||
|
// ..BASE_CONF
|
||||||
|
// },
|
||||||
|
|
||||||
|
// utils::Conf {
|
||||||
|
// group_name: "sort on many similar then different values",
|
||||||
|
// sort: Some(vec!["timezone:desc", "name:asc"]),
|
||||||
|
// ..BASE_CONF
|
||||||
|
// },
|
||||||
|
|
||||||
|
// utils::Conf {
|
||||||
|
// group_name: "sort on many different then similar values",
|
||||||
|
// sort: Some(vec!["timezone:desc", "name:asc"]),
|
||||||
|
// ..BASE_CONF
|
||||||
|
// },
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "geo sort",
|
||||||
|
sample_size: Some(10),
|
||||||
|
sort: Some(vec!["_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many similar values then geo sort",
|
||||||
|
sample_size: Some(10),
|
||||||
|
sort: Some(vec!["timezone:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
|
||||||
|
utils::Conf {
|
||||||
|
group_name: "sort on many different values then geo sort",
|
||||||
|
sample_size: Some(10),
|
||||||
|
sort: Some(vec!["name:desc", "_geoPoint(45.4777599, 9.1967508):asc"]),
|
||||||
|
..BASE_CONF
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
utils::run_benches(c, confs);
|
||||||
|
}
|
||||||
|
|
||||||
|
criterion_group!(benches, bench_sort);
|
||||||
|
criterion_main!(benches);
|
@ -9,6 +9,7 @@ use anyhow::Context;
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use criterion::BenchmarkId;
|
use criterion::BenchmarkId;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
|
use milli::documents::sort::recursive_sort;
|
||||||
use milli::heed::EnvOpenOptions;
|
use milli::heed::EnvOpenOptions;
|
||||||
use milli::progress::Progress;
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
@ -35,6 +36,12 @@ pub struct Conf<'a> {
|
|||||||
pub configure: fn(&mut Settings),
|
pub configure: fn(&mut Settings),
|
||||||
pub filter: Option<&'a str>,
|
pub filter: Option<&'a str>,
|
||||||
pub sort: Option<Vec<&'a str>>,
|
pub sort: Option<Vec<&'a str>>,
|
||||||
|
/// set to skip documents (offset, limit)
|
||||||
|
pub offsets: &'a [Option<(usize, usize)>],
|
||||||
|
/// enable if you want to bench getting documents without querying
|
||||||
|
pub get_documents: bool,
|
||||||
|
/// configure the benchmark sample size
|
||||||
|
pub sample_size: Option<usize>,
|
||||||
/// enable or disable the optional words on the query
|
/// enable or disable the optional words on the query
|
||||||
pub optional_words: bool,
|
pub optional_words: bool,
|
||||||
/// primary key, if there is None we'll auto-generate docids for every documents
|
/// primary key, if there is None we'll auto-generate docids for every documents
|
||||||
@ -52,6 +59,9 @@ impl Conf<'_> {
|
|||||||
configure: |_| (),
|
configure: |_| (),
|
||||||
filter: None,
|
filter: None,
|
||||||
sort: None,
|
sort: None,
|
||||||
|
offsets: &[None],
|
||||||
|
get_documents: false,
|
||||||
|
sample_size: None,
|
||||||
optional_words: true,
|
optional_words: true,
|
||||||
primary_key: None,
|
primary_key: None,
|
||||||
};
|
};
|
||||||
@ -144,13 +154,28 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
|||||||
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
|
let file_name = Path::new(conf.dataset).file_name().and_then(|f| f.to_str()).unwrap();
|
||||||
let name = format!("{}: {}", file_name, conf.group_name);
|
let name = format!("{}: {}", file_name, conf.group_name);
|
||||||
let mut group = c.benchmark_group(&name);
|
let mut group = c.benchmark_group(&name);
|
||||||
|
if let Some(sample_size) = conf.sample_size {
|
||||||
|
group.sample_size(sample_size);
|
||||||
|
}
|
||||||
|
|
||||||
for &query in conf.queries {
|
for &query in conf.queries {
|
||||||
group.bench_with_input(BenchmarkId::from_parameter(query), &query, |b, &query| {
|
for offset in conf.offsets {
|
||||||
|
let parameter = match (query.is_empty(), offset) {
|
||||||
|
(true, None) => String::from("placeholder"),
|
||||||
|
(true, Some((offset, limit))) => format!("placeholder[{offset}:{limit}]"),
|
||||||
|
(false, None) => query.to_string(),
|
||||||
|
(false, Some((offset, limit))) => format!("{query}[{offset}:{limit}]"),
|
||||||
|
};
|
||||||
|
group.bench_with_input(
|
||||||
|
BenchmarkId::from_parameter(parameter),
|
||||||
|
&query,
|
||||||
|
|b, &query| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let mut search = index.search(&rtxn);
|
let mut search = index.search(&rtxn);
|
||||||
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
|
search
|
||||||
|
.query(query)
|
||||||
|
.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
if let Some(filter) = conf.filter {
|
if let Some(filter) = conf.filter {
|
||||||
let filter = Filter::from_str(filter).unwrap().unwrap();
|
let filter = Filter::from_str(filter).unwrap().unwrap();
|
||||||
search.filter(filter);
|
search.filter(filter);
|
||||||
@ -159,10 +184,51 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
|||||||
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||||
search.sort_criteria(sort);
|
search.sort_criteria(sort);
|
||||||
}
|
}
|
||||||
|
if let Some((offset, limit)) = offset {
|
||||||
|
search.offset(*offset).limit(*limit);
|
||||||
|
}
|
||||||
|
|
||||||
let _ids = search.execute().unwrap();
|
let _ids = search.execute().unwrap();
|
||||||
});
|
});
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if conf.get_documents {
|
||||||
|
for offset in conf.offsets {
|
||||||
|
let parameter = match offset {
|
||||||
|
None => String::from("get_documents"),
|
||||||
|
Some((offset, limit)) => format!("get_documents[{offset}:{limit}]"),
|
||||||
|
};
|
||||||
|
group.bench_with_input(BenchmarkId::from_parameter(parameter), &(), |b, &()| {
|
||||||
|
b.iter(|| {
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
if let Some(sort) = &conf.sort {
|
||||||
|
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
|
||||||
|
let all_docs = index.documents_ids(&rtxn).unwrap();
|
||||||
|
let facet_sort =
|
||||||
|
recursive_sort(&index, &rtxn, sort, &all_docs).unwrap();
|
||||||
|
let iter = facet_sort.iter().unwrap();
|
||||||
|
if let Some((offset, limit)) = offset {
|
||||||
|
let _results = iter.skip(*offset).take(*limit).collect::<Vec<_>>();
|
||||||
|
} else {
|
||||||
|
let _results = iter.collect::<Vec<_>>();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let all_docs = index.documents_ids(&rtxn).unwrap();
|
||||||
|
if let Some((offset, limit)) = offset {
|
||||||
|
let _results =
|
||||||
|
all_docs.iter().skip(*offset).take(*limit).collect::<Vec<_>>();
|
||||||
|
} else {
|
||||||
|
let _results = all_docs.iter().collect::<Vec<_>>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
group.finish();
|
group.finish();
|
||||||
|
|
||||||
index.prepare_for_closing().wait();
|
index.prepare_for_closing().wait();
|
||||||
|
Reference in New Issue
Block a user