add benchmarks for the geosearch

This commit is contained in:
Tamo
2021-09-13 18:08:28 +02:00
parent c695a1ffd2
commit 5e683ba472
7 changed files with 222 additions and 12 deletions

View File

@ -277,12 +277,69 @@ fn indexing_movies_default(c: &mut Criterion) {
});
}
fn indexing_geo(c: &mut Criterion) {
let mut group = c.benchmark_group("indexing");
group.sample_size(10);
group.bench_function("Indexing geo_point", |b| {
b.iter_with_setup(
move || {
let index = setup_index();
let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap();
let mut builder = update_builder.settings(&mut wtxn, &index);
builder.set_primary_key("geonameid".to_owned());
let displayed_fields =
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields =
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let filterable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_filterable_fields(filterable_fields);
let sortable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
index
},
move |index| {
let update_builder = UpdateBuilder::new(0);
let mut wtxn = index.write_txn().unwrap();
let mut builder = update_builder.index_documents(&mut wtxn, &index);
builder.update_format(UpdateFormat::JsonStream);
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
let reader = File::open(datasets_paths::SMOL_ALL_COUNTRIES).expect(&format!(
"could not find the dataset in: {}",
datasets_paths::SMOL_ALL_COUNTRIES
));
builder.execute(reader, |_, _| ()).unwrap();
wtxn.commit().unwrap();
index.prepare_for_closing().wait();
},
)
});
}
criterion_group!(
benches,
indexing_songs_default,
indexing_songs_without_faceted_numbers,
indexing_songs_without_faceted_fields,
indexing_wiki,
indexing_movies_default
indexing_movies_default,
indexing_geo
);
criterion_main!(benches);

View File

@ -0,0 +1,123 @@
mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::{Settings, UpdateFormat};
use utils::Conf;
#[cfg(target_os = "linux")]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
fn base_conf(builder: &mut Settings) {
let displayed_fields =
["geonameid", "name", "asciiname", "alternatenames", "_geo", "population"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields =
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let filterable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_filterable_fields(filterable_fields);
let sortable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
}
#[rustfmt::skip]
const BASE_CONF: Conf = Conf {
dataset: datasets_paths::SMOL_ALL_COUNTRIES,
dataset_format: UpdateFormat::JsonStream,
queries: &[
"",
],
configure: base_conf,
primary_key: Some("geonameid"),
..Conf::BASE
};
fn bench_geo(c: &mut criterion::Criterion) {
#[rustfmt::skip]
let confs = &[
// A basic placeholder with no geo
utils::Conf {
group_name: "placeholder with no geo",
..BASE_CONF
},
// Medium aglomeration: probably the most common usecase
utils::Conf {
group_name: "asc sort from Lille",
sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Lille",
sort: Some(vec!["_geoPoint(50.62999333378238, 3.086269263384099):desc"]),
..BASE_CONF
},
// Big agglomeration: a lot of documents close to our point
utils::Conf {
group_name: "asc sort from Tokyo",
sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Tokyo",
sort: Some(vec!["_geoPoint(35.749512532692144, 139.61664952543356):desc"]),
..BASE_CONF
},
// The furthest point from any civilization
utils::Conf {
group_name: "asc sort from Point Nemo",
sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):asc"]),
..BASE_CONF
},
utils::Conf {
group_name: "desc sort from Point Nemo",
sort: Some(vec!["_geoPoint(-48.87561645055408, -123.39275749319793):desc"]),
..BASE_CONF
},
// Filters
utils::Conf {
group_name: "filter of 100km from Lille",
filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Lille",
filter: Some("_geoRadius(50.62999333378238, 3.086269263384099, 1000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 100km from Tokyo",
filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Tokyo",
filter: Some("_geoRadius(35.749512532692144, 139.61664952543356, 1000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 100km from Point Nemo",
filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 100000)"),
..BASE_CONF
},
utils::Conf {
group_name: "filter of 1km from Point Nemo",
filter: Some("_geoRadius(-48.87561645055408, -123.39275749319793, 1000)"),
..BASE_CONF
},
];
utils::run_benches(c, confs);
}
criterion_group!(benches, bench_geo);
criterion_main!(benches);

View File

@ -12,6 +12,8 @@ pub struct Conf<'a> {
pub database_name: &'a str,
/// the dataset to be used, it must be an uncompressed csv
pub dataset: &'a str,
/// The format of the dataset
pub dataset_format: UpdateFormat,
pub group_name: &'a str,
pub queries: &'a [&'a str],
/// here you can change which criterion are used and in which order.
@ -21,6 +23,7 @@ pub struct Conf<'a> {
/// the last chance to configure your database as you want
pub configure: fn(&mut Settings),
pub filter: Option<&'a str>,
pub sort: Option<Vec<&'a str>>,
/// enable or disable the optional words on the query
pub optional_words: bool,
/// primary key, if there is None we'll auto-generate docids for every documents
@ -30,12 +33,14 @@ pub struct Conf<'a> {
impl Conf<'_> {
pub const BASE: Self = Conf {
database_name: "benches.mmdb",
dataset_format: UpdateFormat::Csv,
dataset: "",
group_name: "",
queries: &[],
criterion: None,
configure: |_| (),
filter: None,
sort: None,
optional_words: true,
primary_key: None,
};
@ -82,7 +87,7 @@ pub fn base_setup(conf: &Conf) -> Index {
if let None = conf.primary_key {
builder.enable_autogenerate_docids();
}
builder.update_format(UpdateFormat::Csv);
builder.update_format(conf.dataset_format);
builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
let reader = File::open(conf.dataset)
.expect(&format!("could not find the dataset in: {}", conf.dataset));
@ -110,6 +115,10 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
let filter = FilterCondition::from_str(&rtxn, &index, filter).unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {
let sort = sort.iter().map(|sort| sort.parse().unwrap()).collect();
search.sort_criteria(sort);
}
let _ids = search.execute().unwrap();
});
});