mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-08 21:55:42 +00:00
Compare commits
8 Commits
option-dis
...
v1.9.0-wit
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c3e5ac1b74 | ||
|
|
be9b1f9c4b | ||
|
|
3f3c3cb54f | ||
|
|
97ee9287d1 | ||
|
|
fd87b56e3d | ||
|
|
801219cd5b | ||
|
|
a19d5c086e | ||
|
|
14d10923d8 |
2
.github/workflows/flaky-tests.yml
vendored
2
.github/workflows/flaky-tests.yml
vendored
@@ -1,6 +1,4 @@
|
||||
name: Look for flaky tests
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
|
||||
3
.github/workflows/fuzzer-indexing.yml
vendored
3
.github/workflows/fuzzer-indexing.yml
vendored
@@ -1,6 +1,5 @@
|
||||
name: Run the indexing fuzzer
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
|
||||
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
2
.github/workflows/publish-apt-brew-pkg.yml
vendored
@@ -15,8 +15,6 @@ jobs:
|
||||
|
||||
debian:
|
||||
name: Publish debian packagge
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
container:
|
||||
|
||||
4
.github/workflows/publish-binaries.yml
vendored
4
.github/workflows/publish-binaries.yml
vendored
@@ -35,8 +35,6 @@ jobs:
|
||||
publish-linux:
|
||||
name: Publish binary for Linux
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
needs: check-version
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27
|
||||
@@ -134,8 +132,6 @@ jobs:
|
||||
name: Publish binary for aarch64
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-version
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27
|
||||
image: ubuntu:18.04
|
||||
|
||||
8
.github/workflows/test-suite.yml
vendored
8
.github/workflows/test-suite.yml
vendored
@@ -21,8 +21,6 @@ jobs:
|
||||
test-linux:
|
||||
name: Tests on ubuntu-18.04
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
@@ -79,8 +77,6 @@ jobs:
|
||||
test-all-features:
|
||||
name: Tests almost all features
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
image: ubuntu:18.04
|
||||
@@ -104,8 +100,6 @@ jobs:
|
||||
|
||||
test-disabled-tokenization:
|
||||
name: Test disabled tokenization
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ubuntu:18.04
|
||||
@@ -133,8 +127,6 @@ jobs:
|
||||
# We run tests in debug also, to make sure that the debug_assertions are hit
|
||||
test-debug:
|
||||
name: Run tests in debug
|
||||
env:
|
||||
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
# Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
|
||||
|
||||
34
Cargo.lock
generated
34
Cargo.lock
generated
@@ -503,7 +503,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -648,7 +648,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "build-info"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"time",
|
||||
@@ -1579,7 +1579,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -1804,7 +1804,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@@ -1827,7 +1827,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@@ -1847,7 +1847,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -1965,7 +1965,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@@ -2452,7 +2452,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arroy",
|
||||
@@ -2649,7 +2649,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -3257,7 +3257,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -3266,7 +3266,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -3358,7 +3358,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"enum-iterator",
|
||||
@@ -3377,7 +3377,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -3407,7 +3407,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilitool"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@@ -3446,7 +3446,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"arroy",
|
||||
"big_s",
|
||||
@@ -3886,7 +3886,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
@@ -6042,7 +6042,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "xtask"
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"build-info",
|
||||
|
||||
@@ -22,7 +22,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.9.1"
|
||||
version = "1.9.0"
|
||||
authors = [
|
||||
"Quentin de Quelen <quentin@dequelen.me>",
|
||||
"Clément Renault <clement@meilisearch.com>",
|
||||
|
||||
@@ -897,95 +897,91 @@ impl IndexScheduler {
|
||||
dump_tasks.flush()?;
|
||||
|
||||
// 3. Dump the indexes
|
||||
let () =
|
||||
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let metadata = IndexMetadata {
|
||||
uid: uid.to_owned(),
|
||||
primary_key: index.primary_key(&rtxn)?.map(String::from),
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
};
|
||||
let mut index_dumper = dump.create_index(uid, &metadata)?;
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
|
||||
let embedding_configs = index.embedding_configs(&rtxn)?;
|
||||
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
let (id, doc) = ret?;
|
||||
|
||||
let mut document =
|
||||
milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
|
||||
'inject_vectors: {
|
||||
let embeddings = index.embeddings(&rtxn, id)?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(&rtxn, std::iter::once(id))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={id}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
)
|
||||
.into());
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(
|
||||
embeddings,
|
||||
),
|
||||
),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
serde_json::to_value(embeddings).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
index_dumper.push_document(&document)?;
|
||||
// 3.1. Dump the documents
|
||||
for ret in index.all_documents(&rtxn)? {
|
||||
if self.must_stop_processing.get() {
|
||||
return Err(Error::AbortedTask);
|
||||
}
|
||||
|
||||
// 3.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
meilisearch_types::settings::SecretPolicy::RevealSecrets,
|
||||
)?;
|
||||
index_dumper.settings(&settings)?;
|
||||
Ok(())
|
||||
})?;
|
||||
let (id, doc) = ret?;
|
||||
|
||||
let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
|
||||
|
||||
'inject_vectors: {
|
||||
let embeddings = index.embeddings(&rtxn, id)?;
|
||||
|
||||
if embeddings.is_empty() {
|
||||
break 'inject_vectors;
|
||||
}
|
||||
|
||||
let vectors = document
|
||||
.entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
|
||||
.or_insert(serde_json::Value::Object(Default::default()));
|
||||
|
||||
let serde_json::Value::Object(vectors) = vectors else {
|
||||
return Err(milli::Error::UserError(
|
||||
milli::UserError::InvalidVectorsMapType {
|
||||
document_id: {
|
||||
if let Ok(Some(Ok(index))) = index
|
||||
.external_id_of(&rtxn, std::iter::once(id))
|
||||
.map(|it| it.into_iter().next())
|
||||
{
|
||||
index
|
||||
} else {
|
||||
format!("internal docid={id}")
|
||||
}
|
||||
},
|
||||
value: vectors.clone(),
|
||||
},
|
||||
)
|
||||
.into());
|
||||
};
|
||||
|
||||
for (embedder_name, embeddings) in embeddings {
|
||||
let user_provided = embedding_configs
|
||||
.iter()
|
||||
.find(|conf| conf.name == embedder_name)
|
||||
.is_some_and(|conf| conf.user_provided.contains(id));
|
||||
|
||||
let embeddings = ExplicitVectors {
|
||||
embeddings: Some(
|
||||
VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
|
||||
),
|
||||
regenerate: !user_provided,
|
||||
};
|
||||
vectors.insert(
|
||||
embedder_name,
|
||||
serde_json::to_value(embeddings).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
index_dumper.push_document(&document)?;
|
||||
}
|
||||
|
||||
// 3.2. Dump the settings
|
||||
let settings = meilisearch_types::settings::settings(
|
||||
index,
|
||||
&rtxn,
|
||||
meilisearch_types::settings::SecretPolicy::RevealSecrets,
|
||||
)?;
|
||||
index_dumper.settings(&settings)?;
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// 4. Dump experimental feature settings
|
||||
let features = self.features().runtime_features();
|
||||
@@ -1292,11 +1288,7 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
let config = IndexDocumentsConfig {
|
||||
update_method: method,
|
||||
compute_prefix_databases: self.compute_prefix_databases,
|
||||
..Default::default()
|
||||
};
|
||||
let config = IndexDocumentsConfig { update_method: method, ..Default::default() };
|
||||
|
||||
let embedder_configs = index.embedding_configs(index_wtxn)?;
|
||||
// TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
|
||||
@@ -1406,7 +1398,6 @@ impl IndexScheduler {
|
||||
let deleted_documents = delete_document_by_filter(
|
||||
index_wtxn,
|
||||
filter,
|
||||
self.compute_prefix_databases,
|
||||
self.index_mapper.indexer_config(),
|
||||
self.must_stop_processing.clone(),
|
||||
index,
|
||||
@@ -1647,7 +1638,6 @@ impl IndexScheduler {
|
||||
fn delete_document_by_filter<'a>(
|
||||
wtxn: &mut RwTxn<'a>,
|
||||
filter: &serde_json::Value,
|
||||
compute_prefix_databases: bool,
|
||||
indexer_config: &IndexerConfig,
|
||||
must_stop_processing: MustStopProcessing,
|
||||
index: &'a Index,
|
||||
@@ -1663,7 +1653,6 @@ fn delete_document_by_filter<'a>(
|
||||
|
||||
let config = IndexDocumentsConfig {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
compute_prefix_databases,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
||||
@@ -32,7 +32,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
features: _,
|
||||
max_number_of_tasks: _,
|
||||
max_number_of_batched_tasks: _,
|
||||
compute_prefix_databases: _,
|
||||
wake_up: _,
|
||||
dumps_path: _,
|
||||
snapshots_path: _,
|
||||
|
||||
@@ -276,8 +276,6 @@ pub struct IndexSchedulerOptions {
|
||||
pub max_number_of_batched_tasks: usize,
|
||||
/// The experimental features enabled for this instance.
|
||||
pub instance_features: InstanceTogglableFeatures,
|
||||
/// An experimental option to control the generation of prefix databases.
|
||||
pub compute_prefix_databases: bool,
|
||||
}
|
||||
|
||||
/// Structure which holds meilisearch's indexes and schedules the tasks
|
||||
@@ -285,13 +283,19 @@ pub struct IndexSchedulerOptions {
|
||||
pub struct IndexScheduler {
|
||||
/// The LMDB environment which the DBs are associated with.
|
||||
pub(crate) env: Env,
|
||||
|
||||
/// A boolean that can be set to true to stop the currently processing tasks.
|
||||
pub(crate) must_stop_processing: MustStopProcessing,
|
||||
|
||||
/// The list of tasks currently processing
|
||||
pub(crate) processing_tasks: Arc<RwLock<ProcessingTasks>>,
|
||||
|
||||
/// The list of files referenced by the tasks
|
||||
pub(crate) file_store: FileStore, // The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) file_store: FileStore,
|
||||
|
||||
// The main database, it contains all the tasks accessible by their Id.
|
||||
pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,
|
||||
|
||||
/// All the tasks ids grouped by their status.
|
||||
// TODO we should not be able to serialize a `Status::Processing` in this database.
|
||||
pub(crate) status: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
|
||||
@@ -299,43 +303,58 @@ pub struct IndexScheduler {
|
||||
pub(crate) kind: Database<SerdeBincode<Kind>, RoaringBitmapCodec>,
|
||||
/// Store the tasks associated to an index.
|
||||
pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,
|
||||
|
||||
/// Store the tasks that were canceled by a task uid
|
||||
pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of tasks which were enqueued at a specific date
|
||||
pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of finished tasks which started being processed at a specific date
|
||||
pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// Store the task ids of tasks which finished at a specific date
|
||||
pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,
|
||||
|
||||
/// In charge of creating, opening, storing and returning indexes.
|
||||
pub(crate) index_mapper: IndexMapper,
|
||||
|
||||
/// In charge of fetching and setting the status of experimental features.
|
||||
features: features::FeatureData,
|
||||
|
||||
/// Get a signal when a batch needs to be processed.
|
||||
pub(crate) wake_up: Arc<SignalEvent>,
|
||||
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// Whether we should automatically cleanup the task queue or not.
|
||||
pub(crate) cleanup_enabled: bool,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) max_number_of_batched_tasks: usize,
|
||||
/// Control wether we must generate the prefix databases or not.
|
||||
pub(crate) compute_prefix_databases: bool,
|
||||
|
||||
/// The webhook url we should send tasks to after processing every batches.
|
||||
pub(crate) webhook_url: Option<String>,
|
||||
/// The Authorization header to send to the webhook URL.
|
||||
pub(crate) webhook_authorization_header: Option<String>,
|
||||
|
||||
/// The path used to create the dumps.
|
||||
pub(crate) dumps_path: PathBuf,
|
||||
|
||||
/// The path used to create the snapshots.
|
||||
pub(crate) snapshots_path: PathBuf,
|
||||
|
||||
/// The path to the folder containing the auth LMDB env.
|
||||
pub(crate) auth_path: PathBuf,
|
||||
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
|
||||
|
||||
// ================= test
|
||||
@@ -345,11 +364,13 @@ pub struct IndexScheduler {
|
||||
/// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation.
|
||||
#[cfg(test)]
|
||||
test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
|
||||
|
||||
/// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler.
|
||||
///
|
||||
/// The first field is the iteration index and the second field identifies a location in the code.
|
||||
#[cfg(test)]
|
||||
planned_failures: Vec<(usize, tests::FailureLocation)>,
|
||||
|
||||
/// A counter that is incremented before every call to [`tick`](IndexScheduler::tick)
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc<RwLock<usize>>,
|
||||
@@ -376,7 +397,6 @@ impl IndexScheduler {
|
||||
cleanup_enabled: self.cleanup_enabled,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
max_number_of_batched_tasks: self.max_number_of_batched_tasks,
|
||||
compute_prefix_databases: self.compute_prefix_databases,
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
auth_path: self.auth_path.clone(),
|
||||
@@ -479,7 +499,6 @@ impl IndexScheduler {
|
||||
cleanup_enabled: options.cleanup_enabled,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
max_number_of_batched_tasks: options.max_number_of_batched_tasks,
|
||||
compute_prefix_databases: options.compute_prefix_databases,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
auth_path: options.auth_path,
|
||||
@@ -1800,7 +1819,6 @@ mod tests {
|
||||
max_number_of_tasks: 1_000_000,
|
||||
max_number_of_batched_tasks: usize::MAX,
|
||||
instance_features: Default::default(),
|
||||
compute_prefix_databases: true,
|
||||
};
|
||||
configuration(&mut options);
|
||||
|
||||
|
||||
@@ -256,7 +256,6 @@ struct Infos {
|
||||
experimental_enable_logs_route: bool,
|
||||
experimental_reduce_indexing_memory_usage: bool,
|
||||
experimental_max_number_of_batched_tasks: usize,
|
||||
experimental_disable_prefix_db: bool,
|
||||
gpu_enabled: bool,
|
||||
db_path: bool,
|
||||
import_dump: bool,
|
||||
@@ -299,7 +298,6 @@ impl From<Opt> for Infos {
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_max_number_of_batched_tasks,
|
||||
experimental_disable_prefix_db,
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
@@ -349,7 +347,6 @@ impl From<Opt> for Infos {
|
||||
experimental_replication_parameters,
|
||||
experimental_enable_logs_route,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_disable_prefix_db,
|
||||
gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
|
||||
db_path: db_path != PathBuf::from("./data.ms"),
|
||||
import_dump: import_dump.is_some(),
|
||||
|
||||
@@ -311,7 +311,6 @@ fn open_or_create_database_unchecked(
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
instance_features,
|
||||
compute_prefix_databases: !opt.experimental_disable_prefix_db,
|
||||
})?)
|
||||
};
|
||||
|
||||
|
||||
@@ -60,7 +60,6 @@ const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
||||
const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
|
||||
"MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";
|
||||
const MEILI_EXPERIMENTAL_DISABLE_PREFIX_DB: &str = "MEILI_EXPERIMENTAL_DISABLE_PREFIXDB";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
@@ -390,11 +389,6 @@ pub struct Opt {
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_max_number_of_batched_tasks: usize,
|
||||
|
||||
/// Experimentally disable the prefix database, see: <https://github.com/orgs/meilisearch/discussions>
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_PREFIX_DB)]
|
||||
#[serde(default)]
|
||||
pub experimental_disable_prefix_db: bool,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@@ -495,7 +489,6 @@ impl Opt {
|
||||
experimental_enable_logs_route,
|
||||
experimental_replication_parameters,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_disable_prefix_db,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -525,10 +518,6 @@ impl Opt {
|
||||
MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
|
||||
experimental_max_number_of_batched_tasks.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_DISABLE_PREFIX_DB,
|
||||
experimental_disable_prefix_db.to_string(),
|
||||
);
|
||||
if let Some(ssl_cert_path) = ssl_cert_path {
|
||||
export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
|
||||
}
|
||||
|
||||
@@ -644,12 +644,7 @@ async fn get_document_with_vectors() {
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
@@ -705,12 +700,7 @@ async fn get_document_with_vectors() {
|
||||
},
|
||||
{
|
||||
"name": "echo",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
|
||||
@@ -119,12 +119,7 @@ async fn add_remove_user_provided() {
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
@@ -146,12 +141,7 @@ async fn add_remove_user_provided() {
|
||||
{
|
||||
"id": 1,
|
||||
"name": "echo",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
@@ -587,12 +577,7 @@ async fn add_remove_one_vector_4588() {
|
||||
{
|
||||
"id": 0,
|
||||
"name": "kefir",
|
||||
"_vectors": {
|
||||
"manual": {
|
||||
"embeddings": [],
|
||||
"regenerate": false
|
||||
}
|
||||
}
|
||||
"_vectors": {}
|
||||
}
|
||||
],
|
||||
"offset": 0,
|
||||
|
||||
@@ -141,6 +141,3 @@ swedish-recomposition = ["charabia/swedish-recomposition"]
|
||||
|
||||
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
||||
cuda = ["candle-core/cuda"]
|
||||
|
||||
[lints.rust]
|
||||
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(fuzzing)'] }
|
||||
|
||||
@@ -1230,11 +1230,6 @@ impl Index {
|
||||
)
|
||||
}
|
||||
|
||||
/// Deletes the FST which is the words prefixes dictionary of the engine.
|
||||
pub fn delete_words_prefixes_fst(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::WORDS_PREFIXES_FST_KEY)
|
||||
}
|
||||
|
||||
/// Returns the FST which is the words prefixes dictionary of the engine.
|
||||
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
|
||||
@@ -1665,7 +1660,9 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
res.insert(embedder_name.to_owned(), embeddings);
|
||||
if !embeddings.is_empty() {
|
||||
res.insert(embedder_name.to_owned(), embeddings);
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
||||
embedders: EmbeddingConfigs,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct IndexDocumentsConfig {
|
||||
pub words_prefix_threshold: Option<u32>,
|
||||
pub max_prefix_length: Option<usize>,
|
||||
@@ -93,21 +93,6 @@ pub struct IndexDocumentsConfig {
|
||||
pub words_positions_min_level_size: Option<NonZeroU32>,
|
||||
pub update_method: IndexDocumentsMethod,
|
||||
pub autogenerate_docids: bool,
|
||||
pub compute_prefix_databases: bool,
|
||||
}
|
||||
|
||||
impl Default for IndexDocumentsConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
words_prefix_threshold: Default::default(),
|
||||
max_prefix_length: Default::default(),
|
||||
words_positions_level_group_size: Default::default(),
|
||||
words_positions_min_level_size: Default::default(),
|
||||
update_method: Default::default(),
|
||||
autogenerate_docids: Default::default(),
|
||||
compute_prefix_databases: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA>
|
||||
@@ -573,20 +558,12 @@ where
|
||||
.map_err(InternalError::from)??;
|
||||
}
|
||||
|
||||
if self.config.compute_prefix_databases {
|
||||
self.execute_prefix_databases(
|
||||
word_docids.map(MergerBuilder::build),
|
||||
exact_word_docids.map(MergerBuilder::build),
|
||||
word_position_docids.map(MergerBuilder::build),
|
||||
word_fid_docids.map(MergerBuilder::build),
|
||||
)?;
|
||||
} else {
|
||||
self.index.words_prefixes_fst(self.wtxn)?;
|
||||
self.index.word_prefix_docids.clear(self.wtxn)?;
|
||||
self.index.exact_word_prefix_docids.clear(self.wtxn)?;
|
||||
self.index.word_prefix_position_docids.clear(self.wtxn)?;
|
||||
self.index.word_prefix_fid_docids.clear(self.wtxn)?;
|
||||
}
|
||||
self.execute_prefix_databases(
|
||||
word_docids.map(MergerBuilder::build),
|
||||
exact_word_docids.map(MergerBuilder::build),
|
||||
word_position_docids.map(MergerBuilder::build),
|
||||
word_fid_docids.map(MergerBuilder::build),
|
||||
)?;
|
||||
|
||||
Ok(number_of_documents)
|
||||
}
|
||||
@@ -2203,6 +2180,33 @@ mod tests {
|
||||
index.add_documents(doc1).unwrap();
|
||||
}
|
||||
|
||||
#[cfg(feature = "default")]
|
||||
#[test]
|
||||
fn store_detected_script_and_language_per_document_during_indexing() {
|
||||
use charabia::{Language, Script};
|
||||
let index = TempIndex::new();
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||
{ "id": 2, "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
|
||||
{ "id": 3, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
|
||||
{ "id": 4, "title": "関西国際空港限定トートバッグ すもももももももものうち" },
|
||||
{ "id": 5, "title": "ภาษาไทยง่ายนิดเดียว" },
|
||||
{ "id": 6, "title": "The quick 在尊嚴和權利上一律平等。" },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let key_jpn = (Script::Cj, Language::Jpn);
|
||||
let key_cmn = (Script::Cj, Language::Cmn);
|
||||
let cj_jpn_docs = index.script_language_documents_ids(&rtxn, &key_jpn).unwrap().unwrap();
|
||||
let cj_cmn_docs = index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap();
|
||||
let expected_cj_jpn_docids = [3].iter().collect();
|
||||
assert_eq!(cj_jpn_docs, expected_cj_jpn_docids);
|
||||
let expected_cj_cmn_docids = [1, 5].iter().collect();
|
||||
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_and_delete_documents_in_single_transform() {
|
||||
let mut index = TempIndex::new();
|
||||
|
||||
@@ -113,11 +113,17 @@ pub fn to_call_stats<R: std::io::Read>(
|
||||
|
||||
let span = *span;
|
||||
if let Some(parent_id) = span.parent_id {
|
||||
let (_, _, parent_self_time) = spans.get_mut(&parent_id).unwrap();
|
||||
let Some((_, _, parent_self_time)) = spans.get_mut(&parent_id) else {
|
||||
tracing::warn!("could not find referenced parent span");
|
||||
continue;
|
||||
};
|
||||
parent_self_time.add_child_range(self_range.clone())
|
||||
}
|
||||
total_self_time.add_child_range(self_range);
|
||||
let (_, call_list) = calls.get_mut(&span.call_id).unwrap();
|
||||
let Some((_, call_list)) = calls.get_mut(&span.call_id) else {
|
||||
tracing::warn!("could not find referenced call");
|
||||
continue;
|
||||
};
|
||||
call_list.push((end - begin, self_duration));
|
||||
}
|
||||
Entry::SpanClose(SpanClose { id, time: _ }) => {
|
||||
|
||||
166
workloads/hackernews-ignore-first-100k.json
Normal file
166
workloads/hackernews-ignore-first-100k.json
Normal file
@@ -0,0 +1,166 @@
|
||||
{
|
||||
"name": "hackernews.ndjson_1M_ignore_first_100k",
|
||||
"run_count": 3,
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-100_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson",
|
||||
"sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213"
|
||||
},
|
||||
"hackernews-200_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson",
|
||||
"sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685"
|
||||
},
|
||||
"hackernews-300_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson",
|
||||
"sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2"
|
||||
},
|
||||
"hackernews-400_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson",
|
||||
"sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7"
|
||||
},
|
||||
"hackernews-500_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson",
|
||||
"sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083"
|
||||
},
|
||||
"hackernews-600_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson",
|
||||
"sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe"
|
||||
},
|
||||
"hackernews-700_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson",
|
||||
"sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b"
|
||||
},
|
||||
"hackernews-800_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson",
|
||||
"sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546"
|
||||
},
|
||||
"hackernews-900_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson",
|
||||
"sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9"
|
||||
},
|
||||
"hackernews-1_000_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson",
|
||||
"sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-100_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-200_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-300_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-400_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-500_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-600_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-700_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-800_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-900_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-1_000_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
]
|
||||
}
|
||||
171
workloads/search/embeddings-movies-subset-hf.json
Normal file
171
workloads/search/embeddings-movies-subset-hf.json
Normal file
@@ -0,0 +1,171 @@
|
||||
{
|
||||
"name": "search-movies-subset-hf-embeddings",
|
||||
"run_count": 2,
|
||||
"target": "search::=trace",
|
||||
"extra_cli_args": [
|
||||
"--max-indexing-threads=4"
|
||||
],
|
||||
"assets": {
|
||||
"movies-100.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json",
|
||||
"sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "experimental-features",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"vectorStore": true
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"overview"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"release_date"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"release_date"
|
||||
],
|
||||
"searchCutoffMs": 15000
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"embedders": {
|
||||
"default": {
|
||||
"source": "huggingFace",
|
||||
"documentTemplate": "A movie titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies-100.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "puppy cute comforting movie",
|
||||
"limit": 100,
|
||||
"hybrid": {
|
||||
"semanticRatio": 0.1
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "puppy cute comforting movie",
|
||||
"limit": 100,
|
||||
"hybrid": {
|
||||
"semanticRatio": 0.5
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "puppy cute comforting movie",
|
||||
"limit": 100,
|
||||
"hybrid": {
|
||||
"semanticRatio": 0.9
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "puppy cute comforting movie",
|
||||
"limit": 100,
|
||||
"hybrid": {
|
||||
"semanticRatio": 1.0
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "shrek",
|
||||
"limit": 100,
|
||||
"hybrid": {
|
||||
"semanticRatio": 1.0
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "shrek",
|
||||
"limit": 100,
|
||||
"hybrid": {
|
||||
"semanticRatio": 0.5
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "shrek",
|
||||
"limit": 100,
|
||||
"hybrid": {
|
||||
"semanticRatio": 0.1
|
||||
}
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
}
|
||||
]
|
||||
}
|
||||
94
workloads/search/filterable-movies.json
Normal file
94
workloads/search/filterable-movies.json
Normal file
@@ -0,0 +1,94 @@
|
||||
{
|
||||
"name": "search-filterable-movies.json",
|
||||
"run_count": 10,
|
||||
"target": "search::=trace",
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"movies.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"overview"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"release_date"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"release_date"
|
||||
],
|
||||
"searchCutoffMs": 15000
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "",
|
||||
"limit": 100,
|
||||
"filter": "genres IN [action, comedy, adventure] AND release_date = 233366400"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "Batman returns",
|
||||
"limit": 100,
|
||||
"filter": "genres IN [action, comedy, adventure] AND release_date > 233366400"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "the",
|
||||
"limit": 100,
|
||||
"filter": "genres IN [animation, comedy, adventure] AND release_date < 233366400"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "t",
|
||||
"limit": 100,
|
||||
"filter": "genres = Family AND release_date <= 233366400 OR release_date >= 1054252800"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
}
|
||||
]
|
||||
}
|
||||
340
workloads/search/geosort.json
Normal file
340
workloads/search/geosort.json
Normal file
@@ -0,0 +1,340 @@
|
||||
{
|
||||
"name": "search-geosort.jsonl_1M",
|
||||
"run_count": 3,
|
||||
"target": "search::=trace",
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"smol-all-countries-100k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-100k.jsonl",
|
||||
"sha256": "d00924689abc02d09ec4667cc5a18364ff7bc236bad51367f34b9184b945ece3"
|
||||
},
|
||||
"smol-all-countries-200k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-200k.jsonl",
|
||||
"sha256": "2a215b43b35d596d9da4f1071deab9002a93602e6dbf1308fba53eb89d9c5a9e"
|
||||
},
|
||||
"smol-all-countries-300k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-300k.jsonl",
|
||||
"sha256": "91d94d78eeb10d631557a5ccf775e74a41d14ccaff4d7121dd90c7aa35534f2b"
|
||||
},
|
||||
"smol-all-countries-400k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-400k.jsonl",
|
||||
"sha256": "ee883a353b571f35f4abb79b95cfa628f3f1c582919dd658a388b220f97fe035"
|
||||
},
|
||||
"smol-all-countries-500k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-500k.jsonl",
|
||||
"sha256": "5be254ce4c50db12b7f1795859b8bbdcbc2ec22bccb3a1898899bd4c4765a1bf"
|
||||
},
|
||||
"smol-all-countries-600k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-600k.jsonl",
|
||||
"sha256": "3aa91afe3361f5185c142125dfcdc8ddcb7d39fdeeeb4f5e67439511905e9826"
|
||||
},
|
||||
"smol-all-countries-700k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-700k.jsonl",
|
||||
"sha256": "5a864a1e9d89736147a8da594e2cbce5264979326d38655d0945d8447f3867b3"
|
||||
},
|
||||
"smol-all-countries-800k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-800k.jsonl",
|
||||
"sha256": "d85eb9c85a612fd7b77623e162ecd0f8265ba3be97054e26b9cff7c48735809b"
|
||||
},
|
||||
"smol-all-countries-900k.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-900k.jsonl",
|
||||
"sha256": "4fd6662e8b9bfcd9fad7d5dcd691a47ec985d810d1e340465c056ee84e9c40f3"
|
||||
},
|
||||
"smol-all-countries-1M.jsonl": {
|
||||
"local_location": null,
|
||||
"format": "NdJson",
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-1M.jsonl",
|
||||
"sha256": "585a713b489b154b94e7c07707bd369f888c7fe24eb90bf604578d7adf51a9e6"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"geonameid",
|
||||
"name",
|
||||
"asciiname",
|
||||
"alternatenames",
|
||||
"_geo",
|
||||
"population"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"name",
|
||||
"alternatenames",
|
||||
"elevation"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"_geo",
|
||||
"population",
|
||||
"elevation"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"_geo",
|
||||
"population",
|
||||
"elevation"
|
||||
],
|
||||
"searchCutoffMs": 15000
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-100k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-200k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-300k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-400k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-500k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-600k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-700k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-800k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-900k.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "smol-all-countries-1M.jsonl"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "",
|
||||
"limit": 100
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"_geoPoint(50.62999333378238, 3.086269263384099):asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"_geoPoint(50.62999333378238, 3.086269263384099):desc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"_geoPoint(35.749512532692144, 139.61664952543356):asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"_geoPoint(35.749512532692144, 139.61664952543356):desc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"_geoPoint(-48.87561645055408, -123.39275749319793):asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"_geoPoint(-48.87561645055408, -123.39275749319793):desc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"filter": "_geoRadius(50.62999333378238, 3.086269263384099, 100000)"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"filter": "_geoRadius(50.62999333378238, 3.086269263384099, 1000)"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"filter": "_geoRadius(35.749512532692144, 139.61664952543356, 100000)"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"filter": "_geoRadius(35.749512532692144, 139.61664952543356, 1000)"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"filter": "_geoRadius(-48.87561645055408, -123.39275749319793, 100000)"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"filter": "_geoRadius(-48.87561645055408, -123.39275749319793, 1000)"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
}
|
||||
]
|
||||
}
|
||||
255
workloads/search/hackernews.json
Normal file
255
workloads/search/hackernews.json
Normal file
@@ -0,0 +1,255 @@
|
||||
{
|
||||
"name": "search-hackernews.ndjson_1M",
|
||||
"run_count": 3,
|
||||
"target": "search::=trace",
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"hackernews-100_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson",
|
||||
"sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213"
|
||||
},
|
||||
"hackernews-200_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson",
|
||||
"sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685"
|
||||
},
|
||||
"hackernews-300_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson",
|
||||
"sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2"
|
||||
},
|
||||
"hackernews-400_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson",
|
||||
"sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7"
|
||||
},
|
||||
"hackernews-500_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson",
|
||||
"sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083"
|
||||
},
|
||||
"hackernews-600_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson",
|
||||
"sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe"
|
||||
},
|
||||
"hackernews-700_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson",
|
||||
"sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b"
|
||||
},
|
||||
"hackernews-800_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson",
|
||||
"sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546"
|
||||
},
|
||||
"hackernews-900_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson",
|
||||
"sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9"
|
||||
},
|
||||
"hackernews-1_000_000.ndjson": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson",
|
||||
"sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"displayedAttributes": [
|
||||
"title",
|
||||
"by",
|
||||
"score",
|
||||
"time"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"title"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"by"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"score",
|
||||
"time"
|
||||
],
|
||||
"rankingRules": [
|
||||
"sort",
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"searchCutoffMs": 15000
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-100_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-200_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-300_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-400_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-500_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-600_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-700_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-800_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-900_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "hackernews-1_000_000.ndjson"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "rust meilisearch",
|
||||
"limit": 100,
|
||||
"filter": "by = tpayet",
|
||||
"sort": [
|
||||
"score:desc",
|
||||
"time:asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "rust meilisearch",
|
||||
"limit": 100,
|
||||
"filter": "NOT by = tpayet",
|
||||
"sort": [
|
||||
"score:desc",
|
||||
"time:asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "meilisearch",
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"score:desc",
|
||||
"time:desc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "rust",
|
||||
"limit": 100,
|
||||
"filter": "by = dang",
|
||||
"sort": [
|
||||
"score:desc",
|
||||
"time:asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "combinator YC",
|
||||
"limit": 100,
|
||||
"filter": "by = dang",
|
||||
"sort": [
|
||||
"score:desc",
|
||||
"time:asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
}
|
||||
]
|
||||
}
|
||||
90
workloads/search/movies.json
Normal file
90
workloads/search/movies.json
Normal file
@@ -0,0 +1,90 @@
|
||||
{
|
||||
"name": "search-movies.json",
|
||||
"run_count": 10,
|
||||
"target": "search::=trace",
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"movies.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"overview"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"release_date"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"release_date"
|
||||
],
|
||||
"searchCutoffMs": 15000
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "",
|
||||
"limit": 100
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "Batman returns",
|
||||
"limit": 100
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"q": "the"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"limit": 100,
|
||||
"q": "t"
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
}
|
||||
]
|
||||
}
|
||||
110
workloads/search/sortable-movies.json
Normal file
110
workloads/search/sortable-movies.json
Normal file
@@ -0,0 +1,110 @@
|
||||
{
|
||||
"name": "search-sortable-movies.json",
|
||||
"run_count": 10,
|
||||
"target": "search::=trace",
|
||||
"extra_cli_args": [],
|
||||
"assets": {
|
||||
"movies.json": {
|
||||
"local_location": null,
|
||||
"remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
|
||||
"sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
|
||||
}
|
||||
},
|
||||
"precommands": [
|
||||
{
|
||||
"route": "indexes/movies/settings",
|
||||
"method": "PATCH",
|
||||
"body": {
|
||||
"inline": {
|
||||
"searchableAttributes": [
|
||||
"title",
|
||||
"overview"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"release_date"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"release_date"
|
||||
],
|
||||
"rankingRules": [
|
||||
"sort",
|
||||
"words",
|
||||
"typo",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"searchCutoffMs": 15000
|
||||
}
|
||||
},
|
||||
"synchronous": "DontWait"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/documents",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"asset": "movies.json"
|
||||
},
|
||||
"synchronous": "WaitForTask"
|
||||
}
|
||||
],
|
||||
"commands": [
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "",
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"release_date:asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "Batman returns",
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"release_date:desc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "the",
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"release_date:asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
},
|
||||
{
|
||||
"route": "indexes/movies/search",
|
||||
"method": "POST",
|
||||
"body": {
|
||||
"inline": {
|
||||
"q": "t",
|
||||
"limit": 100,
|
||||
"sort": [
|
||||
"release_date:asc"
|
||||
]
|
||||
}
|
||||
},
|
||||
"synchronous": "WaitForResponse"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -23,6 +23,8 @@ pub struct Workload {
|
||||
pub extra_cli_args: Vec<String>,
|
||||
pub assets: BTreeMap<String, Asset>,
|
||||
#[serde(default)]
|
||||
pub target: String,
|
||||
#[serde(default)]
|
||||
pub precommands: Vec<super::command::Command>,
|
||||
pub commands: Vec<super::command::Command>,
|
||||
}
|
||||
@@ -54,7 +56,7 @@ async fn run_commands(
|
||||
let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json");
|
||||
let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json");
|
||||
|
||||
let report_handle = start_report(logs_client, trace_filename).await?;
|
||||
let report_handle = start_report(logs_client, trace_filename, &workload.target).await?;
|
||||
|
||||
for batch in workload
|
||||
.commands
|
||||
@@ -160,7 +162,11 @@ async fn execute_run(
|
||||
async fn start_report(
|
||||
logs_client: &Client,
|
||||
filename: String,
|
||||
target: &str,
|
||||
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
|
||||
const DEFAULT_TARGET: &str = "indexing::=trace";
|
||||
let target = if target.is_empty() { DEFAULT_TARGET } else { target };
|
||||
|
||||
let report_file = std::fs::File::options()
|
||||
.create(true)
|
||||
.truncate(true)
|
||||
@@ -174,7 +180,7 @@ async fn start_report(
|
||||
.post("")
|
||||
.json(&json!({
|
||||
"mode": "profile",
|
||||
"target": "indexing::=trace"
|
||||
"target": target,
|
||||
}))
|
||||
.send()
|
||||
.await
|
||||
|
||||
Reference in New Issue
Block a user