Compare commits

..

2 Commits

Author SHA1 Message Date
curquiza
72b4b36516 Remove debug lines 2025-12-04 14:13:46 +01:00
curquiza
d795ade246 Add code samples to generated openAPI file 2025-12-04 14:13:46 +01:00
56 changed files with 1665 additions and 1670 deletions

View File

@@ -170,8 +170,11 @@ jobs:
- uses: dtolnay/rust-toolchain@1.89
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.8.0
- name: Build
run: cargo build --release --locked --target x86_64-unknown-linux-gnu
- name: Run cargo build in release
uses: actions-rs/cargo@v1
with:
command: build
args: --all-targets --release
clippy:
name: Run Clippy

1351
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -21,10 +21,6 @@ use roaring::RoaringBitmap;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
fn no_cancel() -> bool {
false
}
const BENCHMARK_ITERATION: usize = 10;
fn setup_dir(path: impl AsRef<Path>) {
@@ -69,7 +65,7 @@ fn setup_settings<'t>(
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
builder.set_sortable_fields(sortable_fields);
builder.execute(&no_cancel, &Progress::default(), Default::default()).unwrap();
builder.execute(&|| false, &Progress::default(), Default::default()).unwrap();
}
fn setup_index_with_settings(
@@ -156,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -172,7 +168,7 @@ fn indexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -224,7 +220,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -240,7 +236,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -270,7 +266,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -286,7 +282,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -340,7 +336,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -356,7 +352,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -418,7 +414,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -434,7 +430,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -464,7 +460,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -480,7 +476,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -506,7 +502,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -522,7 +518,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -575,7 +571,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -591,7 +587,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -643,7 +639,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -659,7 +655,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -711,7 +707,7 @@ fn indexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -727,7 +723,7 @@ fn indexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -778,7 +774,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -794,7 +790,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -824,7 +820,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -840,7 +836,7 @@ fn reindexing_wiki(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -893,7 +889,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -909,7 +905,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -971,7 +967,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -987,7 +983,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1018,7 +1014,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1034,7 +1030,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1061,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1077,7 +1073,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1129,7 +1125,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1145,7 +1141,7 @@ fn indexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1196,7 +1192,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1212,7 +1208,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1242,7 +1238,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1258,7 +1254,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1311,7 +1307,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1327,7 +1323,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1376,7 +1372,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
Some(primary_key),
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1426,7 +1422,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1442,7 +1438,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1472,7 +1468,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1488,7 +1484,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1514,7 +1510,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1530,7 +1526,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1605,7 +1601,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1621,7 +1617,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1697,7 +1693,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1713,7 +1709,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1781,7 +1777,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1797,7 +1793,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1849,7 +1845,7 @@ fn indexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1865,7 +1861,7 @@ fn indexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1916,7 +1912,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1932,7 +1928,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -1962,7 +1958,7 @@ fn reindexing_geo(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -1978,7 +1974,7 @@ fn reindexing_geo(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2031,7 +2027,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2047,7 +2043,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)

View File

@@ -15,4 +15,4 @@ time = { version = "0.3.44", features = ["parsing"] }
[build-dependencies]
anyhow = "1.0.100"
vergen-gitcl = "1.0.8"
vergen-git2 = "1.0.7"

View File

@@ -15,7 +15,7 @@ fn emit_git_variables() -> anyhow::Result<()> {
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory.
let mut builder = vergen_gitcl::GitclBuilder::default();
let mut builder = vergen_git2::Git2Builder::default();
builder.branch(true);
builder.commit_timestamp(true);
@@ -25,5 +25,5 @@ fn emit_git_variables() -> anyhow::Result<()> {
let git2 = builder.build()?;
vergen_gitcl::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
}

View File

@@ -1,6 +0,0 @@
use build_info::BuildInfo;
fn main() {
let info = BuildInfo::from_build();
dbg!(info);
}

View File

@@ -6,7 +6,7 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::tasks::{Details, Kind, Status, Task};
use meilisearch_types::versioning::{self, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use meilisearch_types::versioning;
use roaring::RoaringBitmap;
use crate::index_mapper::IndexMapper;
@@ -320,11 +320,7 @@ fn snapshot_details(d: &Details) -> String {
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
}
Details::UpgradeDatabase { from, to } => {
if to == &(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) {
format!("{{ from: {from:?}, to: [current version] }}")
} else {
format!("{{ from: {from:?}, to: {to:?} }}")
}
format!("{{ from: {from:?}, to: {to:?} }}")
}
Details::IndexCompaction { index_uid, pre_compaction_size, post_compaction_size } => {
format!("{{ index_uid: {index_uid:?}, pre_compaction_size: {pre_compaction_size:?}, post_compaction_size: {post_compaction_size:?} }}")
@@ -404,21 +400,7 @@ pub fn snapshot_batch(batch: &Batch) -> String {
snap.push('{');
snap.push_str(&format!("uid: {uid}, "));
let details = if let Some(upgrade_to) = &details.upgrade_to {
if upgrade_to.as_str()
== format!("v{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}").as_str()
{
let mut details = details.clone();
details.upgrade_to = Some("[current version]".into());
serde_json::to_string(&details).unwrap()
} else {
serde_json::to_string(details).unwrap()
}
} else {
serde_json::to_string(details).unwrap()
};
snap.push_str(&format!("details: {details}, "));
snap.push_str(&format!("details: {}, ", serde_json::to_string(details).unwrap()));
snap.push_str(&format!("stats: {}, ", serde_json::to_string(&stats).unwrap()));
if !embedder_stats.skip_serializing() {
snap.push_str(&format!(

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} }
[timestamp] [4,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, stop reason: "created batch containing only task with id 1 of type `indexCreation` that cannot be batched with any other task.", }
2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 2 of type `indexCreation` that cannot be batched with any other task.", }
3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, stop reason: "created batch containing only task with id 3 of type `indexCreation` that cannot be batched with any other task.", }

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
----------------------------------------------------------------------
### Status:
enqueued [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
----------------------------------------------------------------------
### Status:
@@ -37,7 +37,7 @@ catto [1,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
----------------------------------------------------------------------
@@ -40,7 +40,7 @@ doggo [2,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.2"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: [current version] }, kind: UpgradeDatabase { from: (1, 12, 0) }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 28, 2) }, kind: UpgradeDatabase { from: (1, 12, 0) }}
1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
2 {uid: 2, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
3 {uid: 3, status: enqueued, details: { primary_key: Some("bone"), old_new_uid: None, new_index_uid: None }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }}
@@ -43,7 +43,7 @@ doggo [2,3,]
[timestamp] [0,]
----------------------------------------------------------------------
### All Batches:
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"[current version]"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.28.2"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, stop reason: "stopped after the last task of type `upgradeDatabase` because they cannot be batched with tasks of any other type.", }
----------------------------------------------------------------------
### Batch to tasks mapping:
0 [0,]

View File

@@ -1,7 +1,7 @@
use anyhow::bail;
use meilisearch_types::heed::{Env, RwTxn, WithoutTls};
use meilisearch_types::tasks::{Details, KindWithContent, Status, Task};
use meilisearch_types::versioning;
use meilisearch_types::versioning::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use time::OffsetDateTime;
use tracing::info;
@@ -9,82 +9,83 @@ use crate::queue::TaskQueue;
use crate::versioning::Versioning;
trait UpgradeIndexScheduler {
fn upgrade(&self, env: &Env<WithoutTls>, wtxn: &mut RwTxn) -> anyhow::Result<()>;
/// Whether the migration should be applied, depending on the initial version of the index scheduler before
/// any migration was applied
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
/// A progress-centric description of the migration
fn description(&self) -> &'static str;
fn upgrade(
&self,
env: &Env<WithoutTls>,
wtxn: &mut RwTxn,
original: (u32, u32, u32),
) -> anyhow::Result<()>;
fn target_version(&self) -> (u32, u32, u32);
}
/// Upgrade the index scheduler to the binary version.
///
/// # Warning
///
/// The current implementation uses a single wtxn to the index scheduler for the whole duration of the upgrade.
/// If migrations start taking take a long time, it might prevent tasks from being registered.
/// If this issue manifests, then it can be mitigated by adding a `fn target_version` to `UpgradeIndexScheduler`,
/// to be able to write intermediate versions and drop the wtxn between applying migrations.
pub fn upgrade_index_scheduler(
env: &Env<WithoutTls>,
versioning: &Versioning,
initial_version: (u32, u32, u32),
from: (u32, u32, u32),
to: (u32, u32, u32),
) -> anyhow::Result<()> {
let target_major: u32 = versioning::VERSION_MAJOR;
let target_minor: u32 = versioning::VERSION_MINOR;
let target_patch: u32 = versioning::VERSION_PATCH;
let target_version = (target_major, target_minor, target_patch);
if initial_version == target_version {
return Ok(());
}
let current_major = to.0;
let current_minor = to.1;
let current_patch = to.2;
let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[
// List all upgrade functions to apply in order here.
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
];
let (initial_major, initial_minor, initial_patch) = initial_version;
if initial_version > target_version {
bail!(
"Database version {initial_major}.{initial_minor}.{initial_patch} is higher than the Meilisearch version {target_major}.{target_minor}.{target_patch}. Downgrade is not supported",
let start = match from {
(1, 12, _) => 0,
(1, 13, _) => 0,
(1, 14, _) => 0,
(1, 15, _) => 0,
(1, 16, _) => 0,
(1, 17, _) => 0,
(1, 18, _) => 0,
(1, 19, _) => 0,
(1, 20, _) => 0,
(1, 21, _) => 0,
(1, 22, _) => 0,
(1, 23, _) => 0,
(1, 24, _) => 0,
(1, 25, _) => 0,
(1, 26, _) => 0,
(1, 27, _) => 0,
(1, 28, _) => 0,
(major, minor, patch) => {
if major > current_major
|| (major == current_major && minor > current_minor)
|| (major == current_major && minor == current_minor && patch > current_patch)
{
bail!(
"Database version {major}.{minor}.{patch} is higher than the Meilisearch version {current_major}.{current_minor}.{current_patch}. Downgrade is not supported",
);
} else if major < 1 || (major == current_major && minor < 12) {
bail!(
"Database version {major}.{minor}.{patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{major}.{minor}.{patch} and import it in the v{current_major}.{current_minor}.{current_patch}",
);
}
if initial_version < (1, 12, 0) {
bail!(
"Database version {initial_major}.{initial_minor}.{initial_patch} is too old for the experimental dumpless upgrade feature. Please generate a dump using the v{initial_major}.{initial_minor}.{initial_patch} and import it in the v{target_major}.{target_minor}.{target_patch}",
);
}
} else {
bail!("Unknown database version: v{major}.{minor}.{patch}");
}
}
};
info!("Upgrading the task queue");
let mut wtxn = env.write_txn()?;
let migration_count = upgrade_functions.len();
for (migration_index, upgrade) in upgrade_functions.iter().enumerate() {
if upgrade.must_upgrade(initial_version) {
info!(
"[{migration_index}/{migration_count}]Applying migration: {}",
upgrade.description()
);
upgrade.upgrade(env, &mut wtxn)?;
info!(
"[{}/{migration_count}]Migration applied: {}",
migration_index + 1,
upgrade.description()
)
} else {
info!(
"[{migration_index}/{migration_count}]Skipping unnecessary migration: {}",
upgrade.description()
)
}
let mut local_from = from;
for upgrade in upgrade_functions[start..].iter() {
let target = upgrade.target_version();
info!(
"Upgrading from v{}.{}.{} to v{}.{}.{}",
local_from.0, local_from.1, local_from.2, target.0, target.1, target.2
);
let mut wtxn = env.write_txn()?;
upgrade.upgrade(env, &mut wtxn, local_from)?;
versioning.set_version(&mut wtxn, target)?;
wtxn.commit()?;
local_from = target;
}
versioning.set_version(&mut wtxn, target_version)?;
info!("Task queue upgraded, spawning the upgrade database task");
let mut wtxn = env.write_txn()?;
let queue = TaskQueue::new(env, &mut wtxn)?;
let uid = queue.next_task_id(&wtxn)?;
queue.register(
@@ -97,9 +98,9 @@ pub fn upgrade_index_scheduler(
finished_at: None,
error: None,
canceled_by: None,
details: Some(Details::UpgradeDatabase { from: initial_version, to: target_version }),
details: Some(Details::UpgradeDatabase { from, to }),
status: Status::Enqueued,
kind: KindWithContent::UpgradeDatabase { from: initial_version },
kind: KindWithContent::UpgradeDatabase { from },
network: None,
custom_metadata: None,
},
@@ -108,3 +109,21 @@ pub fn upgrade_index_scheduler(
Ok(())
}
#[allow(non_camel_case_types)]
struct ToCurrentNoOp {}
impl UpgradeIndexScheduler for ToCurrentNoOp {
fn upgrade(
&self,
_env: &Env<WithoutTls>,
_wtxn: &mut RwTxn,
_original: (u32, u32, u32),
) -> anyhow::Result<()> {
Ok(())
}
fn target_version(&self) -> (u32, u32, u32) {
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
}

View File

@@ -64,7 +64,14 @@ impl Versioning {
};
wtxn.commit()?;
upgrade_index_scheduler(env, &this, from)?;
let bin_major: u32 = versioning::VERSION_MAJOR;
let bin_minor: u32 = versioning::VERSION_MINOR;
let bin_patch: u32 = versioning::VERSION_PATCH;
let to = (bin_major, bin_minor, bin_patch);
if from != to {
upgrade_index_scheduler(env, &this, from, to)?;
}
// Once we reach this point it means the upgrade process, if there was one is entirely finished
// we can safely say we reached the latest version of the index scheduler

View File

@@ -1,6 +1,5 @@
use std::collections::BTreeMap;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]

View File

@@ -300,7 +300,6 @@ impl Infos {
max_indexing_memory,
max_indexing_threads,
skip_index_budget: _,
experimental_disable_delta_encoding: _,
experimental_no_edition_2024_for_settings,
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,

View File

@@ -21,7 +21,6 @@ use meilisearch::{
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use meilisearch_types::milli::heed_codec::DELTA_ENCODING_STATUS;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
@@ -96,14 +95,6 @@ async fn main() -> anyhow::Result<()> {
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
// Disables the delta encoding of bitmaps as soon as possible
if opt.indexer_options.experimental_disable_delta_encoding {
DELTA_ENCODING_STATUS.set_to_disabled()
} else {
DELTA_ENCODING_STATUS.set_to_enabled()
}
.expect("the delta-encoding status to be set only once");
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!(

View File

@@ -60,7 +60,6 @@ const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
const MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING: &str = "MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
@@ -846,14 +845,6 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
#[serde(default)]
pub experimental_no_edition_2024_for_facet_post_processing: bool,
/// Experimental disable delta-encoding for bitmaps. For more information,
/// see: <https://github.com/orgs/meilisearch/discussions/875>
///
/// Enables the experimental disable delta-encoding for bitmaps feature.
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING)]
#[serde(default)]
pub experimental_disable_delta_encoding: bool,
}
impl IndexerOpts {
@@ -867,7 +858,6 @@ impl IndexerOpts {
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
experimental_disable_delta_encoding,
} = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
@@ -905,12 +895,6 @@ impl IndexerOpts {
experimental_no_edition_2024_for_facet_post_processing.to_string(),
);
}
if experimental_disable_delta_encoding {
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING,
experimental_disable_delta_encoding.to_string(),
);
}
}
}
@@ -926,7 +910,6 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
experimental_no_edition_2024_for_dumps,
experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing,
experimental_disable_delta_encoding: _, // managed in try_main
} = other;
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
@@ -1262,7 +1245,7 @@ where
T: AsRef<OsStr>,
{
if let Err(VarError::NotPresent) = std::env::var(key) {
unsafe { std::env::set_var(key, value) }
std::env::set_var(key, value);
}
}

View File

@@ -43,9 +43,9 @@ impl Server<Owned> {
let dir = TempDir::new().unwrap();
if cfg!(windows) {
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
std::env::set_var("TMP", TEST_TEMP_DIR.path());
} else {
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
}
let options = default_settings(dir.path());
@@ -58,9 +58,9 @@ impl Server<Owned> {
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
if cfg!(windows) {
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
std::env::set_var("TMP", TEST_TEMP_DIR.path());
} else {
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
}
options.master_key = Some("MASTER_KEY".to_string());
@@ -215,9 +215,9 @@ impl Server<Shared> {
let dir = TempDir::new().unwrap();
if cfg!(windows) {
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
std::env::set_var("TMP", TEST_TEMP_DIR.path());
} else {
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
}
let options = default_settings(dir.path());
@@ -508,8 +508,6 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
experimental_no_edition_2024_for_dumps: false,
experimental_no_edition_2024_for_prefix_post_processing: false,
experimental_no_edition_2024_for_facet_post_processing: false,
// It has no effect to set the delta encoding here as the toggle is done in try_main
experimental_disable_delta_encoding: false,
},
experimental_enable_metrics: false,
..Parser::parse_from(None as Option<&str>)

View File

@@ -42,16 +42,8 @@ async fn version_too_old() {
std::fs::create_dir_all(&db_path).unwrap();
std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err().to_string();
let major = meilisearch_types::versioning::VERSION_MAJOR;
let minor = meilisearch_types::versioning::VERSION_MINOR;
let patch = meilisearch_types::versioning::VERSION_PATCH;
let current_version = format!("{major}.{minor}.{patch}");
let err = err.replace(&current_version, "[current version]");
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v[current version]");
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.28.2");
}
#[actix_rt::test]
@@ -62,21 +54,11 @@ async fn version_requires_downgrade() {
std::fs::create_dir_all(&db_path).unwrap();
let major = meilisearch_types::versioning::VERSION_MAJOR;
let minor = meilisearch_types::versioning::VERSION_MINOR;
let mut patch = meilisearch_types::versioning::VERSION_PATCH;
let current_version = format!("{major}.{minor}.{patch}");
patch += 1;
let future_version = format!("{major}.{minor}.{patch}");
std::fs::write(db_path.join("VERSION"), &future_version).unwrap();
let patch = meilisearch_types::versioning::VERSION_PATCH + 1;
std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap();
let options = Opt { experimental_dumpless_upgrade: true, ..default_settings };
let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err();
let err = err.to_string();
let err = err.replace(&current_version, "[current version]");
let err = err.replace(&future_version, "[future version]");
snapshot!(err, @"Database version [future version] is higher than the Meilisearch version [current version]. Downgrade is not supported");
snapshot!(err, @"Database version 1.28.3 is higher than the Meilisearch version 1.28.2. Downgrade is not supported");
}
#[actix_rt::test]

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "[current version]"
"upgradeTo": "v1.28.2"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "[current version]"
"upgradeTo": "v1.28.2"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "[current version]"
"upgradeTo": "v1.28.2"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "[current version]"
"upgradeTo": "v1.28.2"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "[current version]"
"upgradeTo": "v1.28.2"
},
"error": null,
"duration": "[duration]",

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "[current version]"
"upgradeTo": "v1.28.2"
},
"error": null,
"duration": "[duration]",

View File

@@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"progress": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "[current version]"
"upgradeTo": "v1.28.2"
},
"stats": {
"totalNbTasks": 1,

View File

@@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs
"canceledBy": null,
"details": {
"upgradeFrom": "v1.12.0",
"upgradeTo": "[current version]"
"upgradeTo": "v1.28.2"
},
"error": null,
"duration": "[duration]",

View File

@@ -166,55 +166,55 @@ async fn check_the_index_scheduler(server: &Server) {
// We rewrite the first task for all calls because it may be the upgrade database with unknown dates and duration.
// The other tasks should NOT change
let (tasks, _) = server.tasks_filter("limit=1000").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "the_whole_task_queue_once_everything_has_been_processed");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "the_whole_task_queue_once_everything_has_been_processed");
let (batches, _) = server.batches_filter("limit=1000").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "the_whole_batch_queue_once_everything_has_been_processed");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "the_whole_batch_queue_once_everything_has_been_processed");
// Tests all the tasks query parameters
let (tasks, _) = server.tasks_filter("uids=10").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_uids_equal_10");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_uids_equal_10");
let (tasks, _) = server.tasks_filter("batchUids=10").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_batchUids_equal_10");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_batchUids_equal_10");
let (tasks, _) = server.tasks_filter("statuses=canceled").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_statuses_equal_canceled");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database
let (tasks, _) = server.tasks_filter("canceledBy=19").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_canceledBy_equal_19");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_canceledBy_equal_19");
let (tasks, _) = server.tasks_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (tasks, _) = server.tasks_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(tasks, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(tasks, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]" }), name: "tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
// Tests all the batches query parameters
let (batches, _) = server.batches_filter("uids=10").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_uids_equal_10");
let (batches, _) = server.batches_filter("batchUids=10").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_batchUids_equal_10");
let (batches, _) = server.batches_filter("statuses=canceled").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_statuses_equal_canceled");
// types has already been tested above to retrieve the upgrade database
let (batches, _) = server.batches_filter("canceledBy=19").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_canceledBy_equal_19");
let (batches, _) = server.batches_filter("beforeEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterEnqueuedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterStartedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterStartedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("beforeFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_beforeFinishedAt_equal_2025-01-16T16_47_41");
let (batches, _) = server.batches_filter("afterFinishedAt=2025-01-16T16:47:41Z").await;
snapshot!(json_string!(batches, { ".results[0].details.upgradeTo" => "[current version]", ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
snapshot!(json_string!(batches, { ".results[0].duration" => "[duration]", ".results[0].enqueuedAt" => "[date]", ".results[0].startedAt" => "[date]", ".results[0].finishedAt" => "[date]", ".results[0].stats.progressTrace" => "[progressTrace]", ".results[0].stats.internalDatabaseSizes" => "[internalDatabaseSizes]", ".results[0].stats.writeChannelCongestion" => "[writeChannelCongestion]" }), name: "batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41");
let (stats, _) = server.stats().await;
assert_json_snapshot!(stats, {

View File

@@ -1,43 +0,0 @@
use meili_snap::snapshot;
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
#[actix_rt::test]
async fn hf_bge_m3_force_cls_settings() {
let server = Server::new_shared();
let index = server.unique_index();
let (response, code) = index
.update_settings(json!({
"embedders": {
"default": {
"source": "huggingFace",
"model": "baai/bge-m3",
"revision": "5617a9f61b028005a4858fdac845db406aefb181",
"pooling": "forceCls",
// minimal template to allow potential document embedding if used later
"documentTemplate": "{{doc.title}}"
}
}
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
// Try to embed one simple document
let (task, code) =
index.add_documents(json!([{ "id": 1, "title": "Hello world" }]), None).await;
snapshot!(code, @"202 Accepted");
server.wait_task(task.uid()).await.succeeded();
// Retrieve the document with vectors and assert embeddings were produced
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
let has_vectors = documents["results"][0]["_vectors"]["default"]["embeddings"]
.as_array()
.map(|a| !a.is_empty())
.unwrap_or(false);
snapshot!(has_vectors, @"true");
}

View File

@@ -1,6 +1,5 @@
mod binary_quantized;
mod fragments;
mod huggingface;
#[cfg(feature = "test-ollama")]
mod ollama;
mod openai;

View File

@@ -91,7 +91,7 @@ rhai = { version = "1.23.6", features = [
"sync",
] }
arroy = "0.6.4-nested-rtxns"
hannoy = { version = "0.1.0-nested-rtxns", features = ["arroy"] }
hannoy = { version = "0.0.9-nested-rtxns-2", features = ["arroy"] }
rand = "0.8.5"
tracing = "0.1.41"
ureq = { version = "2.12.1", features = ["json"] }
@@ -120,16 +120,14 @@ twox-hash = { version = "2.1.2", default-features = false, features = [
] }
geo-types = "0.7.17"
zerometry = "0.3.0"
bitpacking = "0.9.2"
[dev-dependencies]
mimalloc = { version = "0.1.48", default-features = false }
# fixed version due to format breakages in v1.40
insta = "=1.39.0"
mimalloc = { version = "0.1.48", default-features = false }
maplit = "1.0.2"
md5 = "0.8.0"
meili-snap = { path = "../meili-snap" }
quickcheck = "1.0.3"
rand = { version = "0.8.5", features = ["small_rng"] }
[features]

View File

@@ -22,10 +22,7 @@ pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{
BoRoaringBitmapCodec, CboRoaringBitmapCodec, DeCboRoaringBitmapCodec, RoaringBitmapCodec,
DELTA_ENCODING_STATUS,
};
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
};

View File

@@ -19,19 +19,8 @@ pub const THRESHOLD: usize = 7;
pub struct CboRoaringBitmapCodec;
impl CboRoaringBitmapCodec {
/// If the number of items (u32s) to encode is less than or equal to the threshold
/// it means that it would weigh the same or less than the RoaringBitmap
/// header, so we directly encode them using ByteOrder instead.
pub fn bitmap_serialize_as_raw_u32s(roaring: &RoaringBitmap) -> bool {
roaring.len() <= THRESHOLD as u64
}
pub fn bytes_deserialize_as_raw_u32s(bytes: &[u8]) -> bool {
bytes.len() <= THRESHOLD * size_of::<u32>()
}
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
if Self::bitmap_serialize_as_raw_u32s(roaring) {
if roaring.len() <= THRESHOLD as u64 {
roaring.len() as usize * size_of::<u32>()
} else {
roaring.serialized_size()
@@ -46,7 +35,10 @@ impl CboRoaringBitmapCodec {
roaring: &RoaringBitmap,
mut writer: W,
) -> io::Result<()> {
if Self::bitmap_serialize_as_raw_u32s(roaring) {
if roaring.len() <= THRESHOLD as u64 {
// If the number of items (u32s) to encode is less than or equal to the threshold
// it means that it would weigh the same or less than the RoaringBitmap
// header, so we directly encode them using ByteOrder instead.
for integer in roaring {
writer.write_u32::<NativeEndian>(integer)?;
}
@@ -59,7 +51,7 @@ impl CboRoaringBitmapCodec {
}
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
if Self::bytes_deserialize_as_raw_u32s(bytes) {
if bytes.len() <= THRESHOLD * size_of::<u32>() {
// If there is threshold or less than threshold integers that can fit into this array
// of bytes it means that we used the ByteOrder codec serializer.
let mut bitmap = RoaringBitmap::new();
@@ -79,7 +71,7 @@ impl CboRoaringBitmapCodec {
other: &RoaringBitmap,
) -> io::Result<RoaringBitmap> {
// See above `deserialize_from` method for implementation details.
if Self::bytes_deserialize_as_raw_u32s(bytes) {
if bytes.len() <= THRESHOLD * size_of::<u32>() {
let mut bitmap = RoaringBitmap::new();
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
if other.contains(integer) {
@@ -106,7 +98,7 @@ impl CboRoaringBitmapCodec {
let mut vec = Vec::new();
for bytes in slices {
if Self::bytes_deserialize_as_raw_u32s(bytes.as_ref()) {
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
let mut reader = bytes.as_ref();
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
vec.push(integer);
@@ -120,8 +112,6 @@ impl CboRoaringBitmapCodec {
vec.sort_unstable();
vec.dedup();
// Be careful when modifying this condition,
// the rule must be the same everywhere
if vec.len() <= THRESHOLD {
for integer in vec {
buffer.extend_from_slice(&integer.to_ne_bytes());

View File

@@ -1,153 +0,0 @@
use std::borrow::Cow;
use std::io::{self, ErrorKind};
use std::sync::OnceLock;
use heed::BoxedError;
use roaring::RoaringBitmap;
use super::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
use super::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
use crate::heed_codec::BytesDecodeOwned;
/// Defines the status of the delta encoding on whether we have enabled it or not.
pub static DELTA_ENCODING_STATUS: DeltaEncodingStatusLock = DeltaEncodingStatusLock::new();
pub struct DeCboRoaringBitmapCodec;
impl DeCboRoaringBitmapCodec {
pub fn serialized_size_with_tmp_buffer(
bitmap: &RoaringBitmap,
tmp_buffer: &mut Vec<u32>,
) -> usize {
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
&& DELTA_ENCODING_STATUS.is_disabled()
{
CboRoaringBitmapCodec::serialized_size(bitmap)
} else {
DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, tmp_buffer)
}
}
/// Writes the delta-encoded compressed version of
/// the given roaring bitmap into the provided writer.
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: W) -> io::Result<()> {
let mut tmp_buffer = Vec::new();
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
}
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
///
/// Note that we always serialize the bitmap with the delta-encoded compressed version.
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
bitmap: &RoaringBitmap,
writer: W,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<()> {
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
&& DELTA_ENCODING_STATUS.is_disabled()
{
CboRoaringBitmapCodec::serialize_into_writer(bitmap, writer)
} else {
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(bitmap, writer, tmp_buffer)
}
}
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
let mut tmp_buffer = Vec::new();
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
}
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
///
/// It tries to decode the input by using the delta-decoded version and
/// if it fails, falls back to the CboRoaringBitmap version.
pub fn deserialize_from_with_tmp_buffer(
input: &[u8],
tmp_buffer: &mut Vec<u32>,
) -> io::Result<RoaringBitmap> {
// The input is too short to be a valid delta-decoded bitmap.
// We fall back to the CboRoaringBitmap version with raw u32s.
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(input) {
return CboRoaringBitmapCodec::deserialize_from(input);
}
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(input, tmp_buffer) {
Ok(bitmap) => Ok(bitmap),
// If the error kind is Other it means that the delta-decoder found
// an invalid magic header. We fall back to the CboRoaringBitmap version.
Err(e) if e.kind() == ErrorKind::Other => {
CboRoaringBitmapCodec::deserialize_from(input)
}
Err(e) => Err(e),
}
}
}
impl heed::BytesDecode<'_> for DeCboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for DeCboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl heed::BytesEncode<'_> for DeCboRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
let mut tmp_buffer = Vec::new();
let capacity = Self::serialized_size_with_tmp_buffer(&item, &mut tmp_buffer);
let mut output = Vec::with_capacity(capacity);
Self::serialize_into_with_tmp_buffer(item, &mut output, &mut tmp_buffer)?;
Ok(Cow::Owned(output))
}
}
/// Manages the global status of the delta encoding.
///
/// Whether we must use delta encoding or not when encoding roaring bitmaps.
pub struct DeltaEncodingStatusLock(OnceLock<DeltaEncodingStatus>);
impl DeltaEncodingStatusLock {
pub const fn new() -> Self {
Self(OnceLock::new())
}
}
#[derive(Default)]
enum DeltaEncodingStatus {
Enabled,
#[default]
Disabled,
}
impl DeltaEncodingStatusLock {
pub fn set_to_enabled(&self) -> Result<(), ()> {
self.0.set(DeltaEncodingStatus::Enabled).map_err(drop)
}
pub fn set_to_disabled(&self) -> Result<(), ()> {
self.0.set(DeltaEncodingStatus::Disabled).map_err(drop)
}
pub fn is_enabled(&self) -> bool {
matches!(self.0.get(), Some(DeltaEncodingStatus::Enabled))
}
pub fn is_disabled(&self) -> bool {
!self.is_enabled()
}
}

View File

@@ -1,377 +0,0 @@
use std::io::{self, ErrorKind};
use std::mem::{self, size_of, size_of_val};
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
use roaring::RoaringBitmap;
/// The magic header for our custom encoding format
const MAGIC_HEADER: u16 = 36869;
pub struct DeRoaringBitmapCodec;
// TODO reintroduce:
// - serialized_size?
// - serialize_into_vec
// - intersection_with_serialized
// - merge_into
// - merge_deladd_into
impl DeRoaringBitmapCodec {
/// Returns the serialized size of the given roaring bitmap with the delta encoding format.
pub fn serialized_size_with_tmp_buffer(
bitmap: &RoaringBitmap,
tmp_buffer: &mut Vec<u32>,
) -> usize {
let mut size = 2; // u16 magic header
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
// This temporary buffer is used to store each chunk of decompressed u32s.
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
let decompressed = &mut tmp_buffer[..];
let mut buffer_index = 0;
let mut initial = None;
// We initially collect all the integers into a flat buffer of the size
// of the largest bitpacker. We encode them with it until we don't have
// enough of them...
for n in bitmap {
decompressed[buffer_index] = n;
buffer_index += 1;
if buffer_index == BitPacker8x::BLOCK_LEN {
let num_bits = bitpacker8x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker8x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = Some(n);
buffer_index = 0;
}
}
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
let decompressed = &decompressed[..buffer_index];
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let num_bits = bitpacker4x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker4x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = decompressed.iter().last().copied();
}
// ...And so on...
let decompressed = chunks.remainder();
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let num_bits = bitpacker1x.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = BitPacker1x::compressed_block_size(num_bits);
size += 1; // u8 chunk header
size += compressed_len; // compressed data length
initial = decompressed.iter().last().copied();
}
// ...Until we don't have any small enough bitpacker. We put them raw
// at the end of out buffer with a header indicating the matter.
let decompressed = chunks.remainder();
if !decompressed.is_empty() {
size += 1; // u8 chunk header
size += mem::size_of_val(decompressed); // remaining uncompressed u32s
}
size
}
/// Writes the delta-encoded compressed version of
/// the given roaring bitmap into the provided writer.
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: W) -> io::Result<()> {
let mut tmp_buffer = Vec::new();
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
}
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
bitmap: &RoaringBitmap,
mut writer: W,
tmp_buffer: &mut Vec<u32>,
) -> io::Result<()> {
// Insert the magic header
writer.write_all(&MAGIC_HEADER.to_ne_bytes())?;
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
// This temporary buffer is used to store each chunk of decompressed and
// compressed and delta-encoded u32s. We need room for the decompressed
// u32s coming from the roaring bitmap, the compressed output that can
// be as large as the decompressed u32s, and the chunk header.
tmp_buffer.resize((BitPacker8x::BLOCK_LEN * 2) + 1, 0u32);
let (decompressed, compressed) = tmp_buffer.split_at_mut(BitPacker8x::BLOCK_LEN);
let compressed = bytemuck::cast_slice_mut(compressed);
let mut buffer_index = 0;
let mut initial = None;
// We initially collect all the integers into a flat buffer of the size
// of the largest bitpacker. We encode them with it until we don't have
// enough of them...
for n in bitmap {
decompressed[buffer_index] = n;
buffer_index += 1;
if buffer_index == BitPacker8x::BLOCK_LEN {
let output = encode_with_packer(&bitpacker8x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = Some(n);
buffer_index = 0;
}
}
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
let decompressed = &decompressed[..buffer_index];
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let output = encode_with_packer(&bitpacker4x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = decompressed.iter().last().copied();
}
// ...And so on...
let decompressed = chunks.remainder();
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
for decompressed in chunks.by_ref() {
let output = encode_with_packer(&bitpacker1x, decompressed, initial, compressed);
writer.write_all(output)?;
initial = decompressed.iter().last().copied();
}
// ...Until we don't have any small enough bitpacker. We put them raw
// at the end of out buffer with a header indicating the matter.
let decompressed = chunks.remainder();
if !decompressed.is_empty() {
let header = encode_chunk_header(BitPackerLevel::None, u32::BITS as u8);
// Note: Not convinced about the performance of writing a single
// byte followed by a larger write. However, we will use this
// codec with a BufWriter or directly with a Vec of bytes.
writer.write_all(&[header])?;
writer.write_all(bytemuck::cast_slice(decompressed))?;
}
Ok(())
}
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
let mut tmp_buffer = Vec::new();
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
}
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
pub fn deserialize_from_with_tmp_buffer(
input: &[u8],
tmp_buffer: &mut Vec<u32>,
) -> io::Result<RoaringBitmap> {
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
else {
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
};
// Safety: This unwrap cannot happen as the header buffer is the right size
let header = u16::from_ne_bytes(header.try_into().unwrap());
if header != MAGIC_HEADER {
return Err(io::Error::other("invalid header value"));
}
let bitpacker8x = BitPacker8x::new();
let bitpacker4x = BitPacker4x::new();
let bitpacker1x = BitPacker1x::new();
let mut bitmap = RoaringBitmap::new();
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
let decompressed = &mut tmp_buffer[..];
let mut initial = None;
while let Some((&chunk_header, encoded)) = compressed.split_first() {
let (level, num_bits) = decode_chunk_header(chunk_header);
let (bytes_read, decompressed) = match level {
BitPackerLevel::None => {
if num_bits != u32::BITS as u8 {
return Err(io::Error::new(
ErrorKind::InvalidData,
"invalid number of bits to encode non-compressed u32s",
));
}
let chunks = encoded.chunks_exact(size_of::<u32>());
if !chunks.remainder().is_empty() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"expecting last chunk to be a multiple of the size of an u32",
));
}
let integers = chunks
// safety: This unwrap cannot happen as
// the size of u32 is set correctly.
.map(|b| b.try_into().unwrap())
.map(u32::from_ne_bytes);
bitmap
.append(integers)
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
// This is basically always the last chunk that exists in
// this delta-encoded format as the raw u32s are appended
// when there is not enough of them to fit in a bitpacker.
break;
}
BitPackerLevel::BitPacker1x => {
decode_with_packer(&bitpacker1x, decompressed, initial, encoded, num_bits)
}
BitPackerLevel::BitPacker4x => {
decode_with_packer(&bitpacker4x, decompressed, initial, encoded, num_bits)
}
BitPackerLevel::BitPacker8x => {
decode_with_packer(&bitpacker8x, decompressed, initial, encoded, num_bits)
}
};
initial = decompressed.iter().last().copied();
// TODO investigate perf
// Safety: Bitpackers cannot output unsorter integers when
// used with the compress_strictly_sorted function.
bitmap.append(decompressed.iter().copied()).unwrap();
// What the delta-decoding read plus the chunk header size
compressed = &compressed[bytes_read + 1..];
}
Ok(bitmap)
}
}
/// Takes a strickly sorted list of u32s and outputs delta-encoded
/// bytes with a chunk header. We expect the output buffer to be
/// at least BLOCK_LEN + 1.
fn encode_with_packer<'c, B: BitPackerExt>(
bitpacker: &B,
decompressed: &[u32],
initial: Option<u32>,
output: &'c mut [u8],
) -> &'c [u8] {
let num_bits = bitpacker.num_bits_strictly_sorted(initial, decompressed);
let compressed_len = B::compressed_block_size(num_bits);
let chunk_header = encode_chunk_header(B::level(), num_bits);
let buffer = &mut output[..compressed_len + 1];
// Safety: The buffer is at least one byte
let (header_in_buffer, encoded) = buffer.split_first_mut().unwrap();
*header_in_buffer = chunk_header;
bitpacker.compress_strictly_sorted(initial, decompressed, encoded, num_bits);
buffer
}
/// Returns the number of bytes read and the decoded unsigned integers.
fn decode_with_packer<'d, B: BitPacker>(
bitpacker: &B,
decompressed: &'d mut [u32],
initial: Option<u32>,
compressed: &[u8],
num_bits: u8,
) -> (usize, &'d [u32]) {
let decompressed = &mut decompressed[..B::BLOCK_LEN];
let read = bitpacker.decompress_strictly_sorted(initial, compressed, decompressed, num_bits);
(read, decompressed)
}
/// An identifier for the bitpacker to be able
/// to correctly decode the compressed integers.
#[derive(Debug, PartialEq, Eq)]
#[repr(u8)]
enum BitPackerLevel {
/// The remaining bytes are raw little endian encoded u32s.
None,
/// The remaining bits are encoded using a `BitPacker1x`.
BitPacker1x,
/// The remaining bits are encoded using a `BitPacker4x`.
BitPacker4x,
/// The remaining bits are encoded using a `BitPacker8x`.
BitPacker8x,
}
/// Returns the chunk header based on the bitpacker level
/// and the number of bits to encode the list of integers.
fn encode_chunk_header(level: BitPackerLevel, num_bits: u8) -> u8 {
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
let level = level as u8;
debug_assert!(level <= 3);
num_bits | (level << 6)
}
/// Decodes the chunk header and output the bitpacker level
/// and the number of bits to decode the following bytes.
fn decode_chunk_header(data: u8) -> (BitPackerLevel, u8) {
let num_bits = data & 0b00111111;
let level = match data >> 6 {
0 => BitPackerLevel::None,
1 => BitPackerLevel::BitPacker1x,
2 => BitPackerLevel::BitPacker4x,
3 => BitPackerLevel::BitPacker8x,
invalid => panic!("Invalid bitpacker level: {invalid}"),
};
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
(level, num_bits)
}
/// A simple helper trait to get the BitPackerLevel
/// and correctly generate the chunk header.
trait BitPackerExt: BitPacker {
/// Returns the level of the bitpacker: an identifier to be
/// able to decode the numbers with the right bitpacker.
fn level() -> BitPackerLevel;
}
impl BitPackerExt for BitPacker8x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker8x
}
}
impl BitPackerExt for BitPacker4x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker4x
}
}
impl BitPackerExt for BitPacker1x {
fn level() -> BitPackerLevel {
BitPackerLevel::BitPacker1x
}
}
#[cfg(test)]
mod tests {
use quickcheck::quickcheck;
use roaring::RoaringBitmap;
use super::DeRoaringBitmapCodec;
quickcheck! {
fn qc_random(xs: Vec<u32>) -> bool {
let bitmap = RoaringBitmap::from_iter(xs);
let mut compressed = Vec::new();
DeRoaringBitmapCodec::serialize_into(&bitmap, &mut compressed).unwrap();
let decompressed = DeRoaringBitmapCodec::deserialize_from(&compressed[..]).unwrap();
decompressed == bitmap
}
}
quickcheck! {
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
let bitmap = RoaringBitmap::from_iter(xs);
let mut compressed = Vec::new();
let mut tmp_buffer = Vec::new();
DeRoaringBitmapCodec::serialize_into(&bitmap, &mut compressed).unwrap();
let expected_len = DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
compressed.len() == expected_len
}
}
}

View File

@@ -1,10 +1,7 @@
mod bo_roaring_bitmap_codec;
pub mod cbo_roaring_bitmap_codec;
pub mod de_cbo_roaring_bitmap_codec;
mod de_roaring_bitmap_codec;
mod roaring_bitmap_codec;
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
// pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
pub use self::de_cbo_roaring_bitmap_codec::{DeCboRoaringBitmapCodec, DELTA_ENCODING_STATUS};
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;

View File

@@ -806,10 +806,6 @@ mod tests {
use crate::vector::db::IndexEmbeddingConfig;
use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
fn no_cancel() -> bool {
false
}
#[test]
fn simple_document_replacement() {
let index = TempIndex::new();
@@ -1989,7 +1985,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2042,7 +2038,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2061,7 +2057,7 @@ mod tests {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2131,7 +2127,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2150,7 +2146,7 @@ mod tests {
primary_key,
&document_changes,
RuntimeEmbedders::default(),
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2321,7 +2317,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2337,7 +2333,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2385,7 +2381,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2401,7 +2397,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2440,7 +2436,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2456,7 +2452,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2494,7 +2490,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2510,7 +2506,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2550,7 +2546,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2566,7 +2562,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2611,7 +2607,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2627,7 +2623,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2665,7 +2661,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2681,7 +2677,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2719,7 +2715,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2735,7 +2731,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2931,7 +2927,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -2947,7 +2943,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -2992,7 +2988,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -3008,7 +3004,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)
@@ -3050,7 +3046,7 @@ mod tests {
&rtxn,
None,
&mut new_fields_ids_map,
&no_cancel,
&|| false,
Progress::default(),
None,
)
@@ -3066,7 +3062,7 @@ mod tests {
primary_key,
&document_changes,
embedders,
&no_cancel,
&|| false,
&Progress::default(),
&Default::default(),
)

View File

@@ -5,36 +5,103 @@ mod v1_15;
mod v1_16;
use heed::RwTxn;
use v1_12::{FixFieldDistribution, RecomputeStats};
use v1_13::AddNewStats;
use v1_14::UpgradeArroyVersion;
use v1_15::RecomputeWordFst;
use v1_16::SwitchToMultimodal;
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
use v1_14::Latest_V1_13_To_Latest_V1_14;
use v1_15::Latest_V1_14_To_Latest_V1_15;
use v1_16::Latest_V1_15_To_V1_16_0;
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::progress::{Progress, VariableNameStep};
use crate::{Index, InternalError, Result};
trait UpgradeIndex {
/// Returns `true` if `upgrade` should be called when the index started with version `initial_version`.
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool;
/// Returns `true` if the index scheduler must regenerate its cached stats.
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool>;
/// Description of the upgrade for progress display purposes.
fn description(&self) -> &'static str;
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
original: (u32, u32, u32),
progress: Progress,
) -> Result<bool>;
fn target_version(&self) -> (u32, u32, u32);
}
const UPGRADE_FUNCTIONS: &[&dyn UpgradeIndex] = &[
&FixFieldDistribution {},
&RecomputeStats {},
&AddNewStats {},
&UpgradeArroyVersion {},
&RecomputeWordFst {},
&SwitchToMultimodal {},
&V1_12_To_V1_12_3 {},
&V1_12_3_To_V1_13_0 {},
&V1_13_0_To_V1_13_1 {},
&V1_13_1_To_Latest_V1_13 {},
&Latest_V1_13_To_Latest_V1_14 {},
&Latest_V1_14_To_Latest_V1_15 {},
&Latest_V1_15_To_V1_16_0 {},
&ToTargetNoOp { target: (1, 18, 0) },
&ToTargetNoOp { target: (1, 19, 0) },
&ToTargetNoOp { target: (1, 20, 0) },
&ToTargetNoOp { target: (1, 21, 0) },
&ToTargetNoOp { target: (1, 22, 0) },
&ToTargetNoOp { target: (1, 23, 0) },
&ToTargetNoOp { target: (1, 24, 0) },
&ToTargetNoOp { target: (1, 25, 0) },
&ToTargetNoOp { target: (1, 26, 0) },
&ToTargetNoOp { target: (1, 27, 0) },
&ToTargetNoOp { target: (1, 28, 0) },
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
];
/// Causes a compile-time error if the argument is not in range of `0..UPGRADE_FUNCTIONS.len()`
macro_rules! function_index {
($start:expr) => {{
const _CHECK_INDEX: () = {
if $start >= $crate::update::upgrade::UPGRADE_FUNCTIONS.len() {
panic!("upgrade functions out of range")
}
};
$start
}};
}
const fn start(from: (u32, u32, u32)) -> Option<usize> {
let start = match from {
(1, 12, 0..=2) => function_index!(0),
(1, 12, 3..) => function_index!(1),
(1, 13, 0) => function_index!(2),
(1, 13, _) => function_index!(4),
(1, 14, _) => function_index!(5),
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
(1, 15, _) => function_index!(6),
(1, 16, _) | (1, 17, _) => function_index!(7),
(1, 18, _) => function_index!(8),
(1, 19, _) => function_index!(9),
(1, 20, _) => function_index!(10),
(1, 21, _) => function_index!(11),
(1, 22, _) => function_index!(12),
(1, 23, _) => function_index!(13),
(1, 24, _) => function_index!(14),
(1, 25, _) => function_index!(15),
(1, 26, _) => function_index!(16),
(1, 27, _) => function_index!(17),
(1, 28, _) => function_index!(18),
// We deliberately don't add a placeholder with (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) here to force manually
// considering dumpless upgrade.
(_major, _minor, _patch) => return None,
};
Some(start)
}
/// Causes a compile-time error if the latest package cannot be upgraded.
///
/// This serves as a reminder to consider the proper dumpless upgrade implementation when changing the package version.
const _CHECK_PACKAGE_CAN_UPGRADE: () = {
if start((VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)).is_none() {
panic!("cannot upgrade from latest package version")
}
};
/// Return true if the cached stats of the index must be regenerated
pub fn upgrade<MSP>(
wtxn: &mut RwTxn,
@@ -46,34 +113,79 @@ pub fn upgrade<MSP>(
where
MSP: Fn() -> bool + Sync,
{
let upgrade_functions = UPGRADE_FUNCTIONS;
let from = index.get_version(wtxn)?.unwrap_or(db_version);
let initial_version = index.get_version(wtxn)?.unwrap_or(db_version);
let start =
start(from).ok_or_else(|| InternalError::CannotUpgradeToVersion(from.0, from.1, from.2))?;
enum UpgradeVersion {}
let upgrade_path = &UPGRADE_FUNCTIONS[start..];
let mut current_version = from;
let mut regenerate_stats = false;
for (i, upgrade) in upgrade_functions.iter().enumerate() {
for (i, upgrade) in upgrade_path.iter().enumerate() {
if (must_stop_processing)() {
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
}
if upgrade.must_upgrade(initial_version) {
regenerate_stats |= upgrade.upgrade(wtxn, index, progress.clone())?;
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
upgrade.description(),
i as u32,
upgrade_functions.len() as u32,
));
} else {
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
"Skipping migration that must not be applied",
i as u32,
upgrade_functions.len() as u32,
));
}
let target = upgrade.target_version();
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
format!(
"Upgrading from v{}.{}.{} to v{}.{}.{}",
current_version.0,
current_version.1,
current_version.2,
target.0,
target.1,
target.2
),
i as u32,
upgrade_path.len() as u32,
));
regenerate_stats |= upgrade.upgrade(wtxn, index, from, progress.clone())?;
index.put_version(wtxn, target)?;
current_version = target;
}
index.put_version(wtxn, (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH))?;
Ok(regenerate_stats)
}
#[allow(non_camel_case_types)]
struct ToCurrentNoOp {}
impl UpgradeIndex for ToCurrentNoOp {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
}
/// Perform no operation during the upgrade except changing to the specified target version.
#[allow(non_camel_case_types)]
struct ToTargetNoOp {
pub target: (u32, u32, u32),
}
impl UpgradeIndex for ToTargetNoOp {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
self.target
}
}

View File

@@ -4,10 +4,17 @@ use super::UpgradeIndex;
use crate::progress::Progress;
use crate::{make_enum_progress, Index, Result};
pub(super) struct FixFieldDistribution {}
#[allow(non_camel_case_types)]
pub(super) struct V1_12_To_V1_12_3 {}
impl UpgradeIndex for FixFieldDistribution {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
impl UpgradeIndex for V1_12_To_V1_12_3 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
make_enum_progress! {
enum FieldDistribution {
RebuildingFieldDistribution,
@@ -18,28 +25,27 @@ impl UpgradeIndex for FixFieldDistribution {
Ok(true)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 12, 3)
}
fn description(&self) -> &'static str {
"Recomputing field distribution which was wrong before v1.12.3"
fn target_version(&self) -> (u32, u32, u32) {
(1, 12, 3)
}
}
pub(super) struct RecomputeStats {}
#[allow(non_camel_case_types)]
pub(super) struct V1_12_3_To_V1_13_0 {}
impl UpgradeIndex for RecomputeStats {
fn upgrade(&self, _wtxn: &mut RwTxn, _index: &Index, _progress: Progress) -> Result<bool> {
impl UpgradeIndex for V1_12_3_To_V1_13_0 {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
// recompute the indexes stats
Ok(true)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 13, 0)
}
fn description(&self) -> &'static str {
"Recomputing stats"
fn target_version(&self) -> (u32, u32, u32) {
(1, 13, 0)
}
}

View File

@@ -5,10 +5,17 @@ use crate::database_stats::DatabaseStats;
use crate::progress::Progress;
use crate::{make_enum_progress, Index, Result};
pub(super) struct AddNewStats();
#[allow(non_camel_case_types)]
pub(super) struct V1_13_0_To_V1_13_1();
impl UpgradeIndex for AddNewStats {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
impl UpgradeIndex for V1_13_0_To_V1_13_1 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
make_enum_progress! {
enum DocumentsStats {
CreatingDocumentsStats,
@@ -23,11 +30,26 @@ impl UpgradeIndex for AddNewStats {
Ok(true)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 13, 1)
}
fn description(&self) -> &'static str {
"Computing newly introduced document stats"
fn target_version(&self) -> (u32, u32, u32) {
(1, 13, 1)
}
}
#[allow(non_camel_case_types)]
pub(super) struct V1_13_1_To_Latest_V1_13();
impl UpgradeIndex for V1_13_1_To_Latest_V1_13 {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 13, 3)
}
}

View File

@@ -5,10 +5,17 @@ use super::UpgradeIndex;
use crate::progress::Progress;
use crate::{make_enum_progress, Index, Result};
pub(super) struct UpgradeArroyVersion();
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_13_To_Latest_V1_14();
impl UpgradeIndex for UpgradeArroyVersion {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
impl UpgradeIndex for Latest_V1_13_To_Latest_V1_14 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
make_enum_progress! {
enum VectorStore {
UpdateInternalVersions,
@@ -28,11 +35,7 @@ impl UpgradeIndex for UpgradeArroyVersion {
Ok(false)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 14, 0)
}
fn description(&self) -> &'static str {
"Updating vector store with an internal version"
fn target_version(&self) -> (u32, u32, u32) {
(1, 14, 0)
}
}

View File

@@ -7,21 +7,25 @@ use crate::progress::Progress;
use crate::update::new::indexer::recompute_word_fst_from_word_docids_database;
use crate::{Index, Result};
pub(super) struct RecomputeWordFst();
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_14_To_Latest_V1_15();
impl UpgradeIndex for RecomputeWordFst {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, progress: Progress) -> Result<bool> {
impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
// Recompute the word FST from the word docids database.
recompute_word_fst_from_word_docids_database(index, wtxn, &progress)?;
Ok(false)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 15, 0)
}
fn description(&self) -> &'static str {
"Recomputing word FST from word docids database as it was wrong before v1.15.0"
fn target_version(&self) -> (u32, u32, u32) {
(1, 15, 0)
}
}

View File

@@ -6,10 +6,17 @@ use crate::progress::Progress;
use crate::vector::db::{EmbedderInfo, EmbeddingStatus};
use crate::{Index, InternalError, Result};
pub(super) struct SwitchToMultimodal();
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_15_To_V1_16_0();
impl UpgradeIndex for SwitchToMultimodal {
fn upgrade(&self, wtxn: &mut RwTxn, index: &Index, _progress: Progress) -> Result<bool> {
impl UpgradeIndex for Latest_V1_15_To_V1_16_0 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
let v1_15_indexing_configs = index
.main
.remap_types::<Str, SerdeJson<Vec<super::v1_15::IndexEmbeddingConfig>>>()
@@ -34,11 +41,8 @@ impl UpgradeIndex for SwitchToMultimodal {
Ok(false)
}
fn must_upgrade(&self, initial_version: (u32, u32, u32)) -> bool {
initial_version < (1, 16, 0)
}
fn description(&self) -> &'static str {
"Migrating the database for multimodal support"
fn target_version(&self) -> (u32, u32, u32) {
(1, 16, 0)
}
}

View File

@@ -2,7 +2,6 @@ use candle_core::Tensor;
use candle_nn::VarBuilder;
use candle_transformers::models::bert::{BertModel, Config as BertConfig, DTYPE};
use candle_transformers::models::modernbert::{Config as ModernConfig, ModernBert};
use candle_transformers::models::xlm_roberta::{Config as XlmRobertaConfig, XLMRobertaModel};
// FIXME: currently we'll be using the hub to retrieve model, in the future we might want to embed it into Meilisearch itself
use hf_hub::api::sync::Api;
use hf_hub::{Repo, RepoType};
@@ -90,7 +89,6 @@ impl Default for EmbedderOptions {
enum ModelKind {
Bert(BertModel),
Modern(ModernBert),
XlmRoberta(XLMRobertaModel),
}
/// Perform embedding of documents and queries
@@ -306,8 +304,7 @@ impl Embedder {
};
let is_modern = has_arch("modernbert");
let is_xlm_roberta = has_arch("xlm-roberta") || has_arch("xlm_roberta");
tracing::debug!(is_modern, is_xlm_roberta, model_type, "detected HF architecture");
tracing::debug!(is_modern, model_type, "detected HF architecture");
let mut tokenizer = Tokenizer::from_file(&tokenizer_filename)
.map_err(|inner| NewEmbedderError::open_tokenizer(tokenizer_filename, inner))?;
@@ -343,18 +340,6 @@ impl Embedder {
)
})?;
ModelKind::Modern(ModernBert::load(vb, &config).map_err(NewEmbedderError::load_model)?)
} else if is_xlm_roberta {
let config: XlmRobertaConfig = serde_json::from_str(&config_str).map_err(|inner| {
NewEmbedderError::deserialize_config(
options.model.clone(),
config_str.clone(),
config_filename.clone(),
inner,
)
})?;
ModelKind::XlmRoberta(
XLMRobertaModel::new(&config, vb).map_err(NewEmbedderError::load_model)?,
)
} else {
let config: BertConfig = serde_json::from_str(&config_str).map_err(|inner| {
NewEmbedderError::deserialize_config(
@@ -466,19 +451,6 @@ impl Embedder {
let mask = Tensor::stack(&[mask], 0).map_err(EmbedError::tensor_shape)?;
model.forward(&token_ids, &mask).map_err(EmbedError::model_forward)?
}
ModelKind::XlmRoberta(model) => {
let mut mask_vec = tokens.get_attention_mask().to_vec();
if mask_vec.len() > self.max_len {
mask_vec.truncate(self.max_len);
}
let mask = Tensor::new(mask_vec.as_slice(), &self.device)
.map_err(EmbedError::tensor_shape)?;
let mask = Tensor::stack(&[mask], 0).map_err(EmbedError::tensor_shape)?;
let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?;
model
.forward(&token_ids, &mask, &token_type_ids, None, None, None)
.map_err(EmbedError::model_forward)?
}
};
let embedding = Self::pooling(embeddings, self.pooling)?;

View File

@@ -1,5 +1,5 @@
use hannoy::distances::{Cosine, Hamming};
use hannoy::{ItemId, Searched};
use hannoy::ItemId;
use heed::{RoTxn, RwTxn, Unspecified};
use ordered_float::OrderedFloat;
use rand::SeedableRng as _;
@@ -974,7 +974,7 @@ impl VectorStore {
}
if let Some(mut ret) = searcher.by_item(rtxn, item)? {
results.append(&mut ret.nns);
results.append(&mut ret);
}
}
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));
@@ -1028,9 +1028,10 @@ impl VectorStore {
searcher.candidates(filter);
}
let Searched { mut nns, did_cancel: _ } =
searcher.by_vector_with_cancellation(rtxn, vector, || time_budget.exceeded())?;
results.append(&mut nns);
let (res, _degraded) =
&mut searcher
.by_vector_with_cancellation(rtxn, vector, || time_budget.exceeded())?;
results.append(res);
}
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));

View File

@@ -10,3 +10,5 @@ serde_json = "1.0"
clap = { version = "4.5.52", features = ["derive"] }
anyhow = "1.0.100"
utoipa = "5.4.0"
reqwest = { version = "0.12", features = ["blocking"] }
regex = "1.10"

View File

@@ -1,10 +1,15 @@
use std::collections::HashMap;
use std::path::PathBuf;
use anyhow::Result;
use anyhow::{Context, Result};
use clap::Parser;
use meilisearch::routes::MeilisearchApi;
use regex::Regex;
use serde_json::{json, Value};
use utoipa::OpenApi;
const CODE_SAMPLES_URL: &str = "https://raw.githubusercontent.com/meilisearch/documentation/6a694aad651772cc3805fb39ab4f0d92273795de/.code-samples.meilisearch.yaml";
#[derive(Parser)]
#[command(name = "openapi-generator")]
#[command(about = "Generate OpenAPI specification for Meilisearch")]
@@ -16,6 +21,10 @@ struct Cli {
/// Pretty print the JSON output
#[arg(short, long)]
pretty: bool,
/// Skip fetching code samples (offline mode)
#[arg(long)]
no_code_samples: bool,
}
fn main() -> Result<()> {
@@ -24,14 +33,23 @@ fn main() -> Result<()> {
// Generate the OpenAPI specification
let openapi = MeilisearchApi::openapi();
// Convert to serde_json::Value for modification
let mut openapi_value: Value = serde_json::to_value(&openapi)?;
// Fetch and add code samples if not disabled
if !cli.no_code_samples {
let code_samples = fetch_code_samples()?;
add_code_samples_to_openapi(&mut openapi_value, &code_samples)?;
}
// Determine output path
let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch.json"));
// Serialize to JSON
let json = if cli.pretty {
serde_json::to_string_pretty(&openapi)?
serde_json::to_string_pretty(&openapi_value)?
} else {
serde_json::to_string(&openapi)?
serde_json::to_string(&openapi_value)?
};
// Write to file
@@ -41,3 +59,244 @@ fn main() -> Result<()> {
Ok(())
}
/// Fetch and parse code samples from the documentation repository
fn fetch_code_samples() -> Result<HashMap<String, String>> {
let response = reqwest::blocking::get(CODE_SAMPLES_URL)
.context("Failed to fetch code samples")?
.text()
.context("Failed to read code samples response")?;
parse_code_samples_yaml(&response)
}
/// Parse the code samples YAML file
/// The format is:
/// ```yaml
/// # key_name
/// some_id: |-
/// curl \
/// -X GET 'URL'
/// ```
/// We extract the comment as the key and everything after `|-` as the value
fn parse_code_samples_yaml(content: &str) -> Result<HashMap<String, String>> {
let mut samples: HashMap<String, String> = HashMap::new();
let mut current_key: Option<String> = None;
let mut current_value: Vec<String> = Vec::new();
let mut in_code_block = false;
let mut base_indent: Option<usize> = None;
// Regex to match the start of a code sample (key: |-)
let code_start_re = Regex::new(r"^[a-zA-Z0-9_]+:\s*\|-\s*$")?;
// Regex to match comment lines that define keys
let comment_re = Regex::new(r"^#\s*([a-zA-Z0-9_]+)\s*$")?;
for line in content.lines() {
// Check if this is a comment line defining a new key
if let Some(caps) = comment_re.captures(line) {
// Save previous sample if exists
if let Some(key) = current_key.take() {
if !current_value.is_empty() {
samples.insert(key, current_value.join("\n").trim_end().to_string());
}
}
current_key = Some(caps[1].to_string());
current_value.clear();
in_code_block = false;
base_indent = None;
continue;
}
// Check if this starts a new code block
if code_start_re.is_match(line) {
in_code_block = true;
base_indent = None;
continue;
}
// If we're in a code block and have a current key, collect the content
if in_code_block && current_key.is_some() {
// Empty line or line with only whitespace
if line.trim().is_empty() {
// Only add empty lines if we've already started collecting
if !current_value.is_empty() {
current_value.push(String::new());
}
continue;
}
// Calculate indentation
let indent = line.len() - line.trim_start().len();
// Set base indent from first non-empty line
if base_indent.is_none() {
base_indent = Some(indent);
}
// If line has less indentation than base, we've exited the block
if indent < base_indent.unwrap_or(0) && !line.trim().is_empty() {
in_code_block = false;
continue;
}
// Remove base indentation and add to value
let base = base_indent.unwrap_or(0);
let dedented = if line.len() > base { &line[base..] } else { line.trim_start() };
current_value.push(dedented.to_string());
}
}
// Don't forget the last sample
if let Some(key) = current_key {
if !current_value.is_empty() {
samples.insert(key, current_value.join("\n").trim_end().to_string());
}
}
Ok(samples)
}
/// Convert an OpenAPI path to a code sample key
/// Path: /indexes/{index_uid}/documents/{document_id}
/// Method: GET
/// Key: get_indexes_indexUid_documents_documentId
fn path_to_key(path: &str, method: &str) -> String {
let method_lower = method.to_lowercase();
// Remove leading slash and convert path
let path_part = path
.trim_start_matches('/')
.split('/')
.map(|segment| {
if segment.starts_with('{') && segment.ends_with('}') {
// Convert {param_name} to camelCase
let param = &segment[1..segment.len() - 1];
to_camel_case(param)
} else {
// Keep path segments as-is, but replace hyphens with underscores
segment.replace('-', "_")
}
})
.collect::<Vec<_>>()
.join("_");
if path_part.is_empty() {
method_lower
} else {
format!("{}_{}", method_lower, path_part)
}
}
/// Convert snake_case to camelCase
fn to_camel_case(s: &str) -> String {
let mut result = String::new();
let mut capitalize_next = false;
for (i, c) in s.chars().enumerate() {
if c == '_' {
capitalize_next = true;
} else if capitalize_next {
result.push(c.to_ascii_uppercase());
capitalize_next = false;
} else if i == 0 {
result.push(c.to_ascii_lowercase());
} else {
result.push(c);
}
}
result
}
/// Add code samples to the OpenAPI specification
fn add_code_samples_to_openapi(
openapi: &mut Value,
code_samples: &HashMap<String, String>,
) -> Result<()> {
let paths = openapi
.get_mut("paths")
.and_then(|p| p.as_object_mut())
.context("OpenAPI spec missing 'paths' object")?;
for (path, path_item) in paths.iter_mut() {
let path_item = match path_item.as_object_mut() {
Some(p) => p,
None => continue,
};
let methods = ["get", "post", "put", "patch", "delete"];
for method in methods {
if let Some(operation) = path_item.get_mut(method) {
let key = path_to_key(path, method);
if let Some(sample) = code_samples.get(&key) {
// Create x-codeSamples array according to Redocly spec
let code_sample = json!([{
"lang": "cURL",
"source": sample
}]);
operation
.as_object_mut()
.map(|op| op.insert("x-codeSamples".to_string(), code_sample));
}
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_path_to_key() {
assert_eq!(path_to_key("/indexes", "GET"), "get_indexes");
assert_eq!(path_to_key("/indexes/{index_uid}", "GET"), "get_indexes_indexUid");
assert_eq!(
path_to_key("/indexes/{index_uid}/documents", "POST"),
"post_indexes_indexUid_documents"
);
assert_eq!(
path_to_key("/indexes/{index_uid}/documents/{document_id}", "GET"),
"get_indexes_indexUid_documents_documentId"
);
assert_eq!(path_to_key("/indexes/{index_uid}/settings/stop-words", "GET"), "get_indexes_indexUid_settings_stop_words");
}
#[test]
fn test_to_camel_case() {
assert_eq!(to_camel_case("index_uid"), "indexUid");
assert_eq!(to_camel_case("document_id"), "documentId");
assert_eq!(to_camel_case("task_uid"), "taskUid");
}
#[test]
fn test_parse_code_samples() {
let yaml = r#"
# get_indexes
list_all_indexes_1: |-
curl \
-X GET 'MEILISEARCH_URL/indexes'
# post_indexes
create_an_index_1: |-
curl \
-X POST 'MEILISEARCH_URL/indexes' \
-H 'Content-Type: application/json' \
--data-binary '{
"uid": "movies"
}'
"#;
let samples = parse_code_samples_yaml(yaml).unwrap();
assert_eq!(samples.len(), 2);
assert!(samples.contains_key("get_indexes"));
assert!(samples.contains_key("post_indexes"));
assert!(samples["get_indexes"].contains("curl"));
assert!(samples["post_indexes"].contains("POST"));
}
}

View File

@@ -22,7 +22,6 @@ reqwest = { version = "0.12.24", features = [
"json",
"rustls-tls",
], default-features = false }
semver = "1.0.27"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.145"
sha2 = "0.10.9"

View File

@@ -23,7 +23,7 @@ pub fn default_dashboard_url() -> String {
/// Run benchmarks from a workload
#[derive(Parser, Debug)]
pub struct BenchArgs {
pub struct BenchDeriveArgs {
/// Common arguments shared with other commands
#[command(flatten)]
common: CommonArgs,
@@ -59,7 +59,7 @@ pub struct BenchArgs {
binary_path: Option<PathBuf>,
}
pub fn run(args: BenchArgs) -> anyhow::Result<()> {
pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
setup_logs(&args.common.log_filter)?;
// fetch environment and build info

View File

@@ -12,7 +12,7 @@ use tokio::task::JoinHandle;
use uuid::Uuid;
use super::dashboard::DashboardClient;
use super::BenchArgs;
use super::BenchDeriveArgs;
use crate::common::assets::{self, Asset};
use crate::common::client::Client;
use crate::common::command::{run_commands, Command};
@@ -40,7 +40,7 @@ async fn run_workload_commands(
meili_client: &Arc<Client>,
workload_uuid: Uuid,
workload: &BenchWorkload,
args: &BenchArgs,
args: &BenchDeriveArgs,
run_number: u16,
) -> anyhow::Result<JoinHandle<anyhow::Result<File>>> {
let report_folder = &args.report_folder;
@@ -95,7 +95,7 @@ pub async fn execute(
invocation_uuid: Uuid,
master_key: Option<&str>,
workload: BenchWorkload,
args: &BenchArgs,
args: &BenchDeriveArgs,
binary_path: Option<&Path>,
) -> anyhow::Result<()> {
assets::fetch_assets(assets_client, &workload.assets, &args.common.asset_folder).await?;
@@ -143,7 +143,7 @@ async fn execute_run(
workload_uuid: Uuid,
master_key: Option<&str>,
workload: &BenchWorkload,
args: &BenchArgs,
args: &BenchDeriveArgs,
binary_path: Option<&Path>,
run_number: u16,
) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {

View File

@@ -1,34 +1,16 @@
use std::{collections::HashSet, process::Stdio};
use std::collections::HashSet;
use anyhow::Context;
use clap::Parser;
use semver::{Prerelease, Version};
use xtask::{bench::BenchArgs, test::TestArgs};
/// This is the version of the crate but also the current Meilisearch version
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
use xtask::{bench::BenchDeriveArgs, test::TestDeriveArgs};
/// List features available in the workspace
#[derive(Parser, Debug)]
struct ListFeaturesArgs {
struct ListFeaturesDeriveArgs {
/// Feature to exclude from the list. Use a comma to separate multiple features.
#[arg(short, long, value_delimiter = ',')]
exclude_feature: Vec<String>,
}
/// Create a git tag for the current version
///
/// The tag will of the form prototype-v<version>-<name>.<increment>
#[derive(Parser, Debug)]
struct PrototypeArgs {
/// Name of the prototype to generate
name: String,
/// If true refuses to increment the tag if it already exists
/// else refuses to generate new tag and expect the tag to exist.
#[arg(long)]
generate_new: bool,
}
/// Utilitary commands
#[derive(Parser, Debug)]
#[command(author, version, about, long_about)]
@@ -36,10 +18,9 @@ struct PrototypeArgs {
#[command(bin_name = "cargo xtask")]
#[allow(clippy::large_enum_variant)] // please, that's enough...
enum Command {
ListFeatures(ListFeaturesArgs),
Bench(BenchArgs),
GeneratePrototype(PrototypeArgs),
Test(TestArgs),
ListFeatures(ListFeaturesDeriveArgs),
Bench(BenchDeriveArgs),
Test(TestDeriveArgs),
}
fn main() -> anyhow::Result<()> {
@@ -47,13 +28,12 @@ fn main() -> anyhow::Result<()> {
match args {
Command::ListFeatures(args) => list_features(args),
Command::Bench(args) => xtask::bench::run(args)?,
Command::GeneratePrototype(args) => generate_prototype(args)?,
Command::Test(args) => xtask::test::run(args)?,
}
Ok(())
}
fn list_features(args: ListFeaturesArgs) {
fn list_features(args: ListFeaturesDeriveArgs) {
let exclude_features: HashSet<_> = args.exclude_feature.into_iter().collect();
let metadata = cargo_metadata::MetadataCommand::new().no_deps().exec().unwrap();
let features: Vec<String> = metadata
@@ -66,106 +46,3 @@ fn list_features(args: ListFeaturesArgs) {
let features = features.join(" ");
println!("{features}")
}
fn generate_prototype(args: PrototypeArgs) -> anyhow::Result<()> {
let PrototypeArgs { name, generate_new: create_new } = args;
if name.rsplit_once(['.', '-']).filter(|(_, t)| t.chars().all(char::is_numeric)).is_some() {
anyhow::bail!(
"The increment must not be part of the name and will be rather incremented by this command."
);
}
// 1. Fetch the crate version
let version = Version::parse(VERSION).context("while semver-parsing the crate version")?;
// 2. Pull tags from remote and retrieve last prototype tag
std::process::Command::new("git")
.arg("fetch")
.arg("--tags")
.stderr(Stdio::null())
.stdout(Stdio::null())
.status()?;
let output = std::process::Command::new("git")
.arg("tag")
.args(["--list", "prototype-v*"])
.stderr(Stdio::inherit())
.output()?;
let output =
String::try_from(output.stdout).context("while converting the tag list into a string")?;
let mut highest_increment = None;
for tag in output.lines() {
let Some(version) = tag.strip_prefix("prototype-v") else {
continue;
};
let Ok(version) = Version::parse(version) else {
continue;
};
let Ok(proto) = PrototypePrerelease::from_str(version.pre.as_str()) else {
continue;
};
if proto.name() == name {
highest_increment = match highest_increment {
Some(last) if last < proto.increment() => Some(proto.increment()),
Some(last) => Some(last),
None => Some(proto.increment()),
};
}
}
// 3. Generate the new tag name (without git, just a string)
let increment = match (create_new, highest_increment) {
(true, None) => 0,
(true, Some(increment)) => anyhow::bail!(
"A prototype with the name `{name}` already exists with increment `{increment}`"
),
(false, None) => anyhow::bail!(
"Prototype `{name}` is missing and must exist to be incremented.\n\
Use the --generate-new flag to create a new prototype with an increment at 0."
),
(false, Some(increment)) => {
increment.checked_add(1).context("While incrementing by one the increment")?
}
};
// Note that we cannot have leading zeros in the increment
let pre = format!("{name}.{increment}").parse().context("while parsing pre-release name")?;
let tag_name = Version { pre, ..version };
println!("prototype-v{tag_name}");
Ok(())
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct PrototypePrerelease {
pre: Prerelease,
}
impl PrototypePrerelease {
fn from_str(s: &str) -> anyhow::Result<Self> {
Prerelease::new(s)
.map_err(Into::into)
.and_then(|pre| {
if pre.rsplit_once('.').is_some() {
Ok(pre)
} else {
Err(anyhow::anyhow!("Invalid prototype name, missing name or increment"))
}
})
.map(|pre| PrototypePrerelease { pre })
}
fn name(&self) -> &str {
self.pre.rsplit_once('.').expect("Missing prototype name").0
}
fn increment(&self) -> u32 {
self.pre
.as_str()
.rsplit_once('.')
.map(|(_, tail)| tail.parse().expect("Invalid increment"))
.expect("Missing increment")
}
}

View File

@@ -17,7 +17,7 @@ pub use workload::TestWorkload;
/// Run tests from a workload
#[derive(Parser, Debug)]
pub struct TestArgs {
pub struct TestDeriveArgs {
/// Common arguments shared with other commands
#[command(flatten)]
common: CommonArgs,
@@ -31,7 +31,7 @@ pub struct TestArgs {
pub add_missing_responses: bool,
}
pub fn run(args: TestArgs) -> anyhow::Result<()> {
pub fn run(args: TestDeriveArgs) -> anyhow::Result<()> {
let rt = tokio::runtime::Builder::new_current_thread().enable_io().enable_time().build()?;
let _scope = rt.enter();
@@ -40,7 +40,7 @@ pub fn run(args: TestArgs) -> anyhow::Result<()> {
Ok(())
}
async fn run_inner(args: TestArgs) -> anyhow::Result<()> {
async fn run_inner(args: TestDeriveArgs) -> anyhow::Result<()> {
setup_logs(&args.common.log_filter)?;
// setup clients

View File

@@ -12,7 +12,7 @@ use crate::common::command::{run_commands, Command};
use crate::common::instance::Binary;
use crate::common::process::{self, delete_db, kill_meili};
use crate::common::workload::Workload;
use crate::test::TestArgs;
use crate::test::TestDeriveArgs;
#[derive(Serialize, Deserialize, Debug)]
#[serde(untagged)]
@@ -81,7 +81,7 @@ pub struct TestWorkload {
impl TestWorkload {
pub async fn run(
mut self,
args: &TestArgs,
args: &TestDeriveArgs,
assets_client: &Client,
meili_client: &Arc<Client>,
asset_folder: &'static str,

View File

@@ -20,33 +20,29 @@ These make us iterate fast before stabilizing it for the current release.
### Release steps
The prototype name must [follow this convention](https://semver.org/#spec-item-11): `prototype-v<version>-<name>.<iteration>` where
The prototype name must follow this convention: `prototype-v<version>.<name>-<number>` where
- `version` is the version of Meilisearch on which the prototype is based.
- `name` is the feature name formatted in `kebab-case`.
- `iteration` is the iteration of the prototype, starting from `0`.
- `name` is the feature name formatted in `kebab-case`. It should not end with a single number.
- `Y` is the version of the prototype, starting from `0`.
✅ Example: `prototype-v1.23.0-search-personalization.1`. </br>
❌ Bad example: `prototype-v1.23.0-search-personalization-0`: a dash separates the name and version. </br>
❌ Bad example: `prototype-v1.23.0.search-personalization.0`: a dot separates the version and name. </br>
✅ Example: `prototype-v1.23.0.search-personalization-0`. </br>
❌ Bad example: `prototype-search-personalization-0`: version is missing.</br>
❌ Bad example: `v1.23.0-auto-resize-0`: lacks the `prototype-` prefix. </br>
❌ Bad example: `prototype-v1.23.0-auto-resize`: lacks the version suffix. </br>
❌ Bad example: `prototype-v1.23.0-auto-resize.0-0`: feature name ends with something else than a number.
❌ Bad example: `v1.23.0.auto-resize-0`: lacks the `prototype` prefix. </br>
❌ Bad example: `prototype-v1.23.0.auto-resize`: lacks the version suffix. </br>
❌ Bad example: `prototype-v1.23.0.auto-resize-0-0`: feature name ends with a single number.
Steps to create a prototype:
1. In your terminal, go to the last commit of your branch (the one you want to provide as a prototype).
2. Use the `cargo xtask generate-prototype` command to generate the prototype name.
3. Create the tag using the `git tag` command.
4. Checkout the tag, run Meilisearch and check that it launches summary features a line: `Prototype: prototype-v<version>-<name>.<iteration>`.
5. Checkout back to your branch: `git checkout -`.
6. Push the tag: `git push origin prototype-v<version>-<name>.<iteration>`
7. Check that the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
2. Create a tag following the convention: `git tag prototype-X-Y`
3. Run Meilisearch and check that its launch summary features a line: `Prototype: prototype-X-Y` (you may need to switch branches and back after tagging for this to work).
3. Push the tag: `git push origin prototype-X-Y`
4. Check the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
🐳 Once the CI has finished to run, a Docker image named `prototype-v<version>-<name>.<iteration>` will be available on [DockerHub](https://hub.docker.com/repository/docker/getmeili/meilisearch/general). People can use it with the following command: `docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-v<version>-<name>.<iteration>`. <br>
🐳 Once the CI has finished to run (~1h30), a Docker image named `prototype-X-Y` will be available on [DockerHub](https://hub.docker.com/repository/docker/getmeili/meilisearch/general). People can use it with the following command: `docker run -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-X-Y`. <br>
More information about [how to run Meilisearch with Docker](https://docs.meilisearch.com/learn/cookbooks/docker.html#download-meilisearch-with-docker).
⚠️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-v<version>-<name>.<iteration>` tag in the Meilisearch repository and compile it from the source code.
⚠️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-X-Y` tag in the Meilisearch repository and compile it from the source code.
### Communication
@@ -67,7 +63,7 @@ Here is an example of messages to share on GitHub:
> How to run the prototype?
> You need to start from a fresh new database (remove the previous used `data.ms`) and use the following Docker image:
> ```bash
> docker run -it --rm -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-v<version>-<name>.<iteration>
> docker run -it --rm -p 7700:7700 -v $(pwd)/meili_data:/meili_data getmeili/meilisearch:prototype-X-Y
> ```
>
> You can use the feature this way: