mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-05 20:25:42 +00:00
Compare commits
9 Commits
delta-enco
...
openapi-co
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bb69b9029c | ||
|
|
3e4eae1227 | ||
|
|
98439617a4 | ||
|
|
64da9e1ea2 | ||
|
|
e774e7080d | ||
|
|
b1543fb477 | ||
|
|
6d0c58c7df | ||
|
|
5bf56279ed | ||
|
|
876cb17835 |
4
.github/workflows/publish-release-assets.yml
vendored
4
.github/workflows/publish-release-assets.yml
vendored
@@ -104,13 +104,13 @@ jobs:
|
||||
- name: Generate OpenAPI file
|
||||
run: |
|
||||
cd crates/openapi-generator
|
||||
cargo run --release -- --pretty --output ../../meilisearch.json
|
||||
cargo run --release -- --pretty --output ../../meilisearch-openapi.json
|
||||
- name: Upload OpenAPI to Release
|
||||
# No need to upload for dry run (cron or workflow_dispatch)
|
||||
if: github.event_name == 'release'
|
||||
uses: svenstaro/upload-release-action@2.11.2
|
||||
with:
|
||||
repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
|
||||
file: ./meilisearch.json
|
||||
file: ./meilisearch-openapi.json
|
||||
asset_name: meilisearch-openapi.json
|
||||
tag: ${{ github.ref }}
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -29,3 +29,6 @@ crates/meilisearch/db.snapshot
|
||||
|
||||
# Fuzzcheck data for the facet indexing fuzz test
|
||||
crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/
|
||||
|
||||
# OpenAPI generator
|
||||
**/meilisearch-openapi.json
|
||||
|
||||
@@ -117,7 +117,7 @@ With swagger:
|
||||
With the internal crate:
|
||||
```bash
|
||||
cd crates/openapi-generator
|
||||
cargo run --release -- --pretty --output meilisearch.json
|
||||
cargo run --release -- --pretty
|
||||
```
|
||||
|
||||
### Logging
|
||||
|
||||
1298
Cargo.lock
generated
1298
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -300,7 +300,6 @@ impl Infos {
|
||||
max_indexing_memory,
|
||||
max_indexing_threads,
|
||||
skip_index_budget: _,
|
||||
experimental_disable_delta_encoding: _,
|
||||
experimental_no_edition_2024_for_settings,
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
|
||||
@@ -21,7 +21,6 @@ use meilisearch::{
|
||||
LogStderrType, Opt, ServicesData, SubscriberForSecondLayer,
|
||||
};
|
||||
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
|
||||
use meilisearch_types::milli::heed_codec::DELTA_ENCODING_STATUS;
|
||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||
use tracing::level_filters::LevelFilter;
|
||||
use tracing_subscriber::layer::SubscriberExt as _;
|
||||
@@ -96,14 +95,6 @@ async fn main() -> anyhow::Result<()> {
|
||||
async fn try_main(runtime: tokio::runtime::Handle) -> anyhow::Result<()> {
|
||||
let (opt, config_read_from) = Opt::try_build()?;
|
||||
|
||||
// Disables the delta encoding of bitmaps as soon as possible
|
||||
if opt.indexer_options.experimental_disable_delta_encoding {
|
||||
DELTA_ENCODING_STATUS.set_to_disabled()
|
||||
} else {
|
||||
DELTA_ENCODING_STATUS.set_to_enabled()
|
||||
}
|
||||
.expect("the delta-encoding status to be set only once");
|
||||
|
||||
std::panic::set_hook(Box::new(on_panic));
|
||||
|
||||
anyhow::ensure!(
|
||||
|
||||
@@ -60,7 +60,6 @@ const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING: &str =
|
||||
"MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_PREFIX_POST_PROCESSING";
|
||||
const MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING: &str = "MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
|
||||
const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER";
|
||||
@@ -846,14 +845,6 @@ pub struct IndexerOpts {
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_NO_EDITION_2024_FOR_FACET_POST_PROCESSING)]
|
||||
#[serde(default)]
|
||||
pub experimental_no_edition_2024_for_facet_post_processing: bool,
|
||||
|
||||
/// Experimental disable delta-encoding for bitmaps. For more information,
|
||||
/// see: <https://github.com/orgs/meilisearch/discussions/875>
|
||||
///
|
||||
/// Enables the experimental disable delta-encoding for bitmaps feature.
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING)]
|
||||
#[serde(default)]
|
||||
pub experimental_disable_delta_encoding: bool,
|
||||
}
|
||||
|
||||
impl IndexerOpts {
|
||||
@@ -867,7 +858,6 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
experimental_disable_delta_encoding,
|
||||
} = self;
|
||||
if let Some(max_indexing_memory) = max_indexing_memory.0 {
|
||||
export_to_env_if_not_present(
|
||||
@@ -905,12 +895,6 @@ impl IndexerOpts {
|
||||
experimental_no_edition_2024_for_facet_post_processing.to_string(),
|
||||
);
|
||||
}
|
||||
if experimental_disable_delta_encoding {
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_DISABLE_DELTA_ENCODING,
|
||||
experimental_disable_delta_encoding.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -926,7 +910,6 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
|
||||
experimental_no_edition_2024_for_dumps,
|
||||
experimental_no_edition_2024_for_prefix_post_processing,
|
||||
experimental_no_edition_2024_for_facet_post_processing,
|
||||
experimental_disable_delta_encoding: _, // managed in try_main
|
||||
} = other;
|
||||
|
||||
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
|
||||
@@ -1262,7 +1245,7 @@ where
|
||||
T: AsRef<OsStr>,
|
||||
{
|
||||
if let Err(VarError::NotPresent) = std::env::var(key) {
|
||||
unsafe { std::env::set_var(key, value) }
|
||||
std::env::set_var(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
/// Compact an index
|
||||
#[utoipa::path(
|
||||
post,
|
||||
path = "{indexUid}/compact",
|
||||
path = "/{indexUid}/compact",
|
||||
tag = "Compact an index",
|
||||
security(("Bearer" = ["search", "*"])),
|
||||
params(("indexUid" = String, Path, example = "movies", description = "Index Unique Identifier", nullable = false)),
|
||||
|
||||
@@ -43,9 +43,9 @@ impl Server<Owned> {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
if cfg!(windows) {
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
} else {
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
@@ -58,9 +58,9 @@ impl Server<Owned> {
|
||||
|
||||
pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self {
|
||||
if cfg!(windows) {
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
} else {
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
}
|
||||
|
||||
options.master_key = Some("MASTER_KEY".to_string());
|
||||
@@ -215,9 +215,9 @@ impl Server<Shared> {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
if cfg!(windows) {
|
||||
unsafe { std::env::set_var("TMP", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMP", TEST_TEMP_DIR.path());
|
||||
} else {
|
||||
unsafe { std::env::set_var("TMPDIR", TEST_TEMP_DIR.path()) }
|
||||
std::env::set_var("TMPDIR", TEST_TEMP_DIR.path());
|
||||
}
|
||||
|
||||
let options = default_settings(dir.path());
|
||||
@@ -508,8 +508,6 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
|
||||
experimental_no_edition_2024_for_dumps: false,
|
||||
experimental_no_edition_2024_for_prefix_post_processing: false,
|
||||
experimental_no_edition_2024_for_facet_post_processing: false,
|
||||
// It has no effect to set the delta encoding here as the toggle is done in try_main
|
||||
experimental_disable_delta_encoding: false,
|
||||
},
|
||||
experimental_enable_metrics: false,
|
||||
..Parser::parse_from(None as Option<&str>)
|
||||
|
||||
@@ -120,16 +120,14 @@ twox-hash = { version = "2.1.2", default-features = false, features = [
|
||||
] }
|
||||
geo-types = "0.7.17"
|
||||
zerometry = "0.3.0"
|
||||
bitpacking = "0.9.2"
|
||||
|
||||
[dev-dependencies]
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
# fixed version due to format breakages in v1.40
|
||||
insta = "=1.39.0"
|
||||
mimalloc = { version = "0.1.48", default-features = false }
|
||||
maplit = "1.0.2"
|
||||
md5 = "0.8.0"
|
||||
meili-snap = { path = "../meili-snap" }
|
||||
quickcheck = "1.0.3"
|
||||
rand = { version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[features]
|
||||
|
||||
@@ -22,10 +22,7 @@ pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
|
||||
pub use self::fst_set_codec::FstSetCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap::{
|
||||
BoRoaringBitmapCodec, CboRoaringBitmapCodec, DeCboRoaringBitmapCodec, RoaringBitmapCodec,
|
||||
DELTA_ENCODING_STATUS,
|
||||
};
|
||||
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
|
||||
pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
};
|
||||
|
||||
@@ -19,19 +19,8 @@ pub const THRESHOLD: usize = 7;
|
||||
pub struct CboRoaringBitmapCodec;
|
||||
|
||||
impl CboRoaringBitmapCodec {
|
||||
/// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
/// it means that it would weigh the same or less than the RoaringBitmap
|
||||
/// header, so we directly encode them using ByteOrder instead.
|
||||
pub fn bitmap_serialize_as_raw_u32s(roaring: &RoaringBitmap) -> bool {
|
||||
roaring.len() <= THRESHOLD as u64
|
||||
}
|
||||
|
||||
pub fn bytes_deserialize_as_raw_u32s(bytes: &[u8]) -> bool {
|
||||
bytes.len() <= THRESHOLD * size_of::<u32>()
|
||||
}
|
||||
|
||||
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
|
||||
if Self::bitmap_serialize_as_raw_u32s(roaring) {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
roaring.len() as usize * size_of::<u32>()
|
||||
} else {
|
||||
roaring.serialized_size()
|
||||
@@ -46,7 +35,10 @@ impl CboRoaringBitmapCodec {
|
||||
roaring: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
) -> io::Result<()> {
|
||||
if Self::bitmap_serialize_as_raw_u32s(roaring) {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
// it means that it would weigh the same or less than the RoaringBitmap
|
||||
// header, so we directly encode them using ByteOrder instead.
|
||||
for integer in roaring {
|
||||
writer.write_u32::<NativeEndian>(integer)?;
|
||||
}
|
||||
@@ -59,7 +51,7 @@ impl CboRoaringBitmapCodec {
|
||||
}
|
||||
|
||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
if Self::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
// If there is threshold or less than threshold integers that can fit into this array
|
||||
// of bytes it means that we used the ByteOrder codec serializer.
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
@@ -79,7 +71,7 @@ impl CboRoaringBitmapCodec {
|
||||
other: &RoaringBitmap,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
// See above `deserialize_from` method for implementation details.
|
||||
if Self::bytes_deserialize_as_raw_u32s(bytes) {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||
if other.contains(integer) {
|
||||
@@ -106,7 +98,7 @@ impl CboRoaringBitmapCodec {
|
||||
let mut vec = Vec::new();
|
||||
|
||||
for bytes in slices {
|
||||
if Self::bytes_deserialize_as_raw_u32s(bytes.as_ref()) {
|
||||
if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
|
||||
let mut reader = bytes.as_ref();
|
||||
while let Ok(integer) = reader.read_u32::<NativeEndian>() {
|
||||
vec.push(integer);
|
||||
@@ -120,8 +112,6 @@ impl CboRoaringBitmapCodec {
|
||||
vec.sort_unstable();
|
||||
vec.dedup();
|
||||
|
||||
// Be careful when modifying this condition,
|
||||
// the rule must be the same everywhere
|
||||
if vec.len() <= THRESHOLD {
|
||||
for integer in vec {
|
||||
buffer.extend_from_slice(&integer.to_ne_bytes());
|
||||
|
||||
@@ -1,153 +0,0 @@
|
||||
use std::borrow::Cow;
|
||||
use std::io::{self, ErrorKind};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use heed::BoxedError;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
use super::de_roaring_bitmap_codec::DeRoaringBitmapCodec;
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
|
||||
/// Defines the status of the delta encoding on whether we have enabled it or not.
|
||||
pub static DELTA_ENCODING_STATUS: DeltaEncodingStatusLock = DeltaEncodingStatusLock::new();
|
||||
|
||||
pub struct DeCboRoaringBitmapCodec;
|
||||
|
||||
impl DeCboRoaringBitmapCodec {
|
||||
pub fn serialized_size_with_tmp_buffer(
|
||||
bitmap: &RoaringBitmap,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> usize {
|
||||
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
|
||||
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
|
||||
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|
||||
&& DELTA_ENCODING_STATUS.is_disabled()
|
||||
{
|
||||
CboRoaringBitmapCodec::serialized_size(bitmap)
|
||||
} else {
|
||||
DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(bitmap, tmp_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes the delta-encoded compressed version of
|
||||
/// the given roaring bitmap into the provided writer.
|
||||
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: W) -> io::Result<()> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
|
||||
}
|
||||
|
||||
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
|
||||
///
|
||||
/// Note that we always serialize the bitmap with the delta-encoded compressed version.
|
||||
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
|
||||
bitmap: &RoaringBitmap,
|
||||
writer: W,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<()> {
|
||||
// We are stuck with this format because the CboRoaringBitmapCodec decides to write
|
||||
// raw and unencoded u32s, without a header when there is at most THRESHOLD elements.
|
||||
if CboRoaringBitmapCodec::bitmap_serialize_as_raw_u32s(bitmap)
|
||||
&& DELTA_ENCODING_STATUS.is_disabled()
|
||||
{
|
||||
CboRoaringBitmapCodec::serialize_into_writer(bitmap, writer)
|
||||
} else {
|
||||
DeRoaringBitmapCodec::serialize_into_with_tmp_buffer(bitmap, writer, tmp_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
|
||||
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
|
||||
}
|
||||
|
||||
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
|
||||
///
|
||||
/// It tries to decode the input by using the delta-decoded version and
|
||||
/// if it fails, falls back to the CboRoaringBitmap version.
|
||||
pub fn deserialize_from_with_tmp_buffer(
|
||||
input: &[u8],
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
// The input is too short to be a valid delta-decoded bitmap.
|
||||
// We fall back to the CboRoaringBitmap version with raw u32s.
|
||||
if CboRoaringBitmapCodec::bytes_deserialize_as_raw_u32s(input) {
|
||||
return CboRoaringBitmapCodec::deserialize_from(input);
|
||||
}
|
||||
|
||||
match DeRoaringBitmapCodec::deserialize_from_with_tmp_buffer(input, tmp_buffer) {
|
||||
Ok(bitmap) => Ok(bitmap),
|
||||
// If the error kind is Other it means that the delta-decoder found
|
||||
// an invalid magic header. We fall back to the CboRoaringBitmap version.
|
||||
Err(e) if e.kind() == ErrorKind::Other => {
|
||||
CboRoaringBitmapCodec::deserialize_from(input)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for DeCboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl BytesDecodeOwned for DeCboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
|
||||
Self::deserialize_from(bytes).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for DeCboRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
let capacity = Self::serialized_size_with_tmp_buffer(&item, &mut tmp_buffer);
|
||||
let mut output = Vec::with_capacity(capacity);
|
||||
Self::serialize_into_with_tmp_buffer(item, &mut output, &mut tmp_buffer)?;
|
||||
Ok(Cow::Owned(output))
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages the global status of the delta encoding.
|
||||
///
|
||||
/// Whether we must use delta encoding or not when encoding roaring bitmaps.
|
||||
pub struct DeltaEncodingStatusLock(OnceLock<DeltaEncodingStatus>);
|
||||
|
||||
impl DeltaEncodingStatusLock {
|
||||
pub const fn new() -> Self {
|
||||
Self(OnceLock::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
enum DeltaEncodingStatus {
|
||||
Enabled,
|
||||
#[default]
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl DeltaEncodingStatusLock {
|
||||
pub fn set_to_enabled(&self) -> Result<(), ()> {
|
||||
self.0.set(DeltaEncodingStatus::Enabled).map_err(drop)
|
||||
}
|
||||
|
||||
pub fn set_to_disabled(&self) -> Result<(), ()> {
|
||||
self.0.set(DeltaEncodingStatus::Disabled).map_err(drop)
|
||||
}
|
||||
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
matches!(self.0.get(), Some(DeltaEncodingStatus::Enabled))
|
||||
}
|
||||
|
||||
pub fn is_disabled(&self) -> bool {
|
||||
!self.is_enabled()
|
||||
}
|
||||
}
|
||||
@@ -1,377 +0,0 @@
|
||||
use std::io::{self, ErrorKind};
|
||||
use std::mem::{self, size_of, size_of_val};
|
||||
|
||||
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
/// The magic header for our custom encoding format
|
||||
const MAGIC_HEADER: u16 = 36869;
|
||||
|
||||
pub struct DeRoaringBitmapCodec;
|
||||
|
||||
// TODO reintroduce:
|
||||
// - serialized_size?
|
||||
// - serialize_into_vec
|
||||
// - intersection_with_serialized
|
||||
// - merge_into
|
||||
// - merge_deladd_into
|
||||
impl DeRoaringBitmapCodec {
|
||||
/// Returns the serialized size of the given roaring bitmap with the delta encoding format.
|
||||
pub fn serialized_size_with_tmp_buffer(
|
||||
bitmap: &RoaringBitmap,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> usize {
|
||||
let mut size = 2; // u16 magic header
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
// This temporary buffer is used to store each chunk of decompressed u32s.
|
||||
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
|
||||
let decompressed = &mut tmp_buffer[..];
|
||||
|
||||
let mut buffer_index = 0;
|
||||
let mut initial = None;
|
||||
// We initially collect all the integers into a flat buffer of the size
|
||||
// of the largest bitpacker. We encode them with it until we don't have
|
||||
// enough of them...
|
||||
for n in bitmap {
|
||||
decompressed[buffer_index] = n;
|
||||
buffer_index += 1;
|
||||
if buffer_index == BitPacker8x::BLOCK_LEN {
|
||||
let num_bits = bitpacker8x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker8x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = Some(n);
|
||||
buffer_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
|
||||
let decompressed = &decompressed[..buffer_index];
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let num_bits = bitpacker4x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker4x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...And so on...
|
||||
let decompressed = chunks.remainder();
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let num_bits = bitpacker1x.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = BitPacker1x::compressed_block_size(num_bits);
|
||||
size += 1; // u8 chunk header
|
||||
size += compressed_len; // compressed data length
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...Until we don't have any small enough bitpacker. We put them raw
|
||||
// at the end of out buffer with a header indicating the matter.
|
||||
let decompressed = chunks.remainder();
|
||||
if !decompressed.is_empty() {
|
||||
size += 1; // u8 chunk header
|
||||
size += mem::size_of_val(decompressed); // remaining uncompressed u32s
|
||||
}
|
||||
|
||||
size
|
||||
}
|
||||
|
||||
/// Writes the delta-encoded compressed version of
|
||||
/// the given roaring bitmap into the provided writer.
|
||||
pub fn serialize_into<W: io::Write>(bitmap: &RoaringBitmap, writer: W) -> io::Result<()> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
Self::serialize_into_with_tmp_buffer(bitmap, writer, &mut tmp_buffer)
|
||||
}
|
||||
|
||||
/// Same as [Self::serialize_into] but accepts a buffer to avoid allocating one.
|
||||
pub fn serialize_into_with_tmp_buffer<W: io::Write>(
|
||||
bitmap: &RoaringBitmap,
|
||||
mut writer: W,
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<()> {
|
||||
// Insert the magic header
|
||||
writer.write_all(&MAGIC_HEADER.to_ne_bytes())?;
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
// This temporary buffer is used to store each chunk of decompressed and
|
||||
// compressed and delta-encoded u32s. We need room for the decompressed
|
||||
// u32s coming from the roaring bitmap, the compressed output that can
|
||||
// be as large as the decompressed u32s, and the chunk header.
|
||||
tmp_buffer.resize((BitPacker8x::BLOCK_LEN * 2) + 1, 0u32);
|
||||
let (decompressed, compressed) = tmp_buffer.split_at_mut(BitPacker8x::BLOCK_LEN);
|
||||
let compressed = bytemuck::cast_slice_mut(compressed);
|
||||
|
||||
let mut buffer_index = 0;
|
||||
let mut initial = None;
|
||||
// We initially collect all the integers into a flat buffer of the size
|
||||
// of the largest bitpacker. We encode them with it until we don't have
|
||||
// enough of them...
|
||||
for n in bitmap {
|
||||
decompressed[buffer_index] = n;
|
||||
buffer_index += 1;
|
||||
if buffer_index == BitPacker8x::BLOCK_LEN {
|
||||
let output = encode_with_packer(&bitpacker8x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = Some(n);
|
||||
buffer_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// ...We then switch to a smaller bitpacker to encode the remaining chunks...
|
||||
let decompressed = &decompressed[..buffer_index];
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker4x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let output = encode_with_packer(&bitpacker4x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...And so on...
|
||||
let decompressed = chunks.remainder();
|
||||
let mut chunks = decompressed.chunks_exact(BitPacker1x::BLOCK_LEN);
|
||||
for decompressed in chunks.by_ref() {
|
||||
let output = encode_with_packer(&bitpacker1x, decompressed, initial, compressed);
|
||||
writer.write_all(output)?;
|
||||
initial = decompressed.iter().last().copied();
|
||||
}
|
||||
|
||||
// ...Until we don't have any small enough bitpacker. We put them raw
|
||||
// at the end of out buffer with a header indicating the matter.
|
||||
let decompressed = chunks.remainder();
|
||||
if !decompressed.is_empty() {
|
||||
let header = encode_chunk_header(BitPackerLevel::None, u32::BITS as u8);
|
||||
// Note: Not convinced about the performance of writing a single
|
||||
// byte followed by a larger write. However, we will use this
|
||||
// codec with a BufWriter or directly with a Vec of bytes.
|
||||
writer.write_all(&[header])?;
|
||||
writer.write_all(bytemuck::cast_slice(decompressed))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the delta-decoded roaring bitmap from the compressed bytes.
|
||||
pub fn deserialize_from(compressed: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
let mut tmp_buffer = Vec::new();
|
||||
Self::deserialize_from_with_tmp_buffer(compressed, &mut tmp_buffer)
|
||||
}
|
||||
|
||||
/// Same as [Self::deserialize_from] but accepts a buffer to avoid allocating one.
|
||||
pub fn deserialize_from_with_tmp_buffer(
|
||||
input: &[u8],
|
||||
tmp_buffer: &mut Vec<u32>,
|
||||
) -> io::Result<RoaringBitmap> {
|
||||
let Some((header, mut compressed)) = input.split_at_checked(size_of_val(&MAGIC_HEADER))
|
||||
else {
|
||||
return Err(io::Error::new(ErrorKind::UnexpectedEof, "expecting a two-bytes header"));
|
||||
};
|
||||
|
||||
// Safety: This unwrap cannot happen as the header buffer is the right size
|
||||
let header = u16::from_ne_bytes(header.try_into().unwrap());
|
||||
|
||||
if header != MAGIC_HEADER {
|
||||
return Err(io::Error::other("invalid header value"));
|
||||
}
|
||||
|
||||
let bitpacker8x = BitPacker8x::new();
|
||||
let bitpacker4x = BitPacker4x::new();
|
||||
let bitpacker1x = BitPacker1x::new();
|
||||
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
tmp_buffer.resize(BitPacker8x::BLOCK_LEN, 0u32);
|
||||
let decompressed = &mut tmp_buffer[..];
|
||||
let mut initial = None;
|
||||
|
||||
while let Some((&chunk_header, encoded)) = compressed.split_first() {
|
||||
let (level, num_bits) = decode_chunk_header(chunk_header);
|
||||
let (bytes_read, decompressed) = match level {
|
||||
BitPackerLevel::None => {
|
||||
if num_bits != u32::BITS as u8 {
|
||||
return Err(io::Error::new(
|
||||
ErrorKind::InvalidData,
|
||||
"invalid number of bits to encode non-compressed u32s",
|
||||
));
|
||||
}
|
||||
|
||||
let chunks = encoded.chunks_exact(size_of::<u32>());
|
||||
if !chunks.remainder().is_empty() {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidData,
|
||||
"expecting last chunk to be a multiple of the size of an u32",
|
||||
));
|
||||
}
|
||||
|
||||
let integers = chunks
|
||||
// safety: This unwrap cannot happen as
|
||||
// the size of u32 is set correctly.
|
||||
.map(|b| b.try_into().unwrap())
|
||||
.map(u32::from_ne_bytes);
|
||||
|
||||
bitmap
|
||||
.append(integers)
|
||||
.map_err(|e| io::Error::new(ErrorKind::InvalidData, e))?;
|
||||
|
||||
// This is basically always the last chunk that exists in
|
||||
// this delta-encoded format as the raw u32s are appended
|
||||
// when there is not enough of them to fit in a bitpacker.
|
||||
break;
|
||||
}
|
||||
BitPackerLevel::BitPacker1x => {
|
||||
decode_with_packer(&bitpacker1x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker4x => {
|
||||
decode_with_packer(&bitpacker4x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
BitPackerLevel::BitPacker8x => {
|
||||
decode_with_packer(&bitpacker8x, decompressed, initial, encoded, num_bits)
|
||||
}
|
||||
};
|
||||
|
||||
initial = decompressed.iter().last().copied();
|
||||
// TODO investigate perf
|
||||
// Safety: Bitpackers cannot output unsorter integers when
|
||||
// used with the compress_strictly_sorted function.
|
||||
bitmap.append(decompressed.iter().copied()).unwrap();
|
||||
// What the delta-decoding read plus the chunk header size
|
||||
compressed = &compressed[bytes_read + 1..];
|
||||
}
|
||||
|
||||
Ok(bitmap)
|
||||
}
|
||||
}
|
||||
|
||||
/// Takes a strickly sorted list of u32s and outputs delta-encoded
|
||||
/// bytes with a chunk header. We expect the output buffer to be
|
||||
/// at least BLOCK_LEN + 1.
|
||||
fn encode_with_packer<'c, B: BitPackerExt>(
|
||||
bitpacker: &B,
|
||||
decompressed: &[u32],
|
||||
initial: Option<u32>,
|
||||
output: &'c mut [u8],
|
||||
) -> &'c [u8] {
|
||||
let num_bits = bitpacker.num_bits_strictly_sorted(initial, decompressed);
|
||||
let compressed_len = B::compressed_block_size(num_bits);
|
||||
let chunk_header = encode_chunk_header(B::level(), num_bits);
|
||||
let buffer = &mut output[..compressed_len + 1];
|
||||
// Safety: The buffer is at least one byte
|
||||
let (header_in_buffer, encoded) = buffer.split_first_mut().unwrap();
|
||||
*header_in_buffer = chunk_header;
|
||||
bitpacker.compress_strictly_sorted(initial, decompressed, encoded, num_bits);
|
||||
buffer
|
||||
}
|
||||
|
||||
/// Returns the number of bytes read and the decoded unsigned integers.
|
||||
fn decode_with_packer<'d, B: BitPacker>(
|
||||
bitpacker: &B,
|
||||
decompressed: &'d mut [u32],
|
||||
initial: Option<u32>,
|
||||
compressed: &[u8],
|
||||
num_bits: u8,
|
||||
) -> (usize, &'d [u32]) {
|
||||
let decompressed = &mut decompressed[..B::BLOCK_LEN];
|
||||
let read = bitpacker.decompress_strictly_sorted(initial, compressed, decompressed, num_bits);
|
||||
(read, decompressed)
|
||||
}
|
||||
|
||||
/// An identifier for the bitpacker to be able
|
||||
/// to correctly decode the compressed integers.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(u8)]
|
||||
enum BitPackerLevel {
|
||||
/// The remaining bytes are raw little endian encoded u32s.
|
||||
None,
|
||||
/// The remaining bits are encoded using a `BitPacker1x`.
|
||||
BitPacker1x,
|
||||
/// The remaining bits are encoded using a `BitPacker4x`.
|
||||
BitPacker4x,
|
||||
/// The remaining bits are encoded using a `BitPacker8x`.
|
||||
BitPacker8x,
|
||||
}
|
||||
|
||||
/// Returns the chunk header based on the bitpacker level
|
||||
/// and the number of bits to encode the list of integers.
|
||||
fn encode_chunk_header(level: BitPackerLevel, num_bits: u8) -> u8 {
|
||||
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
|
||||
let level = level as u8;
|
||||
debug_assert!(level <= 3);
|
||||
num_bits | (level << 6)
|
||||
}
|
||||
|
||||
/// Decodes the chunk header and output the bitpacker level
|
||||
/// and the number of bits to decode the following bytes.
|
||||
fn decode_chunk_header(data: u8) -> (BitPackerLevel, u8) {
|
||||
let num_bits = data & 0b00111111;
|
||||
let level = match data >> 6 {
|
||||
0 => BitPackerLevel::None,
|
||||
1 => BitPackerLevel::BitPacker1x,
|
||||
2 => BitPackerLevel::BitPacker4x,
|
||||
3 => BitPackerLevel::BitPacker8x,
|
||||
invalid => panic!("Invalid bitpacker level: {invalid}"),
|
||||
};
|
||||
debug_assert!(num_bits as u32 <= 2_u32.pow(6));
|
||||
(level, num_bits)
|
||||
}
|
||||
|
||||
/// A simple helper trait to get the BitPackerLevel
|
||||
/// and correctly generate the chunk header.
|
||||
trait BitPackerExt: BitPacker {
|
||||
/// Returns the level of the bitpacker: an identifier to be
|
||||
/// able to decode the numbers with the right bitpacker.
|
||||
fn level() -> BitPackerLevel;
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker8x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker8x
|
||||
}
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker4x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker4x
|
||||
}
|
||||
}
|
||||
|
||||
impl BitPackerExt for BitPacker1x {
|
||||
fn level() -> BitPackerLevel {
|
||||
BitPackerLevel::BitPacker1x
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use quickcheck::quickcheck;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::DeRoaringBitmapCodec;
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random(xs: Vec<u32>) -> bool {
|
||||
let bitmap = RoaringBitmap::from_iter(xs);
|
||||
let mut compressed = Vec::new();
|
||||
DeRoaringBitmapCodec::serialize_into(&bitmap, &mut compressed).unwrap();
|
||||
let decompressed = DeRoaringBitmapCodec::deserialize_from(&compressed[..]).unwrap();
|
||||
decompressed == bitmap
|
||||
}
|
||||
}
|
||||
|
||||
quickcheck! {
|
||||
fn qc_random_check_serialized_size(xs: Vec<u32>) -> bool {
|
||||
let bitmap = RoaringBitmap::from_iter(xs);
|
||||
let mut compressed = Vec::new();
|
||||
let mut tmp_buffer = Vec::new();
|
||||
DeRoaringBitmapCodec::serialize_into(&bitmap, &mut compressed).unwrap();
|
||||
let expected_len = DeRoaringBitmapCodec::serialized_size_with_tmp_buffer(&bitmap, &mut tmp_buffer);
|
||||
compressed.len() == expected_len
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,7 @@
|
||||
mod bo_roaring_bitmap_codec;
|
||||
pub mod cbo_roaring_bitmap_codec;
|
||||
pub mod de_cbo_roaring_bitmap_codec;
|
||||
mod de_roaring_bitmap_codec;
|
||||
mod roaring_bitmap_codec;
|
||||
|
||||
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
||||
// pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
pub use self::de_cbo_roaring_bitmap_codec::{DeCboRoaringBitmapCodec, DELTA_ENCODING_STATUS};
|
||||
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
||||
|
||||
@@ -10,3 +10,5 @@ serde_json = "1.0"
|
||||
clap = { version = "4.5.52", features = ["derive"] }
|
||||
anyhow = "1.0.100"
|
||||
utoipa = "5.4.0"
|
||||
reqwest = { version = "0.12", features = ["blocking"] }
|
||||
regex = "1.10"
|
||||
|
||||
@@ -1,21 +1,57 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{Context, Result};
|
||||
use clap::Parser;
|
||||
use meilisearch::routes::MeilisearchApi;
|
||||
use regex::Regex;
|
||||
use serde_json::{json, Value};
|
||||
use utoipa::OpenApi;
|
||||
|
||||
const HTTP_METHODS: &[&str] = &["get", "post", "put", "patch", "delete"];
|
||||
|
||||
/// Language used in the documentation repository (contains the key mapping)
|
||||
const DOCS_LANG: &str = "cURL";
|
||||
|
||||
/// Mapping of repository URLs to language names.
|
||||
/// The "cURL" entry is special: it contains the key mapping used to resolve sample IDs for all SDKs.
|
||||
const CODE_SAMPLES: &[(&str, &str)] = &[
|
||||
("https://raw.githubusercontent.com/meilisearch/documentation/refs/heads/main/.code-samples.meilisearch.yaml", "cURL"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-dotnet/refs/heads/main/.code-samples.meilisearch.yaml", "C#"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-dart/refs/heads/main/.code-samples.meilisearch.yaml", "Dart"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-go/refs/heads/main/.code-samples.meilisearch.yaml", "Go"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-java/refs/heads/main/.code-samples.meilisearch.yaml", "Java"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-js/refs/heads/main/.code-samples.meilisearch.yaml", "JS"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-php/refs/heads/main/.code-samples.meilisearch.yaml", "PHP"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-python/refs/heads/main/.code-samples.meilisearch.yaml", "Python"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-ruby/refs/heads/main/.code-samples.meilisearch.yaml", "Ruby"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-rust/refs/heads/main/.code-samples.meilisearch.yaml", "Rust"),
|
||||
("https://raw.githubusercontent.com/meilisearch/meilisearch-swift/refs/heads/main/.code-samples.meilisearch.yaml", "Swift"),
|
||||
];
|
||||
|
||||
// Pre-compiled regex patterns
|
||||
static COMMENT_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^#\s*([a-zA-Z0-9_]+)\s*$").unwrap());
|
||||
static CODE_START_RE: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^([a-zA-Z0-9_]+):\s*\|-\s*$").unwrap());
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "openapi-generator")]
|
||||
#[command(about = "Generate OpenAPI specification for Meilisearch")]
|
||||
struct Cli {
|
||||
/// Output file path (default: meilisearch.json)
|
||||
/// Output file path (default: meilisearch-openapi.json)
|
||||
#[arg(short, long, value_name = "FILE")]
|
||||
output: Option<PathBuf>,
|
||||
|
||||
/// Pretty print the JSON output
|
||||
#[arg(short, long)]
|
||||
pretty: bool,
|
||||
|
||||
/// Skip fetching code samples (offline mode)
|
||||
#[arg(long)]
|
||||
no_code_samples: bool,
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
@@ -24,14 +60,23 @@ fn main() -> Result<()> {
|
||||
// Generate the OpenAPI specification
|
||||
let openapi = MeilisearchApi::openapi();
|
||||
|
||||
// Convert to serde_json::Value for modification
|
||||
let mut openapi_value: Value = serde_json::to_value(&openapi)?;
|
||||
|
||||
// Fetch and add code samples if not disabled
|
||||
if !cli.no_code_samples {
|
||||
let code_samples = fetch_all_code_samples()?;
|
||||
add_code_samples_to_openapi(&mut openapi_value, &code_samples)?;
|
||||
}
|
||||
|
||||
// Determine output path
|
||||
let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch.json"));
|
||||
let output_path = cli.output.unwrap_or_else(|| PathBuf::from("meilisearch-openapi.json"));
|
||||
|
||||
// Serialize to JSON
|
||||
let json = if cli.pretty {
|
||||
serde_json::to_string_pretty(&openapi)?
|
||||
serde_json::to_string_pretty(&openapi_value)?
|
||||
} else {
|
||||
serde_json::to_string(&openapi)?
|
||||
serde_json::to_string(&openapi_value)?
|
||||
};
|
||||
|
||||
// Write to file
|
||||
@@ -41,3 +86,364 @@ fn main() -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Code sample for a specific language
|
||||
#[derive(Debug, Clone)]
|
||||
struct CodeSample {
|
||||
lang: String,
|
||||
source: String,
|
||||
}
|
||||
|
||||
/// Fetch and parse code samples from all repositories
|
||||
/// Returns a map from key (e.g., "get_indexes") to a list of code samples for different languages
|
||||
fn fetch_all_code_samples() -> Result<HashMap<String, Vec<CodeSample>>> {
|
||||
// First, fetch the documentation file (cURL) to get the key mapping
|
||||
let (docs_url, _) = CODE_SAMPLES
|
||||
.iter()
|
||||
.find(|(_, lang)| *lang == DOCS_LANG)
|
||||
.context("Documentation source not found in CODE_SAMPLES")?;
|
||||
|
||||
let docs_content = reqwest::blocking::get(*docs_url)
|
||||
.context("Failed to fetch documentation code samples")?
|
||||
.text()
|
||||
.context("Failed to read documentation code samples response")?;
|
||||
|
||||
let key_to_sample_ids = parse_documentation_mapping(&docs_content);
|
||||
|
||||
// Fetch code samples from all sources
|
||||
let mut all_samples: HashMap<String, Vec<CodeSample>> = HashMap::new();
|
||||
|
||||
for (url, lang) in CODE_SAMPLES {
|
||||
// For cURL, reuse already fetched content; for SDKs, fetch from URL
|
||||
let content: Cow<'_, str> = if *lang == DOCS_LANG {
|
||||
Cow::Borrowed(&docs_content)
|
||||
} else {
|
||||
match reqwest::blocking::get(*url).and_then(|r| r.text()) {
|
||||
Ok(text) => Cow::Owned(text),
|
||||
Err(e) => {
|
||||
eprintln!("Warning: Failed to fetch code samples for {}: {}", lang, e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let sample_id_to_code = parse_code_samples(&content);
|
||||
for (key, sample_ids) in &key_to_sample_ids {
|
||||
for sample_id in sample_ids {
|
||||
if let Some(source) = sample_id_to_code.get(sample_id) {
|
||||
all_samples.entry(key.clone()).or_default().push(CodeSample {
|
||||
lang: lang.to_string(),
|
||||
source: source.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(all_samples)
|
||||
}
|
||||
|
||||
/// Parse the documentation file to create a mapping from keys (comment IDs) to sample IDs
|
||||
/// Returns: HashMap<key, Vec<sample_id>>
|
||||
fn parse_documentation_mapping(content: &str) -> HashMap<String, Vec<String>> {
|
||||
let mut mapping: HashMap<String, Vec<String>> = HashMap::new();
|
||||
let mut current_key: Option<String> = None;
|
||||
|
||||
for line in content.lines() {
|
||||
// Check if this is a comment line defining a new key
|
||||
if let Some(caps) = COMMENT_RE.captures(line) {
|
||||
current_key = Some(caps[1].to_string());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this starts a new code block and extract the sample_id
|
||||
if let Some(caps) = CODE_START_RE.captures(line) {
|
||||
if let Some(ref key) = current_key {
|
||||
let sample_id = caps[1].to_string();
|
||||
mapping.entry(key.clone()).or_default().push(sample_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mapping
|
||||
}
|
||||
|
||||
/// State machine for parsing YAML code blocks
|
||||
struct YamlCodeBlockParser {
|
||||
current_value: Vec<String>,
|
||||
in_code_block: bool,
|
||||
base_indent: Option<usize>,
|
||||
}
|
||||
|
||||
impl YamlCodeBlockParser {
|
||||
fn new() -> Self {
|
||||
Self { current_value: Vec::new(), in_code_block: false, base_indent: None }
|
||||
}
|
||||
|
||||
fn start_new_block(&mut self) {
|
||||
self.current_value.clear();
|
||||
self.in_code_block = true;
|
||||
self.base_indent = None;
|
||||
}
|
||||
|
||||
fn take_value(&mut self) -> Option<String> {
|
||||
if self.current_value.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let value = self.current_value.join("\n").trim_end().to_string();
|
||||
self.current_value.clear();
|
||||
self.in_code_block = false;
|
||||
self.base_indent = None;
|
||||
Some(value)
|
||||
}
|
||||
|
||||
fn process_line(&mut self, line: &str) {
|
||||
if !self.in_code_block {
|
||||
return;
|
||||
}
|
||||
|
||||
// Empty line or line with only whitespace
|
||||
if line.trim().is_empty() {
|
||||
// Only add empty lines if we've already started collecting
|
||||
if !self.current_value.is_empty() {
|
||||
self.current_value.push(String::new());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Calculate indentation
|
||||
let indent = line.len() - line.trim_start().len();
|
||||
|
||||
// Set base indent from first non-empty line
|
||||
let base = *self.base_indent.get_or_insert(indent);
|
||||
|
||||
// If line has less indentation than base, we've exited the block
|
||||
if indent < base {
|
||||
self.in_code_block = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove base indentation and add to value
|
||||
let dedented = line.get(base..).unwrap_or_else(|| line.trim_start());
|
||||
self.current_value.push(dedented.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a code samples YAML file
|
||||
/// Returns: HashMap<sample_id, code>
|
||||
fn parse_code_samples(content: &str) -> HashMap<String, String> {
|
||||
let mut samples: HashMap<String, String> = HashMap::new();
|
||||
let mut current_sample_id: Option<String> = None;
|
||||
let mut parser = YamlCodeBlockParser::new();
|
||||
|
||||
for line in content.lines() {
|
||||
// Ignore comment lines
|
||||
if line.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if this starts a new code block
|
||||
if let Some(caps) = CODE_START_RE.captures(line) {
|
||||
// Save previous sample if exists
|
||||
if let Some(sample_id) = current_sample_id.take() {
|
||||
if let Some(value) = parser.take_value() {
|
||||
samples.insert(sample_id, value);
|
||||
}
|
||||
}
|
||||
current_sample_id = Some(caps[1].to_string());
|
||||
parser.start_new_block();
|
||||
continue;
|
||||
}
|
||||
|
||||
if current_sample_id.is_some() {
|
||||
parser.process_line(line);
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget the last sample
|
||||
if let Some(sample_id) = current_sample_id {
|
||||
if let Some(value) = parser.take_value() {
|
||||
samples.insert(sample_id, value);
|
||||
}
|
||||
}
|
||||
|
||||
samples
|
||||
}
|
||||
|
||||
/// Convert an OpenAPI path to a code sample key
|
||||
/// Path: /indexes/{index_uid}/documents/{document_id}
|
||||
/// Method: GET
|
||||
/// Key: get_indexes_indexUid_documents_documentId
|
||||
fn path_to_key(path: &str, method: &str) -> String {
|
||||
let method_lower = method.to_lowercase();
|
||||
|
||||
// Remove leading slash and convert path
|
||||
let path_part = path
|
||||
.trim_start_matches('/')
|
||||
.split('/')
|
||||
.map(|segment| {
|
||||
if segment.starts_with('{') && segment.ends_with('}') {
|
||||
// Convert {param_name} to camelCase
|
||||
let param = &segment[1..segment.len() - 1];
|
||||
to_camel_case(param)
|
||||
} else {
|
||||
// Keep path segments as-is, but replace hyphens with underscores
|
||||
segment.replace('-', "_")
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("_");
|
||||
|
||||
if path_part.is_empty() {
|
||||
method_lower
|
||||
} else {
|
||||
format!("{}_{}", method_lower, path_part)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert snake_case to camelCase
|
||||
fn to_camel_case(s: &str) -> String {
|
||||
let mut result = String::with_capacity(s.len());
|
||||
let mut capitalize_next = false;
|
||||
|
||||
for (i, c) in s.chars().enumerate() {
|
||||
match c {
|
||||
'_' => capitalize_next = true,
|
||||
_ if capitalize_next => {
|
||||
result.push(c.to_ascii_uppercase());
|
||||
capitalize_next = false;
|
||||
}
|
||||
_ if i == 0 => result.push(c.to_ascii_lowercase()),
|
||||
_ => result.push(c),
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Add code samples to the OpenAPI specification
|
||||
fn add_code_samples_to_openapi(
|
||||
openapi: &mut Value,
|
||||
code_samples: &HashMap<String, Vec<CodeSample>>,
|
||||
) -> Result<()> {
|
||||
let paths = openapi
|
||||
.get_mut("paths")
|
||||
.and_then(|p| p.as_object_mut())
|
||||
.context("OpenAPI spec missing 'paths' object")?;
|
||||
|
||||
for (path, path_item) in paths.iter_mut() {
|
||||
let Some(path_item) = path_item.as_object_mut() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for method in HTTP_METHODS {
|
||||
let Some(operation) = path_item.get_mut(*method) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let key = path_to_key(path, method);
|
||||
|
||||
if let Some(samples) = code_samples.get(&key) {
|
||||
// Create x-codeSamples array according to Redocly spec
|
||||
// Sort by language name for consistent output
|
||||
let mut sorted_samples = samples.clone();
|
||||
sorted_samples.sort_by(|a, b| a.lang.cmp(&b.lang));
|
||||
|
||||
let code_sample_array: Vec<Value> = sorted_samples
|
||||
.iter()
|
||||
.map(|sample| {
|
||||
json!({
|
||||
"lang": sample.lang,
|
||||
"source": sample.source
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
if let Some(op) = operation.as_object_mut() {
|
||||
op.insert("x-codeSamples".to_string(), json!(code_sample_array));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_path_to_key() {
|
||||
assert_eq!(path_to_key("/indexes", "GET"), "get_indexes");
|
||||
assert_eq!(path_to_key("/indexes/{index_uid}", "GET"), "get_indexes_indexUid");
|
||||
assert_eq!(
|
||||
path_to_key("/indexes/{index_uid}/documents", "POST"),
|
||||
"post_indexes_indexUid_documents"
|
||||
);
|
||||
assert_eq!(
|
||||
path_to_key("/indexes/{index_uid}/documents/{document_id}", "GET"),
|
||||
"get_indexes_indexUid_documents_documentId"
|
||||
);
|
||||
assert_eq!(
|
||||
path_to_key("/indexes/{index_uid}/settings/stop-words", "GET"),
|
||||
"get_indexes_indexUid_settings_stop_words"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_camel_case() {
|
||||
assert_eq!(to_camel_case("index_uid"), "indexUid");
|
||||
assert_eq!(to_camel_case("document_id"), "documentId");
|
||||
assert_eq!(to_camel_case("task_uid"), "taskUid");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_documentation_mapping() {
|
||||
let yaml = r#"
|
||||
# get_indexes
|
||||
list_all_indexes_1: |-
|
||||
curl \
|
||||
-X GET 'MEILISEARCH_URL/indexes'
|
||||
# post_indexes
|
||||
create_an_index_1: |-
|
||||
curl \
|
||||
-X POST 'MEILISEARCH_URL/indexes'
|
||||
another_sample_id: |-
|
||||
curl \
|
||||
-X POST 'MEILISEARCH_URL/indexes'
|
||||
"#;
|
||||
let mapping = parse_documentation_mapping(yaml);
|
||||
|
||||
assert_eq!(mapping.len(), 2);
|
||||
assert!(mapping.contains_key("get_indexes"));
|
||||
assert!(mapping.contains_key("post_indexes"));
|
||||
assert_eq!(mapping["get_indexes"], vec!["list_all_indexes_1"]);
|
||||
assert_eq!(mapping["post_indexes"], vec!["create_an_index_1", "another_sample_id"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_code_samples() {
|
||||
let yaml = r#"
|
||||
# This is a comment that should be ignored
|
||||
list_all_indexes_1: |-
|
||||
const client = new MeiliSearch({
|
||||
host: 'http://localhost:7700',
|
||||
apiKey: 'masterKey'
|
||||
});
|
||||
|
||||
const response = await client.getIndexes();
|
||||
|
||||
# Another comment
|
||||
create_an_index_1: |-
|
||||
const task = await client.createIndex('movies');
|
||||
"#;
|
||||
let samples = parse_code_samples(yaml);
|
||||
|
||||
assert_eq!(samples.len(), 2);
|
||||
assert!(samples.contains_key("list_all_indexes_1"));
|
||||
assert!(samples.contains_key("create_an_index_1"));
|
||||
assert!(samples["list_all_indexes_1"].contains("getIndexes"));
|
||||
assert!(samples["create_an_index_1"].contains("createIndex"));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user