mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-26 16:21:07 +00:00
Merge #636
636: Remove unused `infos`, `http-ui`, and `milli/fuzz`, crates r=ManyTheFish a=loiclec We haven't used the `infos/`, `http-ui/` and `milli/fuzz/` crates in a long time. They are not properly maintained and probably do not work correctly anymore. This PR removes these crates entirely from the workspace to reduce the amount of code we need to maintain. Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
This commit is contained in:
@ -1,26 +0,0 @@
|
||||
# Milli
|
||||
|
||||
## Fuzzing milli
|
||||
|
||||
Currently you can only fuzz the indexation.
|
||||
To execute the fuzzer run:
|
||||
```
|
||||
cargo +nightly fuzz run indexing
|
||||
```
|
||||
|
||||
To execute the fuzzer on multiple thread you can also run:
|
||||
```
|
||||
cargo +nightly fuzz run -j4 indexing
|
||||
```
|
||||
|
||||
Since the fuzzer is going to create a lot of temporary file to let milli index its documents
|
||||
I would also recommand to execute it on a ramdisk.
|
||||
Here is how to setup a ramdisk on linux:
|
||||
```
|
||||
sudo mount -t tmpfs none path/to/your/ramdisk
|
||||
```
|
||||
And then set the [TMPDIR](https://doc.rust-lang.org/std/env/fn.temp_dir.html) environment variable
|
||||
to make the fuzzer create its file in it:
|
||||
```
|
||||
export TMPDIR=path/to/your/ramdisk
|
||||
```
|
5
milli/fuzz/.gitignore
vendored
5
milli/fuzz/.gitignore
vendored
@ -1,5 +0,0 @@
|
||||
Cargo.lock
|
||||
target/
|
||||
|
||||
/corpus/
|
||||
/artifacts/
|
@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "milli-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = "1.0"
|
||||
libfuzzer-sys = "0.4"
|
||||
serde_json = { version = "1.0.62", features = ["preserve_order"] }
|
||||
anyhow = "1.0"
|
||||
tempfile = "3.3"
|
||||
arbitrary-json = "0.1.0"
|
||||
mimalloc = { version = "0.1.29", default-features = false }
|
||||
|
||||
[dependencies.milli]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[[bin]]
|
||||
name = "indexing"
|
||||
path = "fuzz_targets/indexing.rs"
|
||||
test = false
|
||||
doc = false
|
@ -1,114 +0,0 @@
|
||||
#![no_main]
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::io::{BufWriter, Cursor, Read, Seek, Write};
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use arbitrary_json::ArbitraryValue;
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::heed::EnvOpenOptions;
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{Index, Object};
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
/// reads json from input and write an obkv batch to writer.
|
||||
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
|
||||
let writer = BufWriter::new(writer);
|
||||
let mut builder = DocumentsBatchBuilder::new(writer);
|
||||
|
||||
let values: Vec<Object> = serde_json::from_reader(input)?;
|
||||
if builder.documents_count() == 0 {
|
||||
bail!("Empty payload");
|
||||
}
|
||||
|
||||
for object in values {
|
||||
builder.append_json_object(&object)?;
|
||||
}
|
||||
|
||||
let count = builder.documents_count();
|
||||
let vector = builder.into_inner()?;
|
||||
|
||||
Ok(count as usize)
|
||||
}
|
||||
|
||||
fn index_documents(
|
||||
index: &mut milli::Index,
|
||||
documents: DocumentsBatchReader<Cursor<Vec<u8>>>,
|
||||
) -> Result<()> {
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn()?;
|
||||
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())?;
|
||||
builder.add_documents(documents)?;
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_index() -> Result<milli::Index> {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024 * 1024); // 10 GB
|
||||
options.max_readers(1);
|
||||
let index = Index::new(options, dir.path())?;
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
let displayed_fields =
|
||||
["id", "title", "album", "artist", "genre", "country", "released", "duration"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_displayed_fields(displayed_fields);
|
||||
|
||||
let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields: HashSet<String> =
|
||||
["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields.clone());
|
||||
builder.set_sortable_fields(faceted_fields);
|
||||
|
||||
builder.set_distinct_field("same".to_string());
|
||||
|
||||
builder.execute(|_| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
fuzz_target!(|batches: Vec<Vec<ArbitraryValue>>| {
|
||||
if let Ok(mut index) = create_index() {
|
||||
for batch in batches {
|
||||
let documents: Vec<Value> =
|
||||
batch.into_iter().map(|value| serde_json::Value::from(value)).collect();
|
||||
let json = Value::Array(documents);
|
||||
let json = serde_json::to_string(&json).unwrap();
|
||||
|
||||
let mut documents = Cursor::new(Vec::new());
|
||||
|
||||
// We ignore all malformed documents
|
||||
if let Ok(_) = read_json(json.as_bytes(), &mut documents) {
|
||||
documents.rewind().unwrap();
|
||||
let documents = DocumentsBatchReader::from_reader(documents).unwrap();
|
||||
// A lot of errors can come out of milli and we don't know which ones are normal or not
|
||||
// so we are only going to look for the unexpected panics.
|
||||
let _ = index_documents(&mut index, documents);
|
||||
}
|
||||
}
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
}
|
||||
});
|
Reference in New Issue
Block a user