mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-03 17:36:29 +00:00 
			
		
		
		
	Remove fuzzing feature
This commit is contained in:
		@@ -1,4 +1,3 @@
 | 
			
		||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
 | 
			
		||||
#![allow(clippy::type_complexity)]
 | 
			
		||||
 | 
			
		||||
#[cfg(not(windows))]
 | 
			
		||||
 
 | 
			
		||||
@@ -1059,208 +1059,3 @@ mod tests {
 | 
			
		||||
        milli_snap!(format!("{index}"), "after_delete");
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// fuzz tests
 | 
			
		||||
#[cfg(all(test, fuzzing))]
 | 
			
		||||
/**
 | 
			
		||||
Fuzz test for the incremental indxer.
 | 
			
		||||
 | 
			
		||||
The fuzz test uses fuzzcheck, a coverage-guided fuzzer.
 | 
			
		||||
See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org
 | 
			
		||||
for more information.
 | 
			
		||||
 | 
			
		||||
It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with:
 | 
			
		||||
```sh
 | 
			
		||||
cargo install cargo-fuzzcheck
 | 
			
		||||
```
 | 
			
		||||
To start the fuzz test, run (from the base folder or from milli/):
 | 
			
		||||
```sh
 | 
			
		||||
cargo fuzzcheck update::facet::incremental::fuzz::fuzz
 | 
			
		||||
```
 | 
			
		||||
and wait a couple minutes to make sure the code was thoroughly tested, then
 | 
			
		||||
hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz.
 | 
			
		||||
 | 
			
		||||
To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file:
 | 
			
		||||
```toml
 | 
			
		||||
[build]
 | 
			
		||||
rustflags = ["--cfg",  "fuzzing"]
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The fuzz test generates sequences of additions and deletions to the facet database and
 | 
			
		||||
ensures that:
 | 
			
		||||
1. its structure is still internally valid
 | 
			
		||||
2. its content is the same as a trivially correct implementation of the same database
 | 
			
		||||
*/
 | 
			
		||||
mod fuzz {
 | 
			
		||||
    use std::collections::{BTreeMap, HashMap};
 | 
			
		||||
    use std::iter::FromIterator;
 | 
			
		||||
    use std::rc::Rc;
 | 
			
		||||
 | 
			
		||||
    use fuzzcheck::mutators::integer::U8Mutator;
 | 
			
		||||
    use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
 | 
			
		||||
    use fuzzcheck::mutators::vector::VecMutator;
 | 
			
		||||
    use fuzzcheck::DefaultMutator;
 | 
			
		||||
    use roaring::RoaringBitmap;
 | 
			
		||||
    use tempfile::TempDir;
 | 
			
		||||
 | 
			
		||||
    use super::*;
 | 
			
		||||
    use crate::update::facet::test_helpers::FacetIndex;
 | 
			
		||||
    #[derive(Default)]
 | 
			
		||||
    pub struct TrivialDatabase<T> {
 | 
			
		||||
        pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>,
 | 
			
		||||
    }
 | 
			
		||||
    impl<T> TrivialDatabase<T>
 | 
			
		||||
    where
 | 
			
		||||
        T: Ord + Clone + Eq + std::fmt::Debug,
 | 
			
		||||
    {
 | 
			
		||||
        #[no_coverage]
 | 
			
		||||
        pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) {
 | 
			
		||||
            if new_values.is_empty() {
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
            let values_field_id = self.elements.entry(field_id).or_default();
 | 
			
		||||
            let values = values_field_id.entry(new_key.clone()).or_default();
 | 
			
		||||
            *values |= new_values;
 | 
			
		||||
        }
 | 
			
		||||
        #[no_coverage]
 | 
			
		||||
        pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) {
 | 
			
		||||
            if let Some(values_field_id) = self.elements.get_mut(&field_id) {
 | 
			
		||||
                if let Some(values) = values_field_id.get_mut(&key) {
 | 
			
		||||
                    *values -= values_to_remove;
 | 
			
		||||
                    if values.is_empty() {
 | 
			
		||||
                        values_field_id.remove(&key);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                if values_field_id.is_empty() {
 | 
			
		||||
                    self.elements.remove(&field_id);
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
 | 
			
		||||
    struct Operation {
 | 
			
		||||
        #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })]
 | 
			
		||||
        key: Vec<u8>,
 | 
			
		||||
        #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
 | 
			
		||||
        group_size: u8,
 | 
			
		||||
        #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
 | 
			
		||||
        max_group_size: u8,
 | 
			
		||||
        #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
 | 
			
		||||
        min_level_size: u8,
 | 
			
		||||
        #[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })]
 | 
			
		||||
        field_id: u16,
 | 
			
		||||
        kind: OperationKind,
 | 
			
		||||
    }
 | 
			
		||||
    #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
 | 
			
		||||
    enum OperationKind {
 | 
			
		||||
        Insert(
 | 
			
		||||
            #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
 | 
			
		||||
             Vec<u8>,
 | 
			
		||||
        ),
 | 
			
		||||
        Delete(
 | 
			
		||||
            #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
 | 
			
		||||
             Vec<u8>,
 | 
			
		||||
        ),
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[no_coverage]
 | 
			
		||||
    fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
 | 
			
		||||
        let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
 | 
			
		||||
        let mut txn = index.env.write_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
 | 
			
		||||
        let mut value_to_keys = HashMap::<u8, Vec<Vec<u8>>>::new();
 | 
			
		||||
        for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in
 | 
			
		||||
            operations
 | 
			
		||||
        {
 | 
			
		||||
            index.set_group_size(*group_size);
 | 
			
		||||
            index.set_max_group_size(*max_group_size);
 | 
			
		||||
            index.set_min_level_size(*min_level_size);
 | 
			
		||||
            match kind {
 | 
			
		||||
                OperationKind::Insert(values) => {
 | 
			
		||||
                    let mut bitmap = RoaringBitmap::new();
 | 
			
		||||
                    for value in values {
 | 
			
		||||
                        bitmap.insert(*value as u32);
 | 
			
		||||
                        value_to_keys.entry(*value).or_default().push(key.clone());
 | 
			
		||||
                    }
 | 
			
		||||
                    index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap);
 | 
			
		||||
                    trivial_db.insert(*field_id, &key, &bitmap);
 | 
			
		||||
                }
 | 
			
		||||
                OperationKind::Delete(values) => {
 | 
			
		||||
                    let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32));
 | 
			
		||||
                    let mut values_per_key = HashMap::new();
 | 
			
		||||
 | 
			
		||||
                    for value in values {
 | 
			
		||||
                        if let Some(keys) = value_to_keys.get(&(value as u8)) {
 | 
			
		||||
                            for key in keys {
 | 
			
		||||
                                let values: &mut RoaringBitmap =
 | 
			
		||||
                                    values_per_key.entry(key).or_default();
 | 
			
		||||
                                values.insert(value);
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                    for (key, values) in values_per_key {
 | 
			
		||||
                        index.delete(&mut txn, *field_id, &key.as_slice(), &values);
 | 
			
		||||
                        trivial_db.delete(*field_id, &key, &values);
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for (field_id, values_field_id) in trivial_db.elements.iter() {
 | 
			
		||||
            let level0iter = index
 | 
			
		||||
                .content
 | 
			
		||||
                .as_polymorph()
 | 
			
		||||
                .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
 | 
			
		||||
                .unwrap();
 | 
			
		||||
 | 
			
		||||
            for ((key, values), group) in values_field_id.iter().zip(level0iter) {
 | 
			
		||||
                let (group_key, group_values) = group.unwrap();
 | 
			
		||||
                let group_key =
 | 
			
		||||
                    FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
 | 
			
		||||
                assert_eq!(key, &group_key.left_bound);
 | 
			
		||||
                assert_eq!(values, &group_values.bitmap);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for (field_id, values_field_id) in trivial_db.elements.iter() {
 | 
			
		||||
            let level0iter = index
 | 
			
		||||
                .content
 | 
			
		||||
                .as_polymorph()
 | 
			
		||||
                .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
 | 
			
		||||
                .unwrap();
 | 
			
		||||
 | 
			
		||||
            for ((key, values), group) in values_field_id.iter().zip(level0iter) {
 | 
			
		||||
                let (group_key, group_values) = group.unwrap();
 | 
			
		||||
                let group_key =
 | 
			
		||||
                    FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
 | 
			
		||||
                assert_eq!(key, &group_key.left_bound);
 | 
			
		||||
                assert_eq!(values, &group_values.bitmap);
 | 
			
		||||
            }
 | 
			
		||||
            index.verify_structure_validity(&txn, *field_id);
 | 
			
		||||
        }
 | 
			
		||||
        txn.abort().unwrap();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    #[no_coverage]
 | 
			
		||||
    fn fuzz() {
 | 
			
		||||
        let tempdir = Rc::new(TempDir::new().unwrap());
 | 
			
		||||
        let tempdir_cloned = tempdir.clone();
 | 
			
		||||
        let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| {
 | 
			
		||||
            compare_with_trivial_database(tempdir_cloned.clone(), operations)
 | 
			
		||||
        })
 | 
			
		||||
        .default_mutator()
 | 
			
		||||
        .serde_serializer()
 | 
			
		||||
        .default_sensor_and_pool_with_custom_filter(|file, function| {
 | 
			
		||||
            file == std::path::Path::new("milli/src/update/facet/incremental.rs")
 | 
			
		||||
                && !function.contains("serde")
 | 
			
		||||
                && !function.contains("tests::")
 | 
			
		||||
                && !function.contains("fuzz::")
 | 
			
		||||
                && !function.contains("display_bitmap")
 | 
			
		||||
        })
 | 
			
		||||
        .arguments_from_cargo_fuzzcheck()
 | 
			
		||||
        .launch();
 | 
			
		||||
        assert!(!result.found_test_failure);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -346,35 +346,6 @@ pub(crate) mod test_helpers {
 | 
			
		||||
        for<'a> BoundCodec:
 | 
			
		||||
            BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
 | 
			
		||||
    {
 | 
			
		||||
        #[cfg(all(test, fuzzing))]
 | 
			
		||||
        pub fn open_from_tempdir(
 | 
			
		||||
            tempdir: Rc<tempfile::TempDir>,
 | 
			
		||||
            group_size: u8,
 | 
			
		||||
            max_group_size: u8,
 | 
			
		||||
            min_level_size: u8,
 | 
			
		||||
        ) -> FacetIndex<BoundCodec> {
 | 
			
		||||
            let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16
 | 
			
		||||
            let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16
 | 
			
		||||
            let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17
 | 
			
		||||
 | 
			
		||||
            let mut options = heed::EnvOpenOptions::new();
 | 
			
		||||
            let options = options.map_size(4096 * 4 * 10 * 1000);
 | 
			
		||||
            unsafe {
 | 
			
		||||
                options.flag(heed::flags::Flags::MdbAlwaysFreePages);
 | 
			
		||||
            }
 | 
			
		||||
            let env = options.open(tempdir.path()).unwrap();
 | 
			
		||||
            let content = env.open_database(None).unwrap().unwrap();
 | 
			
		||||
 | 
			
		||||
            FacetIndex {
 | 
			
		||||
                content,
 | 
			
		||||
                group_size: Cell::new(group_size),
 | 
			
		||||
                max_group_size: Cell::new(max_group_size),
 | 
			
		||||
                min_level_size: Cell::new(min_level_size),
 | 
			
		||||
                _tempdir: tempdir,
 | 
			
		||||
                env,
 | 
			
		||||
                _phantom: PhantomData,
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        pub fn new(
 | 
			
		||||
            group_size: u8,
 | 
			
		||||
            max_group_size: u8,
 | 
			
		||||
@@ -402,26 +373,6 @@ pub(crate) mod test_helpers {
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        #[cfg(all(test, fuzzing))]
 | 
			
		||||
        pub fn set_group_size(&self, group_size: u8) {
 | 
			
		||||
            // 2 <= x <= 64
 | 
			
		||||
            self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2)));
 | 
			
		||||
        }
 | 
			
		||||
        #[cfg(all(test, fuzzing))]
 | 
			
		||||
        pub fn set_max_group_size(&self, max_group_size: u8) {
 | 
			
		||||
            // 2*group_size <= x <= 128
 | 
			
		||||
            let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size));
 | 
			
		||||
            self.max_group_size.set(max_group_size);
 | 
			
		||||
            if self.group_size.get() < max_group_size / 2 {
 | 
			
		||||
                self.group_size.set(max_group_size / 2);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        #[cfg(all(test, fuzzing))]
 | 
			
		||||
        pub fn set_min_level_size(&self, min_level_size: u8) {
 | 
			
		||||
            // 1 <= x <= inf
 | 
			
		||||
            self.min_level_size.set(std::cmp::max(1, min_level_size));
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        pub fn insert<'a>(
 | 
			
		||||
            &self,
 | 
			
		||||
            wtxn: &'a mut RwTxn<'_>,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user