mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Remove fuzzing feature
This commit is contained in:
		| @@ -1,4 +1,3 @@ | ||||
| #![cfg_attr(all(test, fuzzing), feature(no_coverage))] | ||||
| #![allow(clippy::type_complexity)] | ||||
|  | ||||
| #[cfg(not(windows))] | ||||
|   | ||||
| @@ -1059,208 +1059,3 @@ mod tests { | ||||
|         milli_snap!(format!("{index}"), "after_delete"); | ||||
|     } | ||||
| } | ||||
|  | ||||
| // fuzz tests | ||||
| #[cfg(all(test, fuzzing))] | ||||
| /** | ||||
| Fuzz test for the incremental indxer. | ||||
|  | ||||
| The fuzz test uses fuzzcheck, a coverage-guided fuzzer. | ||||
| See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org | ||||
| for more information. | ||||
|  | ||||
| It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with: | ||||
| ```sh | ||||
| cargo install cargo-fuzzcheck | ||||
| ``` | ||||
| To start the fuzz test, run (from the base folder or from milli/): | ||||
| ```sh | ||||
| cargo fuzzcheck update::facet::incremental::fuzz::fuzz | ||||
| ``` | ||||
| and wait a couple minutes to make sure the code was thoroughly tested, then | ||||
| hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz. | ||||
|  | ||||
| To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file: | ||||
| ```toml | ||||
| [build] | ||||
| rustflags = ["--cfg",  "fuzzing"] | ||||
| ``` | ||||
|  | ||||
| The fuzz test generates sequences of additions and deletions to the facet database and | ||||
| ensures that: | ||||
| 1. its structure is still internally valid | ||||
| 2. its content is the same as a trivially correct implementation of the same database | ||||
| */ | ||||
| mod fuzz { | ||||
|     use std::collections::{BTreeMap, HashMap}; | ||||
|     use std::iter::FromIterator; | ||||
|     use std::rc::Rc; | ||||
|  | ||||
|     use fuzzcheck::mutators::integer::U8Mutator; | ||||
|     use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator}; | ||||
|     use fuzzcheck::mutators::vector::VecMutator; | ||||
|     use fuzzcheck::DefaultMutator; | ||||
|     use roaring::RoaringBitmap; | ||||
|     use tempfile::TempDir; | ||||
|  | ||||
|     use super::*; | ||||
|     use crate::update::facet::test_helpers::FacetIndex; | ||||
|     #[derive(Default)] | ||||
|     pub struct TrivialDatabase<T> { | ||||
|         pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>, | ||||
|     } | ||||
|     impl<T> TrivialDatabase<T> | ||||
|     where | ||||
|         T: Ord + Clone + Eq + std::fmt::Debug, | ||||
|     { | ||||
|         #[no_coverage] | ||||
|         pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) { | ||||
|             if new_values.is_empty() { | ||||
|                 return; | ||||
|             } | ||||
|             let values_field_id = self.elements.entry(field_id).or_default(); | ||||
|             let values = values_field_id.entry(new_key.clone()).or_default(); | ||||
|             *values |= new_values; | ||||
|         } | ||||
|         #[no_coverage] | ||||
|         pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) { | ||||
|             if let Some(values_field_id) = self.elements.get_mut(&field_id) { | ||||
|                 if let Some(values) = values_field_id.get_mut(&key) { | ||||
|                     *values -= values_to_remove; | ||||
|                     if values.is_empty() { | ||||
|                         values_field_id.remove(&key); | ||||
|                     } | ||||
|                 } | ||||
|                 if values_field_id.is_empty() { | ||||
|                     self.elements.remove(&field_id); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] | ||||
|     struct Operation { | ||||
|         #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })] | ||||
|         key: Vec<u8>, | ||||
|         #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] | ||||
|         group_size: u8, | ||||
|         #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] | ||||
|         max_group_size: u8, | ||||
|         #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] | ||||
|         min_level_size: u8, | ||||
|         #[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })] | ||||
|         field_id: u16, | ||||
|         kind: OperationKind, | ||||
|     } | ||||
|     #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] | ||||
|     enum OperationKind { | ||||
|         Insert( | ||||
|             #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] | ||||
|              Vec<u8>, | ||||
|         ), | ||||
|         Delete( | ||||
|             #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] | ||||
|              Vec<u8>, | ||||
|         ), | ||||
|     } | ||||
|  | ||||
|     #[no_coverage] | ||||
|     fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) { | ||||
|         let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten | ||||
|         let mut txn = index.env.write_txn().unwrap(); | ||||
|  | ||||
|         let mut trivial_db = TrivialDatabase::<Vec<u8>>::default(); | ||||
|         let mut value_to_keys = HashMap::<u8, Vec<Vec<u8>>>::new(); | ||||
|         for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in | ||||
|             operations | ||||
|         { | ||||
|             index.set_group_size(*group_size); | ||||
|             index.set_max_group_size(*max_group_size); | ||||
|             index.set_min_level_size(*min_level_size); | ||||
|             match kind { | ||||
|                 OperationKind::Insert(values) => { | ||||
|                     let mut bitmap = RoaringBitmap::new(); | ||||
|                     for value in values { | ||||
|                         bitmap.insert(*value as u32); | ||||
|                         value_to_keys.entry(*value).or_default().push(key.clone()); | ||||
|                     } | ||||
|                     index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap); | ||||
|                     trivial_db.insert(*field_id, &key, &bitmap); | ||||
|                 } | ||||
|                 OperationKind::Delete(values) => { | ||||
|                     let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32)); | ||||
|                     let mut values_per_key = HashMap::new(); | ||||
|  | ||||
|                     for value in values { | ||||
|                         if let Some(keys) = value_to_keys.get(&(value as u8)) { | ||||
|                             for key in keys { | ||||
|                                 let values: &mut RoaringBitmap = | ||||
|                                     values_per_key.entry(key).or_default(); | ||||
|                                 values.insert(value); | ||||
|                             } | ||||
|                         } | ||||
|                     } | ||||
|                     for (key, values) in values_per_key { | ||||
|                         index.delete(&mut txn, *field_id, &key.as_slice(), &values); | ||||
|                         trivial_db.delete(*field_id, &key, &values); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         for (field_id, values_field_id) in trivial_db.elements.iter() { | ||||
|             let level0iter = index | ||||
|                 .content | ||||
|                 .as_polymorph() | ||||
|                 .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes()) | ||||
|                 .unwrap(); | ||||
|  | ||||
|             for ((key, values), group) in values_field_id.iter().zip(level0iter) { | ||||
|                 let (group_key, group_values) = group.unwrap(); | ||||
|                 let group_key = | ||||
|                     FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap(); | ||||
|                 assert_eq!(key, &group_key.left_bound); | ||||
|                 assert_eq!(values, &group_values.bitmap); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         for (field_id, values_field_id) in trivial_db.elements.iter() { | ||||
|             let level0iter = index | ||||
|                 .content | ||||
|                 .as_polymorph() | ||||
|                 .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) | ||||
|                 .unwrap(); | ||||
|  | ||||
|             for ((key, values), group) in values_field_id.iter().zip(level0iter) { | ||||
|                 let (group_key, group_values) = group.unwrap(); | ||||
|                 let group_key = | ||||
|                     FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap(); | ||||
|                 assert_eq!(key, &group_key.left_bound); | ||||
|                 assert_eq!(values, &group_values.bitmap); | ||||
|             } | ||||
|             index.verify_structure_validity(&txn, *field_id); | ||||
|         } | ||||
|         txn.abort().unwrap(); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     #[no_coverage] | ||||
|     fn fuzz() { | ||||
|         let tempdir = Rc::new(TempDir::new().unwrap()); | ||||
|         let tempdir_cloned = tempdir.clone(); | ||||
|         let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| { | ||||
|             compare_with_trivial_database(tempdir_cloned.clone(), operations) | ||||
|         }) | ||||
|         .default_mutator() | ||||
|         .serde_serializer() | ||||
|         .default_sensor_and_pool_with_custom_filter(|file, function| { | ||||
|             file == std::path::Path::new("milli/src/update/facet/incremental.rs") | ||||
|                 && !function.contains("serde") | ||||
|                 && !function.contains("tests::") | ||||
|                 && !function.contains("fuzz::") | ||||
|                 && !function.contains("display_bitmap") | ||||
|         }) | ||||
|         .arguments_from_cargo_fuzzcheck() | ||||
|         .launch(); | ||||
|         assert!(!result.found_test_failure); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -346,35 +346,6 @@ pub(crate) mod test_helpers { | ||||
|         for<'a> BoundCodec: | ||||
|             BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, | ||||
|     { | ||||
|         #[cfg(all(test, fuzzing))] | ||||
|         pub fn open_from_tempdir( | ||||
|             tempdir: Rc<tempfile::TempDir>, | ||||
|             group_size: u8, | ||||
|             max_group_size: u8, | ||||
|             min_level_size: u8, | ||||
|         ) -> FacetIndex<BoundCodec> { | ||||
|             let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16 | ||||
|             let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16 | ||||
|             let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17 | ||||
|  | ||||
|             let mut options = heed::EnvOpenOptions::new(); | ||||
|             let options = options.map_size(4096 * 4 * 10 * 1000); | ||||
|             unsafe { | ||||
|                 options.flag(heed::flags::Flags::MdbAlwaysFreePages); | ||||
|             } | ||||
|             let env = options.open(tempdir.path()).unwrap(); | ||||
|             let content = env.open_database(None).unwrap().unwrap(); | ||||
|  | ||||
|             FacetIndex { | ||||
|                 content, | ||||
|                 group_size: Cell::new(group_size), | ||||
|                 max_group_size: Cell::new(max_group_size), | ||||
|                 min_level_size: Cell::new(min_level_size), | ||||
|                 _tempdir: tempdir, | ||||
|                 env, | ||||
|                 _phantom: PhantomData, | ||||
|             } | ||||
|         } | ||||
|         pub fn new( | ||||
|             group_size: u8, | ||||
|             max_group_size: u8, | ||||
| @@ -402,26 +373,6 @@ pub(crate) mod test_helpers { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         #[cfg(all(test, fuzzing))] | ||||
|         pub fn set_group_size(&self, group_size: u8) { | ||||
|             // 2 <= x <= 64 | ||||
|             self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2))); | ||||
|         } | ||||
|         #[cfg(all(test, fuzzing))] | ||||
|         pub fn set_max_group_size(&self, max_group_size: u8) { | ||||
|             // 2*group_size <= x <= 128 | ||||
|             let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size)); | ||||
|             self.max_group_size.set(max_group_size); | ||||
|             if self.group_size.get() < max_group_size / 2 { | ||||
|                 self.group_size.set(max_group_size / 2); | ||||
|             } | ||||
|         } | ||||
|         #[cfg(all(test, fuzzing))] | ||||
|         pub fn set_min_level_size(&self, min_level_size: u8) { | ||||
|             // 1 <= x <= inf | ||||
|             self.min_level_size.set(std::cmp::max(1, min_level_size)); | ||||
|         } | ||||
|  | ||||
|         pub fn insert<'a>( | ||||
|             &self, | ||||
|             wtxn: &'a mut RwTxn<'_>, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user