mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Remove fuzzing feature
This commit is contained in:
		| @@ -1,4 +1,3 @@ | |||||||
| #![cfg_attr(all(test, fuzzing), feature(no_coverage))] |  | ||||||
| #![allow(clippy::type_complexity)] | #![allow(clippy::type_complexity)] | ||||||
|  |  | ||||||
| #[cfg(not(windows))] | #[cfg(not(windows))] | ||||||
|   | |||||||
| @@ -1059,208 +1059,3 @@ mod tests { | |||||||
|         milli_snap!(format!("{index}"), "after_delete"); |         milli_snap!(format!("{index}"), "after_delete"); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| // fuzz tests |  | ||||||
| #[cfg(all(test, fuzzing))] |  | ||||||
| /** |  | ||||||
| Fuzz test for the incremental indxer. |  | ||||||
|  |  | ||||||
| The fuzz test uses fuzzcheck, a coverage-guided fuzzer. |  | ||||||
| See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org |  | ||||||
| for more information. |  | ||||||
|  |  | ||||||
| It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with: |  | ||||||
| ```sh |  | ||||||
| cargo install cargo-fuzzcheck |  | ||||||
| ``` |  | ||||||
| To start the fuzz test, run (from the base folder or from milli/): |  | ||||||
| ```sh |  | ||||||
| cargo fuzzcheck update::facet::incremental::fuzz::fuzz |  | ||||||
| ``` |  | ||||||
| and wait a couple minutes to make sure the code was thoroughly tested, then |  | ||||||
| hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz. |  | ||||||
|  |  | ||||||
| To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file: |  | ||||||
| ```toml |  | ||||||
| [build] |  | ||||||
| rustflags = ["--cfg",  "fuzzing"] |  | ||||||
| ``` |  | ||||||
|  |  | ||||||
| The fuzz test generates sequences of additions and deletions to the facet database and |  | ||||||
| ensures that: |  | ||||||
| 1. its structure is still internally valid |  | ||||||
| 2. its content is the same as a trivially correct implementation of the same database |  | ||||||
| */ |  | ||||||
| mod fuzz { |  | ||||||
|     use std::collections::{BTreeMap, HashMap}; |  | ||||||
|     use std::iter::FromIterator; |  | ||||||
|     use std::rc::Rc; |  | ||||||
|  |  | ||||||
|     use fuzzcheck::mutators::integer::U8Mutator; |  | ||||||
|     use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator}; |  | ||||||
|     use fuzzcheck::mutators::vector::VecMutator; |  | ||||||
|     use fuzzcheck::DefaultMutator; |  | ||||||
|     use roaring::RoaringBitmap; |  | ||||||
|     use tempfile::TempDir; |  | ||||||
|  |  | ||||||
|     use super::*; |  | ||||||
|     use crate::update::facet::test_helpers::FacetIndex; |  | ||||||
|     #[derive(Default)] |  | ||||||
|     pub struct TrivialDatabase<T> { |  | ||||||
|         pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>, |  | ||||||
|     } |  | ||||||
|     impl<T> TrivialDatabase<T> |  | ||||||
|     where |  | ||||||
|         T: Ord + Clone + Eq + std::fmt::Debug, |  | ||||||
|     { |  | ||||||
|         #[no_coverage] |  | ||||||
|         pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) { |  | ||||||
|             if new_values.is_empty() { |  | ||||||
|                 return; |  | ||||||
|             } |  | ||||||
|             let values_field_id = self.elements.entry(field_id).or_default(); |  | ||||||
|             let values = values_field_id.entry(new_key.clone()).or_default(); |  | ||||||
|             *values |= new_values; |  | ||||||
|         } |  | ||||||
|         #[no_coverage] |  | ||||||
|         pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) { |  | ||||||
|             if let Some(values_field_id) = self.elements.get_mut(&field_id) { |  | ||||||
|                 if let Some(values) = values_field_id.get_mut(&key) { |  | ||||||
|                     *values -= values_to_remove; |  | ||||||
|                     if values.is_empty() { |  | ||||||
|                         values_field_id.remove(&key); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 if values_field_id.is_empty() { |  | ||||||
|                     self.elements.remove(&field_id); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] |  | ||||||
|     struct Operation { |  | ||||||
|         #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })] |  | ||||||
|         key: Vec<u8>, |  | ||||||
|         #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] |  | ||||||
|         group_size: u8, |  | ||||||
|         #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] |  | ||||||
|         max_group_size: u8, |  | ||||||
|         #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] |  | ||||||
|         min_level_size: u8, |  | ||||||
|         #[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })] |  | ||||||
|         field_id: u16, |  | ||||||
|         kind: OperationKind, |  | ||||||
|     } |  | ||||||
|     #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] |  | ||||||
|     enum OperationKind { |  | ||||||
|         Insert( |  | ||||||
|             #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] |  | ||||||
|              Vec<u8>, |  | ||||||
|         ), |  | ||||||
|         Delete( |  | ||||||
|             #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] |  | ||||||
|              Vec<u8>, |  | ||||||
|         ), |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     #[no_coverage] |  | ||||||
|     fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) { |  | ||||||
|         let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten |  | ||||||
|         let mut txn = index.env.write_txn().unwrap(); |  | ||||||
|  |  | ||||||
|         let mut trivial_db = TrivialDatabase::<Vec<u8>>::default(); |  | ||||||
|         let mut value_to_keys = HashMap::<u8, Vec<Vec<u8>>>::new(); |  | ||||||
|         for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in |  | ||||||
|             operations |  | ||||||
|         { |  | ||||||
|             index.set_group_size(*group_size); |  | ||||||
|             index.set_max_group_size(*max_group_size); |  | ||||||
|             index.set_min_level_size(*min_level_size); |  | ||||||
|             match kind { |  | ||||||
|                 OperationKind::Insert(values) => { |  | ||||||
|                     let mut bitmap = RoaringBitmap::new(); |  | ||||||
|                     for value in values { |  | ||||||
|                         bitmap.insert(*value as u32); |  | ||||||
|                         value_to_keys.entry(*value).or_default().push(key.clone()); |  | ||||||
|                     } |  | ||||||
|                     index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap); |  | ||||||
|                     trivial_db.insert(*field_id, &key, &bitmap); |  | ||||||
|                 } |  | ||||||
|                 OperationKind::Delete(values) => { |  | ||||||
|                     let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32)); |  | ||||||
|                     let mut values_per_key = HashMap::new(); |  | ||||||
|  |  | ||||||
|                     for value in values { |  | ||||||
|                         if let Some(keys) = value_to_keys.get(&(value as u8)) { |  | ||||||
|                             for key in keys { |  | ||||||
|                                 let values: &mut RoaringBitmap = |  | ||||||
|                                     values_per_key.entry(key).or_default(); |  | ||||||
|                                 values.insert(value); |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|                     for (key, values) in values_per_key { |  | ||||||
|                         index.delete(&mut txn, *field_id, &key.as_slice(), &values); |  | ||||||
|                         trivial_db.delete(*field_id, &key, &values); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         for (field_id, values_field_id) in trivial_db.elements.iter() { |  | ||||||
|             let level0iter = index |  | ||||||
|                 .content |  | ||||||
|                 .as_polymorph() |  | ||||||
|                 .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes()) |  | ||||||
|                 .unwrap(); |  | ||||||
|  |  | ||||||
|             for ((key, values), group) in values_field_id.iter().zip(level0iter) { |  | ||||||
|                 let (group_key, group_values) = group.unwrap(); |  | ||||||
|                 let group_key = |  | ||||||
|                     FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap(); |  | ||||||
|                 assert_eq!(key, &group_key.left_bound); |  | ||||||
|                 assert_eq!(values, &group_values.bitmap); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         for (field_id, values_field_id) in trivial_db.elements.iter() { |  | ||||||
|             let level0iter = index |  | ||||||
|                 .content |  | ||||||
|                 .as_polymorph() |  | ||||||
|                 .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) |  | ||||||
|                 .unwrap(); |  | ||||||
|  |  | ||||||
|             for ((key, values), group) in values_field_id.iter().zip(level0iter) { |  | ||||||
|                 let (group_key, group_values) = group.unwrap(); |  | ||||||
|                 let group_key = |  | ||||||
|                     FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap(); |  | ||||||
|                 assert_eq!(key, &group_key.left_bound); |  | ||||||
|                 assert_eq!(values, &group_values.bitmap); |  | ||||||
|             } |  | ||||||
|             index.verify_structure_validity(&txn, *field_id); |  | ||||||
|         } |  | ||||||
|         txn.abort().unwrap(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     #[test] |  | ||||||
|     #[no_coverage] |  | ||||||
|     fn fuzz() { |  | ||||||
|         let tempdir = Rc::new(TempDir::new().unwrap()); |  | ||||||
|         let tempdir_cloned = tempdir.clone(); |  | ||||||
|         let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| { |  | ||||||
|             compare_with_trivial_database(tempdir_cloned.clone(), operations) |  | ||||||
|         }) |  | ||||||
|         .default_mutator() |  | ||||||
|         .serde_serializer() |  | ||||||
|         .default_sensor_and_pool_with_custom_filter(|file, function| { |  | ||||||
|             file == std::path::Path::new("milli/src/update/facet/incremental.rs") |  | ||||||
|                 && !function.contains("serde") |  | ||||||
|                 && !function.contains("tests::") |  | ||||||
|                 && !function.contains("fuzz::") |  | ||||||
|                 && !function.contains("display_bitmap") |  | ||||||
|         }) |  | ||||||
|         .arguments_from_cargo_fuzzcheck() |  | ||||||
|         .launch(); |  | ||||||
|         assert!(!result.found_test_failure); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|   | |||||||
| @@ -346,35 +346,6 @@ pub(crate) mod test_helpers { | |||||||
|         for<'a> BoundCodec: |         for<'a> BoundCodec: | ||||||
|             BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, |             BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>, | ||||||
|     { |     { | ||||||
|         #[cfg(all(test, fuzzing))] |  | ||||||
|         pub fn open_from_tempdir( |  | ||||||
|             tempdir: Rc<tempfile::TempDir>, |  | ||||||
|             group_size: u8, |  | ||||||
|             max_group_size: u8, |  | ||||||
|             min_level_size: u8, |  | ||||||
|         ) -> FacetIndex<BoundCodec> { |  | ||||||
|             let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16 |  | ||||||
|             let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16 |  | ||||||
|             let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17 |  | ||||||
|  |  | ||||||
|             let mut options = heed::EnvOpenOptions::new(); |  | ||||||
|             let options = options.map_size(4096 * 4 * 10 * 1000); |  | ||||||
|             unsafe { |  | ||||||
|                 options.flag(heed::flags::Flags::MdbAlwaysFreePages); |  | ||||||
|             } |  | ||||||
|             let env = options.open(tempdir.path()).unwrap(); |  | ||||||
|             let content = env.open_database(None).unwrap().unwrap(); |  | ||||||
|  |  | ||||||
|             FacetIndex { |  | ||||||
|                 content, |  | ||||||
|                 group_size: Cell::new(group_size), |  | ||||||
|                 max_group_size: Cell::new(max_group_size), |  | ||||||
|                 min_level_size: Cell::new(min_level_size), |  | ||||||
|                 _tempdir: tempdir, |  | ||||||
|                 env, |  | ||||||
|                 _phantom: PhantomData, |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         pub fn new( |         pub fn new( | ||||||
|             group_size: u8, |             group_size: u8, | ||||||
|             max_group_size: u8, |             max_group_size: u8, | ||||||
| @@ -402,26 +373,6 @@ pub(crate) mod test_helpers { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         #[cfg(all(test, fuzzing))] |  | ||||||
|         pub fn set_group_size(&self, group_size: u8) { |  | ||||||
|             // 2 <= x <= 64 |  | ||||||
|             self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2))); |  | ||||||
|         } |  | ||||||
|         #[cfg(all(test, fuzzing))] |  | ||||||
|         pub fn set_max_group_size(&self, max_group_size: u8) { |  | ||||||
|             // 2*group_size <= x <= 128 |  | ||||||
|             let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size)); |  | ||||||
|             self.max_group_size.set(max_group_size); |  | ||||||
|             if self.group_size.get() < max_group_size / 2 { |  | ||||||
|                 self.group_size.set(max_group_size / 2); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         #[cfg(all(test, fuzzing))] |  | ||||||
|         pub fn set_min_level_size(&self, min_level_size: u8) { |  | ||||||
|             // 1 <= x <= inf |  | ||||||
|             self.min_level_size.set(std::cmp::max(1, min_level_size)); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         pub fn insert<'a>( |         pub fn insert<'a>( | ||||||
|             &self, |             &self, | ||||||
|             wtxn: &'a mut RwTxn<'_>, |             wtxn: &'a mut RwTxn<'_>, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user