mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Optimize cbo roaring bitmaps merge
This commit is contained in:
		@@ -52,6 +52,46 @@ impl CboRoaringBitmapCodec {
 | 
			
		||||
            RoaringBitmap::deserialize_from(bytes)
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Merge serialized CboRoaringBitmaps in a buffer.
 | 
			
		||||
    ///
 | 
			
		||||
    /// if the merged values len is under the threshold,
 | 
			
		||||
    /// values are directly serialized in the buffer;
 | 
			
		||||
    /// else a RoaringBitmap is created from the values and is serialized in the buffer.
 | 
			
		||||
    pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
 | 
			
		||||
        let mut roaring = RoaringBitmap::new();
 | 
			
		||||
        let mut vec = Vec::new();
 | 
			
		||||
 | 
			
		||||
        for bytes in slices {
 | 
			
		||||
            if bytes.len() <= THRESHOLD * size_of::<u32>() {
 | 
			
		||||
                let mut reader = bytes.as_ref();
 | 
			
		||||
                while let Ok(integer) = reader.read_u32::<NativeEndian>() {
 | 
			
		||||
                    vec.push(integer);
 | 
			
		||||
                }
 | 
			
		||||
            } else {
 | 
			
		||||
                roaring |= RoaringBitmap::deserialize_from(bytes.as_ref())?;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if roaring.is_empty() {
 | 
			
		||||
            vec.sort_unstable();
 | 
			
		||||
            vec.dedup();
 | 
			
		||||
 | 
			
		||||
            if vec.len() <= THRESHOLD {
 | 
			
		||||
                for integer in vec {
 | 
			
		||||
                    buffer.extend_from_slice(&integer.to_ne_bytes());
 | 
			
		||||
                }
 | 
			
		||||
            } else {
 | 
			
		||||
                let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter());
 | 
			
		||||
                roaring.serialize_into(buffer)?;
 | 
			
		||||
            }
 | 
			
		||||
        } else {
 | 
			
		||||
            roaring.extend(vec);
 | 
			
		||||
            roaring.serialize_into(buffer)?;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
 | 
			
		||||
@@ -106,4 +146,40 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        assert!(roaring_size > bo_size);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn merge_cbo_roaring_bitmaps() {
 | 
			
		||||
        let mut buffer = Vec::new();
 | 
			
		||||
 | 
			
		||||
        let small_data = vec![
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(1..4),
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(2..5),
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(4..6),
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(1..3),
 | 
			
		||||
        ];
 | 
			
		||||
 | 
			
		||||
        let small_data: Vec<_> =
 | 
			
		||||
            small_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
 | 
			
		||||
        CboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
 | 
			
		||||
        let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
 | 
			
		||||
        let expected = RoaringBitmap::from_sorted_iter(1..6);
 | 
			
		||||
        assert_eq!(bitmap, expected);
 | 
			
		||||
 | 
			
		||||
        let medium_data = vec![
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(1..4),
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(2..5),
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(4..8),
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(0..3),
 | 
			
		||||
            RoaringBitmap::from_sorted_iter(7..23),
 | 
			
		||||
        ];
 | 
			
		||||
 | 
			
		||||
        let medium_data: Vec<_> =
 | 
			
		||||
            medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
 | 
			
		||||
        buffer.clear();
 | 
			
		||||
        CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
 | 
			
		||||
 | 
			
		||||
        let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
 | 
			
		||||
        let expected = RoaringBitmap::from_sorted_iter(0..23);
 | 
			
		||||
        assert_eq!(bitmap, expected);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -120,52 +120,11 @@ pub fn merge_cbo_roaring_bitmaps<'a>(
 | 
			
		||||
    _key: &[u8],
 | 
			
		||||
    values: &[Cow<'a, [u8]>],
 | 
			
		||||
) -> Result<Cow<'a, [u8]>> {
 | 
			
		||||
    match values.split_first().unwrap() {
 | 
			
		||||
        (head, []) => Ok(head.clone()),
 | 
			
		||||
        (head, tail) => {
 | 
			
		||||
            let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
 | 
			
		||||
 | 
			
		||||
            for value in tail {
 | 
			
		||||
                head |= CboRoaringBitmapCodec::deserialize_from(&value[..])?;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            let mut vec = Vec::new();
 | 
			
		||||
            CboRoaringBitmapCodec::serialize_into(&head, &mut vec);
 | 
			
		||||
            Ok(Cow::from(vec))
 | 
			
		||||
        }
 | 
			
		||||
    if values.len() == 1 {
 | 
			
		||||
        Ok(values[0].clone())
 | 
			
		||||
    } else {
 | 
			
		||||
        let mut vec = Vec::new();
 | 
			
		||||
        CboRoaringBitmapCodec::merge_into(values, &mut vec)?;
 | 
			
		||||
        Ok(Cow::from(vec))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// /// Uses the FacetStringLevelZeroValueCodec to merge the values.
 | 
			
		||||
// pub fn tuple_string_cbo_roaring_bitmap_merge<'a>(
 | 
			
		||||
//     _key: &[u8],
 | 
			
		||||
//     values: &[Cow<[u8]>],
 | 
			
		||||
// ) -> Result<Cow<'a, [u8]>> {
 | 
			
		||||
//     let (head, tail) = values.split_first().unwrap();
 | 
			
		||||
//     let (head_string, mut head_rb) = FacetStringLevelZeroValueCodec::bytes_decode(&head[..])
 | 
			
		||||
//         .ok_or(SerializationError::Decoding { db_name: None })?;
 | 
			
		||||
 | 
			
		||||
//     for value in tail {
 | 
			
		||||
//         let (_string, rb) = FacetStringLevelZeroValueCodec::bytes_decode(&value[..])
 | 
			
		||||
//             .ok_or(SerializationError::Decoding { db_name: None })?;
 | 
			
		||||
//         head_rb |= rb;
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
//     FacetStringLevelZeroValueCodec::bytes_encode(&(head_string, head_rb))
 | 
			
		||||
//         .map(|cow| cow.into_owned())
 | 
			
		||||
//         .ok_or(SerializationError::Encoding { db_name: None })
 | 
			
		||||
//         .map_err(Into::into)
 | 
			
		||||
// }
 | 
			
		||||
 | 
			
		||||
// pub fn cbo_roaring_bitmap_merge<'a>(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Cow<'a, [u8]>> {
 | 
			
		||||
//     let (head, tail) = values.split_first().unwrap();
 | 
			
		||||
//     let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
 | 
			
		||||
 | 
			
		||||
//     for value in tail {
 | 
			
		||||
//         head |= CboRoaringBitmapCodec::deserialize_from(&value[..])?;
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
//     let mut vec = Vec::new();
 | 
			
		||||
//     CboRoaringBitmapCodec::serialize_into(&head, &mut vec);
 | 
			
		||||
//     Ok(vec)
 | 
			
		||||
// }
 | 
			
		||||
 
 | 
			
		||||
@@ -188,15 +188,22 @@ fn merge_roaring_bitmaps(new_value: &[u8], db_value: &[u8], buffer: &mut Vec<u8>
 | 
			
		||||
    Ok(serialize_roaring_bitmap(&value, buffer)?)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
use std::borrow::Cow;
 | 
			
		||||
 | 
			
		||||
fn merge_cbo_roaring_bitmaps(
 | 
			
		||||
    new_value: &[u8],
 | 
			
		||||
    db_value: &[u8],
 | 
			
		||||
    buffer: &mut Vec<u8>,
 | 
			
		||||
) -> Result<()> {
 | 
			
		||||
    let new_value = CboRoaringBitmapCodec::deserialize_from(new_value)?;
 | 
			
		||||
    let db_value = CboRoaringBitmapCodec::deserialize_from(db_value)?;
 | 
			
		||||
    let value = new_value | db_value;
 | 
			
		||||
    Ok(CboRoaringBitmapCodec::serialize_into(&value, buffer))
 | 
			
		||||
    Ok(CboRoaringBitmapCodec::merge_into(
 | 
			
		||||
        &[Cow::Borrowed(db_value), Cow::Borrowed(new_value)],
 | 
			
		||||
        buffer,
 | 
			
		||||
    )?)
 | 
			
		||||
 | 
			
		||||
    // let new_value = CboRoaringBitmapCodec::deserialize_from(new_value)?;
 | 
			
		||||
    // let db_value = CboRoaringBitmapCodec::deserialize_from(db_value)?;
 | 
			
		||||
    // let value = new_value | db_value;
 | 
			
		||||
    // Ok(CboRoaringBitmapCodec::serialize_into(&value, buffer))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Write provided entries in database using serialize_value function.
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user