Optimise the merge functions to avoid allocations

This commit is contained in:
Clément Renault
2020-10-20 16:40:50 +02:00
parent cde8478388
commit f948a03be2
4 changed files with 17 additions and 16 deletions

View File

@ -1,3 +1,5 @@
use std::borrow::Cow;
use anyhow::bail;
use bstr::ByteSlice as _;
use fst::IntoStreamer;
@ -9,7 +11,7 @@ const WORDS_FST_KEY: &[u8] = crate::WORDS_FST_KEY.as_bytes();
const HEADERS_KEY: &[u8] = crate::HEADERS_KEY.as_bytes();
const DOCUMENTS_IDS_KEY: &[u8] = crate::DOCUMENTS_IDS_KEY.as_bytes();
pub fn main_merge(key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
pub fn main_merge(key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
match key {
WORDS_FST_KEY => {
let fsts: Vec<_> = values.iter().map(|v| fst::Set::new(v).unwrap()).collect();
@ -32,12 +34,12 @@ pub fn main_merge(key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
}
}
pub fn word_docids_merge(_key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
pub fn word_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
let (head, tail) = values.split_first().unwrap();
let mut head = RoaringBitmap::deserialize_from(head.as_slice())?;
let mut head = RoaringBitmap::deserialize_from(&head[..])?;
for value in tail {
let bitmap = RoaringBitmap::deserialize_from(value.as_slice())?;
let bitmap = RoaringBitmap::deserialize_from(&value[..])?;
head.union_with(&bitmap);
}
@ -46,16 +48,16 @@ pub fn word_docids_merge(_key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<
Ok(vec)
}
pub fn docid_word_positions_merge(key: &[u8], _values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
pub fn docid_word_positions_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
bail!("merging docid word positions is an error ({:?})", key.as_bstr())
}
pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
let (head, tail) = values.split_first().unwrap();
let mut head = CboRoaringBitmapCodec::deserialize_from(head.as_slice())?;
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
for value in tail {
let bitmap = CboRoaringBitmapCodec::deserialize_from(value.as_slice())?;
let bitmap = CboRoaringBitmapCodec::deserialize_from(&value[..])?;
head.union_with(&bitmap);
}
@ -64,6 +66,6 @@ pub fn words_pairs_proximities_docids_merge(_key: &[u8], values: &[Vec<u8>]) ->
Ok(vec)
}
pub fn documents_merge(key: &[u8], _values: &[Vec<u8>]) -> anyhow::Result<Vec<u8>> {
pub fn documents_merge(key: &[u8], _values: &[Cow<[u8]>]) -> anyhow::Result<Vec<u8>> {
bail!("merging documents is an error ({:?})", key.as_bstr())
}