Use an u16 field id instead of one byte

This commit is contained in:
Kerollmops
2021-07-06 11:31:24 +02:00
parent cc54c41e30
commit 838ed1cd32
17 changed files with 115 additions and 64 deletions

View File

@ -40,7 +40,7 @@ pub fn keep_first(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
Ok(values.first().unwrap().to_vec())
}
pub fn merge_two_obkvs(base: obkv::KvReader, update: obkv::KvReader, buffer: &mut Vec<u8>) {
pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};

View File

@ -7,6 +7,7 @@ use std::time::Instant;
use std::{cmp, iter};
use bstr::ByteSlice as _;
use concat_arrays::concat_arrays;
use fst::Set;
use grenad::{CompressionType, FileFuse, Reader, Sorter, Writer};
use heed::BytesEncode;
@ -776,7 +777,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
for ((fid, count), docids) in self.field_id_word_count_docids {
docids_buffer.clear();
CboRoaringBitmapCodec::serialize_into(&docids, &mut docids_buffer);
self.field_id_word_count_docids_sorter.insert([fid, count], &docids_buffer)?;
let key: [u8; 3] = concat_arrays!(fid.to_be_bytes(), [count]);
self.field_id_word_count_docids_sorter.insert(key, &docids_buffer)?;
}
let fst = builder.into_set();

View File

@ -626,7 +626,7 @@ mod test {
Some("tata".to_string()),
false,
);
assert_eq!(result.unwrap(), (0u8, "toto".to_string()));
assert_eq!(result.unwrap(), (0, "toto".to_string()));
assert_eq!(fields_map.len(), 1);
}
@ -635,7 +635,7 @@ mod test {
let mut fields_map = FieldsIdsMap::new();
let result =
compute_primary_key_pair(None, &mut fields_map, Some("tata".to_string()), false);
assert_eq!(result.unwrap(), (0u8, "tata".to_string()));
assert_eq!(result.unwrap(), (0, "tata".to_string()));
assert_eq!(fields_map.len(), 1);
}
@ -643,7 +643,7 @@ mod test {
fn should_return_default_if_both_are_none() {
let mut fields_map = FieldsIdsMap::new();
let result = compute_primary_key_pair(None, &mut fields_map, None, true);
assert_eq!(result.unwrap(), (0u8, "id".to_string()));
assert_eq!(result.unwrap(), (0, "id".to_string()));
assert_eq!(fields_map.len(), 1);
}