Make the changes to use heed v0.20-alpha.6

This commit is contained in:
Clément Renault
2023-11-22 18:21:19 +01:00
parent 56a0d91ecd
commit 0d4482625a
54 changed files with 611 additions and 477 deletions

View File

@ -3,7 +3,7 @@ use std::io::BufReader;
use grenad::CompressionType;
use heed::types::ByteSlice;
use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn};
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
@ -146,7 +146,13 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
buffer.push(1);
// then we extend the buffer with the docids bitmap
buffer.extend_from_slice(value);
unsafe { database.append(key, &buffer)? };
unsafe {
database.put_current_with_options::<ByteSlice>(
PutFlags::APPEND,
key,
&buffer,
)?
};
}
} else {
let mut buffer = Vec::new();
@ -219,8 +225,8 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
let level_0_iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
.remap_types::<ByteSlice, ByteSlice>()
.prefix_iter(rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[];
@ -308,10 +314,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
.map_err(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value =
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
cur_writer.insert(key, value)?;
cur_writer_len += 1;
}
@ -337,9 +343,9 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
.map_err(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
cur_writer.insert(key, value)?;
cur_writer_len += 1;
}

View File

@ -68,18 +68,18 @@ impl FacetsUpdateIncremental {
continue;
}
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?;
.map_err(heed::Error::Encoding)?;
let value = KvReader::new(value);
let docids_to_delete = value
.get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding));
.map(|o| o.map_err(heed::Error::Encoding));
let docids_to_add = value
.get(DelAdd::Addition)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding));
.map(|o| o.map_err(heed::Error::Encoding));
if let Some(docids_to_delete) = docids_to_delete {
let docids_to_delete = docids_to_delete?;
@ -134,15 +134,14 @@ impl FacetsUpdateIncrementalInner {
prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(level);
let mut iter =
self.db.as_polymorph().prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
txn,
prefix.as_slice(),
)?;
let mut iter = self
.db
.remap_types::<ByteSlice, FacetGroupValueCodec>()
.prefix_iter(txn, prefix.as_slice())?;
let (key_bytes, value) = iter.next().unwrap()?;
Ok((
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?
.map_err(Error::Encoding)?
.into_owned(),
value,
))
@ -177,10 +176,8 @@ impl FacetsUpdateIncrementalInner {
level0_prefix.extend_from_slice(&field_id.to_be_bytes());
level0_prefix.push(0);
let mut iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, &level0_prefix)?;
let mut iter =
self.db.remap_types::<ByteSlice, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
if iter.next().is_none() {
drop(iter);
@ -384,8 +381,8 @@ impl FacetsUpdateIncrementalInner {
let size_highest_level = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
.remap_types::<ByteSlice, ByteSlice>()
.prefix_iter(txn, &highest_level_prefix)?
.count();
if size_highest_level < self.group_size as usize * self.min_level_size as usize {
@ -394,8 +391,8 @@ impl FacetsUpdateIncrementalInner {
let mut groups_iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &highest_level_prefix)?;
.remap_types::<ByteSlice, FacetGroupValueCodec>()
.prefix_iter(txn, &highest_level_prefix)?;
let nbr_new_groups = size_highest_level / self.group_size as usize;
let nbr_leftover_elements = size_highest_level % self.group_size as usize;
@ -407,7 +404,7 @@ impl FacetsUpdateIncrementalInner {
for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?;
.map_err(Error::Encoding)?;
if first_key.is_none() {
first_key = Some(key_i);
@ -430,7 +427,7 @@ impl FacetsUpdateIncrementalInner {
for _ in 0..nbr_leftover_elements {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?;
.map_err(Error::Encoding)?;
if first_key.is_none() {
first_key = Some(key_i);
@ -597,8 +594,8 @@ impl FacetsUpdateIncrementalInner {
if highest_level == 0
|| self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
.remap_types::<ByteSlice, ByteSlice>()
.prefix_iter(txn, &highest_level_prefix)?
.count()
>= self.min_level_size as usize
{
@ -607,13 +604,13 @@ impl FacetsUpdateIncrementalInner {
let mut to_delete = vec![];
let mut iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?;
.remap_types::<ByteSlice, ByteSlice>()
.prefix_iter(txn, &highest_level_prefix)?;
for el in iter.by_ref() {
let (k, _) = el?;
to_delete.push(
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
.ok_or(Error::Encoding)?
.map_err(Error::Encoding)?
.into_owned(),
);
}

View File

@ -95,7 +95,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
use crate::heed_codec::ByteSliceRefCodec;
use crate::update::index_documents::create_sorter;
use crate::update::merge_btreeset_string;
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
pub mod bulk;
pub mod incremental;
@ -207,8 +207,8 @@ impl<'i> FacetsUpdate<'i> {
}
let set = BTreeSet::from_iter(std::iter::once(left_bound));
let key = (field_id, normalized_facet.as_ref());
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?;
let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
sorter.insert(key, val)?;
}
}
@ -252,7 +252,7 @@ impl<'i> FacetsUpdate<'i> {
// We write those FSTs in LMDB now
for (field_id, fst) in text_fsts {
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
}
Ok(())