Refactor facet-related codecs

This commit is contained in:
Loïc Lecrenier
2022-09-05 13:01:36 +02:00
committed by Loïc Lecrenier
parent 9b55e582cd
commit 485a72306d
22 changed files with 280 additions and 301 deletions

View File

@ -11,7 +11,7 @@ use time::OffsetDateTime;
use super::{ClearDocuments, FacetsUpdateBulk};
use crate::error::{InternalError, UserError};
use crate::facet::FacetType;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::index::{db_name, main_key};
use crate::{
@ -626,10 +626,10 @@ fn remove_docids_from_facet_id_docids<'a>(
) -> Result<()> {
let db = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
};
let mut modified = false;

View File

@ -12,8 +12,8 @@ use time::OffsetDateTime;
use crate::error::InternalError;
use crate::facet::FacetType;
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
};
use crate::update::index_documents::{
create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
@ -22,7 +22,7 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
pub struct FacetsUpdateBulk<'i> {
index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
level_group_size: usize,
min_level_size: usize,
facet_type: FacetType,
@ -40,10 +40,10 @@ impl<'i> FacetsUpdateBulk<'i> {
index,
database: match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
},
level_group_size: 4,
@ -61,10 +61,10 @@ impl<'i> FacetsUpdateBulk<'i> {
index,
database: match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
},
level_group_size: 4,
@ -89,8 +89,8 @@ impl<'i> FacetsUpdateBulk<'i> {
}
fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> {
let left = FacetKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
let right = FacetKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
let range = left..=right;
self.database.delete_range(wtxn, &range).map(drop)?;
Ok(())
@ -119,7 +119,7 @@ impl<'i> FacetsUpdateBulk<'i> {
for level_reader in level_readers {
let mut cursor = level_reader.into_cursor()?;
while let Some((k, v)) = cursor.move_on_next()? {
let key = FacetKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
let key = FacetGroupKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
let value = FacetGroupValueCodec::bytes_decode(v).unwrap();
println!("inserting {key:?} {value:?}");
@ -210,7 +210,7 @@ impl<'i> FacetsUpdateBulk<'i> {
struct ComputeHigherLevels<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
level_group_size: usize,
min_level_size: usize,
@ -233,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> {
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>();
.remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[];
let mut first_iteration_for_new_group = true;
@ -311,9 +311,9 @@ impl<'t> ComputeHigherLevels<'t> {
for ((bitmap, left_bound), group_size) in
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetKey { field_id: self.field_id, level, left_bound };
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
let key =
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?;
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value =
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
@ -329,9 +329,9 @@ impl<'t> ComputeHigherLevels<'t> {
for ((bitmap, left_bound), group_size) in
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetKey { field_id: self.field_id, level, left_bound };
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
let key =
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?;
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
cur_writer.insert(key, value)?;

View File

@ -2,8 +2,8 @@ use heed::types::ByteSlice;
use heed::{BytesDecode, Error, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
};
use crate::search::facet::get_highest_level;
use crate::Result;
@ -19,13 +19,13 @@ enum DeletionResult {
}
pub struct FacetsUpdateIncremental {
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
group_size: usize,
min_level_size: usize,
max_group_size: usize,
}
impl FacetsUpdateIncremental {
pub fn new(db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>) -> Self {
pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
}
}
@ -36,7 +36,7 @@ impl FacetsUpdateIncremental {
level: u8,
search_key: &[u8],
txn: &RoTxn,
) -> Result<(FacetKey<Vec<u8>>, FacetGroupValue)> {
) -> Result<(FacetGroupKey<Vec<u8>>, FacetGroupValue)> {
let mut prefix = vec![];
prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(level);
@ -45,17 +45,17 @@ impl FacetsUpdateIncremental {
let mut prefix_iter = self
.db
.as_polymorph()
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
.prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
if let Some(e) = prefix_iter.next() {
let (key_bytes, value) = e?;
Ok((
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?
.into_owned(),
value,
))
} else {
let key = FacetKey { field_id, level, left_bound: search_key };
let key = FacetGroupKey { field_id, level, left_bound: search_key };
match self.db.get_lower_than(txn, &key)? {
Some((key, value)) => {
if key.level != level || key.field_id != field_id {
@ -66,13 +66,13 @@ impl FacetsUpdateIncremental {
let mut iter = self
.db
.as_polymorph()
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(
.prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(
txn,
&prefix.as_slice(),
)?;
let (key_bytes, value) = iter.next().unwrap()?;
Ok((
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?
.into_owned(),
value,
@ -93,7 +93,7 @@ impl FacetsUpdateIncremental {
new_key: &[u8],
new_values: &RoaringBitmap,
) -> Result<InsertionResult> {
let key = FacetKey { field_id, level: 0, left_bound: new_key };
let key = FacetGroupKey { field_id, level: 0, left_bound: new_key };
let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 };
let mut level0_prefix = vec![];
@ -193,7 +193,7 @@ impl FacetsUpdateIncremental {
.db
.get_greater_than_or_equal_to(
&txn,
&FacetKey {
&FacetGroupKey {
field_id,
level: level_below,
left_bound: insertion_key.left_bound.as_slice(),
@ -217,7 +217,7 @@ impl FacetsUpdateIncremental {
}
let key =
FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
let value = FacetGroupValue { size: size_left as u8, bitmap: values_left };
(key, value)
};
@ -235,7 +235,7 @@ impl FacetsUpdateIncremental {
}
let key =
FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
(key, value)
};
@ -303,7 +303,7 @@ impl FacetsUpdateIncremental {
let mut values = RoaringBitmap::new();
for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?;
if first_key.is_none() {
@ -311,7 +311,7 @@ impl FacetsUpdateIncremental {
}
values |= value_i.bitmap;
}
let key = FacetKey {
let key = FacetGroupKey {
field_id,
level: highest_level + 1,
left_bound: first_key.unwrap().left_bound,
@ -384,7 +384,7 @@ impl FacetsUpdateIncremental {
key: &[u8],
value: u32,
) -> Result<DeletionResult> {
let key = FacetKey { field_id, level: 0, left_bound: key };
let key = FacetGroupKey { field_id, level: 0, left_bound: key };
let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
bitmap.remove(value);
@ -415,7 +415,7 @@ impl FacetsUpdateIncremental {
key: &[u8],
value: u32,
) -> Result<()> {
if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() {
if self.db.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: key })?.is_none() {
return Ok(());
}
let highest_level = get_highest_level(&txn, self.db, field_id)?;
@ -450,7 +450,7 @@ impl FacetsUpdateIncremental {
while let Some(el) = iter.next() {
let (k, _) = el?;
to_delete.push(
FacetKeyCodec::<MyByteSlice>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
);
}
drop(iter);
@ -469,9 +469,9 @@ mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::str_ref::StrRefCodec;
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
use crate::milli_snap;
use crate::search::facet::get_highest_level;
use crate::search::facet::test::FacetIndex;
@ -502,7 +502,7 @@ mod tests {
.unwrap();
while let Some(el) = iter.next() {
let (key, value) = el.unwrap();
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
let mut prefix_start_below = vec![];
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
@ -519,7 +519,7 @@ mod tests {
)
.unwrap();
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes).unwrap()
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
};
assert!(value.size > 0 && (value.size as usize) < db.max_group_size);
@ -996,7 +996,7 @@ mod tests {
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
// let (group_key, group_values) = group.unwrap();
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// assert_eq!(key, &group_key.left_bound);
// assert_eq!(values, &group_values.bitmap);
// }
@ -1014,7 +1014,7 @@ mod tests {
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
// let (group_key, group_values) = group.unwrap();
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// assert_eq!(key, &group_key.left_bound);
// assert_eq!(values, &group_values.bitmap);
// }

View File

@ -1,23 +1,20 @@
use std::{collections::HashMap, fs::File};
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
use crate::{
facet::FacetType,
heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec},
CboRoaringBitmapCodec, FieldId, Index, Result,
};
use grenad::CompressionType;
use heed::BytesDecode;
use roaring::RoaringBitmap;
use crate::{
facet::FacetType,
heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice},
CboRoaringBitmapCodec, FieldId, Index, Result,
};
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
use std::{collections::HashMap, fs::File};
pub mod bulk;
pub mod incremental;
pub struct FacetsUpdate<'i> {
index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
level_group_size: u8,
max_level_group_size: u8,
min_level_size: u8,
@ -28,10 +25,10 @@ impl<'i> FacetsUpdate<'i> {
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
let database = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
};
Self {
@ -70,8 +67,8 @@ impl<'i> FacetsUpdate<'i> {
let mut cursor = self.new_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let key =
FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?;
let docids =
CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;

View File

@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode};
use super::helpers::{
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
};
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::Result;
/// Extracts the facet number and the documents ids where this facet number appear.
@ -36,8 +36,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
let (field_id, document_id, number) =
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
let key = FacetKey { field_id, level: 0, left_bound: number };
let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
let key = FacetGroupKey { field_id, level: 0, left_bound: number };
let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
}

View File

@ -4,8 +4,8 @@ use std::io;
use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
use crate::heed_codec::facet::StrRefCodec;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
use crate::{FieldId, Result};
@ -43,8 +43,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let document_id = u32::from_be_bytes(document_id_bytes);
let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
let key = FacetKey { field_id, level: 0, left_bound: normalised_value };
let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
}

View File

@ -0,0 +1,4 @@
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
6873ff1f78d08f2b1a13bb9e37349c01