mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-14 08:46:26 +00:00
Make the transform struct return diff-based documents obkvs
This commit is contained in:
committed by
Louis Dureuil
parent
f5ef69293b
commit
1dd97578a8
@ -6,6 +6,7 @@ use std::result::Result as StdResult;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::transform::Operation;
|
||||
use crate::Result;
|
||||
|
||||
@ -76,55 +77,118 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
|
||||
Ok(obkvs.last().unwrap().clone())
|
||||
}
|
||||
|
||||
pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
|
||||
pub fn merge_two_del_add_obkvs(
|
||||
base: obkv::KvReaderU16,
|
||||
update: obkv::KvReaderU16,
|
||||
merge_additions: bool,
|
||||
buffer: &mut Vec<u8>,
|
||||
) {
|
||||
use itertools::merge_join_by;
|
||||
use itertools::EitherOrBoth::{Both, Left, Right};
|
||||
|
||||
buffer.clear();
|
||||
|
||||
let mut writer = obkv::KvWriter::new(buffer);
|
||||
let mut value_buffer = Vec::new();
|
||||
for eob in merge_join_by(base.iter(), update.iter(), |(b, _), (u, _)| b.cmp(u)) {
|
||||
match eob {
|
||||
Both(_, (k, v)) | Left((k, v)) | Right((k, v)) => writer.insert(k, v).unwrap(),
|
||||
Left((k, v)) => {
|
||||
if merge_additions {
|
||||
writer.insert(k, v).unwrap()
|
||||
} else {
|
||||
// If merge_additions is false, recreate an obkv keeping the deletions only.
|
||||
value_buffer.clear();
|
||||
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
|
||||
let base_reader = KvReaderDelAdd::new(v);
|
||||
|
||||
if let Some(deletion) = base_reader.get(DelAdd::Deletion) {
|
||||
value_writer.insert(DelAdd::Deletion, deletion).unwrap();
|
||||
value_writer.finish().unwrap();
|
||||
writer.insert(k, &value_buffer).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
Right((k, v)) => writer.insert(k, v).unwrap(),
|
||||
Both((k, base), (_, update)) => {
|
||||
// merge deletions and additions.
|
||||
value_buffer.clear();
|
||||
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
|
||||
let base_reader = KvReaderDelAdd::new(base);
|
||||
let update_reader = KvReaderDelAdd::new(update);
|
||||
|
||||
// keep newest deletion.
|
||||
if let Some(deletion) =
|
||||
update_reader.get(DelAdd::Deletion).or(base_reader.get(DelAdd::Deletion))
|
||||
{
|
||||
value_writer.insert(DelAdd::Deletion, deletion).unwrap();
|
||||
}
|
||||
|
||||
// keep base addition only if merge_additions is true.
|
||||
let base_addition =
|
||||
merge_additions.then(|| base_reader.get(DelAdd::Addition)).flatten();
|
||||
// keep newest addition.
|
||||
if let Some(addition) = update_reader.get(DelAdd::Addition).or(base_addition) {
|
||||
value_writer.insert(DelAdd::Addition, addition).unwrap();
|
||||
}
|
||||
|
||||
value_writer.finish().unwrap();
|
||||
writer.insert(k, &value_buffer).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writer.finish().unwrap();
|
||||
}
|
||||
|
||||
/// Merge all the obks in the order we see them.
|
||||
pub fn merge_obkvs_and_operations<'a>(
|
||||
/// Merge all the obkvs from the newest to the oldest.
|
||||
fn inner_merge_del_add_obkvs<'a>(
|
||||
obkvs: &[Cow<'a, [u8]>],
|
||||
merge_additions: bool,
|
||||
) -> Result<Cow<'a, [u8]>> {
|
||||
// pop the newest operation from the list.
|
||||
let (newest, obkvs) = obkvs.split_last().unwrap();
|
||||
// keep the operation type for the returned value.
|
||||
let newest_operation_type = newest[0];
|
||||
|
||||
// treat the newest obkv as the starting point of the merge.
|
||||
let mut acc_operation_type = newest_operation_type;
|
||||
let mut acc = newest[1..].to_vec();
|
||||
let mut buffer = Vec::new();
|
||||
// reverse iter from the most recent to the oldest.
|
||||
for current in obkvs.into_iter().rev() {
|
||||
// if in the previous iteration there was a complete deletion,
|
||||
// stop the merge process.
|
||||
if acc_operation_type == Operation::Deletion as u8 {
|
||||
break;
|
||||
}
|
||||
|
||||
let newest = obkv::KvReader::new(&acc);
|
||||
let oldest = obkv::KvReader::new(¤t[1..]);
|
||||
merge_two_del_add_obkvs(oldest, newest, merge_additions, &mut buffer);
|
||||
|
||||
// we want the result of the merge into our accumulator.
|
||||
std::mem::swap(&mut acc, &mut buffer);
|
||||
acc_operation_type = current[0];
|
||||
}
|
||||
|
||||
acc.insert(0, newest_operation_type);
|
||||
Ok(Cow::from(acc))
|
||||
}
|
||||
|
||||
/// Merge all the obkvs from the newest to the oldest.
|
||||
pub fn obkvs_merge_additions_and_deletions<'a>(
|
||||
_key: &[u8],
|
||||
obkvs: &[Cow<'a, [u8]>],
|
||||
) -> Result<Cow<'a, [u8]>> {
|
||||
// [add, add, delete, add, add]
|
||||
// we can ignore everything that happened before the last delete.
|
||||
let starting_position =
|
||||
obkvs.iter().rposition(|obkv| obkv[0] == Operation::Deletion as u8).unwrap_or(0);
|
||||
inner_merge_del_add_obkvs(obkvs, true)
|
||||
}
|
||||
|
||||
// [add, add, delete]
|
||||
// if the last operation was a deletion then we simply return the deletion
|
||||
if starting_position == obkvs.len() - 1 && obkvs.last().unwrap()[0] == Operation::Deletion as u8
|
||||
{
|
||||
return Ok(obkvs[obkvs.len() - 1].clone());
|
||||
}
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
// (add, add, delete) [add, add]
|
||||
// in the other case, no deletion will be encountered during the merge
|
||||
let mut ret =
|
||||
obkvs[starting_position..].iter().cloned().fold(Vec::new(), |mut acc, current| {
|
||||
let first = obkv::KvReader::new(&acc);
|
||||
let second = obkv::KvReader::new(¤t[1..]);
|
||||
merge_two_obkvs(first, second, &mut buffer);
|
||||
|
||||
// we want the result of the merge into our accumulator
|
||||
std::mem::swap(&mut acc, &mut buffer);
|
||||
acc
|
||||
});
|
||||
|
||||
ret.insert(0, Operation::Addition as u8);
|
||||
Ok(Cow::from(ret))
|
||||
/// Merge all the obkvs deletions from the newest to the oldest and keep only the newest additions.
|
||||
pub fn obkvs_keep_last_addition_merge_deletions<'a>(
|
||||
_key: &[u8],
|
||||
obkvs: &[Cow<'a, [u8]>],
|
||||
) -> Result<Cow<'a, [u8]>> {
|
||||
inner_merge_del_add_obkvs(obkvs, false)
|
||||
}
|
||||
|
||||
pub fn merge_cbo_roaring_bitmaps<'a>(
|
||||
|
@ -14,8 +14,8 @@ pub use grenad_helpers::{
|
||||
};
|
||||
pub use merge_functions::{
|
||||
concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
|
||||
merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
|
||||
serialize_roaring_bitmap, MergeFn,
|
||||
merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, obkvs_keep_last_addition_merge_deletions,
|
||||
obkvs_merge_additions_and_deletions, serialize_roaring_bitmap, MergeFn,
|
||||
};
|
||||
|
||||
use crate::MAX_WORD_LENGTH;
|
||||
|
Reference in New Issue
Block a user