Give same interface to bulk and incremental facet indexing types

+ cargo fmt, oops, sorry for the bad history :(
This commit is contained in:
Loïc Lecrenier
2022-09-05 17:31:26 +02:00
committed by Loïc Lecrenier
parent 330c9eb1b2
commit 9026867d17
27 changed files with 333 additions and 174 deletions

View File

@ -3,7 +3,7 @@ use std::fs::File;
use std::io::{self, Seek, SeekFrom};
use std::time::Instant;
use grenad::{CompressionType, Reader, Sorter};
use grenad::{CompressionType, Sorter};
use heed::types::ByteSlice;
use log::debug;
@ -208,36 +208,6 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
Ok(std::iter::from_fn(move || transposer().transpose()))
}
pub fn write_into_lmdb_database(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,
reader: Reader<File>,
merge: MergeFn,
) -> Result<()> {
debug!("Writing MTBL stores...");
let before = Instant::now();
let mut cursor = reader.into_cursor()?;
while let Some((k, v)) = cursor.move_on_next()? {
let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
match iter.next().transpose()? {
Some((key, old_val)) if key == k => {
let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..];
let val = merge(k, vals)?;
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(k, &val)? };
}
_ => {
drop(iter);
database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
}
}
}
debug!("MTBL stores merged in {:.02?}!", before.elapsed());
Ok(())
}
pub fn sorter_into_lmdb_database(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,

View File

@ -9,8 +9,8 @@ pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
use fst::{IntoStreamer, Streamer};
pub use grenad_helpers::{
as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
merge_ignore_values, sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database,
writer_into_reader, GrenadParameters, MergeableReader,
merge_ignore_values, sorter_into_lmdb_database, sorter_into_reader, writer_into_reader,
GrenadParameters, MergeableReader,
};
pub use merge_functions::{
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,