Give same interface to bulk and incremental facet indexing types

+ cargo fmt, oops, sorry for the bad history :(
This commit is contained in:
Loïc Lecrenier
2022-09-05 17:31:26 +02:00
committed by Loïc Lecrenier
parent 330c9eb1b2
commit 9026867d17
27 changed files with 333 additions and 174 deletions

View File

@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode};
use super::helpers::{
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
};
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
};
use crate::Result;
/// Extracts the facet number and the documents ids where this facet number appear.

View File

@ -4,8 +4,7 @@ use std::io;
use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::heed_codec::facet::StrRefCodec;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, StrRefCodec};
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
use crate::{FieldId, Result};

View File

@ -3,7 +3,7 @@ use std::fs::File;
use std::io::{self, Seek, SeekFrom};
use std::time::Instant;
use grenad::{CompressionType, Reader, Sorter};
use grenad::{CompressionType, Sorter};
use heed::types::ByteSlice;
use log::debug;
@ -208,36 +208,6 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
Ok(std::iter::from_fn(move || transposer().transpose()))
}
pub fn write_into_lmdb_database(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,
reader: Reader<File>,
merge: MergeFn,
) -> Result<()> {
debug!("Writing MTBL stores...");
let before = Instant::now();
let mut cursor = reader.into_cursor()?;
while let Some((k, v)) = cursor.move_on_next()? {
let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
match iter.next().transpose()? {
Some((key, old_val)) if key == k => {
let vals = &[Cow::Borrowed(old_val), Cow::Borrowed(v)][..];
let val = merge(k, vals)?;
// safety: we don't keep references from inside the LMDB database.
unsafe { iter.put_current(k, &val)? };
}
_ => {
drop(iter);
database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
}
}
}
debug!("MTBL stores merged in {:.02?}!", before.elapsed());
Ok(())
}
pub fn sorter_into_lmdb_database(
wtxn: &mut heed::RwTxn,
database: heed::PolyDatabase,

View File

@ -9,8 +9,8 @@ pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
use fst::{IntoStreamer, Streamer};
pub use grenad_helpers::{
as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
merge_ignore_values, sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database,
writer_into_reader, GrenadParameters, MergeableReader,
merge_ignore_values, sorter_into_lmdb_database, sorter_into_reader, writer_into_reader,
GrenadParameters, MergeableReader,
};
pub use merge_functions::{
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,

View File

@ -27,8 +27,7 @@ pub use self::enrich::{
pub use self::helpers::{
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
sorter_into_lmdb_database, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
ClonableMmap, MergeFn,
sorter_into_lmdb_database, valid_lmdb_key, writer_into_reader, ClonableMmap, MergeFn,
};
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
pub use self::transform::{Transform, TransformOutput};