Make the changes to use heed v0.20-alpha.6

This commit is contained in:
Clément Renault
2023-11-22 18:21:19 +01:00
parent 56a0d91ecd
commit 0d4482625a
54 changed files with 611 additions and 477 deletions

View File

@ -1,15 +1,16 @@
use heed::RwTxn;
use roaring::RoaringBitmap;
use time::OffsetDateTime;
use crate::{FieldDistribution, Index, Result};
pub struct ClearDocuments<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct ClearDocuments<'t, 'i> {
wtxn: &'t mut RwTxn<'i>,
index: &'i Index,
}
impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> ClearDocuments<'t, 'u, 'i> {
impl<'t, 'i> ClearDocuments<'t, 'i> {
pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> {
ClearDocuments { wtxn, index }
}

View File

@ -3,7 +3,7 @@ use std::io::BufReader;
use grenad::CompressionType;
use heed::types::ByteSlice;
use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn};
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
@ -146,7 +146,13 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
buffer.push(1);
// then we extend the buffer with the docids bitmap
buffer.extend_from_slice(value);
unsafe { database.append(key, &buffer)? };
unsafe {
database.put_current_with_options::<ByteSlice>(
PutFlags::APPEND,
key,
&buffer,
)?
};
}
} else {
let mut buffer = Vec::new();
@ -219,8 +225,8 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
let level_0_iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
.remap_types::<ByteSlice, ByteSlice>()
.prefix_iter(rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[];
@ -308,10 +314,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
.map_err(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value =
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
cur_writer.insert(key, value)?;
cur_writer_len += 1;
}
@ -337,9 +343,9 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
.map_err(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
cur_writer.insert(key, value)?;
cur_writer_len += 1;
}

View File

@ -68,18 +68,18 @@ impl FacetsUpdateIncremental {
continue;
}
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?;
.map_err(heed::Error::Encoding)?;
let value = KvReader::new(value);
let docids_to_delete = value
.get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding));
.map(|o| o.map_err(heed::Error::Encoding));
let docids_to_add = value
.get(DelAdd::Addition)
.map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.ok_or(heed::Error::Encoding));
.map(|o| o.map_err(heed::Error::Encoding));
if let Some(docids_to_delete) = docids_to_delete {
let docids_to_delete = docids_to_delete?;
@ -134,15 +134,14 @@ impl FacetsUpdateIncrementalInner {
prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(level);
let mut iter =
self.db.as_polymorph().prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
txn,
prefix.as_slice(),
)?;
let mut iter = self
.db
.remap_types::<ByteSlice, FacetGroupValueCodec>()
.prefix_iter(txn, prefix.as_slice())?;
let (key_bytes, value) = iter.next().unwrap()?;
Ok((
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?
.map_err(Error::Encoding)?
.into_owned(),
value,
))
@ -177,10 +176,8 @@ impl FacetsUpdateIncrementalInner {
level0_prefix.extend_from_slice(&field_id.to_be_bytes());
level0_prefix.push(0);
let mut iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, &level0_prefix)?;
let mut iter =
self.db.remap_types::<ByteSlice, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
if iter.next().is_none() {
drop(iter);
@ -384,8 +381,8 @@ impl FacetsUpdateIncrementalInner {
let size_highest_level = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
.remap_types::<ByteSlice, ByteSlice>()
.prefix_iter(txn, &highest_level_prefix)?
.count();
if size_highest_level < self.group_size as usize * self.min_level_size as usize {
@ -394,8 +391,8 @@ impl FacetsUpdateIncrementalInner {
let mut groups_iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &highest_level_prefix)?;
.remap_types::<ByteSlice, FacetGroupValueCodec>()
.prefix_iter(txn, &highest_level_prefix)?;
let nbr_new_groups = size_highest_level / self.group_size as usize;
let nbr_leftover_elements = size_highest_level % self.group_size as usize;
@ -407,7 +404,7 @@ impl FacetsUpdateIncrementalInner {
for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?;
.map_err(Error::Encoding)?;
if first_key.is_none() {
first_key = Some(key_i);
@ -430,7 +427,7 @@ impl FacetsUpdateIncrementalInner {
for _ in 0..nbr_leftover_elements {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
.ok_or(Error::Encoding)?;
.map_err(Error::Encoding)?;
if first_key.is_none() {
first_key = Some(key_i);
@ -597,8 +594,8 @@ impl FacetsUpdateIncrementalInner {
if highest_level == 0
|| self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
.remap_types::<ByteSlice, ByteSlice>()
.prefix_iter(txn, &highest_level_prefix)?
.count()
>= self.min_level_size as usize
{
@ -607,13 +604,13 @@ impl FacetsUpdateIncrementalInner {
let mut to_delete = vec![];
let mut iter = self
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?;
.remap_types::<ByteSlice, ByteSlice>()
.prefix_iter(txn, &highest_level_prefix)?;
for el in iter.by_ref() {
let (k, _) = el?;
to_delete.push(
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
.ok_or(Error::Encoding)?
.map_err(Error::Encoding)?
.into_owned(),
);
}

View File

@ -95,7 +95,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
use crate::heed_codec::ByteSliceRefCodec;
use crate::update::index_documents::create_sorter;
use crate::update::merge_btreeset_string;
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
pub mod bulk;
pub mod incremental;
@ -207,8 +207,8 @@ impl<'i> FacetsUpdate<'i> {
}
let set = BTreeSet::from_iter(std::iter::once(left_bound));
let key = (field_id, normalized_facet.as_ref());
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?;
let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
sorter.insert(key, val)?;
}
}
@ -252,7 +252,7 @@ impl<'i> FacetsUpdate<'i> {
// We write those FSTs in LMDB now
for (field_id, fst) in text_fsts {
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
}
Ok(())

View File

@ -6,8 +6,8 @@ use std::io::{self, BufReader};
use std::mem::size_of;
use std::result::Result as StdResult;
use bytemuck::bytes_of;
use grenad::Sorter;
use heed::zerocopy::AsBytes;
use heed::BytesEncode;
use itertools::EitherOrBoth;
use ordered_float::OrderedFloat;
@ -20,9 +20,7 @@ use crate::error::InternalError;
use crate::facet::value_encoding::f64_into_bytes;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::{
CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
};
use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
/// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
let document = BEU32::from(document).get();
let document = DocumentId::from_be_bytes(document);
// For the other extraction tasks, prefix the key with the field_id and the document_id
numbers_key_buffer.extend_from_slice(docid_bytes);
@ -323,7 +321,7 @@ where
// We insert only the Del part of the Obkv to inform
// that we only want to remove all those numbers.
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, ().as_bytes())?;
obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
let bytes = obkv.into_inner()?;
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
}
@ -336,7 +334,7 @@ where
// We insert only the Add part of the Obkv to inform
// that we only want to remove all those numbers.
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, ().as_bytes())?;
obkv.insert(DelAdd::Addition, bytes_of(&()))?;
let bytes = obkv.into_inner()?;
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
}

View File

@ -118,7 +118,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
}
let (word, fid) = StrBEU16Codec::bytes_decode(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
.map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
// every words contained in an attribute set to exact must be pushed in the exact_words list.
if exact_attributes.contains(&fid) {

View File

@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod {
}
}
pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
config: IndexDocumentsConfig,
indexer_config: &'a IndexerConfig,
@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig {
pub autogenerate_docids: bool,
}
impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA>
impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA>
where
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
indexer_config: &'a IndexerConfig,
config: IndexDocumentsConfig,
progress: FP,
should_abort: FA,
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> {
) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
let transform = Some(Transform::new(
wtxn,
index,

View File

@ -24,9 +24,7 @@ use crate::index::{db_name, main_key};
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
use crate::update::index_documents::GrenadParameters;
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
use crate::{
FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
};
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result};
pub struct TransformOutput {
pub primary_key: String,
@ -245,7 +243,7 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut skip_insertion = false;
if let Some(original_docid) = original_docid {
let original_key = BEU32::new(original_docid);
let original_key = original_docid;
let base_obkv = self
.index
.documents
@ -499,7 +497,7 @@ impl<'a, 'i> Transform<'a, 'i> {
self.replaced_documents_ids.insert(internal_docid);
// fetch the obkv document
let original_key = BEU32::new(internal_docid);
let original_key = internal_docid;
let base_obkv = self
.index
.documents
@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// TODO this can be done in parallel by using the rayon `ThreadPool`.
pub fn prepare_for_documents_reindexing(
self,
wtxn: &mut heed::RwTxn<'i, '_>,
wtxn: &mut heed::RwTxn<'i>,
old_fields_ids_map: FieldsIdsMap,
mut new_fields_ids_map: FieldsIdsMap,
) -> Result<TransformOutput> {
@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> {
let obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
)?;
let docid = docid.get();
obkv_buffer.clear();
let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer);

View File

@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec;
use charabia::{Language, Script};
use grenad::MergerBuilder;
use heed::types::ByteSlice;
use heed::RwTxn;
use heed::{PutFlags, RwTxn};
use log::error;
use obkv::{KvReader, KvWriter};
use ordered_float::OrderedFloat;
@ -27,9 +27,7 @@ use crate::index::Hnsw;
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
use crate::{
lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32,
};
use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError};
pub(crate) enum TypedChunk {
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
@ -149,7 +147,7 @@ pub(crate) fn write_typed_chunk_into_index(
let db = index.documents.remap_data_type::<ByteSlice>();
if !writer.is_empty() {
db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?;
db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
operations.push(DocumentOperation {
external_id: external_id.to_string(),
internal_id: docid,
@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index(
});
docids.insert(docid);
} else {
db.delete(wtxn, &BEU32::new(docid))?;
db.delete(wtxn, &docid)?;
operations.push(DocumentOperation {
external_id: external_id.to_string(),
internal_id: docid,
@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index(
// We extract and store the previous vectors
if let Some(hnsw) = index.vector_hnsw(wtxn)? {
for (pid, point) in hnsw.iter() {
let pid_key = BEU32::new(pid.into_inner());
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get();
let pid_key = pid.into_inner();
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap();
let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect();
vectors_set.insert((docid, vector));
}
@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index(
// Store the vectors in the point-docid relation database
index.vector_id_docid.clear(wtxn)?;
for (docid, pid) in docids.into_iter().zip(pids) {
index.vector_id_docid.put(
wtxn,
&BEU32::new(pid.into_inner()),
&BEU32::new(docid),
)?;
index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?;
}
log::debug!("There are {} entries in the HNSW so far", hnsw_length);
@ -568,14 +562,17 @@ where
while let Some((key, value)) = cursor.move_on_next()? {
if valid_lmdb_key(key) {
debug_assert!(
K::bytes_decode(key).is_some(),
K::bytes_decode(key).is_ok(),
"Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
key.len(),
&key
);
buffer.clear();
let value = serialize_value(value, &mut buffer)?;
unsafe { database.append(key, value)? };
unsafe {
// safety: We do not keep a reference to anything that lives inside the database
database.put_current_with_options::<ByteSlice>(PutFlags::APPEND, key, value)?
};
}
}

View File

@ -100,8 +100,8 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
}
}
pub struct Settings<'a, 't, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct Settings<'a, 't, 'i> {
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
indexer_config: &'a IndexerConfig,
@ -129,12 +129,12 @@ pub struct Settings<'a, 't, 'u, 'i> {
pagination_max_total_hits: Setting<usize>,
}
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
indexer_config: &'a IndexerConfig,
) -> Settings<'a, 't, 'u, 'i> {
) -> Settings<'a, 't, 'i> {
Settings {
wtxn,
index,

View File

@ -12,8 +12,8 @@ use crate::update::index_documents::{
};
use crate::{CboRoaringBitmapCodec, Result};
pub struct WordPrefixDocids<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct WordPrefixDocids<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i>,
word_docids: Database<Str, CboRoaringBitmapCodec>,
word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
pub(crate) chunk_compression_type: CompressionType,
@ -22,12 +22,12 @@ pub struct WordPrefixDocids<'t, 'u, 'i> {
pub(crate) max_memory: Option<usize>,
}
impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
impl<'t, 'i> WordPrefixDocids<'t, 'i> {
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
word_docids: Database<Str, CboRoaringBitmapCodec>,
word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
) -> WordPrefixDocids<'t, 'u, 'i> {
) -> WordPrefixDocids<'t, 'i> {
WordPrefixDocids {
wtxn,
word_docids,

View File

@ -17,8 +17,8 @@ use crate::update::index_documents::{
};
use crate::{CboRoaringBitmapCodec, Result};
pub struct WordPrefixIntegerDocids<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct WordPrefixIntegerDocids<'t, 'i> {
wtxn: &'t mut heed::RwTxn<'i>,
prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
pub(crate) chunk_compression_type: CompressionType,
@ -27,12 +27,12 @@ pub struct WordPrefixIntegerDocids<'t, 'u, 'i> {
pub(crate) max_memory: Option<usize>,
}
impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
) -> WordPrefixIntegerDocids<'t, 'u, 'i> {
) -> WordPrefixIntegerDocids<'t, 'i> {
WordPrefixIntegerDocids {
wtxn,
prefix_database,
@ -72,7 +72,8 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
let mut current_prefixes: Option<&&[String]> = None;
let mut prefixes_cache = HashMap::new();
while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? {
let (word, pos) = StrBEU16Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
let (word, pos) =
StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?;
current_prefixes = match current_prefixes.take() {
Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes),

View File

@ -2,21 +2,19 @@ use std::iter::{repeat_with, FromIterator};
use std::str;
use fst::{SetBuilder, Streamer};
use heed::RwTxn;
use crate::{Index, Result, SmallString32};
pub struct WordsPrefixesFst<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct WordsPrefixesFst<'t, 'i> {
wtxn: &'t mut RwTxn<'i>,
index: &'i Index,
threshold: u32,
max_prefix_length: usize,
}
impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index,
) -> WordsPrefixesFst<'t, 'u, 'i> {
impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> WordsPrefixesFst<'t, 'i> {
WordsPrefixesFst { wtxn, index, threshold: 100, max_prefix_length: 4 }
}