mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 01:01:00 +00:00
Make the changes to use heed v0.20-alpha.6
This commit is contained in:
@ -1,15 +1,16 @@
|
||||
use heed::RwTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::{FieldDistribution, Index, Result};
|
||||
|
||||
pub struct ClearDocuments<'t, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
pub struct ClearDocuments<'t, 'i> {
|
||||
wtxn: &'t mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
||||
pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> ClearDocuments<'t, 'u, 'i> {
|
||||
impl<'t, 'i> ClearDocuments<'t, 'i> {
|
||||
pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> {
|
||||
ClearDocuments { wtxn, index }
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,7 @@ use std::io::BufReader;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn};
|
||||
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
@ -146,7 +146,13 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
buffer.push(1);
|
||||
// then we extend the buffer with the docids bitmap
|
||||
buffer.extend_from_slice(value);
|
||||
unsafe { database.append(key, &buffer)? };
|
||||
unsafe {
|
||||
database.put_current_with_options::<ByteSlice>(
|
||||
PutFlags::APPEND,
|
||||
key,
|
||||
&buffer,
|
||||
)?
|
||||
};
|
||||
}
|
||||
} else {
|
||||
let mut buffer = Vec::new();
|
||||
@ -219,8 +225,8 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
|
||||
let level_0_iter = self
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.prefix_iter(rtxn, level_0_prefix.as_slice())?
|
||||
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
|
||||
|
||||
let mut left_bound: &[u8] = &[];
|
||||
@ -308,10 +314,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
{
|
||||
let key = FacetGroupKey { field_id, level, left_bound };
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
|
||||
.ok_or(Error::Encoding)?;
|
||||
.map_err(Error::Encoding)?;
|
||||
let value = FacetGroupValue { size: group_size, bitmap };
|
||||
let value =
|
||||
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||
FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
|
||||
cur_writer.insert(key, value)?;
|
||||
cur_writer_len += 1;
|
||||
}
|
||||
@ -337,9 +343,9 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
{
|
||||
let key = FacetGroupKey { field_id, level, left_bound };
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
|
||||
.ok_or(Error::Encoding)?;
|
||||
.map_err(Error::Encoding)?;
|
||||
let value = FacetGroupValue { size: group_size, bitmap };
|
||||
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||
let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
|
||||
cur_writer.insert(key, value)?;
|
||||
cur_writer_len += 1;
|
||||
}
|
||||
|
@ -68,18 +68,18 @@ impl FacetsUpdateIncremental {
|
||||
continue;
|
||||
}
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
|
||||
.ok_or(heed::Error::Encoding)?;
|
||||
.map_err(heed::Error::Encoding)?;
|
||||
let value = KvReader::new(value);
|
||||
|
||||
let docids_to_delete = value
|
||||
.get(DelAdd::Deletion)
|
||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||
.map(|o| o.ok_or(heed::Error::Encoding));
|
||||
.map(|o| o.map_err(heed::Error::Encoding));
|
||||
|
||||
let docids_to_add = value
|
||||
.get(DelAdd::Addition)
|
||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||
.map(|o| o.ok_or(heed::Error::Encoding));
|
||||
.map(|o| o.map_err(heed::Error::Encoding));
|
||||
|
||||
if let Some(docids_to_delete) = docids_to_delete {
|
||||
let docids_to_delete = docids_to_delete?;
|
||||
@ -134,15 +134,14 @@ impl FacetsUpdateIncrementalInner {
|
||||
prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
prefix.push(level);
|
||||
|
||||
let mut iter =
|
||||
self.db.as_polymorph().prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
|
||||
txn,
|
||||
prefix.as_slice(),
|
||||
)?;
|
||||
let mut iter = self
|
||||
.db
|
||||
.remap_types::<ByteSlice, FacetGroupValueCodec>()
|
||||
.prefix_iter(txn, prefix.as_slice())?;
|
||||
let (key_bytes, value) = iter.next().unwrap()?;
|
||||
Ok((
|
||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
|
||||
.ok_or(Error::Encoding)?
|
||||
.map_err(Error::Encoding)?
|
||||
.into_owned(),
|
||||
value,
|
||||
))
|
||||
@ -177,10 +176,8 @@ impl FacetsUpdateIncrementalInner {
|
||||
level0_prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||
level0_prefix.push(0);
|
||||
|
||||
let mut iter = self
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, &level0_prefix)?;
|
||||
let mut iter =
|
||||
self.db.remap_types::<ByteSlice, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
|
||||
|
||||
if iter.next().is_none() {
|
||||
drop(iter);
|
||||
@ -384,8 +381,8 @@ impl FacetsUpdateIncrementalInner {
|
||||
|
||||
let size_highest_level = self
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.prefix_iter(txn, &highest_level_prefix)?
|
||||
.count();
|
||||
|
||||
if size_highest_level < self.group_size as usize * self.min_level_size as usize {
|
||||
@ -394,8 +391,8 @@ impl FacetsUpdateIncrementalInner {
|
||||
|
||||
let mut groups_iter = self
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &highest_level_prefix)?;
|
||||
.remap_types::<ByteSlice, FacetGroupValueCodec>()
|
||||
.prefix_iter(txn, &highest_level_prefix)?;
|
||||
|
||||
let nbr_new_groups = size_highest_level / self.group_size as usize;
|
||||
let nbr_leftover_elements = size_highest_level % self.group_size as usize;
|
||||
@ -407,7 +404,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
for _ in 0..group_size {
|
||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
|
||||
.ok_or(Error::Encoding)?;
|
||||
.map_err(Error::Encoding)?;
|
||||
|
||||
if first_key.is_none() {
|
||||
first_key = Some(key_i);
|
||||
@ -430,7 +427,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
for _ in 0..nbr_leftover_elements {
|
||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes)
|
||||
.ok_or(Error::Encoding)?;
|
||||
.map_err(Error::Encoding)?;
|
||||
|
||||
if first_key.is_none() {
|
||||
first_key = Some(key_i);
|
||||
@ -597,8 +594,8 @@ impl FacetsUpdateIncrementalInner {
|
||||
if highest_level == 0
|
||||
|| self
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.prefix_iter(txn, &highest_level_prefix)?
|
||||
.count()
|
||||
>= self.min_level_size as usize
|
||||
{
|
||||
@ -607,13 +604,13 @@ impl FacetsUpdateIncrementalInner {
|
||||
let mut to_delete = vec![];
|
||||
let mut iter = self
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?;
|
||||
.remap_types::<ByteSlice, ByteSlice>()
|
||||
.prefix_iter(txn, &highest_level_prefix)?;
|
||||
for el in iter.by_ref() {
|
||||
let (k, _) = el?;
|
||||
to_delete.push(
|
||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
|
||||
.ok_or(Error::Encoding)?
|
||||
.map_err(Error::Encoding)?
|
||||
.into_owned(),
|
||||
);
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::update::index_documents::create_sorter;
|
||||
use crate::update::merge_btreeset_string;
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
|
||||
use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod incremental;
|
||||
@ -207,8 +207,8 @@ impl<'i> FacetsUpdate<'i> {
|
||||
}
|
||||
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||
let key = (field_id, normalized_facet.as_ref());
|
||||
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
|
||||
let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?;
|
||||
let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
|
||||
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
|
||||
sorter.insert(key, val)?;
|
||||
}
|
||||
}
|
||||
@ -252,7 +252,7 @@ impl<'i> FacetsUpdate<'i> {
|
||||
|
||||
// We write those FSTs in LMDB now
|
||||
for (field_id, fst) in text_fsts {
|
||||
self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?;
|
||||
self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -6,8 +6,8 @@ use std::io::{self, BufReader};
|
||||
use std::mem::size_of;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use bytemuck::bytes_of;
|
||||
use grenad::Sorter;
|
||||
use heed::zerocopy::AsBytes;
|
||||
use heed::BytesEncode;
|
||||
use itertools::EitherOrBoth;
|
||||
use ordered_float::OrderedFloat;
|
||||
@ -20,9 +20,7 @@ use crate::error::InternalError;
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
/// The length of the elements that are always in the buffer when inserting new values.
|
||||
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
|
||||
@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
|
||||
let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
|
||||
let document = BEU32::from(document).get();
|
||||
let document = DocumentId::from_be_bytes(document);
|
||||
|
||||
// For the other extraction tasks, prefix the key with the field_id and the document_id
|
||||
numbers_key_buffer.extend_from_slice(docid_bytes);
|
||||
@ -323,7 +321,7 @@ where
|
||||
// We insert only the Del part of the Obkv to inform
|
||||
// that we only want to remove all those numbers.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Deletion, ().as_bytes())?;
|
||||
obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
@ -336,7 +334,7 @@ where
|
||||
// We insert only the Add part of the Obkv to inform
|
||||
// that we only want to remove all those numbers.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Addition, ().as_bytes())?;
|
||||
obkv.insert(DelAdd::Addition, bytes_of(&()))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
}
|
||||
|
||||
let (word, fid) = StrBEU16Codec::bytes_decode(key)
|
||||
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||
.map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||
|
||||
// every words contained in an attribute set to exact must be pushed in the exact_words list.
|
||||
if exact_attributes.contains(&fid) {
|
||||
|
@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
config: IndexDocumentsConfig,
|
||||
indexer_config: &'a IndexerConfig,
|
||||
@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig {
|
||||
pub autogenerate_docids: bool,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA>
|
||||
impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA>
|
||||
where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
pub fn new(
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
indexer_config: &'a IndexerConfig,
|
||||
config: IndexDocumentsConfig,
|
||||
progress: FP,
|
||||
should_abort: FA,
|
||||
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> {
|
||||
) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
|
||||
let transform = Some(Transform::new(
|
||||
wtxn,
|
||||
index,
|
||||
|
@ -24,9 +24,7 @@ use crate::index::{db_name, main_key};
|
||||
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
|
||||
use crate::update::index_documents::GrenadParameters;
|
||||
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
|
||||
use crate::{
|
||||
FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
|
||||
};
|
||||
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result};
|
||||
|
||||
pub struct TransformOutput {
|
||||
pub primary_key: String,
|
||||
@ -245,7 +243,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
|
||||
let mut skip_insertion = false;
|
||||
if let Some(original_docid) = original_docid {
|
||||
let original_key = BEU32::new(original_docid);
|
||||
let original_key = original_docid;
|
||||
let base_obkv = self
|
||||
.index
|
||||
.documents
|
||||
@ -499,7 +497,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
self.replaced_documents_ids.insert(internal_docid);
|
||||
|
||||
// fetch the obkv document
|
||||
let original_key = BEU32::new(internal_docid);
|
||||
let original_key = internal_docid;
|
||||
let base_obkv = self
|
||||
.index
|
||||
.documents
|
||||
@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
||||
pub fn prepare_for_documents_reindexing(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn<'i, '_>,
|
||||
wtxn: &mut heed::RwTxn<'i>,
|
||||
old_fields_ids_map: FieldsIdsMap,
|
||||
mut new_fields_ids_map: FieldsIdsMap,
|
||||
) -> Result<TransformOutput> {
|
||||
@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
|
||||
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
|
||||
)?;
|
||||
let docid = docid.get();
|
||||
|
||||
obkv_buffer.clear();
|
||||
let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer);
|
||||
|
@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec;
|
||||
use charabia::{Language, Script};
|
||||
use grenad::MergerBuilder;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::RwTxn;
|
||||
use heed::{PutFlags, RwTxn};
|
||||
use log::error;
|
||||
use obkv::{KvReader, KvWriter};
|
||||
use ordered_float::OrderedFloat;
|
||||
@ -27,9 +27,7 @@ use crate::index::Hnsw;
|
||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
|
||||
use crate::update::facet::FacetsUpdate;
|
||||
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
|
||||
use crate::{
|
||||
lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32,
|
||||
};
|
||||
use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
@ -149,7 +147,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let db = index.documents.remap_data_type::<ByteSlice>();
|
||||
|
||||
if !writer.is_empty() {
|
||||
db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?;
|
||||
db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
|
||||
operations.push(DocumentOperation {
|
||||
external_id: external_id.to_string(),
|
||||
internal_id: docid,
|
||||
@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
});
|
||||
docids.insert(docid);
|
||||
} else {
|
||||
db.delete(wtxn, &BEU32::new(docid))?;
|
||||
db.delete(wtxn, &docid)?;
|
||||
operations.push(DocumentOperation {
|
||||
external_id: external_id.to_string(),
|
||||
internal_id: docid,
|
||||
@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
// We extract and store the previous vectors
|
||||
if let Some(hnsw) = index.vector_hnsw(wtxn)? {
|
||||
for (pid, point) in hnsw.iter() {
|
||||
let pid_key = BEU32::new(pid.into_inner());
|
||||
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get();
|
||||
let pid_key = pid.into_inner();
|
||||
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap();
|
||||
let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect();
|
||||
vectors_set.insert((docid, vector));
|
||||
}
|
||||
@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
// Store the vectors in the point-docid relation database
|
||||
index.vector_id_docid.clear(wtxn)?;
|
||||
for (docid, pid) in docids.into_iter().zip(pids) {
|
||||
index.vector_id_docid.put(
|
||||
wtxn,
|
||||
&BEU32::new(pid.into_inner()),
|
||||
&BEU32::new(docid),
|
||||
)?;
|
||||
index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?;
|
||||
}
|
||||
|
||||
log::debug!("There are {} entries in the HNSW so far", hnsw_length);
|
||||
@ -568,14 +562,17 @@ where
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
if valid_lmdb_key(key) {
|
||||
debug_assert!(
|
||||
K::bytes_decode(key).is_some(),
|
||||
K::bytes_decode(key).is_ok(),
|
||||
"Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
|
||||
key.len(),
|
||||
&key
|
||||
);
|
||||
buffer.clear();
|
||||
let value = serialize_value(value, &mut buffer)?;
|
||||
unsafe { database.append(key, value)? };
|
||||
unsafe {
|
||||
// safety: We do not keep a reference to anything that lives inside the database
|
||||
database.put_current_with_options::<ByteSlice>(PutFlags::APPEND, key, value)?
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -100,8 +100,8 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Settings<'a, 't, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
pub struct Settings<'a, 't, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
|
||||
indexer_config: &'a IndexerConfig,
|
||||
@ -129,12 +129,12 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
pagination_max_total_hits: Setting<usize>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
pub fn new(
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
indexer_config: &'a IndexerConfig,
|
||||
) -> Settings<'a, 't, 'u, 'i> {
|
||||
) -> Settings<'a, 't, 'i> {
|
||||
Settings {
|
||||
wtxn,
|
||||
index,
|
||||
|
@ -12,8 +12,8 @@ use crate::update::index_documents::{
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
pub struct WordPrefixDocids<'t, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
pub struct WordPrefixDocids<'t, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
word_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
pub(crate) chunk_compression_type: CompressionType,
|
||||
@ -22,12 +22,12 @@ pub struct WordPrefixDocids<'t, 'u, 'i> {
|
||||
pub(crate) max_memory: Option<usize>,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
||||
impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
||||
pub fn new(
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
word_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
word_prefix_docids: Database<Str, CboRoaringBitmapCodec>,
|
||||
) -> WordPrefixDocids<'t, 'u, 'i> {
|
||||
) -> WordPrefixDocids<'t, 'i> {
|
||||
WordPrefixDocids {
|
||||
wtxn,
|
||||
word_docids,
|
||||
|
@ -17,8 +17,8 @@ use crate::update::index_documents::{
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
pub struct WordPrefixIntegerDocids<'t, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
pub struct WordPrefixIntegerDocids<'t, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
pub(crate) chunk_compression_type: CompressionType,
|
||||
@ -27,12 +27,12 @@ pub struct WordPrefixIntegerDocids<'t, 'u, 'i> {
|
||||
pub(crate) max_memory: Option<usize>,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
||||
impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
||||
pub fn new(
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
|
||||
) -> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
||||
) -> WordPrefixIntegerDocids<'t, 'i> {
|
||||
WordPrefixIntegerDocids {
|
||||
wtxn,
|
||||
prefix_database,
|
||||
@ -72,7 +72,8 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
||||
let mut current_prefixes: Option<&&[String]> = None;
|
||||
let mut prefixes_cache = HashMap::new();
|
||||
while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? {
|
||||
let (word, pos) = StrBEU16Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?;
|
||||
let (word, pos) =
|
||||
StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?;
|
||||
|
||||
current_prefixes = match current_prefixes.take() {
|
||||
Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes),
|
||||
|
@ -2,21 +2,19 @@ use std::iter::{repeat_with, FromIterator};
|
||||
use std::str;
|
||||
|
||||
use fst::{SetBuilder, Streamer};
|
||||
use heed::RwTxn;
|
||||
|
||||
use crate::{Index, Result, SmallString32};
|
||||
|
||||
pub struct WordsPrefixesFst<'t, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
pub struct WordsPrefixesFst<'t, 'i> {
|
||||
wtxn: &'t mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
threshold: u32,
|
||||
max_prefix_length: usize,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
|
||||
pub fn new(
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
index: &'i Index,
|
||||
) -> WordsPrefixesFst<'t, 'u, 'i> {
|
||||
impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
|
||||
pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> WordsPrefixesFst<'t, 'i> {
|
||||
WordsPrefixesFst { wtxn, index, threshold: 100, max_prefix_length: 4 }
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user