Make the changes to use heed v0.20-alpha.6

This commit is contained in:
Clément Renault
2023-11-22 18:21:19 +01:00
parent 56a0d91ecd
commit 0d4482625a
54 changed files with 611 additions and 477 deletions

View File

@ -6,8 +6,8 @@ use std::io::{self, BufReader};
use std::mem::size_of;
use std::result::Result as StdResult;
use bytemuck::bytes_of;
use grenad::Sorter;
use heed::zerocopy::AsBytes;
use heed::BytesEncode;
use itertools::EitherOrBoth;
use ordered_float::OrderedFloat;
@ -20,9 +20,7 @@ use crate::error::InternalError;
use crate::facet::value_encoding::f64_into_bytes;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::{
CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
};
use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
/// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
let document = BEU32::from(document).get();
let document = DocumentId::from_be_bytes(document);
// For the other extraction tasks, prefix the key with the field_id and the document_id
numbers_key_buffer.extend_from_slice(docid_bytes);
@ -323,7 +321,7 @@ where
// We insert only the Del part of the Obkv to inform
// that we only want to remove all those numbers.
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Deletion, ().as_bytes())?;
obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
let bytes = obkv.into_inner()?;
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
}
@ -336,7 +334,7 @@ where
// We insert only the Add part of the Obkv to inform
// that we only want to remove all those numbers.
let mut obkv = KvWriterDelAdd::memory();
obkv.insert(DelAdd::Addition, ().as_bytes())?;
obkv.insert(DelAdd::Addition, bytes_of(&()))?;
let bytes = obkv.into_inner()?;
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
}

View File

@ -118,7 +118,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
}
let (word, fid) = StrBEU16Codec::bytes_decode(key)
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
.map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
// every words contained in an attribute set to exact must be pushed in the exact_words list.
if exact_attributes.contains(&fid) {

View File

@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod {
}
}
pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
config: IndexDocumentsConfig,
indexer_config: &'a IndexerConfig,
@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig {
pub autogenerate_docids: bool,
}
impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA>
impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA>
where
FP: Fn(UpdateIndexingStep) + Sync,
FA: Fn() -> bool + Sync,
{
pub fn new(
wtxn: &'t mut heed::RwTxn<'i, 'u>,
wtxn: &'t mut heed::RwTxn<'i>,
index: &'i Index,
indexer_config: &'a IndexerConfig,
config: IndexDocumentsConfig,
progress: FP,
should_abort: FA,
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> {
) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
let transform = Some(Transform::new(
wtxn,
index,

View File

@ -24,9 +24,7 @@ use crate::index::{db_name, main_key};
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
use crate::update::index_documents::GrenadParameters;
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
use crate::{
FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
};
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result};
pub struct TransformOutput {
pub primary_key: String,
@ -245,7 +243,7 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut skip_insertion = false;
if let Some(original_docid) = original_docid {
let original_key = BEU32::new(original_docid);
let original_key = original_docid;
let base_obkv = self
.index
.documents
@ -499,7 +497,7 @@ impl<'a, 'i> Transform<'a, 'i> {
self.replaced_documents_ids.insert(internal_docid);
// fetch the obkv document
let original_key = BEU32::new(internal_docid);
let original_key = internal_docid;
let base_obkv = self
.index
.documents
@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// TODO this can be done in parallel by using the rayon `ThreadPool`.
pub fn prepare_for_documents_reindexing(
self,
wtxn: &mut heed::RwTxn<'i, '_>,
wtxn: &mut heed::RwTxn<'i>,
old_fields_ids_map: FieldsIdsMap,
mut new_fields_ids_map: FieldsIdsMap,
) -> Result<TransformOutput> {
@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> {
let obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
)?;
let docid = docid.get();
obkv_buffer.clear();
let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer);

View File

@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec;
use charabia::{Language, Script};
use grenad::MergerBuilder;
use heed::types::ByteSlice;
use heed::RwTxn;
use heed::{PutFlags, RwTxn};
use log::error;
use obkv::{KvReader, KvWriter};
use ordered_float::OrderedFloat;
@ -27,9 +27,7 @@ use crate::index::Hnsw;
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
use crate::update::facet::FacetsUpdate;
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
use crate::{
lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32,
};
use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError};
pub(crate) enum TypedChunk {
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
@ -149,7 +147,7 @@ pub(crate) fn write_typed_chunk_into_index(
let db = index.documents.remap_data_type::<ByteSlice>();
if !writer.is_empty() {
db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?;
db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
operations.push(DocumentOperation {
external_id: external_id.to_string(),
internal_id: docid,
@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index(
});
docids.insert(docid);
} else {
db.delete(wtxn, &BEU32::new(docid))?;
db.delete(wtxn, &docid)?;
operations.push(DocumentOperation {
external_id: external_id.to_string(),
internal_id: docid,
@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index(
// We extract and store the previous vectors
if let Some(hnsw) = index.vector_hnsw(wtxn)? {
for (pid, point) in hnsw.iter() {
let pid_key = BEU32::new(pid.into_inner());
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get();
let pid_key = pid.into_inner();
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap();
let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect();
vectors_set.insert((docid, vector));
}
@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index(
// Store the vectors in the point-docid relation database
index.vector_id_docid.clear(wtxn)?;
for (docid, pid) in docids.into_iter().zip(pids) {
index.vector_id_docid.put(
wtxn,
&BEU32::new(pid.into_inner()),
&BEU32::new(docid),
)?;
index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?;
}
log::debug!("There are {} entries in the HNSW so far", hnsw_length);
@ -568,14 +562,17 @@ where
while let Some((key, value)) = cursor.move_on_next()? {
if valid_lmdb_key(key) {
debug_assert!(
K::bytes_decode(key).is_some(),
K::bytes_decode(key).is_ok(),
"Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
key.len(),
&key
);
buffer.clear();
let value = serialize_value(value, &mut buffer)?;
unsafe { database.append(key, value)? };
unsafe {
// safety: We do not keep a reference to anything that lives inside the database
database.put_current_with_options::<ByteSlice>(PutFlags::APPEND, key, value)?
};
}
}