mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-13 16:26:26 +00:00
Make the changes to use heed v0.20-alpha.6
This commit is contained in:
@ -6,8 +6,8 @@ use std::io::{self, BufReader};
|
||||
use std::mem::size_of;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use bytemuck::bytes_of;
|
||||
use grenad::Sorter;
|
||||
use heed::zerocopy::AsBytes;
|
||||
use heed::BytesEncode;
|
||||
use itertools::EitherOrBoth;
|
||||
use ordered_float::OrderedFloat;
|
||||
@ -20,9 +20,7 @@ use crate::error::InternalError;
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||
use crate::{
|
||||
CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
/// The length of the elements that are always in the buffer when inserting new values.
|
||||
const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
|
||||
@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
|
||||
|
||||
let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
|
||||
let document = BEU32::from(document).get();
|
||||
let document = DocumentId::from_be_bytes(document);
|
||||
|
||||
// For the other extraction tasks, prefix the key with the field_id and the document_id
|
||||
numbers_key_buffer.extend_from_slice(docid_bytes);
|
||||
@ -323,7 +321,7 @@ where
|
||||
// We insert only the Del part of the Obkv to inform
|
||||
// that we only want to remove all those numbers.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Deletion, ().as_bytes())?;
|
||||
obkv.insert(DelAdd::Deletion, bytes_of(&()))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
@ -336,7 +334,7 @@ where
|
||||
// We insert only the Add part of the Obkv to inform
|
||||
// that we only want to remove all those numbers.
|
||||
let mut obkv = KvWriterDelAdd::memory();
|
||||
obkv.insert(DelAdd::Addition, ().as_bytes())?;
|
||||
obkv.insert(DelAdd::Addition, bytes_of(&()))?;
|
||||
let bytes = obkv.into_inner()?;
|
||||
fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
|
||||
}
|
||||
|
@ -118,7 +118,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
}
|
||||
|
||||
let (word, fid) = StrBEU16Codec::bytes_decode(key)
|
||||
.ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||
.map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
|
||||
|
||||
// every words contained in an attribute set to exact must be pushed in the exact_words list.
|
||||
if exact_attributes.contains(&fid) {
|
||||
|
@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
config: IndexDocumentsConfig,
|
||||
indexer_config: &'a IndexerConfig,
|
||||
@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig {
|
||||
pub autogenerate_docids: bool,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA>
|
||||
impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA>
|
||||
where
|
||||
FP: Fn(UpdateIndexingStep) + Sync,
|
||||
FA: Fn() -> bool + Sync,
|
||||
{
|
||||
pub fn new(
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
wtxn: &'t mut heed::RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
indexer_config: &'a IndexerConfig,
|
||||
config: IndexDocumentsConfig,
|
||||
progress: FP,
|
||||
should_abort: FA,
|
||||
) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> {
|
||||
) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
|
||||
let transform = Some(Transform::new(
|
||||
wtxn,
|
||||
index,
|
||||
|
@ -24,9 +24,7 @@ use crate::index::{db_name, main_key};
|
||||
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
|
||||
use crate::update::index_documents::GrenadParameters;
|
||||
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
|
||||
use crate::{
|
||||
FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
|
||||
};
|
||||
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result};
|
||||
|
||||
pub struct TransformOutput {
|
||||
pub primary_key: String,
|
||||
@ -245,7 +243,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
|
||||
let mut skip_insertion = false;
|
||||
if let Some(original_docid) = original_docid {
|
||||
let original_key = BEU32::new(original_docid);
|
||||
let original_key = original_docid;
|
||||
let base_obkv = self
|
||||
.index
|
||||
.documents
|
||||
@ -499,7 +497,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
self.replaced_documents_ids.insert(internal_docid);
|
||||
|
||||
// fetch the obkv document
|
||||
let original_key = BEU32::new(internal_docid);
|
||||
let original_key = internal_docid;
|
||||
let base_obkv = self
|
||||
.index
|
||||
.documents
|
||||
@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
||||
pub fn prepare_for_documents_reindexing(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn<'i, '_>,
|
||||
wtxn: &mut heed::RwTxn<'i>,
|
||||
old_fields_ids_map: FieldsIdsMap,
|
||||
mut new_fields_ids_map: FieldsIdsMap,
|
||||
) -> Result<TransformOutput> {
|
||||
@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
|
||||
InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
|
||||
)?;
|
||||
let docid = docid.get();
|
||||
|
||||
obkv_buffer.clear();
|
||||
let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer);
|
||||
|
@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec;
|
||||
use charabia::{Language, Script};
|
||||
use grenad::MergerBuilder;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::RwTxn;
|
||||
use heed::{PutFlags, RwTxn};
|
||||
use log::error;
|
||||
use obkv::{KvReader, KvWriter};
|
||||
use ordered_float::OrderedFloat;
|
||||
@ -27,9 +27,7 @@ use crate::index::Hnsw;
|
||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
|
||||
use crate::update::facet::FacetsUpdate;
|
||||
use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
|
||||
use crate::{
|
||||
lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32,
|
||||
};
|
||||
use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError};
|
||||
|
||||
pub(crate) enum TypedChunk {
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
@ -149,7 +147,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let db = index.documents.remap_data_type::<ByteSlice>();
|
||||
|
||||
if !writer.is_empty() {
|
||||
db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?;
|
||||
db.put(wtxn, &docid, &writer.into_inner().unwrap())?;
|
||||
operations.push(DocumentOperation {
|
||||
external_id: external_id.to_string(),
|
||||
internal_id: docid,
|
||||
@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
});
|
||||
docids.insert(docid);
|
||||
} else {
|
||||
db.delete(wtxn, &BEU32::new(docid))?;
|
||||
db.delete(wtxn, &docid)?;
|
||||
operations.push(DocumentOperation {
|
||||
external_id: external_id.to_string(),
|
||||
internal_id: docid,
|
||||
@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
// We extract and store the previous vectors
|
||||
if let Some(hnsw) = index.vector_hnsw(wtxn)? {
|
||||
for (pid, point) in hnsw.iter() {
|
||||
let pid_key = BEU32::new(pid.into_inner());
|
||||
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get();
|
||||
let pid_key = pid.into_inner();
|
||||
let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap();
|
||||
let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect();
|
||||
vectors_set.insert((docid, vector));
|
||||
}
|
||||
@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
// Store the vectors in the point-docid relation database
|
||||
index.vector_id_docid.clear(wtxn)?;
|
||||
for (docid, pid) in docids.into_iter().zip(pids) {
|
||||
index.vector_id_docid.put(
|
||||
wtxn,
|
||||
&BEU32::new(pid.into_inner()),
|
||||
&BEU32::new(docid),
|
||||
)?;
|
||||
index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?;
|
||||
}
|
||||
|
||||
log::debug!("There are {} entries in the HNSW so far", hnsw_length);
|
||||
@ -568,14 +562,17 @@ where
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
if valid_lmdb_key(key) {
|
||||
debug_assert!(
|
||||
K::bytes_decode(key).is_some(),
|
||||
K::bytes_decode(key).is_ok(),
|
||||
"Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
|
||||
key.len(),
|
||||
&key
|
||||
);
|
||||
buffer.clear();
|
||||
let value = serialize_value(value, &mut buffer)?;
|
||||
unsafe { database.append(key, value)? };
|
||||
unsafe {
|
||||
// safety: We do not keep a reference to anything that lives inside the database
|
||||
database.put_current_with_options::<ByteSlice>(PutFlags::APPEND, key, value)?
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user