mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Make the changes to use heed v0.20-alpha.6
This commit is contained in:
		| @@ -29,8 +29,8 @@ geoutils = "0.5.1" | ||||
| grenad = { version = "0.4.5", default-features = false, features = [ | ||||
|     "rayon", "tempfile" | ||||
| ] } | ||||
| heed = { git = "https://github.com/meilisearch/heed", branch = "put-current-with-data-codec", default-features = false, features = [ | ||||
|     "read-txn-no-tls" | ||||
| heed = { git = "https://github.com/meilisearch/heed", branch = "main", default-features = false, features = [ | ||||
|     "serde-json", "serde-bincode", "read-txn-no-tls" | ||||
| ] } | ||||
| indexmap = { version = "2.0.0", features = ["serde"] } | ||||
| instant-distance = { version = "0.6.1", features = ["with-serde"] } | ||||
|   | ||||
| @@ -152,7 +152,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco | ||||
|         valid_fields: BTreeSet<String>, | ||||
|         hidden_fields: bool, | ||||
|     }, | ||||
|     #[error("{}", HeedError::BadOpenOptions)] | ||||
|     #[error("an environment is already opened with different options")] | ||||
|     InvalidLmdbOpenOptions, | ||||
|     #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] | ||||
|     SortRankingRuleMissing, | ||||
| @@ -326,11 +326,12 @@ impl From<HeedError> for Error { | ||||
|             HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached), | ||||
|             HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile), | ||||
|             HeedError::Mdb(error) => InternalError(Store(error)), | ||||
|             HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })), | ||||
|             HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })), | ||||
|             // TODO use the encoding | ||||
|             HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })), | ||||
|             HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })), | ||||
|             HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping), | ||||
|             HeedError::DatabaseClosing => InternalError(DatabaseClosing), | ||||
|             HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions), | ||||
|             HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use heed::types::{OwnedType, Str}; | ||||
| use heed::types::Str; | ||||
| use heed::{Database, RoIter, RoTxn, RwTxn}; | ||||
|  | ||||
| use crate::{DocumentId, BEU32}; | ||||
| @@ -16,10 +16,10 @@ pub struct DocumentOperation { | ||||
|     pub kind: DocumentOperationKind, | ||||
| } | ||||
|  | ||||
| pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>); | ||||
| pub struct ExternalDocumentsIds(Database<Str, BEU32>); | ||||
|  | ||||
| impl ExternalDocumentsIds { | ||||
|     pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds { | ||||
|     pub fn new(db: Database<Str, BEU32>) -> ExternalDocumentsIds { | ||||
|         ExternalDocumentsIds(db) | ||||
|     } | ||||
|  | ||||
| @@ -29,7 +29,7 @@ impl ExternalDocumentsIds { | ||||
|     } | ||||
|  | ||||
|     pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> { | ||||
|         Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get())) | ||||
|         Ok(self.0.get(rtxn, external_id.as_ref())?) | ||||
|     } | ||||
|  | ||||
|     /// An helper function to debug this type, returns an `HashMap` of both, | ||||
| @@ -38,7 +38,7 @@ impl ExternalDocumentsIds { | ||||
|         let mut map = HashMap::default(); | ||||
|         for result in self.0.iter(rtxn)? { | ||||
|             let (external, internal) = result?; | ||||
|             map.insert(external.to_owned(), internal.get()); | ||||
|             map.insert(external.to_owned(), internal); | ||||
|         } | ||||
|         Ok(map) | ||||
|     } | ||||
| @@ -55,7 +55,7 @@ impl ExternalDocumentsIds { | ||||
|         for DocumentOperation { external_id, internal_id, kind } in operations { | ||||
|             match kind { | ||||
|                 DocumentOperationKind::Create => { | ||||
|                     self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?; | ||||
|                     self.0.put(wtxn, &external_id, &internal_id)?; | ||||
|                 } | ||||
|                 DocumentOperationKind::Delete => { | ||||
|                     if !self.0.delete(wtxn, &external_id)? { | ||||
| @@ -69,7 +69,7 @@ impl ExternalDocumentsIds { | ||||
|     } | ||||
|  | ||||
|     /// Returns an iterator over all the external ids. | ||||
|     pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, OwnedType<BEU32>>> { | ||||
|     pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> { | ||||
|         self.0.iter(rtxn) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,26 +2,28 @@ use std::borrow::Cow; | ||||
| use std::convert::TryInto; | ||||
| use std::str; | ||||
|  | ||||
| use heed::BoxedError; | ||||
|  | ||||
| pub struct BEU16StrCodec; | ||||
|  | ||||
| impl<'a> heed::BytesDecode<'a> for BEU16StrCodec { | ||||
|     type DItem = (u16, &'a str); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let (n_bytes, str_bytes) = bytes.split_at(2); | ||||
|         let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?; | ||||
|         let s = str::from_utf8(str_bytes).ok()?; | ||||
|         Some((n, s)) | ||||
|         let n = n_bytes.try_into().map(u16::from_be_bytes)?; | ||||
|         let s = str::from_utf8(str_bytes)?; | ||||
|         Ok((n, s)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> heed::BytesEncode<'a> for BEU16StrCodec { | ||||
|     type EItem = (u16, &'a str); | ||||
|  | ||||
|     fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut bytes = Vec::with_capacity(s.len() + 2); | ||||
|         bytes.extend_from_slice(&n.to_be_bytes()); | ||||
|         bytes.extend_from_slice(s.as_bytes()); | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,26 +2,28 @@ use std::borrow::Cow; | ||||
| use std::convert::TryInto; | ||||
| use std::str; | ||||
|  | ||||
| use heed::BoxedError; | ||||
|  | ||||
| pub struct BEU32StrCodec; | ||||
|  | ||||
| impl<'a> heed::BytesDecode<'a> for BEU32StrCodec { | ||||
|     type DItem = (u32, &'a str); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let (n_bytes, str_bytes) = bytes.split_at(4); | ||||
|         let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?; | ||||
|         let s = str::from_utf8(str_bytes).ok()?; | ||||
|         Some((n, s)) | ||||
|         let n = n_bytes.try_into().map(u32::from_be_bytes)?; | ||||
|         let s = str::from_utf8(str_bytes)?; | ||||
|         Ok((n, s)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> heed::BytesEncode<'a> for BEU32StrCodec { | ||||
|     type EItem = (u32, &'a str); | ||||
|  | ||||
|     fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut bytes = Vec::with_capacity(s.len() + 4); | ||||
|         bytes.extend_from_slice(&n.to_be_bytes()); | ||||
|         bytes.extend_from_slice(s.as_bytes()); | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use heed::{BytesDecode, BytesEncode}; | ||||
| use heed::{BoxedError, BytesDecode, BytesEncode}; | ||||
|  | ||||
| /// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated | ||||
| /// types are equivalent (= `&'a [u8]`) and these values can reside within another structure. | ||||
| @@ -9,15 +9,15 @@ pub struct ByteSliceRefCodec; | ||||
| impl<'a> BytesEncode<'a> for ByteSliceRefCodec { | ||||
|     type EItem = &'a [u8]; | ||||
|  | ||||
|     fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         Some(Cow::Borrowed(item)) | ||||
|     fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { | ||||
|         Ok(Cow::Borrowed(item)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> BytesDecode<'a> for ByteSliceRefCodec { | ||||
|     type DItem = &'a [u8]; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         Some(bytes) | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Ok(bytes) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::marker::PhantomData; | ||||
|  | ||||
| use heed::{BytesDecode, BytesEncode}; | ||||
| use heed::{BoxedError, BytesDecode, BytesEncode}; | ||||
|  | ||||
| use crate::{try_split_array_at, DocumentId, FieldId}; | ||||
|  | ||||
| @@ -13,16 +13,16 @@ where | ||||
| { | ||||
|     type DItem = (FieldId, DocumentId, C::DItem); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let (field_id_bytes, bytes) = try_split_array_at(bytes)?; | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let (field_id_bytes, bytes) = try_split_array_at(bytes).unwrap(); | ||||
|         let field_id = u16::from_be_bytes(field_id_bytes); | ||||
|  | ||||
|         let (document_id_bytes, bytes) = try_split_array_at(bytes)?; | ||||
|         let (document_id_bytes, bytes) = try_split_array_at(bytes).unwrap(); | ||||
|         let document_id = u32::from_be_bytes(document_id_bytes); | ||||
|  | ||||
|         let value = C::bytes_decode(bytes)?; | ||||
|  | ||||
|         Some((field_id, document_id, value)) | ||||
|         Ok((field_id, document_id, value)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -32,13 +32,15 @@ where | ||||
| { | ||||
|     type EItem = (FieldId, DocumentId, C::EItem); | ||||
|  | ||||
|     fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode( | ||||
|         (field_id, document_id, value): &'a Self::EItem, | ||||
|     ) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut bytes = Vec::with_capacity(32); | ||||
|         bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes | ||||
|         bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes | ||||
|         let value_bytes = C::bytes_encode(value)?; | ||||
|         // variable length, if f64 -> 16 bytes, if string -> large, potentially | ||||
|         bytes.extend_from_slice(&value_bytes); | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -5,8 +5,8 @@ use std::borrow::Cow; | ||||
| use std::convert::TryFrom; | ||||
| use std::marker::PhantomData; | ||||
|  | ||||
| use heed::types::{DecodeIgnore, OwnedType}; | ||||
| use heed::{BytesDecode, BytesEncode}; | ||||
| use heed::types::DecodeIgnore; | ||||
| use heed::{BoxedError, BytesDecode, BytesEncode}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec; | ||||
| @@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>; | ||||
| pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>; | ||||
| pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>; | ||||
|  | ||||
| pub type FieldIdCodec = OwnedType<BEU16>; | ||||
| pub type FieldIdCodec = BEU16; | ||||
|  | ||||
| /// Tries to split a slice in half at the given middle point, | ||||
| /// `None` if the slice is too short. | ||||
| @@ -58,15 +58,15 @@ where | ||||
| { | ||||
|     type EItem = FacetGroupKey<T::EItem>; | ||||
|  | ||||
|     fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|     fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { | ||||
|         let mut v = vec![]; | ||||
|         v.extend_from_slice(&value.field_id.to_be_bytes()); | ||||
|         v.extend_from_slice(&[value.level]); | ||||
|  | ||||
|         let bound = T::bytes_encode(&value.left_bound)?; | ||||
|         let bound = T::bytes_encode(&value.left_bound).unwrap(); | ||||
|         v.extend_from_slice(&bound); | ||||
|  | ||||
|         Some(Cow::Owned(v)) | ||||
|         Ok(Cow::Owned(v)) | ||||
|     } | ||||
| } | ||||
| impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T> | ||||
| @@ -75,11 +75,11 @@ where | ||||
| { | ||||
|     type DItem = FacetGroupKey<T::DItem>; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?); | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?); | ||||
|         let level = bytes[2]; | ||||
|         let bound = T::bytes_decode(&bytes[3..])?; | ||||
|         Some(FacetGroupKey { field_id: fid, level, left_bound: bound }) | ||||
|         Ok(FacetGroupKey { field_id: fid, level, left_bound: bound }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec; | ||||
| impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { | ||||
|     type EItem = FacetGroupValue; | ||||
|  | ||||
|     fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|     fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { | ||||
|         let mut v = vec![value.size]; | ||||
|         CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v); | ||||
|         Some(Cow::Owned(v)) | ||||
|         Ok(Cow::Owned(v)) | ||||
|     } | ||||
| } | ||||
| impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { | ||||
|     type DItem = FacetGroupValue; | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let size = bytes[0]; | ||||
|         let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?; | ||||
|         Some(FacetGroupValue { size, bitmap }) | ||||
|         let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?; | ||||
|         Ok(FacetGroupValue { size, bitmap }) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::convert::TryInto; | ||||
|  | ||||
| use heed::BytesDecode; | ||||
| use heed::{BoxedError, BytesDecode}; | ||||
|  | ||||
| use crate::facet::value_encoding::f64_into_bytes; | ||||
|  | ||||
| @@ -10,28 +10,28 @@ pub struct OrderedF64Codec; | ||||
| impl<'a> BytesDecode<'a> for OrderedF64Codec { | ||||
|     type DItem = f64; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         if bytes.len() < 16 { | ||||
|             return None; | ||||
|             panic!() // TODO don't panic | ||||
|         } | ||||
|         let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?; | ||||
|         Some(f) | ||||
|         let f = bytes[8..].try_into().ok().map(f64::from_be_bytes).unwrap(); | ||||
|         Ok(f) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl heed::BytesEncode<'_> for OrderedF64Codec { | ||||
|     type EItem = f64; | ||||
|  | ||||
|     fn bytes_encode(f: &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut buffer = [0u8; 16]; | ||||
|  | ||||
|         // write the globally ordered float | ||||
|         let bytes = f64_into_bytes(*f)?; | ||||
|         let bytes = f64_into_bytes(*f).unwrap(); | ||||
|         buffer[..8].copy_from_slice(&bytes[..]); | ||||
|         // Then the f64 value just to be able to read it back | ||||
|         let bytes = f.to_be_bytes(); | ||||
|         buffer[8..16].copy_from_slice(&bytes[..]); | ||||
|  | ||||
|         Some(Cow::Owned(buffer.to_vec())) | ||||
|         Ok(Cow::Owned(buffer.to_vec())) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use heed::BoxedError; | ||||
|  | ||||
| use crate::{try_split_array_at, FieldId}; | ||||
|  | ||||
| pub struct FieldIdWordCountCodec; | ||||
| @@ -7,21 +9,21 @@ pub struct FieldIdWordCountCodec; | ||||
| impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec { | ||||
|     type DItem = (FieldId, u8); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let (field_id_bytes, bytes) = try_split_array_at(bytes)?; | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let (field_id_bytes, bytes) = try_split_array_at(bytes).unwrap(); | ||||
|         let field_id = u16::from_be_bytes(field_id_bytes); | ||||
|         let ([word_count], _nothing) = try_split_array_at(bytes)?; | ||||
|         Some((field_id, word_count)) | ||||
|         let ([word_count], _nothing) = try_split_array_at(bytes).unwrap(); | ||||
|         Ok((field_id, word_count)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec { | ||||
|     type EItem = (FieldId, u8); | ||||
|  | ||||
|     fn bytes_encode((field_id, word_count): &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut bytes = Vec::with_capacity(2 + 1); | ||||
|         bytes.extend_from_slice(&field_id.to_be_bytes()); | ||||
|         bytes.push(*word_count); | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use fst::Set; | ||||
| use heed::{BytesDecode, BytesEncode}; | ||||
| use heed::{BoxedError, BytesDecode, BytesEncode}; | ||||
|  | ||||
| /// A codec for values of type `Set<&[u8]>`. | ||||
| pub struct FstSetCodec; | ||||
| @@ -9,15 +9,15 @@ pub struct FstSetCodec; | ||||
| impl<'a> BytesEncode<'a> for FstSetCodec { | ||||
|     type EItem = Set<Vec<u8>>; | ||||
|  | ||||
|     fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> { | ||||
|         Some(Cow::Borrowed(item.as_fst().as_bytes())) | ||||
|     fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> { | ||||
|         Ok(Cow::Borrowed(item.as_fst().as_bytes())) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> BytesDecode<'a> for FstSetCodec { | ||||
|     type DItem = Set<&'a [u8]>; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         Set::new(bytes).ok() | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Set::new(bytes).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -13,6 +13,7 @@ mod str_ref; | ||||
| mod str_str_u8_codec; | ||||
|  | ||||
| pub use byte_slice_ref::ByteSliceRefCodec; | ||||
| use heed::BoxedError; | ||||
| pub use str_ref::StrRefCodec; | ||||
|  | ||||
| pub use self::beu16_str_codec::BEU16StrCodec; | ||||
| @@ -31,5 +32,5 @@ pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec}; | ||||
| pub trait BytesDecodeOwned { | ||||
|     type DItem; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>; | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError>; | ||||
| } | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use heed::BoxedError; | ||||
| use obkv::{KvReaderU16, KvWriterU16}; | ||||
|  | ||||
| pub struct ObkvCodec; | ||||
| @@ -7,15 +8,15 @@ pub struct ObkvCodec; | ||||
| impl<'a> heed::BytesDecode<'a> for ObkvCodec { | ||||
|     type DItem = KvReaderU16<'a>; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         Some(KvReaderU16::new(bytes)) | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Ok(KvReaderU16::new(bytes)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl heed::BytesEncode<'_> for ObkvCodec { | ||||
|     type EItem = KvWriterU16<Vec<u8>>; | ||||
|  | ||||
|     fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|         item.clone().into_inner().map(Cow::Owned).ok() | ||||
|     fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         item.clone().into_inner().map(Cow::Owned).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,7 +2,7 @@ use std::borrow::Cow; | ||||
| use std::convert::TryInto; | ||||
| use std::mem::size_of; | ||||
|  | ||||
| use heed::BytesDecode; | ||||
| use heed::{BoxedError, BytesDecode}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
| @@ -19,22 +19,22 @@ impl BoRoaringBitmapCodec { | ||||
| impl BytesDecode<'_> for BoRoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let mut bitmap = RoaringBitmap::new(); | ||||
|  | ||||
|         for chunk in bytes.chunks(size_of::<u32>()) { | ||||
|             let bytes = chunk.try_into().ok()?; | ||||
|             let bytes = chunk.try_into()?; | ||||
|             bitmap.push(u32::from_ne_bytes(bytes)); | ||||
|         } | ||||
|  | ||||
|         Some(bitmap) | ||||
|         Ok(bitmap) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for BoRoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Self::bytes_decode(bytes) | ||||
|     } | ||||
| } | ||||
| @@ -42,9 +42,9 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec { | ||||
| impl heed::BytesEncode<'_> for BoRoaringBitmapCodec { | ||||
|     type EItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut out = Vec::new(); | ||||
|         BoRoaringBitmapCodec::serialize_into(item, &mut out); | ||||
|         Some(Cow::Owned(out)) | ||||
|         Ok(Cow::Owned(out)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -3,6 +3,7 @@ use std::io; | ||||
| use std::mem::size_of; | ||||
|  | ||||
| use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; | ||||
| use heed::BoxedError; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
| @@ -132,26 +133,26 @@ impl CboRoaringBitmapCodec { | ||||
| impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         Self::deserialize_from(bytes).ok() | ||||
|     fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Self::deserialize_from(bytes).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for CboRoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         Self::deserialize_from(bytes).ok() | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Self::deserialize_from(bytes).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { | ||||
|     type EItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut vec = Vec::with_capacity(Self::serialized_size(item)); | ||||
|         Self::serialize_into(item, &mut vec); | ||||
|         Some(Cow::Owned(vec)) | ||||
|         Ok(Cow::Owned(vec)) | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use heed::BoxedError; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
| @@ -9,25 +10,25 @@ pub struct RoaringBitmapCodec; | ||||
| impl heed::BytesDecode<'_> for RoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         RoaringBitmap::deserialize_unchecked_from(bytes).ok() | ||||
|     fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for RoaringBitmapCodec { | ||||
|     type DItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         RoaringBitmap::deserialize_from(bytes).ok() | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         RoaringBitmap::deserialize_from(bytes).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl heed::BytesEncode<'_> for RoaringBitmapCodec { | ||||
|     type EItem = RoaringBitmap; | ||||
|  | ||||
|     fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut bytes = Vec::with_capacity(item.serialized_size()); | ||||
|         item.serialize_into(&mut bytes).ok()?; | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         item.serialize_into(&mut bytes)?; | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| use std::mem; | ||||
|  | ||||
| use heed::BytesDecode; | ||||
| use heed::{BoxedError, BytesDecode}; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
|  | ||||
| @@ -9,15 +9,15 @@ pub struct BoRoaringBitmapLenCodec; | ||||
| impl BytesDecode<'_> for BoRoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         Some((bytes.len() / mem::size_of::<u32>()) as u64) | ||||
|     fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Ok((bytes.len() / mem::size_of::<u32>()) as u64) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for BoRoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Self::bytes_decode(bytes) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| use std::mem; | ||||
|  | ||||
| use heed::BytesDecode; | ||||
| use heed::{BoxedError, BytesDecode}; | ||||
|  | ||||
| use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec}; | ||||
| use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD; | ||||
| @@ -11,7 +11,7 @@ pub struct CboRoaringBitmapLenCodec; | ||||
| impl BytesDecode<'_> for CboRoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         if bytes.len() <= THRESHOLD * mem::size_of::<u32>() { | ||||
|             // If there is threshold or less than threshold integers that can fit into this array | ||||
|             // of bytes it means that we used the ByteOrder codec serializer. | ||||
| @@ -27,7 +27,7 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec { | ||||
| impl BytesDecodeOwned for CboRoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         Self::bytes_decode(bytes) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::io::{self, BufRead, Read}; | ||||
| use std::mem; | ||||
|  | ||||
| use byteorder::{LittleEndian, ReadBytesExt}; | ||||
| use heed::BoxedError; | ||||
|  | ||||
| use crate::heed_codec::BytesDecodeOwned; | ||||
|  | ||||
| @@ -56,16 +57,16 @@ impl RoaringBitmapLenCodec { | ||||
| impl heed::BytesDecode<'_> for RoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() | ||||
|     fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl BytesDecodeOwned for RoaringBitmapLenCodec { | ||||
|     type DItem = u64; | ||||
|  | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> { | ||||
|         RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok() | ||||
|     fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -2,29 +2,30 @@ use std::borrow::Cow; | ||||
| use std::str; | ||||
|  | ||||
| use charabia::{Language, Script}; | ||||
| use heed::BoxedError; | ||||
|  | ||||
| pub struct ScriptLanguageCodec; | ||||
|  | ||||
| impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec { | ||||
|     type DItem = (Script, Language); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let sep = bytes.iter().position(|b| *b == 0)?; | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let sep = bytes.iter().position(|b| *b == 0).unwrap(); | ||||
|         let (s_bytes, l_bytes) = bytes.split_at(sep); | ||||
|         let script = str::from_utf8(s_bytes).ok()?; | ||||
|         let script = str::from_utf8(s_bytes)?; | ||||
|         let script_name = Script::from_name(script); | ||||
|         let lan = str::from_utf8(l_bytes).ok()?; | ||||
|         let lan = str::from_utf8(l_bytes)?; | ||||
|         // skip '\0' byte between the two strings. | ||||
|         let lan_name = Language::from_name(&lan[1..]); | ||||
|  | ||||
|         Some((script_name, lan_name)) | ||||
|         Ok((script_name, lan_name)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec { | ||||
|     type EItem = (Script, Language); | ||||
|  | ||||
|     fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let script_name = script.name().as_bytes(); | ||||
|         let lan_name = lan.name().as_bytes(); | ||||
|  | ||||
| @@ -33,6 +34,6 @@ impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec { | ||||
|         bytes.push(0); | ||||
|         bytes.extend_from_slice(lan_name); | ||||
|  | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -3,37 +3,39 @@ use std::convert::TryInto; | ||||
| use std::mem::size_of; | ||||
| use std::str; | ||||
|  | ||||
| use heed::BoxedError; | ||||
|  | ||||
| pub struct StrBEU32Codec; | ||||
|  | ||||
| impl<'a> heed::BytesDecode<'a> for StrBEU32Codec { | ||||
|     type DItem = (&'a str, u32); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let footer_len = size_of::<u32>(); | ||||
|  | ||||
|         if bytes.len() < footer_len { | ||||
|             return None; | ||||
|             panic!() // TODO Do not panic | ||||
|         } | ||||
|  | ||||
|         let (word, bytes) = bytes.split_at(bytes.len() - footer_len); | ||||
|         let word = str::from_utf8(word).ok()?; | ||||
|         let pos = bytes.try_into().map(u32::from_be_bytes).ok()?; | ||||
|         let word = str::from_utf8(word)?; | ||||
|         let pos = bytes.try_into().map(u32::from_be_bytes)?; | ||||
|  | ||||
|         Some((word, pos)) | ||||
|         Ok((word, pos)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> heed::BytesEncode<'a> for StrBEU32Codec { | ||||
|     type EItem = (&'a str, u32); | ||||
|  | ||||
|     fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let pos = pos.to_be_bytes(); | ||||
|  | ||||
|         let mut bytes = Vec::with_capacity(word.len() + pos.len()); | ||||
|         bytes.extend_from_slice(word.as_bytes()); | ||||
|         bytes.extend_from_slice(&pos[..]); | ||||
|  | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -42,26 +44,26 @@ pub struct StrBEU16Codec; | ||||
| impl<'a> heed::BytesDecode<'a> for StrBEU16Codec { | ||||
|     type DItem = (&'a str, u16); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let footer_len = size_of::<u16>(); | ||||
|  | ||||
|         if bytes.len() < footer_len + 1 { | ||||
|             return None; | ||||
|             panic!() // TODO do not panic | ||||
|         } | ||||
|  | ||||
|         let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len); | ||||
|         let (_, word) = word_plus_nul_byte.split_last()?; | ||||
|         let word = str::from_utf8(word).ok()?; | ||||
|         let pos = bytes.try_into().map(u16::from_be_bytes).ok()?; | ||||
|         let (_, word) = word_plus_nul_byte.split_last().unwrap(); | ||||
|         let word = str::from_utf8(word).ok().unwrap(); | ||||
|         let pos = bytes.try_into().map(u16::from_be_bytes).ok().unwrap(); | ||||
|  | ||||
|         Some((word, pos)) | ||||
|         Ok((word, pos)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> heed::BytesEncode<'a> for StrBEU16Codec { | ||||
|     type EItem = (&'a str, u16); | ||||
|  | ||||
|     fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let pos = pos.to_be_bytes(); | ||||
|  | ||||
|         let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len()); | ||||
| @@ -69,6 +71,6 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec { | ||||
|         bytes.push(0); | ||||
|         bytes.extend_from_slice(&pos[..]); | ||||
|  | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use heed::{BytesDecode, BytesEncode}; | ||||
| use heed::{BoxedError, BytesDecode, BytesEncode}; | ||||
|  | ||||
| /// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated | ||||
| /// types are equivalent (= `&'a str`) and these values can reside within another structure. | ||||
| @@ -8,15 +8,14 @@ pub struct StrRefCodec; | ||||
| impl<'a> BytesEncode<'a> for StrRefCodec { | ||||
|     type EItem = &'a str; | ||||
|  | ||||
|     fn bytes_encode(item: &'a &'a str) -> Option<Cow<'a, [u8]>> { | ||||
|         Some(Cow::Borrowed(item.as_bytes())) | ||||
|     fn bytes_encode(item: &'a &'a str) -> Result<Cow<'a, [u8]>, BoxedError> { | ||||
|         Ok(Cow::Borrowed(item.as_bytes())) | ||||
|     } | ||||
| } | ||||
| impl<'a> BytesDecode<'a> for StrRefCodec { | ||||
|     type DItem = &'a str; | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let s = std::str::from_utf8(bytes).ok()?; | ||||
|         Some(s) | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         std::str::from_utf8(bytes).map_err(Into::into) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,32 +1,34 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::str; | ||||
|  | ||||
| use heed::BoxedError; | ||||
|  | ||||
| pub struct U8StrStrCodec; | ||||
|  | ||||
| impl<'a> heed::BytesDecode<'a> for U8StrStrCodec { | ||||
|     type DItem = (u8, &'a str, &'a str); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let (n, bytes) = bytes.split_first()?; | ||||
|         let s1_end = bytes.iter().position(|b| *b == 0)?; | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let (n, bytes) = bytes.split_first().unwrap(); | ||||
|         let s1_end = bytes.iter().position(|b| *b == 0).unwrap(); | ||||
|         let (s1_bytes, rest) = bytes.split_at(s1_end); | ||||
|         let s2_bytes = &rest[1..]; | ||||
|         let s1 = str::from_utf8(s1_bytes).ok()?; | ||||
|         let s2 = str::from_utf8(s2_bytes).ok()?; | ||||
|         Some((*n, s1, s2)) | ||||
|         let s1 = str::from_utf8(s1_bytes).ok().unwrap(); | ||||
|         let s2 = str::from_utf8(s2_bytes).ok().unwrap(); | ||||
|         Ok((*n, s1, s2)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> heed::BytesEncode<'a> for U8StrStrCodec { | ||||
|     type EItem = (u8, &'a str, &'a str); | ||||
|  | ||||
|     fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); | ||||
|         bytes.push(*n); | ||||
|         bytes.extend_from_slice(s1.as_bytes()); | ||||
|         bytes.push(0); | ||||
|         bytes.extend_from_slice(s2.as_bytes()); | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
| pub struct UncheckedU8StrStrCodec; | ||||
| @@ -34,24 +36,24 @@ pub struct UncheckedU8StrStrCodec; | ||||
| impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec { | ||||
|     type DItem = (u8, &'a [u8], &'a [u8]); | ||||
|  | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> { | ||||
|         let (n, bytes) = bytes.split_first()?; | ||||
|         let s1_end = bytes.iter().position(|b| *b == 0)?; | ||||
|     fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> { | ||||
|         let (n, bytes) = bytes.split_first().unwrap(); | ||||
|         let s1_end = bytes.iter().position(|b| *b == 0).unwrap(); | ||||
|         let (s1_bytes, rest) = bytes.split_at(s1_end); | ||||
|         let s2_bytes = &rest[1..]; | ||||
|         Some((*n, s1_bytes, s2_bytes)) | ||||
|         Ok((*n, s1_bytes, s2_bytes)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec { | ||||
|     type EItem = (u8, &'a [u8], &'a [u8]); | ||||
|  | ||||
|     fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> { | ||||
|     fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> { | ||||
|         let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); | ||||
|         bytes.push(*n); | ||||
|         bytes.extend_from_slice(s1); | ||||
|         bytes.push(0); | ||||
|         bytes.extend_from_slice(s2); | ||||
|         Some(Cow::Owned(bytes)) | ||||
|         Ok(Cow::Owned(bytes)) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -4,9 +4,8 @@ use std::fs::File; | ||||
| use std::path::Path; | ||||
|  | ||||
| use charabia::{Language, Script}; | ||||
| use heed::flags::Flags; | ||||
| use heed::types::*; | ||||
| use heed::{CompactionOption, Database, PolyDatabase, RoTxn, RwTxn}; | ||||
| use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified}; | ||||
| use roaring::RoaringBitmap; | ||||
| use rstar::RTree; | ||||
| use time::OffsetDateTime; | ||||
| @@ -109,10 +108,10 @@ pub struct Index { | ||||
|     pub(crate) env: heed::Env, | ||||
|  | ||||
|     /// Contains many different types (e.g. the fields ids map). | ||||
|     pub(crate) main: PolyDatabase, | ||||
|     pub(crate) main: Database<Unspecified, Unspecified>, | ||||
|  | ||||
|     /// Maps the external documents ids with the internal document id. | ||||
|     pub external_documents_ids: Database<Str, OwnedType<BEU32>>, | ||||
|     pub external_documents_ids: Database<Str, BEU32>, | ||||
|  | ||||
|     /// A word and all the documents ids containing the word. | ||||
|     pub word_docids: Database<Str, CboRoaringBitmapCodec>, | ||||
| @@ -158,7 +157,7 @@ pub struct Index { | ||||
|     /// Maps the facet field id of the normalized-for-search string facets with their original versions. | ||||
|     pub facet_id_normalized_string_strings: Database<BEU16StrCodec, SerdeJson<BTreeSet<String>>>, | ||||
|     /// Maps the facet field id of the string facets with an FST containing all the facets values. | ||||
|     pub facet_id_string_fst: Database<OwnedType<BEU16>, FstSetCodec>, | ||||
|     pub facet_id_string_fst: Database<BEU16, FstSetCodec>, | ||||
|  | ||||
|     /// Maps the document id, the facet field id and the numbers. | ||||
|     pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>, | ||||
| @@ -166,10 +165,10 @@ pub struct Index { | ||||
|     pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>, | ||||
|  | ||||
|     /// Maps a vector id to the document id that have it. | ||||
|     pub vector_id_docid: Database<OwnedType<BEU32>, OwnedType<BEU32>>, | ||||
|     pub vector_id_docid: Database<BEU32, BEU32>, | ||||
|  | ||||
|     /// Maps the document id to the document as an obkv store. | ||||
|     pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>, | ||||
|     pub(crate) documents: Database<BEU32, ObkvCodec>, | ||||
| } | ||||
|  | ||||
| impl Index { | ||||
| @@ -182,11 +181,10 @@ impl Index { | ||||
|         use db_name::*; | ||||
|  | ||||
|         options.max_dbs(24); | ||||
|         unsafe { options.flag(Flags::MdbAlwaysFreePages) }; | ||||
|  | ||||
|         let env = options.open(path)?; | ||||
|         let mut wtxn = env.write_txn()?; | ||||
|         let main = env.create_poly_database(&mut wtxn, Some(MAIN))?; | ||||
|         let main = env.database_options().name(MAIN).create(&mut wtxn)?; | ||||
|         let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; | ||||
|         let external_documents_ids = | ||||
|             env.create_database(&mut wtxn, Some(EXTERNAL_DOCUMENTS_IDS))?; | ||||
| @@ -264,20 +262,23 @@ impl Index { | ||||
|  | ||||
|     fn set_creation_dates( | ||||
|         env: &heed::Env, | ||||
|         main: PolyDatabase, | ||||
|         main: Database<Unspecified, Unspecified>, | ||||
|         created_at: OffsetDateTime, | ||||
|         updated_at: OffsetDateTime, | ||||
|     ) -> heed::Result<()> { | ||||
|         let mut txn = env.write_txn()?; | ||||
|         // The db was just created, we update its metadata with the relevant information. | ||||
|         if main.get::<_, Str, SerdeJson<OffsetDateTime>>(&txn, main_key::CREATED_AT_KEY)?.is_none() | ||||
|         if main | ||||
|             .remap_types::<Str, SerdeJson<OffsetDateTime>>() | ||||
|             .get(&txn, main_key::CREATED_AT_KEY)? | ||||
|             .is_none() | ||||
|         { | ||||
|             main.put::<_, Str, SerdeJson<OffsetDateTime>>( | ||||
|             main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put( | ||||
|                 &mut txn, | ||||
|                 main_key::UPDATED_AT_KEY, | ||||
|                 &updated_at, | ||||
|             )?; | ||||
|             main.put::<_, Str, SerdeJson<OffsetDateTime>>( | ||||
|             main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put( | ||||
|                 &mut txn, | ||||
|                 main_key::CREATED_AT_KEY, | ||||
|                 &created_at, | ||||
| @@ -319,11 +320,11 @@ impl Index { | ||||
|     /// This value is the maximum between the map size passed during the opening of the index | ||||
|     /// and the on-disk size of the index at the time of opening. | ||||
|     pub fn map_size(&self) -> Result<usize> { | ||||
|         Ok(self.env.map_size()?) | ||||
|         Ok(self.env.info().map_size) // TODO remove Result | ||||
|     } | ||||
|  | ||||
|     pub fn copy_to_path<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> { | ||||
|         self.env.copy_to_path(path, option).map_err(Into::into) | ||||
|         self.env.copy_to_file(path, option).map_err(Into::into) | ||||
|     } | ||||
|  | ||||
|     /// Returns an `EnvClosingEvent` that can be used to wait for the closing event, | ||||
| @@ -343,21 +344,28 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids) | ||||
|         self.main.remap_types::<Str, RoaringBitmapCodec>().put( | ||||
|             wtxn, | ||||
|             main_key::DOCUMENTS_IDS_KEY, | ||||
|             docids, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Returns the internal documents ids. | ||||
|     pub fn documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)? | ||||
|             .remap_types::<Str, RoaringBitmapCodec>() | ||||
|             .get(rtxn, main_key::DOCUMENTS_IDS_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
|     /// Returns the number of documents indexed in the database. | ||||
|     pub fn number_of_documents(&self, rtxn: &RoTxn) -> Result<u64> { | ||||
|         let count = | ||||
|             self.main.get::<_, Str, RoaringBitmapLenCodec>(rtxn, main_key::DOCUMENTS_IDS_KEY)?; | ||||
|         let count = self | ||||
|             .main | ||||
|             .remap_types::<Str, RoaringBitmapLenCodec>() | ||||
|             .get(rtxn, main_key::DOCUMENTS_IDS_KEY)?; | ||||
|         Ok(count.unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
| @@ -366,17 +374,17 @@ impl Index { | ||||
|     /// Writes the documents primary key, this is the field name that is used to store the id. | ||||
|     pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> { | ||||
|         self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; | ||||
|         self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, primary_key) | ||||
|         self.main.remap_types::<Str, Str>().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key) | ||||
|     } | ||||
|  | ||||
|     /// Deletes the primary key of the documents, this can be done to reset indexes settings. | ||||
|     pub(crate) fn delete_primary_key(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::PRIMARY_KEY_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::PRIMARY_KEY_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the documents primary key, `None` if it hasn't been defined. | ||||
|     pub fn primary_key<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<&'t str>> { | ||||
|         self.main.get::<_, Str, Str>(rtxn, main_key::PRIMARY_KEY_KEY) | ||||
|         self.main.remap_types::<Str, Str>().get(rtxn, main_key::PRIMARY_KEY_KEY) | ||||
|     } | ||||
|  | ||||
|     /* external documents ids */ | ||||
| @@ -396,7 +404,11 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         map: &FieldsIdsMap, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map) | ||||
|         self.main.remap_types::<Str, SerdeJson<FieldsIdsMap>>().put( | ||||
|             wtxn, | ||||
|             main_key::FIELDS_IDS_MAP_KEY, | ||||
|             map, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Returns the fields ids map which associate the documents keys with an internal field id | ||||
| @@ -404,7 +416,8 @@ impl Index { | ||||
|     pub fn fields_ids_map(&self, rtxn: &RoTxn) -> heed::Result<FieldsIdsMap> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeJson<FieldsIdsMap>>(rtxn, main_key::FIELDS_IDS_MAP_KEY)? | ||||
|             .remap_types::<Str, SerdeJson<FieldsIdsMap>>() | ||||
|             .get(rtxn, main_key::FIELDS_IDS_MAP_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
| @@ -416,19 +429,24 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         rtree: &RTree<GeoPoint>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<RTree<GeoPoint>>>(wtxn, main_key::GEO_RTREE_KEY, rtree) | ||||
|         self.main.remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>().put( | ||||
|             wtxn, | ||||
|             main_key::GEO_RTREE_KEY, | ||||
|             rtree, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Delete the `rtree` which associates coordinates to documents ids. | ||||
|     pub(crate) fn delete_geo_rtree(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::GEO_RTREE_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_RTREE_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the `rtree` which associates coordinates to documents ids. | ||||
|     pub fn geo_rtree(&self, rtxn: &RoTxn) -> Result<Option<RTree<GeoPoint>>> { | ||||
|         match self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeBincode<RTree<GeoPoint>>>(rtxn, main_key::GEO_RTREE_KEY)? | ||||
|             .remap_types::<Str, SerdeBincode<RTree<GeoPoint>>>() | ||||
|             .get(rtxn, main_key::GEO_RTREE_KEY)? | ||||
|         { | ||||
|             Some(rtree) => Ok(Some(rtree)), | ||||
|             None => Ok(None), | ||||
| @@ -443,7 +461,7 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         docids: &RoaringBitmap, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, RoaringBitmapCodec>( | ||||
|         self.main.remap_types::<Str, RoaringBitmapCodec>().put( | ||||
|             wtxn, | ||||
|             main_key::GEO_FACETED_DOCUMENTS_IDS_KEY, | ||||
|             docids, | ||||
| @@ -452,14 +470,15 @@ impl Index { | ||||
|  | ||||
|     /// Delete the documents ids that are faceted with a _geo field. | ||||
|     pub(crate) fn delete_geo_faceted_documents_ids(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Retrieve all the documents ids that are faceted with a _geo field. | ||||
|     pub fn geo_faceted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> { | ||||
|         match self | ||||
|             .main | ||||
|             .get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)? | ||||
|             .remap_types::<Str, RoaringBitmapCodec>() | ||||
|             .get(rtxn, main_key::GEO_FACETED_DOCUMENTS_IDS_KEY)? | ||||
|         { | ||||
|             Some(docids) => Ok(docids), | ||||
|             None => Ok(RoaringBitmap::new()), | ||||
| @@ -474,22 +493,22 @@ impl Index { | ||||
|         self.delete_vector_hnsw(wtxn)?; | ||||
|  | ||||
|         let chunk_size = 1024 * 1024 * (1024 + 512); // 1.5 GiB | ||||
|         let bytes = bincode::serialize(hnsw).map_err(|_| heed::Error::Encoding)?; | ||||
|         let bytes = bincode::serialize(hnsw).map_err(Into::into).map_err(heed::Error::Encoding)?; | ||||
|         for (i, chunk) in bytes.chunks(chunk_size).enumerate() { | ||||
|             let i = i as u32; | ||||
|             let mut key = main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes().to_vec(); | ||||
|             key.extend_from_slice(&i.to_be_bytes()); | ||||
|             self.main.put::<_, ByteSlice, ByteSlice>(wtxn, &key, chunk)?; | ||||
|             self.main.remap_types::<ByteSlice, ByteSlice>().put(wtxn, &key, chunk)?; | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Delete the `hnsw`. | ||||
|     pub(crate) fn delete_vector_hnsw(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         let mut iter = self.main.prefix_iter_mut::<_, ByteSlice, DecodeIgnore>( | ||||
|             wtxn, | ||||
|             main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes(), | ||||
|         )?; | ||||
|         let mut iter = self | ||||
|             .main | ||||
|             .remap_types::<ByteSlice, DecodeIgnore>() | ||||
|             .prefix_iter_mut(wtxn, main_key::VECTOR_HNSW_KEY_PREFIX.as_bytes())?; | ||||
|         let mut deleted = false; | ||||
|         while iter.next().transpose()?.is_some() { | ||||
|             // We do not keep a reference to the key or the value. | ||||
| @@ -501,8 +520,10 @@ impl Index { | ||||
|     /// Returns the `hnsw`. | ||||
|     pub fn vector_hnsw(&self, rtxn: &RoTxn) -> Result<Option<Hnsw>> { | ||||
|         let mut slices = Vec::new(); | ||||
|         for result in | ||||
|             self.main.prefix_iter::<_, Str, ByteSlice>(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)? | ||||
|         for result in self | ||||
|             .main | ||||
|             .remap_types::<Str, ByteSlice>() | ||||
|             .prefix_iter(rtxn, main_key::VECTOR_HNSW_KEY_PREFIX)? | ||||
|         { | ||||
|             let (_, slice) = result?; | ||||
|             slices.push(slice); | ||||
| @@ -512,7 +533,11 @@ impl Index { | ||||
|             Ok(None) | ||||
|         } else { | ||||
|             let readable_slices: ReadableSlices<_> = slices.into_iter().collect(); | ||||
|             Ok(Some(bincode::deserialize_from(readable_slices).map_err(|_| heed::Error::Decoding)?)) | ||||
|             Ok(Some( | ||||
|                 bincode::deserialize_from(readable_slices) | ||||
|                     .map_err(Into::into) | ||||
|                     .map_err(heed::Error::Decoding)?, | ||||
|             )) | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -525,7 +550,7 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         distribution: &FieldDistribution, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<FieldDistribution>>( | ||||
|         self.main.remap_types::<Str, SerdeJson<FieldDistribution>>().put( | ||||
|             wtxn, | ||||
|             main_key::FIELD_DISTRIBUTION_KEY, | ||||
|             distribution, | ||||
| @@ -537,7 +562,8 @@ impl Index { | ||||
|     pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldDistribution> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeJson<FieldDistribution>>(rtxn, main_key::FIELD_DISTRIBUTION_KEY)? | ||||
|             .remap_types::<Str, SerdeJson<FieldDistribution>>() | ||||
|             .get(rtxn, main_key::FIELD_DISTRIBUTION_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
| @@ -550,7 +576,7 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         fields: &[&str], | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<&[&str]>>( | ||||
|         self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put( | ||||
|             wtxn, | ||||
|             main_key::DISPLAYED_FIELDS_KEY, | ||||
|             &fields, | ||||
| @@ -560,13 +586,15 @@ impl Index { | ||||
|     /// Deletes the displayed fields ids, this will make the engine to display | ||||
|     /// all the documents attributes in the order of the `FieldsIdsMap`. | ||||
|     pub(crate) fn delete_displayed_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::DISPLAYED_FIELDS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISPLAYED_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the displayed fields in the order they were set by the user. If it returns | ||||
|     /// `None` it means that all the attributes are set as displayed in the order of the `FieldsIdsMap`. | ||||
|     pub fn displayed_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { | ||||
|         self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::DISPLAYED_FIELDS_KEY) | ||||
|         self.main | ||||
|             .remap_types::<Str, SerdeBincode<Vec<&'t str>>>() | ||||
|             .get(rtxn, main_key::DISPLAYED_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Identical to `displayed_fields`, but returns the ids instead. | ||||
| @@ -646,7 +674,7 @@ impl Index { | ||||
|  | ||||
|     /// Writes the searchable fields, when this list is specified, only these are indexed. | ||||
|     fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<&[&str]>>( | ||||
|         self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put( | ||||
|             wtxn, | ||||
|             main_key::SEARCHABLE_FIELDS_KEY, | ||||
|             &fields, | ||||
| @@ -655,13 +683,15 @@ impl Index { | ||||
|  | ||||
|     /// Deletes the searchable fields, when no fields are specified, all fields are indexed. | ||||
|     fn delete_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::SEARCHABLE_FIELDS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCHABLE_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the searchable fields, those are the fields that are indexed, | ||||
|     /// if the searchable fields aren't there it means that **all** the fields are indexed. | ||||
|     pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { | ||||
|         self.main.get::<_, Str, SerdeBincode<Vec<&'t str>>>(rtxn, main_key::SEARCHABLE_FIELDS_KEY) | ||||
|         self.main | ||||
|             .remap_types::<Str, SerdeBincode<Vec<&'t str>>>() | ||||
|             .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Identical to `searchable_fields`, but returns the ids instead. | ||||
| @@ -687,7 +717,7 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         fields: &[&str], | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<_>>( | ||||
|         self.main.remap_types::<Str, SerdeBincode<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY, | ||||
|             &fields, | ||||
| @@ -699,7 +729,7 @@ impl Index { | ||||
|         &self, | ||||
|         wtxn: &mut RwTxn, | ||||
|     ) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the user defined searchable fields. | ||||
| @@ -708,7 +738,8 @@ impl Index { | ||||
|         rtxn: &'t RoTxn, | ||||
|     ) -> heed::Result<Option<Vec<&'t str>>> { | ||||
|         self.main | ||||
|             .get::<_, Str, SerdeBincode<Vec<_>>>(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) | ||||
|             .remap_types::<Str, SerdeBincode<Vec<_>>>() | ||||
|             .get(rtxn, main_key::USER_DEFINED_SEARCHABLE_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /* filterable fields */ | ||||
| @@ -719,19 +750,24 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         fields: &HashSet<String>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields) | ||||
|         self.main.remap_types::<Str, SerdeJson<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::FILTERABLE_FIELDS_KEY, | ||||
|             fields, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Deletes the filterable fields ids in the database. | ||||
|     pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::FILTERABLE_FIELDS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the filterable fields names. | ||||
|     pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeJson<_>>(rtxn, main_key::FILTERABLE_FIELDS_KEY)? | ||||
|             .remap_types::<Str, SerdeJson<_>>() | ||||
|             .get(rtxn, main_key::FILTERABLE_FIELDS_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
| @@ -758,19 +794,24 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         fields: &HashSet<String>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::SORTABLE_FIELDS_KEY, fields) | ||||
|         self.main.remap_types::<Str, SerdeJson<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::SORTABLE_FIELDS_KEY, | ||||
|             fields, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Deletes the sortable fields ids in the database. | ||||
|     pub(crate) fn delete_sortable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::SORTABLE_FIELDS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::SORTABLE_FIELDS_KEY) | ||||
|     } | ||||
|  | ||||
|     /// Returns the sortable fields names. | ||||
|     pub fn sortable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeJson<_>>(rtxn, main_key::SORTABLE_FIELDS_KEY)? | ||||
|             .remap_types::<Str, SerdeJson<_>>() | ||||
|             .get(rtxn, main_key::SORTABLE_FIELDS_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
| @@ -789,14 +830,19 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         fields: &HashSet<String>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::HIDDEN_FACETED_FIELDS_KEY, fields) | ||||
|         self.main.remap_types::<Str, SerdeJson<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::HIDDEN_FACETED_FIELDS_KEY, | ||||
|             fields, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Returns the faceted fields names. | ||||
|     pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeJson<_>>(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? | ||||
|             .remap_types::<Str, SerdeJson<_>>() | ||||
|             .get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
| @@ -863,7 +909,7 @@ impl Index { | ||||
|         rtxn: &RoTxn, | ||||
|         field_id: FieldId, | ||||
|     ) -> heed::Result<RoaringBitmap> { | ||||
|         match self.facet_id_is_null_docids.get(rtxn, &BEU16::new(field_id))? { | ||||
|         match self.facet_id_is_null_docids.get(rtxn, &field_id)? { | ||||
|             Some(docids) => Ok(docids), | ||||
|             None => Ok(RoaringBitmap::new()), | ||||
|         } | ||||
| @@ -875,7 +921,7 @@ impl Index { | ||||
|         rtxn: &RoTxn, | ||||
|         field_id: FieldId, | ||||
|     ) -> heed::Result<RoaringBitmap> { | ||||
|         match self.facet_id_is_empty_docids.get(rtxn, &BEU16::new(field_id))? { | ||||
|         match self.facet_id_is_empty_docids.get(rtxn, &field_id)? { | ||||
|             Some(docids) => Ok(docids), | ||||
|             None => Ok(RoaringBitmap::new()), | ||||
|         } | ||||
| @@ -887,7 +933,7 @@ impl Index { | ||||
|         rtxn: &RoTxn, | ||||
|         field_id: FieldId, | ||||
|     ) -> heed::Result<RoaringBitmap> { | ||||
|         match self.facet_id_exists_docids.get(rtxn, &BEU16::new(field_id))? { | ||||
|         match self.facet_id_exists_docids.get(rtxn, &field_id)? { | ||||
|             Some(docids) => Ok(docids), | ||||
|             None => Ok(RoaringBitmap::new()), | ||||
|         } | ||||
| @@ -900,15 +946,15 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         distinct_field: &str, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, Str>(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field) | ||||
|         self.main.remap_types::<Str, Str>().put(wtxn, main_key::DISTINCT_FIELD_KEY, distinct_field) | ||||
|     } | ||||
|  | ||||
|     pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> { | ||||
|         self.main.get::<_, Str, Str>(rtxn, main_key::DISTINCT_FIELD_KEY) | ||||
|         self.main.remap_types::<Str, Str>().get(rtxn, main_key::DISTINCT_FIELD_KEY) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::DISTINCT_FIELD_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::DISTINCT_FIELD_KEY) | ||||
|     } | ||||
|  | ||||
|     /* criteria */ | ||||
| @@ -918,15 +964,23 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         criteria: &[Criterion], | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria) | ||||
|         self.main.remap_types::<Str, SerdeJson<&[Criterion]>>().put( | ||||
|             wtxn, | ||||
|             main_key::CRITERIA_KEY, | ||||
|             &criteria, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_criteria(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::CRITERIA_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::CRITERIA_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn criteria(&self, rtxn: &RoTxn) -> heed::Result<Vec<Criterion>> { | ||||
|         match self.main.get::<_, Str, SerdeJson<Vec<Criterion>>>(rtxn, main_key::CRITERIA_KEY)? { | ||||
|         match self | ||||
|             .main | ||||
|             .remap_types::<Str, SerdeJson<Vec<Criterion>>>() | ||||
|             .get(rtxn, main_key::CRITERIA_KEY)? | ||||
|         { | ||||
|             Some(criteria) => Ok(criteria), | ||||
|             None => Ok(default_criteria()), | ||||
|         } | ||||
| @@ -940,12 +994,16 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         fst: &fst::Set<A>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes()) | ||||
|         self.main.remap_types::<Str, ByteSlice>().put( | ||||
|             wtxn, | ||||
|             main_key::WORDS_FST_KEY, | ||||
|             fst.as_fst().as_bytes(), | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Returns the FST which is the words dictionary of the engine. | ||||
|     pub fn words_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> { | ||||
|         match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_FST_KEY)? { | ||||
|         match self.main.remap_types::<Str, ByteSlice>().get(rtxn, main_key::WORDS_FST_KEY)? { | ||||
|             Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), | ||||
|             None => Ok(fst::Set::default().map_data(Cow::Owned)?), | ||||
|         } | ||||
| @@ -958,15 +1016,19 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         fst: &fst::Set<A>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes()) | ||||
|         self.main.remap_types::<Str, ByteSlice>().put( | ||||
|             wtxn, | ||||
|             main_key::STOP_WORDS_KEY, | ||||
|             fst.as_fst().as_bytes(), | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_stop_words(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::STOP_WORDS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::STOP_WORDS_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn stop_words<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<fst::Set<&'t [u8]>>> { | ||||
|         match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::STOP_WORDS_KEY)? { | ||||
|         match self.main.remap_types::<Str, ByteSlice>().get(rtxn, main_key::STOP_WORDS_KEY)? { | ||||
|             Some(bytes) => Ok(Some(fst::Set::new(bytes)?)), | ||||
|             None => Ok(None), | ||||
|         } | ||||
| @@ -979,18 +1041,22 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         set: &BTreeSet<String>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set) | ||||
|         self.main.remap_types::<Str, SerdeBincode<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::NON_SEPARATOR_TOKENS_KEY, | ||||
|             set, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> { | ||||
|         Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>( | ||||
|             rtxn, | ||||
|             main_key::NON_SEPARATOR_TOKENS_KEY, | ||||
|         )?) | ||||
|         Ok(self | ||||
|             .main | ||||
|             .remap_types::<Str, SerdeBincode<BTreeSet<String>>>() | ||||
|             .get(rtxn, main_key::NON_SEPARATOR_TOKENS_KEY)?) | ||||
|     } | ||||
|  | ||||
|     /* separator tokens */ | ||||
| @@ -1000,17 +1066,22 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         set: &BTreeSet<String>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set) | ||||
|         self.main.remap_types::<Str, SerdeBincode<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::SEPARATOR_TOKENS_KEY, | ||||
|             set, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEPARATOR_TOKENS_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) | ||||
|             .remap_types::<Str, SerdeBincode<BTreeSet<String>>>() | ||||
|             .get(rtxn, main_key::SEPARATOR_TOKENS_KEY)?) | ||||
|     } | ||||
|  | ||||
|     /* separators easing method */ | ||||
| @@ -1040,17 +1111,18 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         set: &BTreeSet<String>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set) | ||||
|         self.main.remap_types::<Str, SerdeBincode<_>>().put(wtxn, main_key::DICTIONARY_KEY, set) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::DICTIONARY_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?) | ||||
|             .remap_types::<Str, SerdeBincode<BTreeSet<String>>>() | ||||
|             .get(rtxn, main_key::DICTIONARY_KEY)?) | ||||
|     } | ||||
|  | ||||
|     /* synonyms */ | ||||
| @@ -1061,8 +1133,12 @@ impl Index { | ||||
|         synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>, | ||||
|         user_defined_synonyms: &BTreeMap<String, Vec<String>>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?; | ||||
|         self.main.put::<_, Str, SerdeBincode<_>>( | ||||
|         self.main.remap_types::<Str, SerdeBincode<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::SYNONYMS_KEY, | ||||
|             synonyms, | ||||
|         )?; | ||||
|         self.main.remap_types::<Str, SerdeBincode<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::USER_DEFINED_SYNONYMS_KEY, | ||||
|             user_defined_synonyms, | ||||
| @@ -1070,8 +1146,8 @@ impl Index { | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?; | ||||
|         self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY) | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::SYNONYMS_KEY)?; | ||||
|         self.main.remap_key_type::<Str>().delete(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY) | ||||
|     } | ||||
|  | ||||
|     pub fn user_defined_synonyms( | ||||
| @@ -1080,14 +1156,16 @@ impl Index { | ||||
|     ) -> heed::Result<BTreeMap<String, Vec<String>>> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)? | ||||
|             .remap_types::<Str, SerdeBincode<_>>() | ||||
|             .get(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
|     pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::SYNONYMS_KEY)? | ||||
|             .remap_types::<Str, SerdeBincode<_>>() | ||||
|             .get(rtxn, main_key::SYNONYMS_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
| @@ -1108,7 +1186,7 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         fst: &fst::Set<A>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, ByteSlice>( | ||||
|         self.main.remap_types::<Str, ByteSlice>().put( | ||||
|             wtxn, | ||||
|             main_key::WORDS_PREFIXES_FST_KEY, | ||||
|             fst.as_fst().as_bytes(), | ||||
| @@ -1117,7 +1195,11 @@ impl Index { | ||||
|  | ||||
|     /// Returns the FST which is the words prefixes dictionnary of the engine. | ||||
|     pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn) -> Result<fst::Set<Cow<'t, [u8]>>> { | ||||
|         match self.main.get::<_, Str, ByteSlice>(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? { | ||||
|         match self | ||||
|             .main | ||||
|             .remap_types::<Str, ByteSlice>() | ||||
|             .get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? | ||||
|         { | ||||
|             Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), | ||||
|             None => Ok(fst::Set::default().map_data(Cow::Owned)?), | ||||
|         } | ||||
| @@ -1142,7 +1224,7 @@ impl Index { | ||||
|         Ok(ids.into_iter().map(move |id| { | ||||
|             let kv = self | ||||
|                 .documents | ||||
|                 .get(rtxn, &BEU32::new(id))? | ||||
|                 .get(rtxn, &id)? | ||||
|                 .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?; | ||||
|             Ok((id, kv)) | ||||
|         })) | ||||
| @@ -1207,7 +1289,8 @@ impl Index { | ||||
|     pub fn created_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeJson<OffsetDateTime>>(rtxn, main_key::CREATED_AT_KEY)? | ||||
|             .remap_types::<Str, SerdeJson<OffsetDateTime>>() | ||||
|             .get(rtxn, main_key::CREATED_AT_KEY)? | ||||
|             .ok_or(InternalError::DatabaseMissingEntry { | ||||
|                 db_name: db_name::MAIN, | ||||
|                 key: Some(main_key::CREATED_AT_KEY), | ||||
| @@ -1218,7 +1301,8 @@ impl Index { | ||||
|     pub fn updated_at(&self, rtxn: &RoTxn) -> Result<OffsetDateTime> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeJson<OffsetDateTime>>(rtxn, main_key::UPDATED_AT_KEY)? | ||||
|             .remap_types::<Str, SerdeJson<OffsetDateTime>>() | ||||
|             .get(rtxn, main_key::UPDATED_AT_KEY)? | ||||
|             .ok_or(InternalError::DatabaseMissingEntry { | ||||
|                 db_name: db_name::MAIN, | ||||
|                 key: Some(main_key::UPDATED_AT_KEY), | ||||
| @@ -1230,14 +1314,18 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         time: &OffsetDateTime, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, time) | ||||
|         self.main.remap_types::<Str, SerdeJson<OffsetDateTime>>().put( | ||||
|             wtxn, | ||||
|             main_key::UPDATED_AT_KEY, | ||||
|             time, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         match self.main.get::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS)? { | ||||
|         match self.main.remap_types::<Str, OwnedType<u8>>().get(txn, main_key::AUTHORIZE_TYPOS)? { | ||||
|             Some(0) => Ok(false), | ||||
|             _ => Ok(true), | ||||
|         } | ||||
| @@ -1247,7 +1335,11 @@ impl Index { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?; | ||||
|         self.main.remap_types::<Str, OwnedType<u8>>().put( | ||||
|             txn, | ||||
|             main_key::AUTHORIZE_TYPOS, | ||||
|             &(flag as u8), | ||||
|         )?; | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
| @@ -1258,7 +1350,8 @@ impl Index { | ||||
|         // because by default, we authorize typos. | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)? | ||||
|             .remap_types::<Str, OwnedType<u8>>() | ||||
|             .get(txn, main_key::ONE_TYPO_WORD_LEN)? | ||||
|             .unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO)) | ||||
|     } | ||||
|  | ||||
| @@ -1266,7 +1359,11 @@ impl Index { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?; | ||||
|         self.main.remap_types::<Str, OwnedType<u8>>().put( | ||||
|             txn, | ||||
|             main_key::ONE_TYPO_WORD_LEN, | ||||
|             &val, | ||||
|         )?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
| @@ -1276,7 +1373,8 @@ impl Index { | ||||
|         // because by default, we authorize typos. | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)? | ||||
|             .remap_types::<Str, OwnedType<u8>>() | ||||
|             .get(txn, main_key::TWO_TYPOS_WORD_LEN)? | ||||
|             .unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) | ||||
|     } | ||||
|  | ||||
| @@ -1284,13 +1382,17 @@ impl Index { | ||||
|         // It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We | ||||
|         // identify 0 as being false, and anything else as true. The absence of a value is true, | ||||
|         // because by default, we authorize typos. | ||||
|         self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?; | ||||
|         self.main.remap_types::<Str, OwnedType<u8>>().put( | ||||
|             txn, | ||||
|             main_key::TWO_TYPOS_WORD_LEN, | ||||
|             &val, | ||||
|         )?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// List the words on which typo are not allowed | ||||
|     pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result<Option<fst::Set<Cow<'t, [u8]>>>> { | ||||
|         match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? { | ||||
|         match self.main.remap_types::<Str, ByteSlice>().get(txn, main_key::EXACT_WORDS)? { | ||||
|             Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)), | ||||
|             None => Ok(None), | ||||
|         } | ||||
| @@ -1301,7 +1403,7 @@ impl Index { | ||||
|         txn: &mut RwTxn, | ||||
|         words: &fst::Set<A>, | ||||
|     ) -> Result<()> { | ||||
|         self.main.put::<_, Str, ByteSlice>( | ||||
|         self.main.remap_types::<Str, ByteSlice>().put( | ||||
|             txn, | ||||
|             main_key::EXACT_WORDS, | ||||
|             words.as_fst().as_bytes(), | ||||
| @@ -1313,7 +1415,8 @@ impl Index { | ||||
|     pub fn exact_attributes<'t>(&self, txn: &'t RoTxn) -> Result<Vec<&'t str>> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeBincode<Vec<&str>>>(txn, main_key::EXACT_ATTRIBUTES)? | ||||
|             .remap_types::<Str, SerdeBincode<Vec<&str>>>() | ||||
|             .get(txn, main_key::EXACT_ATTRIBUTES)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
| @@ -1326,34 +1429,40 @@ impl Index { | ||||
|  | ||||
|     /// Writes the exact attributes to the database. | ||||
|     pub(crate) fn put_exact_attributes(&self, txn: &mut RwTxn, attrs: &[&str]) -> Result<()> { | ||||
|         self.main.put::<_, Str, SerdeBincode<&[&str]>>(txn, main_key::EXACT_ATTRIBUTES, &attrs)?; | ||||
|         self.main.remap_types::<Str, SerdeBincode<&[&str]>>().put( | ||||
|             txn, | ||||
|             main_key::EXACT_ATTRIBUTES, | ||||
|             &attrs, | ||||
|         )?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     /// Clears the exact attributes from the store. | ||||
|     pub(crate) fn delete_exact_attributes(&self, txn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES) | ||||
|         self.main.remap_key_type::<Str>().delete(txn, main_key::EXACT_ATTRIBUTES) | ||||
|     } | ||||
|  | ||||
|     pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<usize>> { | ||||
|         self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET) | ||||
|         self.main.remap_types::<Str, OwnedType<usize>>().get(txn, main_key::MAX_VALUES_PER_FACET) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET, &val) | ||||
|         self.main.remap_types::<Str, OwnedType<usize>>().put( | ||||
|             txn, | ||||
|             main_key::MAX_VALUES_PER_FACET, | ||||
|             &val, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET) | ||||
|         self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET) | ||||
|     } | ||||
|  | ||||
|     pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> { | ||||
|         let mut orders = self | ||||
|             .main | ||||
|             .get::<_, Str, SerdeJson<HashMap<String, OrderBy>>>( | ||||
|                 txn, | ||||
|                 main_key::SORT_FACET_VALUES_BY, | ||||
|             )? | ||||
|             .remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>() | ||||
|             .get(txn, main_key::SORT_FACET_VALUES_BY)? | ||||
|             .unwrap_or_default(); | ||||
|         // Insert the default ordering if it is not already overwritten by the user. | ||||
|         orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); | ||||
| @@ -1365,15 +1474,17 @@ impl Index { | ||||
|         txn: &mut RwTxn, | ||||
|         val: &HashMap<String, OrderBy>, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, SerdeJson<_>>(txn, main_key::SORT_FACET_VALUES_BY, &val) | ||||
|         self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_sort_facet_values_by(&self, txn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(txn, main_key::SORT_FACET_VALUES_BY) | ||||
|         self.main.remap_key_type::<Str>().delete(txn, main_key::SORT_FACET_VALUES_BY) | ||||
|     } | ||||
|  | ||||
|     pub fn pagination_max_total_hits(&self, txn: &RoTxn) -> heed::Result<Option<usize>> { | ||||
|         self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) | ||||
|         self.main | ||||
|             .remap_types::<Str, OwnedType<usize>>() | ||||
|             .get(txn, main_key::PAGINATION_MAX_TOTAL_HITS) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn put_pagination_max_total_hits( | ||||
| @@ -1381,11 +1492,15 @@ impl Index { | ||||
|         txn: &mut RwTxn, | ||||
|         val: usize, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::PAGINATION_MAX_TOTAL_HITS, &val) | ||||
|         self.main.remap_types::<Str, OwnedType<usize>>().put( | ||||
|             txn, | ||||
|             main_key::PAGINATION_MAX_TOTAL_HITS, | ||||
|             &val, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_pagination_max_total_hits(&self, txn: &mut RwTxn) -> heed::Result<bool> { | ||||
|         self.main.delete::<_, Str>(txn, main_key::PAGINATION_MAX_TOTAL_HITS) | ||||
|         self.main.remap_key_type::<Str>().delete(txn, main_key::PAGINATION_MAX_TOTAL_HITS) | ||||
|     } | ||||
|  | ||||
|     /* script  language docids */ | ||||
|   | ||||
| @@ -66,9 +66,9 @@ pub use self::search::{ | ||||
| pub type Result<T> = std::result::Result<T, error::Error>; | ||||
|  | ||||
| pub type Attribute = u32; | ||||
| pub type BEU16 = heed::zerocopy::U16<heed::byteorder::BE>; | ||||
| pub type BEU32 = heed::zerocopy::U32<heed::byteorder::BE>; | ||||
| pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>; | ||||
| pub type BEU16 = heed::types::U16<heed::byteorder::BE>; | ||||
| pub type BEU32 = heed::types::U32<heed::byteorder::BE>; | ||||
| pub type BEU64 = heed::types::U64<heed::byteorder::BE>; | ||||
| pub type DocumentId = u32; | ||||
| pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>; | ||||
| pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>; | ||||
|   | ||||
| @@ -25,11 +25,11 @@ where | ||||
|     let inner; | ||||
|     let left = match left { | ||||
|         Bound::Included(left) => { | ||||
|             inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?; | ||||
|             inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?; | ||||
|             Bound::Included(inner.as_ref()) | ||||
|         } | ||||
|         Bound::Excluded(left) => { | ||||
|             inner = BoundCodec::bytes_encode(left).ok_or(heed::Error::Encoding)?; | ||||
|             inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?; | ||||
|             Bound::Excluded(inner.as_ref()) | ||||
|         } | ||||
|         Bound::Unbounded => Bound::Unbounded, | ||||
| @@ -37,11 +37,11 @@ where | ||||
|     let inner; | ||||
|     let right = match right { | ||||
|         Bound::Included(right) => { | ||||
|             inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?; | ||||
|             inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?; | ||||
|             Bound::Included(inner.as_ref()) | ||||
|         } | ||||
|         Bound::Excluded(right) => { | ||||
|             inner = BoundCodec::bytes_encode(right).ok_or(heed::Error::Encoding)?; | ||||
|             inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?; | ||||
|             Bound::Excluded(inner.as_ref()) | ||||
|         } | ||||
|         Bound::Unbounded => Bound::Unbounded, | ||||
|   | ||||
| @@ -22,8 +22,10 @@ fn facet_extreme_value<'t>( | ||||
|     let extreme_value = | ||||
|         if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) }; | ||||
|     let (_, extreme_value) = extreme_value?; | ||||
|  | ||||
|     Ok(OrderedF64Codec::bytes_decode(extreme_value)) | ||||
|     OrderedF64Codec::bytes_decode(extreme_value) | ||||
|         .map(Some) | ||||
|         .map_err(heed::Error::Decoding) | ||||
|         .map_err(Into::into) | ||||
| } | ||||
|  | ||||
| pub fn facet_min_value<'t>( | ||||
| @@ -60,13 +62,12 @@ where | ||||
|     let mut level0prefix = vec![]; | ||||
|     level0prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|     level0prefix.push(0); | ||||
|     let mut level0_iter_forward = db | ||||
|         .as_polymorph() | ||||
|         .prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?; | ||||
|     let mut level0_iter_forward = | ||||
|         db.remap_types::<ByteSlice, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?; | ||||
|     if let Some(first) = level0_iter_forward.next() { | ||||
|         let (first_key, _) = first?; | ||||
|         let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key) | ||||
|             .ok_or(heed::Error::Encoding)?; | ||||
|             .map_err(heed::Error::Decoding)?; | ||||
|         Ok(Some(first_key.left_bound)) | ||||
|     } else { | ||||
|         Ok(None) | ||||
| @@ -86,12 +87,12 @@ where | ||||
|     level0prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|     level0prefix.push(0); | ||||
|     let mut level0_iter_backward = db | ||||
|         .as_polymorph() | ||||
|         .rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, level0prefix.as_slice())?; | ||||
|         .remap_types::<ByteSlice, DecodeIgnore>() | ||||
|         .rev_prefix_iter(txn, level0prefix.as_slice())?; | ||||
|     if let Some(last) = level0_iter_backward.next() { | ||||
|         let (last_key, _) = last?; | ||||
|         let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key) | ||||
|             .ok_or(heed::Error::Encoding)?; | ||||
|             .map_err(heed::Error::Decoding)?; | ||||
|         Ok(Some(last_key.left_bound)) | ||||
|     } else { | ||||
|         Ok(None) | ||||
| @@ -106,8 +107,8 @@ pub(crate) fn get_highest_level<'t>( | ||||
| ) -> heed::Result<u8> { | ||||
|     let field_id_prefix = &field_id.to_be_bytes(); | ||||
|     Ok(db | ||||
|         .as_polymorph() | ||||
|         .rev_prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, field_id_prefix)? | ||||
|         .remap_types::<ByteSlice, DecodeIgnore>() | ||||
|         .rev_prefix_iter(txn, field_id_prefix)? | ||||
|         .next() | ||||
|         .map(|el| { | ||||
|             let (key, _) = el.unwrap(); | ||||
|   | ||||
| @@ -17,8 +17,7 @@ use crate::error::UserError; | ||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; | ||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||
| use crate::{ | ||||
|     execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, | ||||
|     SearchContext, BEU16, | ||||
|     execute_search, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, Result, SearchContext, | ||||
| }; | ||||
|  | ||||
| // Building these factories is not free. | ||||
| @@ -299,7 +298,7 @@ impl<'a> SearchForFacetValues<'a> { | ||||
|             None => return Ok(Vec::new()), | ||||
|         }; | ||||
|  | ||||
|         let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &BEU16::new(fid))? { | ||||
|         let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { | ||||
|             Some(fst) => fst, | ||||
|             None => return Ok(vec![]), | ||||
|         }; | ||||
|   | ||||
| @@ -63,12 +63,14 @@ impl<'ctx> DatabaseCache<'ctx> { | ||||
|         } | ||||
|  | ||||
|         match cache.get(&cache_key).unwrap() { | ||||
|             Some(Cow::Borrowed(bytes)) => { | ||||
|                 DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) | ||||
|             } | ||||
|             Some(Cow::Owned(bytes)) => { | ||||
|                 DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) | ||||
|             } | ||||
|             Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes) | ||||
|                 .map_err(heed::Error::Decoding) | ||||
|                 .map_err(Into::into) | ||||
|                 .map(Some), | ||||
|             Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes) | ||||
|                 .map_err(heed::Error::Decoding) | ||||
|                 .map_err(Into::into) | ||||
|                 .map(Some), | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
| @@ -110,12 +112,14 @@ impl<'ctx> DatabaseCache<'ctx> { | ||||
|         } | ||||
|  | ||||
|         match cache.get(&cache_key).unwrap() { | ||||
|             Some(Cow::Borrowed(bytes)) => { | ||||
|                 DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) | ||||
|             } | ||||
|             Some(Cow::Owned(bytes)) => { | ||||
|                 DC::bytes_decode_owned(bytes).ok_or(heed::Error::Decoding.into()).map(Some) | ||||
|             } | ||||
|             Some(Cow::Borrowed(bytes)) => DC::bytes_decode_owned(bytes) | ||||
|                 .map_err(heed::Error::Decoding) | ||||
|                 .map_err(Into::into) | ||||
|                 .map(Some), | ||||
|             Some(Cow::Owned(bytes)) => DC::bytes_decode_owned(bytes) | ||||
|                 .map_err(heed::Error::Decoding) | ||||
|                 .map_err(Into::into) | ||||
|                 .map(Some), | ||||
|             None => Ok(None), | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -50,9 +50,7 @@ use crate::distance::NDotProductPoint; | ||||
| use crate::error::FieldIdMapMissingEntry; | ||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||
| use crate::search::new::distinct::apply_distinct_rule; | ||||
| use crate::{ | ||||
|     AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, BEU32, | ||||
| }; | ||||
| use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; | ||||
|  | ||||
| /// A structure used throughout the execution of a search query. | ||||
| pub struct SearchContext<'ctx> { | ||||
| @@ -451,8 +449,8 @@ pub fn execute_search( | ||||
|                 let mut docids = Vec::new(); | ||||
|                 let mut uniq_docids = RoaringBitmap::new(); | ||||
|                 for instant_distance::Item { distance: _, pid, point: _ } in neighbors { | ||||
|                     let index = BEU32::new(pid.into_inner()); | ||||
|                     let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap().get(); | ||||
|                     let index = pid.into_inner(); | ||||
|                     let docid = ctx.index.vector_id_docid.get(ctx.txn, &index)?.unwrap(); | ||||
|                     if universe.contains(docid) && uniq_docids.insert(docid) { | ||||
|                         docids.push(docid); | ||||
|                         if docids.len() == (from + length) { | ||||
|   | ||||
| @@ -1,15 +1,16 @@ | ||||
| use heed::RwTxn; | ||||
| use roaring::RoaringBitmap; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| use crate::{FieldDistribution, Index, Result}; | ||||
|  | ||||
| pub struct ClearDocuments<'t, 'u, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
| pub struct ClearDocuments<'t, 'i> { | ||||
|     wtxn: &'t mut RwTxn<'i>, | ||||
|     index: &'i Index, | ||||
| } | ||||
|  | ||||
| impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { | ||||
|     pub fn new(wtxn: &'t mut heed::RwTxn<'i, 'u>, index: &'i Index) -> ClearDocuments<'t, 'u, 'i> { | ||||
| impl<'t, 'i> ClearDocuments<'t, 'i> { | ||||
|     pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> ClearDocuments<'t, 'i> { | ||||
|         ClearDocuments { wtxn, index } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -3,7 +3,7 @@ use std::io::BufReader; | ||||
|  | ||||
| use grenad::CompressionType; | ||||
| use heed::types::ByteSlice; | ||||
| use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn}; | ||||
| use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; | ||||
| @@ -146,7 +146,13 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
|                 buffer.push(1); | ||||
|                 // then we extend the buffer with the docids bitmap | ||||
|                 buffer.extend_from_slice(value); | ||||
|                 unsafe { database.append(key, &buffer)? }; | ||||
|                 unsafe { | ||||
|                     database.put_current_with_options::<ByteSlice>( | ||||
|                         PutFlags::APPEND, | ||||
|                         key, | ||||
|                         &buffer, | ||||
|                     )? | ||||
|                 }; | ||||
|             } | ||||
|         } else { | ||||
|             let mut buffer = Vec::new(); | ||||
| @@ -219,8 +225,8 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
|  | ||||
|         let level_0_iter = self | ||||
|             .db | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())? | ||||
|             .remap_types::<ByteSlice, ByteSlice>() | ||||
|             .prefix_iter(rtxn, level_0_prefix.as_slice())? | ||||
|             .remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>(); | ||||
|  | ||||
|         let mut left_bound: &[u8] = &[]; | ||||
| @@ -308,10 +314,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
|                 { | ||||
|                     let key = FacetGroupKey { field_id, level, left_bound }; | ||||
|                     let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key) | ||||
|                         .ok_or(Error::Encoding)?; | ||||
|                         .map_err(Error::Encoding)?; | ||||
|                     let value = FacetGroupValue { size: group_size, bitmap }; | ||||
|                     let value = | ||||
|                         FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; | ||||
|                         FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?; | ||||
|                     cur_writer.insert(key, value)?; | ||||
|                     cur_writer_len += 1; | ||||
|                 } | ||||
| @@ -337,9 +343,9 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
|             { | ||||
|                 let key = FacetGroupKey { field_id, level, left_bound }; | ||||
|                 let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key) | ||||
|                     .ok_or(Error::Encoding)?; | ||||
|                     .map_err(Error::Encoding)?; | ||||
|                 let value = FacetGroupValue { size: group_size, bitmap }; | ||||
|                 let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; | ||||
|                 let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?; | ||||
|                 cur_writer.insert(key, value)?; | ||||
|                 cur_writer_len += 1; | ||||
|             } | ||||
|   | ||||
| @@ -68,18 +68,18 @@ impl FacetsUpdateIncremental { | ||||
|                 continue; | ||||
|             } | ||||
|             let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key) | ||||
|                 .ok_or(heed::Error::Encoding)?; | ||||
|                 .map_err(heed::Error::Encoding)?; | ||||
|             let value = KvReader::new(value); | ||||
|  | ||||
|             let docids_to_delete = value | ||||
|                 .get(DelAdd::Deletion) | ||||
|                 .map(CboRoaringBitmapCodec::bytes_decode) | ||||
|                 .map(|o| o.ok_or(heed::Error::Encoding)); | ||||
|                 .map(|o| o.map_err(heed::Error::Encoding)); | ||||
|  | ||||
|             let docids_to_add = value | ||||
|                 .get(DelAdd::Addition) | ||||
|                 .map(CboRoaringBitmapCodec::bytes_decode) | ||||
|                 .map(|o| o.ok_or(heed::Error::Encoding)); | ||||
|                 .map(|o| o.map_err(heed::Error::Encoding)); | ||||
|  | ||||
|             if let Some(docids_to_delete) = docids_to_delete { | ||||
|                 let docids_to_delete = docids_to_delete?; | ||||
| @@ -134,15 +134,14 @@ impl FacetsUpdateIncrementalInner { | ||||
|                     prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|                     prefix.push(level); | ||||
|  | ||||
|                     let mut iter = | ||||
|                         self.db.as_polymorph().prefix_iter::<_, ByteSlice, FacetGroupValueCodec>( | ||||
|                             txn, | ||||
|                             prefix.as_slice(), | ||||
|                         )?; | ||||
|                     let mut iter = self | ||||
|                         .db | ||||
|                         .remap_types::<ByteSlice, FacetGroupValueCodec>() | ||||
|                         .prefix_iter(txn, prefix.as_slice())?; | ||||
|                     let (key_bytes, value) = iter.next().unwrap()?; | ||||
|                     Ok(( | ||||
|                         FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes) | ||||
|                             .ok_or(Error::Encoding)? | ||||
|                             .map_err(Error::Encoding)? | ||||
|                             .into_owned(), | ||||
|                         value, | ||||
|                     )) | ||||
| @@ -177,10 +176,8 @@ impl FacetsUpdateIncrementalInner { | ||||
|         level0_prefix.extend_from_slice(&field_id.to_be_bytes()); | ||||
|         level0_prefix.push(0); | ||||
|  | ||||
|         let mut iter = self | ||||
|             .db | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, ByteSlice, DecodeIgnore>(txn, &level0_prefix)?; | ||||
|         let mut iter = | ||||
|             self.db.remap_types::<ByteSlice, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?; | ||||
|  | ||||
|         if iter.next().is_none() { | ||||
|             drop(iter); | ||||
| @@ -384,8 +381,8 @@ impl FacetsUpdateIncrementalInner { | ||||
|  | ||||
|         let size_highest_level = self | ||||
|             .db | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)? | ||||
|             .remap_types::<ByteSlice, ByteSlice>() | ||||
|             .prefix_iter(txn, &highest_level_prefix)? | ||||
|             .count(); | ||||
|  | ||||
|         if size_highest_level < self.group_size as usize * self.min_level_size as usize { | ||||
| @@ -394,8 +391,8 @@ impl FacetsUpdateIncrementalInner { | ||||
|  | ||||
|         let mut groups_iter = self | ||||
|             .db | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(txn, &highest_level_prefix)?; | ||||
|             .remap_types::<ByteSlice, FacetGroupValueCodec>() | ||||
|             .prefix_iter(txn, &highest_level_prefix)?; | ||||
|  | ||||
|         let nbr_new_groups = size_highest_level / self.group_size as usize; | ||||
|         let nbr_leftover_elements = size_highest_level % self.group_size as usize; | ||||
| @@ -407,7 +404,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|             for _ in 0..group_size { | ||||
|                 let (key_bytes, value_i) = groups_iter.next().unwrap()?; | ||||
|                 let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes) | ||||
|                     .ok_or(Error::Encoding)?; | ||||
|                     .map_err(Error::Encoding)?; | ||||
|  | ||||
|                 if first_key.is_none() { | ||||
|                     first_key = Some(key_i); | ||||
| @@ -430,7 +427,7 @@ impl FacetsUpdateIncrementalInner { | ||||
|             for _ in 0..nbr_leftover_elements { | ||||
|                 let (key_bytes, value_i) = groups_iter.next().unwrap()?; | ||||
|                 let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key_bytes) | ||||
|                     .ok_or(Error::Encoding)?; | ||||
|                     .map_err(Error::Encoding)?; | ||||
|  | ||||
|                 if first_key.is_none() { | ||||
|                     first_key = Some(key_i); | ||||
| @@ -597,8 +594,8 @@ impl FacetsUpdateIncrementalInner { | ||||
|         if highest_level == 0 | ||||
|             || self | ||||
|                 .db | ||||
|                 .as_polymorph() | ||||
|                 .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)? | ||||
|                 .remap_types::<ByteSlice, ByteSlice>() | ||||
|                 .prefix_iter(txn, &highest_level_prefix)? | ||||
|                 .count() | ||||
|                 >= self.min_level_size as usize | ||||
|         { | ||||
| @@ -607,13 +604,13 @@ impl FacetsUpdateIncrementalInner { | ||||
|         let mut to_delete = vec![]; | ||||
|         let mut iter = self | ||||
|             .db | ||||
|             .as_polymorph() | ||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(txn, &highest_level_prefix)?; | ||||
|             .remap_types::<ByteSlice, ByteSlice>() | ||||
|             .prefix_iter(txn, &highest_level_prefix)?; | ||||
|         for el in iter.by_ref() { | ||||
|             let (k, _) = el?; | ||||
|             to_delete.push( | ||||
|                 FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k) | ||||
|                     .ok_or(Error::Encoding)? | ||||
|                     .map_err(Error::Encoding)? | ||||
|                     .into_owned(), | ||||
|             ); | ||||
|         } | ||||
|   | ||||
| @@ -95,7 +95,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu | ||||
| use crate::heed_codec::ByteSliceRefCodec; | ||||
| use crate::update::index_documents::create_sorter; | ||||
| use crate::update::merge_btreeset_string; | ||||
| use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH}; | ||||
| use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH}; | ||||
|  | ||||
| pub mod bulk; | ||||
| pub mod incremental; | ||||
| @@ -207,8 +207,8 @@ impl<'i> FacetsUpdate<'i> { | ||||
|                 } | ||||
|                 let set = BTreeSet::from_iter(std::iter::once(left_bound)); | ||||
|                 let key = (field_id, normalized_facet.as_ref()); | ||||
|                 let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?; | ||||
|                 let val = SerdeJson::bytes_encode(&set).ok_or(heed::Error::Encoding)?; | ||||
|                 let key = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; | ||||
|                 let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?; | ||||
|                 sorter.insert(key, val)?; | ||||
|             } | ||||
|         } | ||||
| @@ -252,7 +252,7 @@ impl<'i> FacetsUpdate<'i> { | ||||
|  | ||||
|         // We write those FSTs in LMDB now | ||||
|         for (field_id, fst) in text_fsts { | ||||
|             self.index.facet_id_string_fst.put(wtxn, &BEU16::new(field_id), &fst)?; | ||||
|             self.index.facet_id_string_fst.put(wtxn, &field_id, &fst)?; | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
|   | ||||
| @@ -6,8 +6,8 @@ use std::io::{self, BufReader}; | ||||
| use std::mem::size_of; | ||||
| use std::result::Result as StdResult; | ||||
|  | ||||
| use bytemuck::bytes_of; | ||||
| use grenad::Sorter; | ||||
| use heed::zerocopy::AsBytes; | ||||
| use heed::BytesEncode; | ||||
| use itertools::EitherOrBoth; | ||||
| use ordered_float::OrderedFloat; | ||||
| @@ -20,9 +20,7 @@ use crate::error::InternalError; | ||||
| use crate::facet::value_encoding::f64_into_bytes; | ||||
| use crate::update::del_add::{DelAdd, KvWriterDelAdd}; | ||||
| use crate::update::index_documents::{create_writer, writer_into_reader}; | ||||
| use crate::{ | ||||
|     CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH, | ||||
| }; | ||||
| use crate::{CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, MAX_FACET_VALUE_LENGTH}; | ||||
|  | ||||
| /// The length of the elements that are always in the buffer when inserting new values. | ||||
| const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>(); | ||||
| @@ -94,7 +92,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | ||||
|                 strings_key_buffer.extend_from_slice(&field_id.to_be_bytes()); | ||||
|  | ||||
|                 let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); | ||||
|                 let document = BEU32::from(document).get(); | ||||
|                 let document = DocumentId::from_be_bytes(document); | ||||
|  | ||||
|                 // For the other extraction tasks, prefix the key with the field_id and the document_id | ||||
|                 numbers_key_buffer.extend_from_slice(docid_bytes); | ||||
| @@ -323,7 +321,7 @@ where | ||||
|                     // We insert only the Del part of the Obkv to inform | ||||
|                     // that we only want to remove all those numbers. | ||||
|                     let mut obkv = KvWriterDelAdd::memory(); | ||||
|                     obkv.insert(DelAdd::Deletion, ().as_bytes())?; | ||||
|                     obkv.insert(DelAdd::Deletion, bytes_of(&()))?; | ||||
|                     let bytes = obkv.into_inner()?; | ||||
|                     fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; | ||||
|                 } | ||||
| @@ -336,7 +334,7 @@ where | ||||
|                     // We insert only the Add part of the Obkv to inform | ||||
|                     // that we only want to remove all those numbers. | ||||
|                     let mut obkv = KvWriterDelAdd::memory(); | ||||
|                     obkv.insert(DelAdd::Addition, ().as_bytes())?; | ||||
|                     obkv.insert(DelAdd::Addition, bytes_of(&()))?; | ||||
|                     let bytes = obkv.into_inner()?; | ||||
|                     fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?; | ||||
|                 } | ||||
|   | ||||
| @@ -118,7 +118,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>( | ||||
|         } | ||||
|  | ||||
|         let (word, fid) = StrBEU16Codec::bytes_decode(key) | ||||
|             .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; | ||||
|             .map_err(|_| SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?; | ||||
|  | ||||
|         // every words contained in an attribute set to exact must be pushed in the exact_words list. | ||||
|         if exact_attributes.contains(&fid) { | ||||
|   | ||||
| @@ -68,8 +68,8 @@ impl Default for IndexDocumentsMethod { | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
| pub struct IndexDocuments<'t, 'i, 'a, FP, FA> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i>, | ||||
|     index: &'i Index, | ||||
|     config: IndexDocumentsConfig, | ||||
|     indexer_config: &'a IndexerConfig, | ||||
| @@ -90,19 +90,19 @@ pub struct IndexDocumentsConfig { | ||||
|     pub autogenerate_docids: bool, | ||||
| } | ||||
|  | ||||
| impl<'t, 'u, 'i, 'a, FP, FA> IndexDocuments<'t, 'u, 'i, 'a, FP, FA> | ||||
| impl<'t, 'i, 'a, FP, FA> IndexDocuments<'t, 'i, 'a, FP, FA> | ||||
| where | ||||
|     FP: Fn(UpdateIndexingStep) + Sync, | ||||
|     FA: Fn() -> bool + Sync, | ||||
| { | ||||
|     pub fn new( | ||||
|         wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
|         wtxn: &'t mut heed::RwTxn<'i>, | ||||
|         index: &'i Index, | ||||
|         indexer_config: &'a IndexerConfig, | ||||
|         config: IndexDocumentsConfig, | ||||
|         progress: FP, | ||||
|         should_abort: FA, | ||||
|     ) -> Result<IndexDocuments<'t, 'u, 'i, 'a, FP, FA>> { | ||||
|     ) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> { | ||||
|         let transform = Some(Transform::new( | ||||
|             wtxn, | ||||
|             index, | ||||
|   | ||||
| @@ -24,9 +24,7 @@ use crate::index::{db_name, main_key}; | ||||
| use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd}; | ||||
| use crate::update::index_documents::GrenadParameters; | ||||
| use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; | ||||
| use crate::{ | ||||
|     FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32, | ||||
| }; | ||||
| use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result}; | ||||
|  | ||||
| pub struct TransformOutput { | ||||
|     pub primary_key: String, | ||||
| @@ -245,7 +243,7 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|  | ||||
|             let mut skip_insertion = false; | ||||
|             if let Some(original_docid) = original_docid { | ||||
|                 let original_key = BEU32::new(original_docid); | ||||
|                 let original_key = original_docid; | ||||
|                 let base_obkv = self | ||||
|                     .index | ||||
|                     .documents | ||||
| @@ -499,7 +497,7 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|         self.replaced_documents_ids.insert(internal_docid); | ||||
|  | ||||
|         // fetch the obkv document | ||||
|         let original_key = BEU32::new(internal_docid); | ||||
|         let original_key = internal_docid; | ||||
|         let base_obkv = self | ||||
|             .index | ||||
|             .documents | ||||
| @@ -811,7 +809,7 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|     // TODO this can be done in parallel by using the rayon `ThreadPool`. | ||||
|     pub fn prepare_for_documents_reindexing( | ||||
|         self, | ||||
|         wtxn: &mut heed::RwTxn<'i, '_>, | ||||
|         wtxn: &mut heed::RwTxn<'i>, | ||||
|         old_fields_ids_map: FieldsIdsMap, | ||||
|         mut new_fields_ids_map: FieldsIdsMap, | ||||
|     ) -> Result<TransformOutput> { | ||||
| @@ -857,7 +855,6 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|             let obkv = self.index.documents.get(wtxn, &docid)?.ok_or( | ||||
|                 InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None }, | ||||
|             )?; | ||||
|             let docid = docid.get(); | ||||
|  | ||||
|             obkv_buffer.clear(); | ||||
|             let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer); | ||||
|   | ||||
| @@ -7,7 +7,7 @@ use bytemuck::allocation::pod_collect_to_vec; | ||||
| use charabia::{Language, Script}; | ||||
| use grenad::MergerBuilder; | ||||
| use heed::types::ByteSlice; | ||||
| use heed::RwTxn; | ||||
| use heed::{PutFlags, RwTxn}; | ||||
| use log::error; | ||||
| use obkv::{KvReader, KvWriter}; | ||||
| use ordered_float::OrderedFloat; | ||||
| @@ -27,9 +27,7 @@ use crate::index::Hnsw; | ||||
| use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd}; | ||||
| use crate::update::facet::FacetsUpdate; | ||||
| use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at}; | ||||
| use crate::{ | ||||
|     lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32, | ||||
| }; | ||||
| use crate::{lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError}; | ||||
|  | ||||
| pub(crate) enum TypedChunk { | ||||
|     FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>), | ||||
| @@ -149,7 +147,7 @@ pub(crate) fn write_typed_chunk_into_index( | ||||
|                 let db = index.documents.remap_data_type::<ByteSlice>(); | ||||
|  | ||||
|                 if !writer.is_empty() { | ||||
|                     db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?; | ||||
|                     db.put(wtxn, &docid, &writer.into_inner().unwrap())?; | ||||
|                     operations.push(DocumentOperation { | ||||
|                         external_id: external_id.to_string(), | ||||
|                         internal_id: docid, | ||||
| @@ -157,7 +155,7 @@ pub(crate) fn write_typed_chunk_into_index( | ||||
|                     }); | ||||
|                     docids.insert(docid); | ||||
|                 } else { | ||||
|                     db.delete(wtxn, &BEU32::new(docid))?; | ||||
|                     db.delete(wtxn, &docid)?; | ||||
|                     operations.push(DocumentOperation { | ||||
|                         external_id: external_id.to_string(), | ||||
|                         internal_id: docid, | ||||
| @@ -362,8 +360,8 @@ pub(crate) fn write_typed_chunk_into_index( | ||||
|             // We extract and store the previous vectors | ||||
|             if let Some(hnsw) = index.vector_hnsw(wtxn)? { | ||||
|                 for (pid, point) in hnsw.iter() { | ||||
|                     let pid_key = BEU32::new(pid.into_inner()); | ||||
|                     let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get(); | ||||
|                     let pid_key = pid.into_inner(); | ||||
|                     let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap(); | ||||
|                     let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect(); | ||||
|                     vectors_set.insert((docid, vector)); | ||||
|                 } | ||||
| @@ -424,11 +422,7 @@ pub(crate) fn write_typed_chunk_into_index( | ||||
|             // Store the vectors in the point-docid relation database | ||||
|             index.vector_id_docid.clear(wtxn)?; | ||||
|             for (docid, pid) in docids.into_iter().zip(pids) { | ||||
|                 index.vector_id_docid.put( | ||||
|                     wtxn, | ||||
|                     &BEU32::new(pid.into_inner()), | ||||
|                     &BEU32::new(docid), | ||||
|                 )?; | ||||
|                 index.vector_id_docid.put(wtxn, &pid.into_inner(), &docid)?; | ||||
|             } | ||||
|  | ||||
|             log::debug!("There are {} entries in the HNSW so far", hnsw_length); | ||||
| @@ -568,14 +562,17 @@ where | ||||
|     while let Some((key, value)) = cursor.move_on_next()? { | ||||
|         if valid_lmdb_key(key) { | ||||
|             debug_assert!( | ||||
|                 K::bytes_decode(key).is_some(), | ||||
|                 K::bytes_decode(key).is_ok(), | ||||
|                 "Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}", | ||||
|                 key.len(), | ||||
|                 &key | ||||
|             ); | ||||
|             buffer.clear(); | ||||
|             let value = serialize_value(value, &mut buffer)?; | ||||
|             unsafe { database.append(key, value)? }; | ||||
|             unsafe { | ||||
|                 // safety: We do not keep a reference to anything that lives inside the database | ||||
|                 database.put_current_with_options::<ByteSlice>(PutFlags::APPEND, key, value)? | ||||
|             }; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -100,8 +100,8 @@ impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Settings<'a, 't, 'u, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
| pub struct Settings<'a, 't, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i>, | ||||
|     index: &'i Index, | ||||
|  | ||||
|     indexer_config: &'a IndexerConfig, | ||||
| @@ -129,12 +129,12 @@ pub struct Settings<'a, 't, 'u, 'i> { | ||||
|     pagination_max_total_hits: Setting<usize>, | ||||
| } | ||||
|  | ||||
| impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
| impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||
|     pub fn new( | ||||
|         wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
|         wtxn: &'t mut heed::RwTxn<'i>, | ||||
|         index: &'i Index, | ||||
|         indexer_config: &'a IndexerConfig, | ||||
|     ) -> Settings<'a, 't, 'u, 'i> { | ||||
|     ) -> Settings<'a, 't, 'i> { | ||||
|         Settings { | ||||
|             wtxn, | ||||
|             index, | ||||
|   | ||||
| @@ -12,8 +12,8 @@ use crate::update::index_documents::{ | ||||
| }; | ||||
| use crate::{CboRoaringBitmapCodec, Result}; | ||||
|  | ||||
| pub struct WordPrefixDocids<'t, 'u, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
| pub struct WordPrefixDocids<'t, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i>, | ||||
|     word_docids: Database<Str, CboRoaringBitmapCodec>, | ||||
|     word_prefix_docids: Database<Str, CboRoaringBitmapCodec>, | ||||
|     pub(crate) chunk_compression_type: CompressionType, | ||||
| @@ -22,12 +22,12 @@ pub struct WordPrefixDocids<'t, 'u, 'i> { | ||||
|     pub(crate) max_memory: Option<usize>, | ||||
| } | ||||
|  | ||||
| impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { | ||||
| impl<'t, 'i> WordPrefixDocids<'t, 'i> { | ||||
|     pub fn new( | ||||
|         wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
|         wtxn: &'t mut heed::RwTxn<'i>, | ||||
|         word_docids: Database<Str, CboRoaringBitmapCodec>, | ||||
|         word_prefix_docids: Database<Str, CboRoaringBitmapCodec>, | ||||
|     ) -> WordPrefixDocids<'t, 'u, 'i> { | ||||
|     ) -> WordPrefixDocids<'t, 'i> { | ||||
|         WordPrefixDocids { | ||||
|             wtxn, | ||||
|             word_docids, | ||||
|   | ||||
| @@ -17,8 +17,8 @@ use crate::update::index_documents::{ | ||||
| }; | ||||
| use crate::{CboRoaringBitmapCodec, Result}; | ||||
|  | ||||
| pub struct WordPrefixIntegerDocids<'t, 'u, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
| pub struct WordPrefixIntegerDocids<'t, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i>, | ||||
|     prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>, | ||||
|     word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>, | ||||
|     pub(crate) chunk_compression_type: CompressionType, | ||||
| @@ -27,12 +27,12 @@ pub struct WordPrefixIntegerDocids<'t, 'u, 'i> { | ||||
|     pub(crate) max_memory: Option<usize>, | ||||
| } | ||||
|  | ||||
| impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { | ||||
| impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { | ||||
|     pub fn new( | ||||
|         wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
|         wtxn: &'t mut heed::RwTxn<'i>, | ||||
|         prefix_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>, | ||||
|         word_database: Database<StrBEU16Codec, CboRoaringBitmapCodec>, | ||||
|     ) -> WordPrefixIntegerDocids<'t, 'u, 'i> { | ||||
|     ) -> WordPrefixIntegerDocids<'t, 'i> { | ||||
|         WordPrefixIntegerDocids { | ||||
|             wtxn, | ||||
|             prefix_database, | ||||
| @@ -72,7 +72,8 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> { | ||||
|             let mut current_prefixes: Option<&&[String]> = None; | ||||
|             let mut prefixes_cache = HashMap::new(); | ||||
|             while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? { | ||||
|                 let (word, pos) = StrBEU16Codec::bytes_decode(key).ok_or(heed::Error::Decoding)?; | ||||
|                 let (word, pos) = | ||||
|                     StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?; | ||||
|  | ||||
|                 current_prefixes = match current_prefixes.take() { | ||||
|                     Some(prefixes) if word.starts_with(&prefixes[0]) => Some(prefixes), | ||||
|   | ||||
| @@ -2,21 +2,19 @@ use std::iter::{repeat_with, FromIterator}; | ||||
| use std::str; | ||||
|  | ||||
| use fst::{SetBuilder, Streamer}; | ||||
| use heed::RwTxn; | ||||
|  | ||||
| use crate::{Index, Result, SmallString32}; | ||||
|  | ||||
| pub struct WordsPrefixesFst<'t, 'u, 'i> { | ||||
|     wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
| pub struct WordsPrefixesFst<'t, 'i> { | ||||
|     wtxn: &'t mut RwTxn<'i>, | ||||
|     index: &'i Index, | ||||
|     threshold: u32, | ||||
|     max_prefix_length: usize, | ||||
| } | ||||
|  | ||||
| impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> { | ||||
|     pub fn new( | ||||
|         wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
|         index: &'i Index, | ||||
|     ) -> WordsPrefixesFst<'t, 'u, 'i> { | ||||
| impl<'t, 'i> WordsPrefixesFst<'t, 'i> { | ||||
|     pub fn new(wtxn: &'t mut RwTxn<'i>, index: &'i Index) -> WordsPrefixesFst<'t, 'i> { | ||||
|         WordsPrefixesFst { wtxn, index, threshold: 100, max_prefix_length: 4 } | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user