Rename StrStrU8Codec to U8StrStrCodec and reorder its fields

This commit is contained in:
Loïc Lecrenier
2022-09-14 14:01:53 +02:00
committed by Loïc Lecrenier
parent bdeb47305e
commit 1dbbd8694f
7 changed files with 61 additions and 61 deletions

View File

@@ -15,4 +15,4 @@ pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
};
pub use self::str_beu32_codec::StrBEU32Codec;
pub use self::str_str_u8_codec::{StrStrU8Codec, UncheckedStrStrU8Codec};
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};

View File

@@ -1,10 +1,10 @@
use std::borrow::Cow;
use std::str;
pub struct StrStrU8Codec;
pub struct U8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for StrStrU8Codec {
type DItem = (&'a str, &'a str, u8);
impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
type DItem = (u8, &'a str, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_first()?;
@@ -13,14 +13,14 @@ impl<'a> heed::BytesDecode<'a> for StrStrU8Codec {
let s2_bytes = &rest[1..];
let s1 = str::from_utf8(s1_bytes).ok()?;
let s2 = str::from_utf8(s2_bytes).ok()?;
Some((s1, s2, *n))
Some((*n, s1, s2))
}
}
impl<'a> heed::BytesEncode<'a> for StrStrU8Codec {
type EItem = (&'a str, &'a str, u8);
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
type EItem = (u8, &'a str, &'a str);
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1.as_bytes());
@@ -29,24 +29,24 @@ impl<'a> heed::BytesEncode<'a> for StrStrU8Codec {
Some(Cow::Owned(bytes))
}
}
pub struct UncheckedStrStrU8Codec;
pub struct UncheckedU8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for UncheckedStrStrU8Codec {
type DItem = (&'a [u8], &'a [u8], u8);
impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
type DItem = (u8, &'a [u8], &'a [u8]);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_first()?;
let s1_end = bytes.iter().position(|b| *b == 0)?;
let (s1_bytes, rest) = bytes.split_at(s1_end);
let s2_bytes = &rest[1..];
Some((s1_bytes, s2_bytes, *n))
Some((*n, s1_bytes, s2_bytes))
}
}
impl<'a> heed::BytesEncode<'a> for UncheckedStrStrU8Codec {
type EItem = (&'a [u8], &'a [u8], u8);
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
type EItem = (u8, &'a [u8], &'a [u8]);
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1);

View File

@@ -21,7 +21,7 @@ use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
Search, StrBEU32Codec, StrStrU8Codec, BEU16, BEU32,
Search, StrBEU32Codec, U8StrStrCodec, BEU16, BEU32,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -106,9 +106,9 @@ pub struct Index {
pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
/// Maps the proximity between a pair of words with all the docids where this relation appears.
pub word_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
/// Maps the proximity between a pair of word and prefix with all the docids where this relation appears.
pub word_prefix_pair_proximity_docids: Database<StrStrU8Codec, CboRoaringBitmapCodec>,
pub word_prefix_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
/// Maps the word and the position with the docids that corresponds to it.
pub word_position_docids: Database<StrBEU32Codec, CboRoaringBitmapCodec>,

View File

@@ -37,7 +37,7 @@ pub use self::fields_ids_map::FieldsIdsMap;
pub use self::heed_codec::{
BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec,
CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, RoaringBitmapCodec,
RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, UncheckedStrStrU8Codec,
RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec, UncheckedU8StrStrCodec,
};
pub use self::index::Index;
pub use self::search::{

View File

@@ -138,7 +138,7 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
right: &str,
proximity: u8,
) -> heed::Result<Option<RoaringBitmap>> {
let key = (left, right, proximity);
let key = (proximity, left, right);
self.index.word_pair_proximity_docids.get(self.rtxn, &key)
}
@@ -148,7 +148,7 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
right: &str,
proximity: u8,
) -> heed::Result<Option<RoaringBitmap>> {
let key = (left, right, proximity);
let key = (proximity, left, right);
self.index.word_prefix_pair_proximity_docids.get(self.rtxn, &key)
}

View File

@@ -182,16 +182,16 @@ pub fn snap_docid_word_positions(index: &Index) -> String {
}
pub fn snap_word_pair_proximity_docids(index: &Index) -> String {
let snap = make_db_snap_from_iter!(index, word_pair_proximity_docids, |(
(word1, word2, proximity),
(proximity, word1, word2),
b,
)| {
&format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b))
&format!("{proximity:<2} {word1:<16} {word2:<16} {}", display_bitmap(&b))
});
snap
}
pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String {
let snap = make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |(
(word1, prefix, proximity),
(proximity, word1, prefix),
b,
)| {
&format!("{proximity:<2} {word1:<16} {prefix:<4} {}", display_bitmap(&b))

View File

@@ -177,7 +177,7 @@ use log::debug;
use crate::update::index_documents::{
create_writer, merge_cbo_roaring_bitmaps, CursorClonableMmap,
};
use crate::{CboRoaringBitmapCodec, Index, Result, UncheckedStrStrU8Codec};
use crate::{CboRoaringBitmapCodec, Index, Result, UncheckedU8StrStrCodec};
pub struct WordPrefixPairProximityDocids<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>,
@@ -259,9 +259,9 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
&mut cursor,
|cursor| {
if let Some((key, value)) = cursor.move_on_next()? {
let (word1, word2, proximity) = UncheckedStrStrU8Codec::bytes_decode(key)
let (proximity, word1, word2) = UncheckedU8StrStrCodec::bytes_decode(key)
.ok_or(heed::Error::Decoding)?;
Ok(Some(((word1, word2, proximity), value)))
Ok(Some(((proximity, word1, word2), value)))
} else {
Ok(None)
}
@@ -293,7 +293,7 @@ impl<'t, 'u, 'i> WordPrefixPairProximityDocids<'t, 'u, 'i> {
let mut db_iter = self
.index
.word_pair_proximity_docids
.remap_key_type::<UncheckedStrStrU8Codec>()
.remap_key_type::<UncheckedU8StrStrCodec>()
.remap_data_type::<ByteSlice>()
.iter(self.wtxn)?;
@@ -358,7 +358,7 @@ fn execute_on_word_pairs_and_prefixes<I>(
mut next_word_pair_proximity: impl for<'a> FnMut(
&'a mut I,
) -> Result<
Option<((&'a [u8], &'a [u8], u8), &'a [u8])>,
Option<((u8, &'a [u8], &'a [u8]), &'a [u8])>,
>,
prefixes: &PrefixTrieNode,
max_proximity: u8,
@@ -376,14 +376,14 @@ fn execute_on_word_pairs_and_prefixes<I>(
let mut prefix_buffer = Vec::with_capacity(8);
let mut merge_buffer = Vec::with_capacity(65_536);
while let Some(((word1, word2, proximity), data)) = next_word_pair_proximity(iter)? {
while let Some(((proximity, word1, word2), data)) = next_word_pair_proximity(iter)? {
// skip this iteration if the proximity is over the threshold
if proximity > max_proximity {
break;
};
let word2_start_different_than_prev = word2[0] != prev_word2_start;
// if there were no potential prefixes for the previous word2 based on its first letter,
// and if the current word2 starts with the same letter, then there is also no potential
// and if the current word2 starts with the s`ame letter, then there is also no potential
// prefixes for the current word2, and we can skip to the next iteration
if empty_prefixes && !word2_start_different_than_prev {
continue;
@@ -683,7 +683,7 @@ mod tests {
use super::*;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::index::tests::TempIndex;
use crate::{db_snap, CboRoaringBitmapCodec, StrStrU8Codec};
use crate::{db_snap, CboRoaringBitmapCodec, U8StrStrCodec};
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
let mut documents = Vec::new();
@@ -858,40 +858,40 @@ mod tests {
CboRoaringBitmapCodec::serialize_into(&bitmap_ranges, &mut serialised_bitmap_ranges);
let word_pairs = [
(("healthy", "arbres", 1), &serialised_bitmap123),
(("healthy", "boat", 1), &serialised_bitmap123),
(("healthy", "ca", 1), &serialised_bitmap123),
(("healthy", "cats", 1), &serialised_bitmap456),
(("healthy", "cattos", 1), &serialised_bitmap123),
(("jittery", "cat", 1), &serialised_bitmap123),
(("jittery", "cata", 1), &serialised_bitmap456),
(("jittery", "catb", 1), &serialised_bitmap789),
(("jittery", "catc", 1), &serialised_bitmap_ranges),
(("healthy", "arbre", 2), &serialised_bitmap123),
(("healthy", "arbres", 2), &serialised_bitmap456),
(("healthy", "cats", 2), &serialised_bitmap789),
(("healthy", "cattos", 2), &serialised_bitmap_ranges),
(("healthy", "arbre", 3), &serialised_bitmap456),
(("healthy", "arbres", 3), &serialised_bitmap789),
((1, "healthy", "arbres"), &serialised_bitmap123),
((1, "healthy", "boat"), &serialised_bitmap123),
((1, "healthy", "ca"), &serialised_bitmap123),
((1, "healthy", "cats"), &serialised_bitmap456),
((1, "healthy", "cattos"), &serialised_bitmap123),
((1, "jittery", "cat"), &serialised_bitmap123),
((1, "jittery", "cata"), &serialised_bitmap456),
((1, "jittery", "catb"), &serialised_bitmap789),
((1, "jittery", "catc"), &serialised_bitmap_ranges),
((2, "healthy", "arbre"), &serialised_bitmap123),
((2, "healthy", "arbres"), &serialised_bitmap456),
((2, "healthy", "cats"), &serialised_bitmap789),
((2, "healthy", "cattos"), &serialised_bitmap_ranges),
((3, "healthy", "arbre"), &serialised_bitmap456),
((3, "healthy", "arbres"), &serialised_bitmap789),
];
let expected_result = [
(("healthy", "arb", 1), bitmap123.clone()),
(("healthy", "arbre", 1), bitmap123.clone()),
(("healthy", "cat", 1), &bitmap456 | &bitmap123),
(("healthy", "catto", 1), bitmap123.clone()),
(("jittery", "cat", 1), (&bitmap123 | &bitmap456 | &bitmap789 | &bitmap_ranges)),
(("healthy", "arb", 2), &bitmap123 | &bitmap456),
(("healthy", "arbre", 2), &bitmap123 | &bitmap456),
(("healthy", "cat", 2), &bitmap789 | &bitmap_ranges),
(("healthy", "catto", 2), bitmap_ranges.clone()),
((1, "healthy", "arb"), bitmap123.clone()),
((1, "healthy", "arbre"), bitmap123.clone()),
((1, "healthy", "cat"), &bitmap456 | &bitmap123),
((1, "healthy", "catto"), bitmap123.clone()),
((1, "jittery", "cat"), (&bitmap123 | &bitmap456 | &bitmap789 | &bitmap_ranges)),
((2, "healthy", "arb"), &bitmap123 | &bitmap456),
((2, "healthy", "arbre"), &bitmap123 | &bitmap456),
((2, "healthy", "cat"), &bitmap789 | &bitmap_ranges),
((2, "healthy", "catto"), bitmap_ranges.clone()),
];
let mut result = vec![];
let mut iter =
IntoIterator::into_iter(word_pairs).map(|((word1, word2, proximity), data)| {
((word1.as_bytes(), word2.as_bytes(), proximity), data.as_slice())
IntoIterator::into_iter(word_pairs).map(|((proximity, word1, word2), data)| {
((proximity, word1.as_bytes(), word2.as_bytes()), data.as_slice())
});
execute_on_word_pairs_and_prefixes(
&mut iter,
@@ -899,7 +899,7 @@ mod tests {
&prefixes,
2,
|k, v| {
let (word1, prefix, proximity) = StrStrU8Codec::bytes_decode(k).unwrap();
let (word1, prefix, proximity) = U8StrStrCodec::bytes_decode(k).unwrap();
let bitmap = CboRoaringBitmapCodec::bytes_decode(v).unwrap();
result.push(((word1.to_owned(), prefix.to_owned(), proximity.to_owned()), bitmap));
Ok(())
@@ -908,8 +908,8 @@ mod tests {
.unwrap();
for (x, y) in result.into_iter().zip(IntoIterator::into_iter(expected_result)) {
let ((actual_word1, actual_prefix, actual_proximity), actual_bitmap) = x;
let ((expected_word1, expected_prefix, expected_proximity), expected_bitmap) = y;
let ((actual_proximity, actual_word1, actual_prefix), actual_bitmap) = x;
let ((expected_proximity, expected_word1, expected_prefix), expected_bitmap) = y;
assert_eq!(actual_word1, expected_word1);
assert_eq!(actual_prefix, expected_prefix);