mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 01:01:00 +00:00
Return the original string values for the inverted facet index database
This commit is contained in:
committed by
Kerollmops
parent
03a01166ba
commit
0227254a65
@ -9,6 +9,7 @@ use serde_json::Value;
|
||||
|
||||
use super::ClearDocuments;
|
||||
use crate::error::{FieldIdMapMissingEntry, InternalError, UserError};
|
||||
use crate::heed_codec::facet::FacetStringLevelZeroValueCodec;
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::index::{db_name, main_key};
|
||||
use crate::{DocumentId, ExternalDocumentsIds, FieldId, Index, Result, SmallString32, BEU32};
|
||||
@ -374,13 +375,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
drop(iter);
|
||||
|
||||
// We delete the documents ids that are under the facet field id values.
|
||||
remove_docids_from_facet_field_id_value_docids(
|
||||
remove_docids_from_facet_field_id_number_docids(
|
||||
self.wtxn,
|
||||
facet_id_f64_docids,
|
||||
&self.documents_ids,
|
||||
)?;
|
||||
|
||||
remove_docids_from_facet_field_id_value_docids(
|
||||
remove_docids_from_facet_field_id_string_docids(
|
||||
self.wtxn,
|
||||
facet_id_string_docids,
|
||||
&self.documents_ids,
|
||||
@ -447,7 +448,33 @@ where
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_docids_from_facet_field_id_value_docids<'a, C>(
|
||||
fn remove_docids_from_facet_field_id_string_docids<'a, C>(
|
||||
wtxn: &'a mut heed::RwTxn,
|
||||
db: &heed::Database<C, FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>>,
|
||||
to_remove: &RoaringBitmap,
|
||||
) -> heed::Result<()>
|
||||
where
|
||||
C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
|
||||
{
|
||||
let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
|
||||
while let Some(result) = iter.next() {
|
||||
let (bytes, (original_value, mut docids)) = result?;
|
||||
let previous_len = docids.len();
|
||||
docids -= to_remove;
|
||||
if docids.is_empty() {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
} else if docids.len() != previous_len {
|
||||
let bytes = bytes.to_owned();
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.put_current(&bytes, &(original_value, docids))? };
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn remove_docids_from_facet_field_id_number_docids<'a, C>(
|
||||
wtxn: &'a mut heed::RwTxn,
|
||||
db: &heed::Database<C, CboRoaringBitmapCodec>,
|
||||
to_remove: &RoaringBitmap,
|
||||
|
@ -12,7 +12,7 @@ use roaring::RoaringBitmap;
|
||||
use crate::error::InternalError;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetLevelValueF64Codec, FacetLevelValueU32Codec, FacetStringLevelZeroCodec,
|
||||
FacetStringZeroBoundsValueCodec,
|
||||
FacetStringLevelZeroValueCodec, FacetStringZeroBoundsValueCodec,
|
||||
};
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::update::index_documents::{
|
||||
@ -75,7 +75,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
)?;
|
||||
|
||||
// Compute and store the faceted strings documents ids.
|
||||
let string_documents_ids = compute_faceted_documents_ids(
|
||||
let string_documents_ids = compute_faceted_strings_documents_ids(
|
||||
self.wtxn,
|
||||
self.index.facet_id_string_docids.remap_key_type::<ByteSlice>(),
|
||||
field_id,
|
||||
@ -96,7 +96,7 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
clear_field_number_levels(self.wtxn, self.index.facet_id_f64_docids, field_id)?;
|
||||
|
||||
// Compute and store the faceted numbers documents ids.
|
||||
let number_documents_ids = compute_faceted_documents_ids(
|
||||
let number_documents_ids = compute_faceted_numbers_documents_ids(
|
||||
self.wtxn,
|
||||
self.index.facet_id_f64_docids.remap_key_type::<ByteSlice>(),
|
||||
field_id,
|
||||
@ -237,13 +237,26 @@ fn write_number_entry(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_faceted_documents_ids(
|
||||
fn compute_faceted_strings_documents_ids(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: heed::Database<ByteSlice, FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>>,
|
||||
field_id: FieldId,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut documents_ids = RoaringBitmap::new();
|
||||
for result in db.prefix_iter(rtxn, &field_id.to_be_bytes())? {
|
||||
let (_key, (_original_value, docids)) = result?;
|
||||
documents_ids |= docids;
|
||||
}
|
||||
|
||||
Ok(documents_ids)
|
||||
}
|
||||
|
||||
fn compute_faceted_numbers_documents_ids(
|
||||
rtxn: &heed::RoTxn,
|
||||
db: heed::Database<ByteSlice, CboRoaringBitmapCodec>,
|
||||
field_id: FieldId,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut documents_ids = RoaringBitmap::new();
|
||||
|
||||
for result in db.prefix_iter(rtxn, &field_id.to_be_bytes())? {
|
||||
let (_key, docids) = result?;
|
||||
documents_ids |= docids;
|
||||
@ -265,7 +278,10 @@ fn clear_field_string_levels<'t>(
|
||||
|
||||
fn compute_facet_string_levels<'t>(
|
||||
rtxn: &'t heed::RoTxn,
|
||||
db: heed::Database<FacetStringLevelZeroCodec, CboRoaringBitmapCodec>,
|
||||
db: heed::Database<
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
|
||||
>,
|
||||
compression_type: CompressionType,
|
||||
compression_level: Option<u32>,
|
||||
shrink_size: Option<u64>,
|
||||
@ -299,7 +315,7 @@ fn compute_facet_string_levels<'t>(
|
||||
// Because we know the size of the level 0 we can use a range iterator that starts
|
||||
// at the first value of the level and goes to the last by simply counting.
|
||||
for (i, result) in db.range(rtxn, &((field_id, "")..))?.take(first_level_size).enumerate() {
|
||||
let ((_field_id, value), docids) = result?;
|
||||
let ((_field_id, value), (_original_value, docids)) = result?;
|
||||
|
||||
if i == 0 {
|
||||
left = (i as u32, value);
|
||||
|
@ -2,8 +2,11 @@ use std::borrow::Cow;
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use fst::IntoStreamer;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::error::SerializationError;
|
||||
use crate::heed_codec::facet::FacetStringLevelZeroValueCodec;
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::Result;
|
||||
|
||||
@ -69,6 +72,26 @@ pub fn roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>
|
||||
Ok(vec)
|
||||
}
|
||||
|
||||
/// Uses the FacetStringLevelZeroValueCodec to merge the values.
|
||||
pub fn tuple_string_cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
|
||||
let (head, tail) = values.split_first().unwrap();
|
||||
let (head_string, mut head_rb) =
|
||||
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(&head[..])
|
||||
.ok_or(SerializationError::Decoding { db_name: None })?;
|
||||
|
||||
for value in tail {
|
||||
let (_string, rb) =
|
||||
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(&value[..])
|
||||
.ok_or(SerializationError::Decoding { db_name: None })?;
|
||||
head_rb |= rb;
|
||||
}
|
||||
|
||||
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&(head_string, head_rb))
|
||||
.map(|cow| cow.into_owned())
|
||||
.ok_or(SerializationError::Encoding { db_name: None })
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn cbo_roaring_bitmap_merge(_key: &[u8], values: &[Cow<[u8]>]) -> Result<Vec<u8>> {
|
||||
let (head, tail) = values.split_first().unwrap();
|
||||
let mut head = CboRoaringBitmapCodec::deserialize_from(&head[..])?;
|
||||
|
@ -20,6 +20,7 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
pub use self::merge_function::{
|
||||
cbo_roaring_bitmap_merge, fst_merge, keep_first, roaring_bitmap_merge,
|
||||
tuple_string_cbo_roaring_bitmap_merge,
|
||||
};
|
||||
use self::store::{Readers, Store};
|
||||
pub use self::transform::{Transform, TransformOutput};
|
||||
@ -655,7 +656,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
self.wtxn,
|
||||
*self.index.facet_id_string_docids.as_polymorph(),
|
||||
facet_field_strings_docids_readers,
|
||||
cbo_roaring_bitmap_merge,
|
||||
tuple_string_cbo_roaring_bitmap_merge,
|
||||
write_method,
|
||||
)?;
|
||||
|
||||
|
@ -22,12 +22,13 @@ use tempfile::tempfile;
|
||||
|
||||
use super::merge_function::{
|
||||
cbo_roaring_bitmap_merge, fst_merge, keep_first, roaring_bitmap_merge,
|
||||
tuple_string_cbo_roaring_bitmap_merge,
|
||||
};
|
||||
use super::{create_sorter, create_writer, writer_into_reader, MergeFn};
|
||||
use crate::error::{Error, InternalError, SerializationError};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FieldDocIdFacetF64Codec,
|
||||
FieldDocIdFacetStringCodec,
|
||||
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
||||
FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
};
|
||||
use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec};
|
||||
use crate::update::UpdateIndexingStep;
|
||||
@ -153,7 +154,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
||||
max_memory,
|
||||
);
|
||||
let facet_field_strings_docids_sorter = create_sorter(
|
||||
cbo_roaring_bitmap_merge,
|
||||
tuple_string_cbo_roaring_bitmap_merge,
|
||||
chunk_compression_type,
|
||||
chunk_compression_level,
|
||||
chunk_fusing_shrink_size,
|
||||
@ -528,17 +529,18 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
||||
Error: From<E>,
|
||||
{
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut data_buffer = Vec::new();
|
||||
|
||||
for ((field_id, normalized_value), (original_value, docids)) in iter {
|
||||
key_buffer.clear();
|
||||
data_buffer.clear();
|
||||
|
||||
FacetStringLevelZeroCodec::serialize_into(field_id, &normalized_value, &mut key_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
|
||||
|
||||
let data = (original_value.as_str(), docids);
|
||||
let data = FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&data)
|
||||
.ok_or(SerializationError::Encoding { db_name: Some("facet-id-string-docids") })?;
|
||||
|
||||
if lmdb_key_valid_size(&key_buffer) {
|
||||
sorter.insert(&key_buffer, &data_buffer)?;
|
||||
sorter.insert(&key_buffer, &data)?;
|
||||
} else {
|
||||
warn!("facet value {:?} is too large to be saved", original_value);
|
||||
}
|
||||
|
Reference in New Issue
Block a user