mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 09:11:00 +00:00
Display the original facet string value from the linear facet database
This commit is contained in:
@ -3,7 +3,7 @@ use std::collections::HashMap;
|
||||
|
||||
use chrono::Utc;
|
||||
use fst::IntoStreamer;
|
||||
use heed::types::{ByteSlice, Unit};
|
||||
use heed::types::ByteSlice;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde_json::Value;
|
||||
|
||||
@ -419,15 +419,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_docids_from_field_id_docid_facet_value<'a, C, K, F>(
|
||||
fn remove_docids_from_field_id_docid_facet_value<'a, C, K, F, DC, V>(
|
||||
wtxn: &'a mut heed::RwTxn,
|
||||
db: &heed::Database<C, Unit>,
|
||||
db: &heed::Database<C, DC>,
|
||||
field_id: FieldId,
|
||||
to_remove: &RoaringBitmap,
|
||||
convert: F,
|
||||
) -> heed::Result<()>
|
||||
where
|
||||
C: heed::BytesDecode<'a, DItem = K> + heed::BytesEncode<'a, EItem = K>,
|
||||
C: heed::BytesDecode<'a, DItem = K>,
|
||||
DC: heed::BytesDecode<'a, DItem = V>,
|
||||
F: Fn(K) -> DocumentId,
|
||||
{
|
||||
let mut iter = db
|
||||
@ -436,7 +437,7 @@ where
|
||||
.remap_key_type::<C>();
|
||||
|
||||
while let Some(result) = iter.next() {
|
||||
let (key, ()) = result?;
|
||||
let (key, _) = result?;
|
||||
if to_remove.contains(convert(key)) {
|
||||
// safety: we don't keep references from inside the LMDB database.
|
||||
unsafe { iter.del_current()? };
|
||||
|
@ -65,7 +65,7 @@ pub struct Store<'s, A> {
|
||||
LinkedHashMap<(SmallVec32<u8>, SmallVec32<u8>, u8), RoaringBitmap>,
|
||||
words_pairs_proximities_docids_limit: usize,
|
||||
facet_field_number_docids: LinkedHashMap<(FieldId, OrderedFloat<f64>), RoaringBitmap>,
|
||||
facet_field_string_docids: LinkedHashMap<(FieldId, String), RoaringBitmap>,
|
||||
facet_field_string_docids: LinkedHashMap<(FieldId, String), (String, RoaringBitmap)>,
|
||||
facet_field_value_docids_limit: usize,
|
||||
// MTBL parameters
|
||||
chunk_compression_type: CompressionType,
|
||||
@ -283,25 +283,33 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
||||
fn insert_facet_string_values_docid(
|
||||
&mut self,
|
||||
field_id: FieldId,
|
||||
value: String,
|
||||
normalized_value: String,
|
||||
original_value: String,
|
||||
id: DocumentId,
|
||||
) -> Result<()> {
|
||||
if value.is_empty() {
|
||||
if normalized_value.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let sorter = &mut self.field_id_docid_facet_strings_sorter;
|
||||
Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;
|
||||
Self::write_field_id_docid_facet_string_value(
|
||||
sorter,
|
||||
field_id,
|
||||
id,
|
||||
&normalized_value,
|
||||
&original_value,
|
||||
)?;
|
||||
|
||||
let key = (field_id, value);
|
||||
let key = (field_id, normalized_value);
|
||||
// if get_refresh finds the element it is assured to be at the end of the linked hash map.
|
||||
match self.facet_field_string_docids.get_refresh(&key) {
|
||||
Some(old) => {
|
||||
Some((_original_value, old)) => {
|
||||
old.insert(id);
|
||||
}
|
||||
None => {
|
||||
// A newly inserted element is append at the end of the linked hash map.
|
||||
self.facet_field_string_docids.insert(key, RoaringBitmap::from_iter(Some(id)));
|
||||
self.facet_field_string_docids
|
||||
.insert(key, (original_value, RoaringBitmap::from_iter(Some(id))));
|
||||
// If the word docids just reached it's capacity we must make sure to remove
|
||||
// one element, this way next time we insert we doesn't grow the capacity.
|
||||
if self.facet_field_string_docids.len() == self.facet_field_value_docids_limit {
|
||||
@ -363,7 +371,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
||||
document_id: DocumentId,
|
||||
words_positions: &mut HashMap<String, SmallVec32<Position>>,
|
||||
facet_numbers_values: &mut HashMap<FieldId, Vec<f64>>,
|
||||
facet_strings_values: &mut HashMap<FieldId, Vec<String>>,
|
||||
facet_strings_values: &mut HashMap<FieldId, Vec<(String, String)>>,
|
||||
record: &[u8],
|
||||
) -> Result<()> {
|
||||
// We compute the list of words pairs proximities (self-join) and write it directly to disk.
|
||||
@ -399,8 +407,8 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
||||
|
||||
// We store document_id associated with all the facet strings fields ids and values.
|
||||
for (field, values) in facet_strings_values.drain() {
|
||||
for value in values {
|
||||
self.insert_facet_string_values_docid(field, value, document_id)?;
|
||||
for (normalized, original) in values {
|
||||
self.insert_facet_string_values_docid(field, normalized, original, document_id)?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -516,23 +524,23 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
||||
|
||||
fn write_facet_field_string_docids<I, E>(sorter: &mut Sorter<MergeFn<E>>, iter: I) -> Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = ((FieldId, String), RoaringBitmap)>,
|
||||
I: IntoIterator<Item = ((FieldId, String), (String, RoaringBitmap))>,
|
||||
Error: From<E>,
|
||||
{
|
||||
let mut key_buffer = Vec::new();
|
||||
let mut data_buffer = Vec::new();
|
||||
|
||||
for ((field_id, value), docids) in iter {
|
||||
for ((field_id, normalized_value), (original_value, docids)) in iter {
|
||||
key_buffer.clear();
|
||||
data_buffer.clear();
|
||||
|
||||
FacetStringLevelZeroCodec::serialize_into(field_id, &value, &mut key_buffer);
|
||||
FacetStringLevelZeroCodec::serialize_into(field_id, &normalized_value, &mut key_buffer);
|
||||
CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer);
|
||||
|
||||
if lmdb_key_valid_size(&key_buffer) {
|
||||
sorter.insert(&key_buffer, &data_buffer)?;
|
||||
} else {
|
||||
warn!("facet value {:?} is too large to be saved", value);
|
||||
warn!("facet value {:?} is too large to be saved", original_value);
|
||||
}
|
||||
}
|
||||
|
||||
@ -587,19 +595,24 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> {
|
||||
sorter: &mut Sorter<MergeFn<E>>,
|
||||
field_id: FieldId,
|
||||
document_id: DocumentId,
|
||||
value: &str,
|
||||
normalized_value: &str,
|
||||
original_value: &str,
|
||||
) -> Result<()>
|
||||
where
|
||||
Error: From<E>,
|
||||
{
|
||||
let mut buffer = Vec::new();
|
||||
|
||||
FieldDocIdFacetStringCodec::serialize_into(field_id, document_id, value, &mut buffer);
|
||||
FieldDocIdFacetStringCodec::serialize_into(
|
||||
field_id,
|
||||
document_id,
|
||||
normalized_value,
|
||||
&mut buffer,
|
||||
);
|
||||
|
||||
if lmdb_key_valid_size(&buffer) {
|
||||
sorter.insert(&buffer, &[])?;
|
||||
sorter.insert(&buffer, original_value.as_bytes())?;
|
||||
} else {
|
||||
warn!("facet value {:?} is too large to be saved", value);
|
||||
warn!("facet value {:?} is too large to be saved", original_value);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@ -929,24 +942,24 @@ fn process_tokens<'a>(
|
||||
.filter(|(_, t)| t.is_word())
|
||||
}
|
||||
|
||||
fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<String>) {
|
||||
fn extract_facet_values(value: &Value) -> (Vec<f64>, Vec<(String, String)>) {
|
||||
fn inner_extract_facet_values(
|
||||
value: &Value,
|
||||
can_recurse: bool,
|
||||
output_numbers: &mut Vec<f64>,
|
||||
output_strings: &mut Vec<String>,
|
||||
output_strings: &mut Vec<(String, String)>,
|
||||
) {
|
||||
match value {
|
||||
Value::Null => (),
|
||||
Value::Bool(b) => output_strings.push(b.to_string()),
|
||||
Value::Bool(b) => output_strings.push((b.to_string(), b.to_string())),
|
||||
Value::Number(number) => {
|
||||
if let Some(float) = number.as_f64() {
|
||||
output_numbers.push(float);
|
||||
}
|
||||
}
|
||||
Value::String(string) => {
|
||||
let string = string.trim().to_lowercase();
|
||||
output_strings.push(string);
|
||||
Value::String(original) => {
|
||||
let normalized = original.trim().to_lowercase();
|
||||
output_strings.push((normalized, original.clone()));
|
||||
}
|
||||
Value::Array(values) => {
|
||||
if can_recurse {
|
||||
|
@ -276,8 +276,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
match self.searchable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
// every time the searchable attributes are updated, we need to update the
|
||||
// ids for any settings that uses the facets. (displayed_fields,
|
||||
// filterable_fields)
|
||||
// ids for any settings that uses the facets. (distinct_fields, filterable_fields).
|
||||
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
|
||||
let mut new_fields_ids_map = FieldsIdsMap::new();
|
||||
|
Reference in New Issue
Block a user