mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 01:01:00 +00:00
Return the original string values for the inverted facet index database
This commit is contained in:
committed by
Kerollmops
parent
03a01166ba
commit
0227254a65
@ -1,5 +1,6 @@
|
||||
use std::mem::size_of;
|
||||
|
||||
use concat_arrays::concat_arrays;
|
||||
use heed::types::{ByteSlice, Str, Unit};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
@ -43,7 +44,10 @@ pub struct FacetDistinctIter<'a> {
|
||||
|
||||
impl<'a> FacetDistinctIter<'a> {
|
||||
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||
self.index.facet_id_string_docids.get(self.txn, &(self.distinct, key))
|
||||
self.index
|
||||
.facet_id_string_docids
|
||||
.get(self.txn, &(self.distinct, key))
|
||||
.map(|result| result.map(|(_original, docids)| docids))
|
||||
}
|
||||
|
||||
fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
|
||||
@ -116,10 +120,7 @@ impl<'a> FacetDistinctIter<'a> {
|
||||
}
|
||||
|
||||
fn facet_values_prefix_key(distinct: FieldId, id: DocumentId) -> [u8; FID_SIZE + DOCID_SIZE] {
|
||||
let mut key = [0; FID_SIZE + DOCID_SIZE];
|
||||
key[0..FID_SIZE].copy_from_slice(&distinct.to_be_bytes());
|
||||
key[FID_SIZE..].copy_from_slice(&id.to_be_bytes());
|
||||
key
|
||||
concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
|
||||
}
|
||||
|
||||
fn facet_number_values<'a>(
|
||||
|
@ -47,7 +47,7 @@ mod test {
|
||||
|
||||
let mut documents = Vec::new();
|
||||
|
||||
let txts = ["toto", "titi", "tata"];
|
||||
let txts = ["Toto", "Titi", "Tata"];
|
||||
let cats = (1..10).map(|i| i.to_string()).collect::<Vec<_>>();
|
||||
let cat_ints = (1..10).collect::<Vec<_>>();
|
||||
|
||||
@ -90,7 +90,6 @@ mod test {
|
||||
|
||||
addition.index_documents_method(IndexDocumentsMethod::ReplaceDocuments);
|
||||
addition.update_format(UpdateFormat::Json);
|
||||
|
||||
addition.execute(JSON.to_string().as_bytes(), |_, _| ()).unwrap();
|
||||
|
||||
let fields_map = index.fields_ids_map(&txn).unwrap();
|
||||
|
@ -23,7 +23,7 @@ const MAX_VALUES_BY_FACET: usize = 1000;
|
||||
|
||||
/// Threshold on the number of candidates that will make
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 35_000;
|
||||
const CANDIDATES_THRESHOLD: u64 = 3000;
|
||||
|
||||
pub struct FacetDistribution<'a> {
|
||||
facets: Option<HashSet<String>>,
|
||||
@ -72,6 +72,7 @@ impl<'a> FacetDistribution<'a> {
|
||||
FacetType::Number => {
|
||||
let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect();
|
||||
|
||||
let distribution_prelength = distribution.len();
|
||||
let db = self.index.field_id_docid_facet_f64s;
|
||||
for docid in candidates.into_iter() {
|
||||
key_buffer.truncate(mem::size_of::<FieldId>());
|
||||
@ -84,6 +85,9 @@ impl<'a> FacetDistribution<'a> {
|
||||
for result in iter {
|
||||
let ((_, _, value), ()) = result?;
|
||||
*distribution.entry(value.to_string()).or_insert(0) += 1;
|
||||
if distribution.len() - distribution_prelength == self.max_values_by_facet {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -106,6 +110,10 @@ impl<'a> FacetDistribution<'a> {
|
||||
.entry(normalized_value)
|
||||
.or_insert_with(|| (original_value, 0));
|
||||
*count += 1;
|
||||
|
||||
if normalized_distribution.len() == self.max_values_by_facet {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -154,10 +162,10 @@ impl<'a> FacetDistribution<'a> {
|
||||
FacetStringIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
||||
|
||||
for result in iter {
|
||||
let (value, mut docids) = result?;
|
||||
let (_normalized, original, mut docids) = result?;
|
||||
docids &= candidates;
|
||||
if !docids.is_empty() {
|
||||
distribution.insert(value.to_string(), docids.len());
|
||||
distribution.insert(original.to_string(), docids.len());
|
||||
}
|
||||
if distribution.len() == self.max_values_by_facet {
|
||||
break;
|
||||
@ -193,14 +201,20 @@ impl<'a> FacetDistribution<'a> {
|
||||
.prefix_iter(self.rtxn, &field_id.to_be_bytes())?
|
||||
.remap_key_type::<FacetStringLevelZeroCodec>();
|
||||
|
||||
let mut normalized_distribution = BTreeMap::new();
|
||||
for result in iter {
|
||||
let ((_, value), docids) = result?;
|
||||
distribution.insert(value.to_string(), docids.len());
|
||||
let ((_, normalized_value), (original_value, docids)) = result?;
|
||||
normalized_distribution.insert(normalized_value, (original_value, docids.len()));
|
||||
if distribution.len() == self.max_values_by_facet {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let iter = normalized_distribution
|
||||
.into_iter()
|
||||
.map(|(_normalized, (original, count))| (original.to_string(), count));
|
||||
distribution.extend(iter);
|
||||
|
||||
Ok(distribution)
|
||||
}
|
||||
|
||||
|
@ -135,7 +135,8 @@ use heed::{Database, LazyDecode, RoRange};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::{
|
||||
FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringZeroBoundsValueCodec,
|
||||
FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
||||
FacetStringZeroBoundsValueCodec,
|
||||
};
|
||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||
use crate::{FieldId, Index};
|
||||
@ -209,7 +210,11 @@ impl<'t> Iterator for FacetStringGroupRange<'t> {
|
||||
///
|
||||
/// It yields the facet string and the roaring bitmap associated with it.
|
||||
pub struct FacetStringLevelZeroRange<'t> {
|
||||
iter: RoRange<'t, FacetStringLevelZeroCodec, CboRoaringBitmapCodec>,
|
||||
iter: RoRange<
|
||||
't,
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
|
||||
>,
|
||||
}
|
||||
|
||||
impl<'t> FacetStringLevelZeroRange<'t> {
|
||||
@ -252,18 +257,23 @@ impl<'t> FacetStringLevelZeroRange<'t> {
|
||||
let iter = db
|
||||
.remap_key_type::<ByteSlice>()
|
||||
.range(rtxn, &(left_bound, right_bound))?
|
||||
.remap_types::<FacetStringLevelZeroCodec, CboRoaringBitmapCodec>();
|
||||
.remap_types::<
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>
|
||||
>();
|
||||
|
||||
Ok(FacetStringLevelZeroRange { iter })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'t> Iterator for FacetStringLevelZeroRange<'t> {
|
||||
type Item = heed::Result<(&'t str, RoaringBitmap)>;
|
||||
type Item = heed::Result<(&'t str, &'t str, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.iter.next() {
|
||||
Some(Ok(((_fid, value), docids))) => Some(Ok((value, docids))),
|
||||
Some(Ok(((_fid, normalized), (original, docids)))) => {
|
||||
Some(Ok((normalized, original, docids)))
|
||||
}
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
@ -326,7 +336,7 @@ impl<'t> FacetStringIter<'t> {
|
||||
}
|
||||
|
||||
impl<'t> Iterator for FacetStringIter<'t> {
|
||||
type Item = heed::Result<(&'t str, RoaringBitmap)>;
|
||||
type Item = heed::Result<(&'t str, &'t str, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
'outer: loop {
|
||||
@ -377,11 +387,11 @@ impl<'t> Iterator for FacetStringIter<'t> {
|
||||
// level zero only
|
||||
for result in last {
|
||||
match result {
|
||||
Ok((value, mut docids)) => {
|
||||
Ok((normalized, original, mut docids)) => {
|
||||
docids &= &*documents_ids;
|
||||
if !docids.is_empty() {
|
||||
*documents_ids -= &docids;
|
||||
return Some(Ok((value, docids)));
|
||||
return Some(Ok((normalized, original, docids)));
|
||||
}
|
||||
}
|
||||
Err(e) => return Some(Err(e)),
|
||||
|
@ -17,7 +17,9 @@ use self::Operator::*;
|
||||
use super::parser::{FilterParser, Rule, PREC_CLIMBER};
|
||||
use super::FacetNumberRange;
|
||||
use crate::error::UserError;
|
||||
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetStringLevelZeroCodec};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
||||
};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
@ -363,7 +365,10 @@ impl FilterCondition {
|
||||
rtxn: &heed::RoTxn,
|
||||
index: &Index,
|
||||
numbers_db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
||||
strings_db: heed::Database<FacetStringLevelZeroCodec, CboRoaringBitmapCodec>,
|
||||
strings_db: heed::Database<
|
||||
FacetStringLevelZeroCodec,
|
||||
FacetStringLevelZeroValueCodec<CboRoaringBitmapCodec>,
|
||||
>,
|
||||
field_id: FieldId,
|
||||
operator: &Operator,
|
||||
) -> Result<RoaringBitmap> {
|
||||
@ -374,7 +379,8 @@ impl FilterCondition {
|
||||
GreaterThan(val) => (Excluded(*val), Included(f64::MAX)),
|
||||
GreaterThanOrEqual(val) => (Included(*val), Included(f64::MAX)),
|
||||
Equal(number, string) => {
|
||||
let string_docids = strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default();
|
||||
let (_original_value, string_docids) =
|
||||
strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default();
|
||||
let number_docids = match number {
|
||||
Some(n) => {
|
||||
let n = Included(*n);
|
||||
|
Reference in New Issue
Block a user