mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-24 04:26:27 +00:00
fix facet distribution case
This commit is contained in:
@@ -39,7 +39,7 @@ pub fn bucket_sort<'c, FI>(
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>>,
|
||||
filter: Option<FI>,
|
||||
criteria: Criteria<'c>,
|
||||
searchable_attrs: Option<ReorderedAttrs>,
|
||||
@@ -199,7 +199,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
|
||||
query: &str,
|
||||
range: Range<usize>,
|
||||
facets_docids: Option<SetBuf<DocumentId>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
|
||||
facet_count_docids: Option<HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>>,
|
||||
filter: Option<FI>,
|
||||
distinct: FD,
|
||||
distinct_size: usize,
|
||||
@@ -636,18 +636,18 @@ pub fn placeholder_document_sort(
|
||||
}
|
||||
|
||||
/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids.
|
||||
pub fn facet_count(
|
||||
facet_docids: HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>,
|
||||
fn facet_count(
|
||||
facet_docids: HashMap<String, HashMap<String, (&str, Cow<Set<DocumentId>>)>>,
|
||||
candidate_docids: &Set<DocumentId>,
|
||||
) -> HashMap<String, HashMap<String, usize>> {
|
||||
let mut facets_counts = HashMap::with_capacity(facet_docids.len());
|
||||
for (key, doc_map) in facet_docids {
|
||||
let mut count_map = HashMap::with_capacity(doc_map.len());
|
||||
for (value, docids) in doc_map {
|
||||
for (_, (value, docids)) in doc_map {
|
||||
let mut counter = Counter::new();
|
||||
let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection();
|
||||
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
|
||||
count_map.insert(value, counter.0);
|
||||
count_map.insert(value.to_string(), counter.0);
|
||||
}
|
||||
facets_counts.insert(key, count_map);
|
||||
}
|
||||
|
@@ -164,7 +164,7 @@ impl<'a> heed::BytesDecode<'a> for FacetKey {
|
||||
}
|
||||
|
||||
pub fn add_to_facet_map(
|
||||
facet_map: &mut HashMap<FacetKey, Vec<DocumentId>>,
|
||||
facet_map: &mut HashMap<FacetKey, (String, Vec<DocumentId>)>,
|
||||
field_id: FieldId,
|
||||
value: Value,
|
||||
document_id: DocumentId,
|
||||
@@ -175,8 +175,8 @@ pub fn add_to_facet_map(
|
||||
Value::Null => return Ok(()),
|
||||
value => return Err(FacetError::InvalidDocumentAttribute(value.to_string())),
|
||||
};
|
||||
let key = FacetKey::new(field_id, value);
|
||||
facet_map.entry(key).or_insert_with(Vec::new).push(document_id);
|
||||
let key = FacetKey::new(field_id, value.clone());
|
||||
facet_map.entry(key).or_insert_with(|| (value, Vec::new())).1.push(document_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -185,8 +185,10 @@ pub fn facet_map_from_docids(
|
||||
index: &crate::Index,
|
||||
document_ids: &[DocumentId],
|
||||
attributes_for_facetting: &[FieldId],
|
||||
) -> MResult<HashMap<FacetKey, Vec<DocumentId>>> {
|
||||
let mut facet_map = HashMap::new();
|
||||
) -> MResult<HashMap<FacetKey, (String, Vec<DocumentId>)>> {
|
||||
// A hashmap that ascociate a facet key to a pair containing the original facet attribute
|
||||
// string with it's case preserved, and a list of document ids for that facet attribute.
|
||||
let mut facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)> = HashMap::new();
|
||||
for document_id in document_ids {
|
||||
for result in index
|
||||
.documents_fields
|
||||
@@ -212,7 +214,7 @@ pub fn facet_map_from_docs(
|
||||
schema: &Schema,
|
||||
documents: &HashMap<DocumentId, IndexMap<String, Value>>,
|
||||
attributes_for_facetting: &[FieldId],
|
||||
) -> MResult<HashMap<FacetKey, Vec<DocumentId>>> {
|
||||
) -> MResult<HashMap<FacetKey, (String, Vec<DocumentId>)>> {
|
||||
let mut facet_map = HashMap::new();
|
||||
let attributes_for_facetting = attributes_for_facetting
|
||||
.iter()
|
||||
|
@@ -97,16 +97,14 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> {
|
||||
.unwrap_or_default();
|
||||
ors.push(docids);
|
||||
}
|
||||
let sets: Vec<_> = ors.iter().map(Cow::deref).collect();
|
||||
let or_result = sdset::multi::OpBuilder::from_vec(sets)
|
||||
.union()
|
||||
.into_set_buf();
|
||||
let sets: Vec<_> = ors.iter().map(|i| &i.1).map(Cow::deref).collect();
|
||||
let or_result = sdset::multi::OpBuilder::from_vec(sets).union().into_set_buf();
|
||||
ands.push(Cow::Owned(or_result));
|
||||
ors.clear();
|
||||
}
|
||||
Either::Right(key) => {
|
||||
match self.index.facets.facet_document_ids(reader, &key)? {
|
||||
Some(docids) => ands.push(docids),
|
||||
Some(docids) => ands.push(docids.1),
|
||||
// no candidates for search, early return.
|
||||
None => return Ok(Some(SetBuf::default())),
|
||||
}
|
||||
|
@@ -1,7 +1,8 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem;
|
||||
|
||||
use heed::{RwTxn, RoTxn, Result as ZResult, RoRange};
|
||||
use heed::{RwTxn, RoTxn, Result as ZResult, RoRange, types::Str, BytesEncode, BytesDecode};
|
||||
use sdset::{SetBuf, Set, SetOperation};
|
||||
|
||||
use meilisearch_types::DocumentId;
|
||||
@@ -14,40 +15,76 @@ use super::cow_set::CowSet;
|
||||
/// contains facet info
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Facets {
|
||||
pub(crate) facets: heed::Database<FacetKey, CowSet<DocumentId>>,
|
||||
pub(crate) facets: heed::Database<FacetKey, FacetData>,
|
||||
}
|
||||
|
||||
pub struct FacetData;
|
||||
|
||||
impl<'a> BytesEncode<'a> for FacetData {
|
||||
type EItem = (&'a str, &'a Set<DocumentId>);
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
// get size of the first item
|
||||
let first_size = item.0.as_bytes().len();
|
||||
let size = mem::size_of::<usize>()
|
||||
+ first_size
|
||||
+ item.1.len() * mem::size_of::<DocumentId>();
|
||||
let mut buffer = Vec::with_capacity(size);
|
||||
// encode the length of the first item
|
||||
buffer.extend_from_slice(&first_size.to_be_bytes());
|
||||
buffer.extend_from_slice(Str::bytes_encode(&item.0)?.as_ref());
|
||||
let second_slice = CowSet::bytes_encode(&item.1)?;
|
||||
buffer.extend_from_slice(second_slice.as_ref());
|
||||
Some(Cow::Owned(buffer))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for FacetData {
|
||||
type DItem = (&'a str, Cow<'a, Set<DocumentId>>);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let mut size_buf = [0; 8];
|
||||
size_buf.copy_from_slice(bytes.get(0..8)?);
|
||||
// decode size of the first item from the bytes
|
||||
let first_size = usize::from_be_bytes(size_buf);
|
||||
// decode first and second items
|
||||
let first_item = Str::bytes_decode(bytes.get(8..(8 + first_size))?)?;
|
||||
let second_item = CowSet::bytes_decode(bytes.get((8 + first_size)..)?)?;
|
||||
Some((first_item, second_item))
|
||||
}
|
||||
}
|
||||
|
||||
impl Facets {
|
||||
// we use sdset::SetBuf to ensure the docids are sorted.
|
||||
pub fn put_facet_document_ids(&self, writer: &mut RwTxn<MainT>, facet_key: FacetKey, doc_ids: &Set<DocumentId>) -> ZResult<()> {
|
||||
self.facets.put(writer, &facet_key, doc_ids)
|
||||
pub fn put_facet_document_ids(&self, writer: &mut RwTxn<MainT>, facet_key: FacetKey, doc_ids: &Set<DocumentId>, facet_value: &str) -> ZResult<()> {
|
||||
self.facets.put(writer, &facet_key, &(facet_value, doc_ids))
|
||||
}
|
||||
|
||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, CowSet<DocumentId>>> {
|
||||
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, FacetData>> {
|
||||
self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))
|
||||
}
|
||||
|
||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {
|
||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<(&'txn str,Cow<'txn, Set<DocumentId>>)>> {
|
||||
self.facets.get(reader, &facet_key)
|
||||
}
|
||||
|
||||
/// updates the facets store, revmoving the documents from the facets provided in the
|
||||
/// `facet_map` argument
|
||||
pub fn remove(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, Vec<DocumentId>>) -> ZResult<()> {
|
||||
for (key, document_ids) in facet_map {
|
||||
if let Some(old) = self.facets.get(writer, &key)? {
|
||||
pub fn remove(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)>) -> ZResult<()> {
|
||||
for (key, (name, document_ids)) in facet_map {
|
||||
if let Some((_, old)) = self.facets.get(writer, &key)? {
|
||||
let to_remove = SetBuf::from_dirty(document_ids);
|
||||
let new = sdset::duo::OpBuilder::new(old.as_ref(), to_remove.as_set()).difference().into_set_buf();
|
||||
self.facets.put(writer, &key, new.as_set())?;
|
||||
self.facets.put(writer, &key, &(&name, new.as_set()))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, Vec<DocumentId>>) -> ZResult<()> {
|
||||
for (key, document_ids) in facet_map {
|
||||
pub fn add(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, (String, Vec<DocumentId>)>) -> ZResult<()> {
|
||||
for (key, (facet_name, document_ids)) in facet_map {
|
||||
let set = SetBuf::from_dirty(document_ids);
|
||||
self.put_facet_document_ids(writer, key, set.as_set())?;
|
||||
self.put_facet_document_ids(writer, key, set.as_set(), &facet_name)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
Reference in New Issue
Block a user