mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-06-10 14:07:58 +00:00
Attempt to improve facet extraction
This commit is contained in:
parent
4e280534a2
commit
9ac4a69fe4
@ -9,7 +9,7 @@ use heed::RoTxn;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::super::cache::BalancedCaches;
|
||||
use super::facet_document::extract_document_facets;
|
||||
use super::facet_document::{extract_document_facets, extract_merged_document_facets};
|
||||
use super::FacetKind;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::update::del_add::DelAdd;
|
||||
@ -106,17 +106,19 @@ impl FacetedDocidsExtractor {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
extract_document_facets(
|
||||
extract_merged_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
&mut del_add_facet_value,
|
||||
cached_sorter.deref_mut(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
&mut |fid, depth, value, del_add_facet_value, cached_sorter| {
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
cached_sorter,
|
||||
BalancedCaches::insert_del_u32,
|
||||
&mut del_add_facet_value,
|
||||
del_add_facet_value,
|
||||
DelAddFacetValue::insert_del,
|
||||
docid,
|
||||
fid,
|
||||
@ -124,19 +126,12 @@ impl FacetedDocidsExtractor {
|
||||
value,
|
||||
)
|
||||
},
|
||||
)?;
|
||||
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
&mut |fid, depth, value, del_add_facet_value, cached_sorter| {
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
cached_sorter,
|
||||
BalancedCaches::insert_add_u32,
|
||||
&mut del_add_facet_value,
|
||||
del_add_facet_value,
|
||||
DelAddFacetValue::insert_add,
|
||||
docid,
|
||||
fid,
|
||||
@ -282,7 +277,7 @@ impl FacetedDocidsExtractor {
|
||||
}
|
||||
}
|
||||
|
||||
struct DelAddFacetValue<'doc> {
|
||||
pub(crate) struct DelAddFacetValue<'doc> {
|
||||
strings: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
||||
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
||||
}
|
||||
|
@ -1,9 +1,11 @@
|
||||
use serde_json::value::RawValue;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::update::new::document::Document;
|
||||
use super::extract_facets::DelAddFacetValue;
|
||||
use crate::update::new::document::{Document, MergedDocument, MergedValue};
|
||||
use crate::update::new::extract::geo::extract_geo_coordinates;
|
||||
use crate::update::new::extract::perm_json_p;
|
||||
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
||||
use crate::update::new::extract::{perm_json_p, BalancedCaches};
|
||||
use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
||||
|
||||
pub fn extract_document_facets<'doc>(
|
||||
attributes_to_extract: &[&str],
|
||||
@ -15,58 +17,7 @@ pub fn extract_document_facets<'doc>(
|
||||
for res in document.iter_top_level_fields() {
|
||||
let (field_name, value) = res?;
|
||||
|
||||
let mut tokenize_field =
|
||||
|name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map
|
||||
.id_or_insert(name)
|
||||
{
|
||||
Some(field_id) => facet_fn(field_id, depth, value),
|
||||
None => Err(UserError::AttributeLimitReached.into()),
|
||||
};
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
|
||||
if selection != perm_json_p::Selection::Skip {
|
||||
// parse json.
|
||||
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => {
|
||||
perm_json_p::seek_leaf_values_in_object(
|
||||
&object,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Object(object),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Value::Array(array) => {
|
||||
perm_json_p::seek_leaf_values_in_array(
|
||||
&array,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Array(array),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||
}
|
||||
}
|
||||
extract_document_facet(attributes_to_extract, field_id_map, facet_fn, field_name, value)?;
|
||||
}
|
||||
|
||||
if attributes_to_extract.contains(&"_geo") {
|
||||
@ -85,3 +36,203 @@ pub fn extract_document_facets<'doc>(
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_document_facet(
|
||||
attributes_to_extract: &[&str],
|
||||
field_id_map: &mut GlobalFieldsIdsMap<'_>,
|
||||
facet_fn: &mut impl FnMut(u16, perm_json_p::Depth, &Value) -> std::result::Result<(), crate::Error>,
|
||||
field_name: &str,
|
||||
value: &serde_json::value::RawValue,
|
||||
) -> Result<()> {
|
||||
let mut tokenize_field = |name: &str, depth: perm_json_p::Depth, value: &Value| {
|
||||
match field_id_map.id_or_insert(name) {
|
||||
Some(field_id) => facet_fn(field_id, depth, value),
|
||||
None => Err(UserError::AttributeLimitReached.into()),
|
||||
}
|
||||
};
|
||||
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
|
||||
if selection != perm_json_p::Selection::Skip {
|
||||
// parse json.
|
||||
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => {
|
||||
perm_json_p::seek_leaf_values_in_object(
|
||||
&object,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Object(object),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Value::Array(array) => {
|
||||
perm_json_p::seek_leaf_values_in_array(
|
||||
&array,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Array(array),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn extract_merged_document_facets<'doc, 'del_add_facet_value, 'cache>(
|
||||
attributes_to_extract: &[&str],
|
||||
document: MergedDocument<'doc, 'doc, 'doc, FieldsIdsMap>,
|
||||
external_document_id: &str,
|
||||
del_add_facet_value: &mut DelAddFacetValue<'del_add_facet_value>,
|
||||
cached_sorter: &mut BalancedCaches<'cache>,
|
||||
field_id_map: &mut GlobalFieldsIdsMap,
|
||||
facet_fn_current: &mut impl FnMut(
|
||||
FieldId,
|
||||
perm_json_p::Depth,
|
||||
&Value,
|
||||
&mut DelAddFacetValue<'del_add_facet_value>,
|
||||
&mut BalancedCaches<'cache>,
|
||||
) -> Result<()>,
|
||||
facet_fn_updated: &mut impl FnMut(
|
||||
FieldId,
|
||||
perm_json_p::Depth,
|
||||
&Value,
|
||||
&mut DelAddFacetValue<'del_add_facet_value>,
|
||||
&mut BalancedCaches<'cache>,
|
||||
) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
for res in document.iter_merged_top_level_fields() {
|
||||
let (field_name, value) = res?;
|
||||
match value {
|
||||
MergedValue::Current(value) => {
|
||||
extract_document_facet(
|
||||
attributes_to_extract,
|
||||
field_id_map,
|
||||
&mut |fid, depth, value| {
|
||||
facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||
},
|
||||
field_name,
|
||||
value,
|
||||
)?;
|
||||
}
|
||||
MergedValue::Updated(value) => {
|
||||
extract_document_facet(
|
||||
attributes_to_extract,
|
||||
field_id_map,
|
||||
&mut |fid, depth, value| {
|
||||
facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||
},
|
||||
field_name,
|
||||
value,
|
||||
)?;
|
||||
}
|
||||
MergedValue::CurrentAndUpdated(current, updated) => {
|
||||
if current.get() == updated.get() {
|
||||
continue;
|
||||
}
|
||||
extract_document_facet(
|
||||
attributes_to_extract,
|
||||
field_id_map,
|
||||
&mut |fid, depth, value| {
|
||||
facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||
},
|
||||
field_name,
|
||||
current,
|
||||
)?;
|
||||
extract_document_facet(
|
||||
attributes_to_extract,
|
||||
field_id_map,
|
||||
&mut |fid, depth, value| {
|
||||
facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||
},
|
||||
field_name,
|
||||
updated,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if attributes_to_extract.contains(&"_geo") {
|
||||
match document.merged_geo_field()? {
|
||||
Some(MergedValue::Current(current)) => {
|
||||
extract_geo_facet(
|
||||
external_document_id,
|
||||
current,
|
||||
field_id_map,
|
||||
&mut |fid, depth, value| {
|
||||
facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
Some(MergedValue::Updated(updated)) => {
|
||||
extract_geo_facet(
|
||||
external_document_id,
|
||||
updated,
|
||||
field_id_map,
|
||||
&mut |fid, depth, value| {
|
||||
facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
Some(MergedValue::CurrentAndUpdated(current, updated))
|
||||
if current.get() != updated.get() =>
|
||||
{
|
||||
extract_geo_facet(
|
||||
external_document_id,
|
||||
current,
|
||||
field_id_map,
|
||||
&mut |fid, depth, value| {
|
||||
facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||
},
|
||||
)?;
|
||||
extract_geo_facet(
|
||||
external_document_id,
|
||||
updated,
|
||||
field_id_map,
|
||||
&mut |fid, depth, value| {
|
||||
facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||
},
|
||||
)?;
|
||||
}
|
||||
None | Some(MergedValue::CurrentAndUpdated(_, _)) => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_geo_facet(
|
||||
external_document_id: &str,
|
||||
geo_value: &RawValue,
|
||||
field_id_map: &mut GlobalFieldsIdsMap<'_>,
|
||||
facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
|
||||
let (lat_fid, lng_fid) = field_id_map
|
||||
.id_or_insert("_geo.lat")
|
||||
.zip(field_id_map.id_or_insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
|
||||
facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?;
|
||||
facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?;
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user