mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-06-11 22:47:46 +00:00
Attempt to improve facet extraction
This commit is contained in:
parent
4e280534a2
commit
9ac4a69fe4
@ -9,7 +9,7 @@ use heed::RoTxn;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::super::cache::BalancedCaches;
|
use super::super::cache::BalancedCaches;
|
||||||
use super::facet_document::extract_document_facets;
|
use super::facet_document::{extract_document_facets, extract_merged_document_facets};
|
||||||
use super::FacetKind;
|
use super::FacetKind;
|
||||||
use crate::heed_codec::facet::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
@ -106,17 +106,19 @@ impl FacetedDocidsExtractor {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
extract_document_facets(
|
extract_merged_document_facets(
|
||||||
attributes_to_extract,
|
attributes_to_extract,
|
||||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||||
inner.external_document_id(),
|
inner.external_document_id(),
|
||||||
|
&mut del_add_facet_value,
|
||||||
|
cached_sorter.deref_mut(),
|
||||||
new_fields_ids_map.deref_mut(),
|
new_fields_ids_map.deref_mut(),
|
||||||
&mut |fid, depth, value| {
|
&mut |fid, depth, value, del_add_facet_value, cached_sorter| {
|
||||||
Self::facet_fn_with_options(
|
Self::facet_fn_with_options(
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
cached_sorter.deref_mut(),
|
cached_sorter,
|
||||||
BalancedCaches::insert_del_u32,
|
BalancedCaches::insert_del_u32,
|
||||||
&mut del_add_facet_value,
|
del_add_facet_value,
|
||||||
DelAddFacetValue::insert_del,
|
DelAddFacetValue::insert_del,
|
||||||
docid,
|
docid,
|
||||||
fid,
|
fid,
|
||||||
@ -124,19 +126,12 @@ impl FacetedDocidsExtractor {
|
|||||||
value,
|
value,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
)?;
|
&mut |fid, depth, value, del_add_facet_value, cached_sorter| {
|
||||||
|
|
||||||
extract_document_facets(
|
|
||||||
attributes_to_extract,
|
|
||||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
|
||||||
inner.external_document_id(),
|
|
||||||
new_fields_ids_map.deref_mut(),
|
|
||||||
&mut |fid, depth, value| {
|
|
||||||
Self::facet_fn_with_options(
|
Self::facet_fn_with_options(
|
||||||
&context.doc_alloc,
|
&context.doc_alloc,
|
||||||
cached_sorter.deref_mut(),
|
cached_sorter,
|
||||||
BalancedCaches::insert_add_u32,
|
BalancedCaches::insert_add_u32,
|
||||||
&mut del_add_facet_value,
|
del_add_facet_value,
|
||||||
DelAddFacetValue::insert_add,
|
DelAddFacetValue::insert_add,
|
||||||
docid,
|
docid,
|
||||||
fid,
|
fid,
|
||||||
@ -282,7 +277,7 @@ impl FacetedDocidsExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct DelAddFacetValue<'doc> {
|
pub(crate) struct DelAddFacetValue<'doc> {
|
||||||
strings: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
strings: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
||||||
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
|
use serde_json::value::RawValue;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::update::new::document::Document;
|
use super::extract_facets::DelAddFacetValue;
|
||||||
|
use crate::update::new::document::{Document, MergedDocument, MergedValue};
|
||||||
use crate::update::new::extract::geo::extract_geo_coordinates;
|
use crate::update::new::extract::geo::extract_geo_coordinates;
|
||||||
use crate::update::new::extract::perm_json_p;
|
use crate::update::new::extract::{perm_json_p, BalancedCaches};
|
||||||
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
||||||
|
|
||||||
pub fn extract_document_facets<'doc>(
|
pub fn extract_document_facets<'doc>(
|
||||||
attributes_to_extract: &[&str],
|
attributes_to_extract: &[&str],
|
||||||
@ -15,58 +17,7 @@ pub fn extract_document_facets<'doc>(
|
|||||||
for res in document.iter_top_level_fields() {
|
for res in document.iter_top_level_fields() {
|
||||||
let (field_name, value) = res?;
|
let (field_name, value) = res?;
|
||||||
|
|
||||||
let mut tokenize_field =
|
extract_document_facet(attributes_to_extract, field_id_map, facet_fn, field_name, value)?;
|
||||||
|name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map
|
|
||||||
.id_or_insert(name)
|
|
||||||
{
|
|
||||||
Some(field_id) => facet_fn(field_id, depth, value),
|
|
||||||
None => Err(UserError::AttributeLimitReached.into()),
|
|
||||||
};
|
|
||||||
|
|
||||||
// if the current field is searchable or contains a searchable attribute
|
|
||||||
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
|
|
||||||
if selection != perm_json_p::Selection::Skip {
|
|
||||||
// parse json.
|
|
||||||
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
|
||||||
Value::Object(object) => {
|
|
||||||
perm_json_p::seek_leaf_values_in_object(
|
|
||||||
&object,
|
|
||||||
Some(attributes_to_extract),
|
|
||||||
&[], // skip no attributes
|
|
||||||
field_name,
|
|
||||||
perm_json_p::Depth::OnBaseKey,
|
|
||||||
&mut tokenize_field,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
if selection == perm_json_p::Selection::Select {
|
|
||||||
tokenize_field(
|
|
||||||
field_name,
|
|
||||||
perm_json_p::Depth::OnBaseKey,
|
|
||||||
&Value::Object(object),
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Value::Array(array) => {
|
|
||||||
perm_json_p::seek_leaf_values_in_array(
|
|
||||||
&array,
|
|
||||||
Some(attributes_to_extract),
|
|
||||||
&[], // skip no attributes
|
|
||||||
field_name,
|
|
||||||
perm_json_p::Depth::OnBaseKey,
|
|
||||||
&mut tokenize_field,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
if selection == perm_json_p::Selection::Select {
|
|
||||||
tokenize_field(
|
|
||||||
field_name,
|
|
||||||
perm_json_p::Depth::OnBaseKey,
|
|
||||||
&Value::Array(array),
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if attributes_to_extract.contains(&"_geo") {
|
if attributes_to_extract.contains(&"_geo") {
|
||||||
@ -85,3 +36,203 @@ pub fn extract_document_facets<'doc>(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn extract_document_facet(
|
||||||
|
attributes_to_extract: &[&str],
|
||||||
|
field_id_map: &mut GlobalFieldsIdsMap<'_>,
|
||||||
|
facet_fn: &mut impl FnMut(u16, perm_json_p::Depth, &Value) -> std::result::Result<(), crate::Error>,
|
||||||
|
field_name: &str,
|
||||||
|
value: &serde_json::value::RawValue,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut tokenize_field = |name: &str, depth: perm_json_p::Depth, value: &Value| {
|
||||||
|
match field_id_map.id_or_insert(name) {
|
||||||
|
Some(field_id) => facet_fn(field_id, depth, value),
|
||||||
|
None => Err(UserError::AttributeLimitReached.into()),
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
|
||||||
|
if selection != perm_json_p::Selection::Skip {
|
||||||
|
// parse json.
|
||||||
|
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||||
|
Value::Object(object) => {
|
||||||
|
perm_json_p::seek_leaf_values_in_object(
|
||||||
|
&object,
|
||||||
|
Some(attributes_to_extract),
|
||||||
|
&[], // skip no attributes
|
||||||
|
field_name,
|
||||||
|
perm_json_p::Depth::OnBaseKey,
|
||||||
|
&mut tokenize_field,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
if selection == perm_json_p::Selection::Select {
|
||||||
|
tokenize_field(
|
||||||
|
field_name,
|
||||||
|
perm_json_p::Depth::OnBaseKey,
|
||||||
|
&Value::Object(object),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Value::Array(array) => {
|
||||||
|
perm_json_p::seek_leaf_values_in_array(
|
||||||
|
&array,
|
||||||
|
Some(attributes_to_extract),
|
||||||
|
&[], // skip no attributes
|
||||||
|
field_name,
|
||||||
|
perm_json_p::Depth::OnBaseKey,
|
||||||
|
&mut tokenize_field,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
if selection == perm_json_p::Selection::Select {
|
||||||
|
tokenize_field(
|
||||||
|
field_name,
|
||||||
|
perm_json_p::Depth::OnBaseKey,
|
||||||
|
&Value::Array(array),
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
pub fn extract_merged_document_facets<'doc, 'del_add_facet_value, 'cache>(
|
||||||
|
attributes_to_extract: &[&str],
|
||||||
|
document: MergedDocument<'doc, 'doc, 'doc, FieldsIdsMap>,
|
||||||
|
external_document_id: &str,
|
||||||
|
del_add_facet_value: &mut DelAddFacetValue<'del_add_facet_value>,
|
||||||
|
cached_sorter: &mut BalancedCaches<'cache>,
|
||||||
|
field_id_map: &mut GlobalFieldsIdsMap,
|
||||||
|
facet_fn_current: &mut impl FnMut(
|
||||||
|
FieldId,
|
||||||
|
perm_json_p::Depth,
|
||||||
|
&Value,
|
||||||
|
&mut DelAddFacetValue<'del_add_facet_value>,
|
||||||
|
&mut BalancedCaches<'cache>,
|
||||||
|
) -> Result<()>,
|
||||||
|
facet_fn_updated: &mut impl FnMut(
|
||||||
|
FieldId,
|
||||||
|
perm_json_p::Depth,
|
||||||
|
&Value,
|
||||||
|
&mut DelAddFacetValue<'del_add_facet_value>,
|
||||||
|
&mut BalancedCaches<'cache>,
|
||||||
|
) -> Result<()>,
|
||||||
|
) -> Result<()> {
|
||||||
|
for res in document.iter_merged_top_level_fields() {
|
||||||
|
let (field_name, value) = res?;
|
||||||
|
match value {
|
||||||
|
MergedValue::Current(value) => {
|
||||||
|
extract_document_facet(
|
||||||
|
attributes_to_extract,
|
||||||
|
field_id_map,
|
||||||
|
&mut |fid, depth, value| {
|
||||||
|
facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||||
|
},
|
||||||
|
field_name,
|
||||||
|
value,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
MergedValue::Updated(value) => {
|
||||||
|
extract_document_facet(
|
||||||
|
attributes_to_extract,
|
||||||
|
field_id_map,
|
||||||
|
&mut |fid, depth, value| {
|
||||||
|
facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||||
|
},
|
||||||
|
field_name,
|
||||||
|
value,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
MergedValue::CurrentAndUpdated(current, updated) => {
|
||||||
|
if current.get() == updated.get() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
extract_document_facet(
|
||||||
|
attributes_to_extract,
|
||||||
|
field_id_map,
|
||||||
|
&mut |fid, depth, value| {
|
||||||
|
facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||||
|
},
|
||||||
|
field_name,
|
||||||
|
current,
|
||||||
|
)?;
|
||||||
|
extract_document_facet(
|
||||||
|
attributes_to_extract,
|
||||||
|
field_id_map,
|
||||||
|
&mut |fid, depth, value| {
|
||||||
|
facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||||
|
},
|
||||||
|
field_name,
|
||||||
|
updated,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if attributes_to_extract.contains(&"_geo") {
|
||||||
|
match document.merged_geo_field()? {
|
||||||
|
Some(MergedValue::Current(current)) => {
|
||||||
|
extract_geo_facet(
|
||||||
|
external_document_id,
|
||||||
|
current,
|
||||||
|
field_id_map,
|
||||||
|
&mut |fid, depth, value| {
|
||||||
|
facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Some(MergedValue::Updated(updated)) => {
|
||||||
|
extract_geo_facet(
|
||||||
|
external_document_id,
|
||||||
|
updated,
|
||||||
|
field_id_map,
|
||||||
|
&mut |fid, depth, value| {
|
||||||
|
facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
Some(MergedValue::CurrentAndUpdated(current, updated))
|
||||||
|
if current.get() != updated.get() =>
|
||||||
|
{
|
||||||
|
extract_geo_facet(
|
||||||
|
external_document_id,
|
||||||
|
current,
|
||||||
|
field_id_map,
|
||||||
|
&mut |fid, depth, value| {
|
||||||
|
facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
extract_geo_facet(
|
||||||
|
external_document_id,
|
||||||
|
updated,
|
||||||
|
field_id_map,
|
||||||
|
&mut |fid, depth, value| {
|
||||||
|
facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter)
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
None | Some(MergedValue::CurrentAndUpdated(_, _)) => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_geo_facet(
|
||||||
|
external_document_id: &str,
|
||||||
|
geo_value: &RawValue,
|
||||||
|
field_id_map: &mut GlobalFieldsIdsMap<'_>,
|
||||||
|
facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>,
|
||||||
|
) -> Result<()> {
|
||||||
|
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
|
||||||
|
let (lat_fid, lng_fid) = field_id_map
|
||||||
|
.id_or_insert("_geo.lat")
|
||||||
|
.zip(field_id_map.id_or_insert("_geo.lng"))
|
||||||
|
.ok_or(UserError::AttributeLimitReached)?;
|
||||||
|
|
||||||
|
facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?;
|
||||||
|
facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?;
|
||||||
|
};
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user