Add depth to facet extraction so that null inside an array doesn't mark the entire field as null

This commit is contained in:
Louis Dureuil
2024-11-19 17:52:24 +01:00
committed by Clément Renault
parent 50d1bd01df
commit 8049df125b
4 changed files with 65 additions and 30 deletions

View File

@ -14,6 +14,7 @@ use super::FacetKind;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender;
use crate::update::new::extract::perm_json_p;
use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
};
@ -81,7 +82,7 @@ impl FacetedDocidsExtractor {
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
&mut |fid, depth, value| {
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -90,6 +91,7 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_del,
docid,
fid,
depth,
value,
)
},
@ -100,7 +102,7 @@ impl FacetedDocidsExtractor {
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
&mut |fid, depth, value| {
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -109,6 +111,7 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_del,
docid,
fid,
depth,
value,
)
},
@ -119,7 +122,7 @@ impl FacetedDocidsExtractor {
inner.merged(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
&mut |fid, depth, value| {
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -128,6 +131,7 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_add,
docid,
fid,
depth,
value,
)
},
@ -138,7 +142,7 @@ impl FacetedDocidsExtractor {
inner.inserted(),
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
&mut |fid, depth, value| {
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -147,6 +151,7 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_add,
docid,
fid,
depth,
value,
)
},
@ -166,6 +171,7 @@ impl FacetedDocidsExtractor {
facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind),
docid: DocumentId,
fid: FieldId,
depth: perm_json_p::Depth,
value: &Value,
) -> Result<()> {
let mut buffer = BVec::new_in(doc_alloc);
@ -217,7 +223,7 @@ impl FacetedDocidsExtractor {
}
// Null
// key: fid
Value::Null => {
Value::Null if depth == perm_json_p::Depth::OnBaseKey => {
buffer.clear();
buffer.push(FacetKind::Null as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
@ -225,13 +231,13 @@ impl FacetedDocidsExtractor {
}
// Empty
// key: fid
Value::Array(a) if a.is_empty() => {
Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
buffer.clear();
buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &buffer, docid)
}
Value::Object(o) if o.is_empty() => {
Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
buffer.clear();
buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes());

View File

@ -10,15 +10,18 @@ pub fn extract_document_facets<'doc>(
document: impl Document<'doc>,
external_document_id: &str,
field_id_map: &mut GlobalFieldsIdsMap,
facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>,
) -> Result<()> {
for res in document.iter_top_level_fields() {
let (field_name, value) = res?;
let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
Some(field_id) => facet_fn(field_id, value),
None => Err(UserError::AttributeLimitReached.into()),
};
let mut tokenize_field =
|name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map
.id_or_insert(name)
{
Some(field_id) => facet_fn(field_id, depth, value),
None => Err(UserError::AttributeLimitReached.into()),
};
// if the current field is searchable or contains a searchable attribute
if perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]) {
@ -29,6 +32,7 @@ pub fn extract_document_facets<'doc>(
Some(attributes_to_extract),
&[], // skip no attributes
field_name,
perm_json_p::Depth::OnBaseKey,
&mut tokenize_field,
)?,
Value::Array(array) => perm_json_p::seek_leaf_values_in_array(
@ -36,9 +40,10 @@ pub fn extract_document_facets<'doc>(
Some(attributes_to_extract),
&[], // skip no attributes
field_name,
perm_json_p::Depth::OnBaseKey,
&mut tokenize_field,
)?,
value => tokenize_field(field_name, &value)?,
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
}
}
}
@ -51,8 +56,8 @@ pub fn extract_document_facets<'doc>(
.zip(field_id_map.id_or_insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
facet_fn(lat_fid, &lat.into())?;
facet_fn(lng_fid, &lng.into())?;
facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?;
facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?;
}
}
}