Compute the field distribution and convert _geo into an f64s

This commit is contained in:
Clément Renault
2024-11-13 14:15:42 +01:00
parent e627e182ce
commit 8e5b1a3ec1
10 changed files with 86 additions and 42 deletions

View File

@ -1,18 +1,16 @@
use std::cell::RefCell;
use std::collections::HashSet;
use std::mem::size_of;
use std::ops::DerefMut as _;
use bumpalo::collections::Vec as BVec;
use bumpalo::Bump;
use hashbrown::HashMap;
use heed::{BytesDecode, RoTxn};
use heed::RoTxn;
use serde_json::Value;
use super::super::cache::BalancedCaches;
use super::facet_document::extract_document_facets;
use super::FacetKind;
use crate::facet::value_encoding::f64_into_bytes;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender;
@ -80,6 +78,7 @@ impl FacetedDocidsExtractor {
DocumentChange::Deletion(inner) => extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
Self::facet_fn_with_options(
@ -98,6 +97,7 @@ impl FacetedDocidsExtractor {
extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
Self::facet_fn_with_options(
@ -116,6 +116,7 @@ impl FacetedDocidsExtractor {
extract_document_facets(
attributes_to_extract,
inner.merged(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
Self::facet_fn_with_options(
@ -134,6 +135,7 @@ impl FacetedDocidsExtractor {
DocumentChange::Insertion(inner) => extract_document_facets(
attributes_to_extract,
inner.inserted(),
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, value| {
Self::facet_fn_with_options(

View File

@ -1,17 +1,18 @@
use serde_json::Value;
use crate::update::new::document::Document;
use crate::update::new::extract::geo::extract_geo_coordinates;
use crate::update::new::extract::perm_json_p;
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
pub fn extract_document_facets<'doc>(
attributes_to_extract: &[&str],
document: impl Document<'doc>,
external_document_id: &str,
field_id_map: &mut GlobalFieldsIdsMap,
facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
) -> Result<()> {
let geo = document.geo_field().transpose().map(|res| res.map(|rval| ("_geo", rval)));
for res in document.iter_top_level_fields().chain(geo) {
for res in document.iter_top_level_fields() {
let (field_name, value) = res?;
let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
@ -42,5 +43,19 @@ pub fn extract_document_facets<'doc>(
}
}
if attributes_to_extract.contains(&"_geo") {
if let Some(geo_value) = document.geo_field()? {
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
let (lat_fid, lng_fid) = field_id_map
.id_or_insert("_geo.lat")
.zip(field_id_map.id_or_insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
facet_fn(lat_fid, &lat.into())?;
facet_fn(lng_fid, &lng.into())?;
}
}
}
Ok(())
}