mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Compute the field distribution and convert _geo into an f64s
This commit is contained in:
		@@ -354,6 +354,8 @@ where
 | 
			
		||||
 | 
			
		||||
    if let Some(geo_value) = document.geo_field()? {
 | 
			
		||||
        let fid = fields_ids_map.id_or_insert("_geo").ok_or(UserError::AttributeLimitReached)?;
 | 
			
		||||
        fields_ids_map.id_or_insert("_geo.lat").ok_or(UserError::AttributeLimitReached)?;
 | 
			
		||||
        fields_ids_map.id_or_insert("_geo.lng").ok_or(UserError::AttributeLimitReached)?;
 | 
			
		||||
        unordered_field_buffer.push((fid, geo_value));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -58,7 +58,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
 | 
			
		||||
                        context.index,
 | 
			
		||||
                        &context.db_fields_ids_map,
 | 
			
		||||
                    )?;
 | 
			
		||||
                    for res in content.iter_top_level_fields() {
 | 
			
		||||
                    let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
 | 
			
		||||
                    for res in content.iter_top_level_fields().chain(geo_iter) {
 | 
			
		||||
                        let (f, _) = res?;
 | 
			
		||||
                        let entry = document_extractor_data
 | 
			
		||||
                            .field_distribution_delta
 | 
			
		||||
@@ -73,7 +74,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
 | 
			
		||||
                    let docid = update.docid();
 | 
			
		||||
                    let content =
 | 
			
		||||
                        update.current(&context.rtxn, context.index, &context.db_fields_ids_map)?;
 | 
			
		||||
                    for res in content.iter_top_level_fields() {
 | 
			
		||||
                        let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
 | 
			
		||||
                    for res in content.iter_top_level_fields().chain(geo_iter) {
 | 
			
		||||
                        let (f, _) = res?;
 | 
			
		||||
                        let entry = document_extractor_data
 | 
			
		||||
                            .field_distribution_delta
 | 
			
		||||
@@ -82,7 +84,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
 | 
			
		||||
                        *entry -= 1;
 | 
			
		||||
                    }
 | 
			
		||||
                    let content = update.updated();
 | 
			
		||||
                    for res in content.iter_top_level_fields() {
 | 
			
		||||
                    let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
 | 
			
		||||
                    for res in content.iter_top_level_fields().chain(geo_iter) {
 | 
			
		||||
                        let (f, _) = res?;
 | 
			
		||||
                        let entry = document_extractor_data
 | 
			
		||||
                            .field_distribution_delta
 | 
			
		||||
@@ -111,7 +114,8 @@ impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
 | 
			
		||||
                DocumentChange::Insertion(insertion) => {
 | 
			
		||||
                    let docid = insertion.docid();
 | 
			
		||||
                    let content = insertion.inserted();
 | 
			
		||||
                    for res in content.iter_top_level_fields() {
 | 
			
		||||
                    let geo_iter = content.geo_field().transpose().map(|res| res.map(|rv| ("_geo", rv)));
 | 
			
		||||
                    for res in content.iter_top_level_fields().chain(geo_iter) {
 | 
			
		||||
                        let (f, _) = res?;
 | 
			
		||||
                        let entry = document_extractor_data
 | 
			
		||||
                            .field_distribution_delta
 | 
			
		||||
 
 | 
			
		||||
@@ -1,18 +1,16 @@
 | 
			
		||||
use std::cell::RefCell;
 | 
			
		||||
use std::collections::HashSet;
 | 
			
		||||
use std::mem::size_of;
 | 
			
		||||
use std::ops::DerefMut as _;
 | 
			
		||||
 | 
			
		||||
use bumpalo::collections::Vec as BVec;
 | 
			
		||||
use bumpalo::Bump;
 | 
			
		||||
use hashbrown::HashMap;
 | 
			
		||||
use heed::{BytesDecode, RoTxn};
 | 
			
		||||
use heed::RoTxn;
 | 
			
		||||
use serde_json::Value;
 | 
			
		||||
 | 
			
		||||
use super::super::cache::BalancedCaches;
 | 
			
		||||
use super::facet_document::extract_document_facets;
 | 
			
		||||
use super::FacetKind;
 | 
			
		||||
use crate::facet::value_encoding::f64_into_bytes;
 | 
			
		||||
use crate::heed_codec::facet::OrderedF64Codec;
 | 
			
		||||
use crate::update::del_add::DelAdd;
 | 
			
		||||
use crate::update::new::channel::FieldIdDocidFacetSender;
 | 
			
		||||
@@ -80,6 +78,7 @@ impl FacetedDocidsExtractor {
 | 
			
		||||
            DocumentChange::Deletion(inner) => extract_document_facets(
 | 
			
		||||
                attributes_to_extract,
 | 
			
		||||
                inner.current(rtxn, index, context.db_fields_ids_map)?,
 | 
			
		||||
                inner.external_document_id(),
 | 
			
		||||
                new_fields_ids_map.deref_mut(),
 | 
			
		||||
                &mut |fid, value| {
 | 
			
		||||
                    Self::facet_fn_with_options(
 | 
			
		||||
@@ -98,6 +97,7 @@ impl FacetedDocidsExtractor {
 | 
			
		||||
                extract_document_facets(
 | 
			
		||||
                    attributes_to_extract,
 | 
			
		||||
                    inner.current(rtxn, index, context.db_fields_ids_map)?,
 | 
			
		||||
                    inner.external_document_id(),
 | 
			
		||||
                    new_fields_ids_map.deref_mut(),
 | 
			
		||||
                    &mut |fid, value| {
 | 
			
		||||
                        Self::facet_fn_with_options(
 | 
			
		||||
@@ -116,6 +116,7 @@ impl FacetedDocidsExtractor {
 | 
			
		||||
                extract_document_facets(
 | 
			
		||||
                    attributes_to_extract,
 | 
			
		||||
                    inner.merged(rtxn, index, context.db_fields_ids_map)?,
 | 
			
		||||
                    inner.external_document_id(),
 | 
			
		||||
                    new_fields_ids_map.deref_mut(),
 | 
			
		||||
                    &mut |fid, value| {
 | 
			
		||||
                        Self::facet_fn_with_options(
 | 
			
		||||
@@ -134,6 +135,7 @@ impl FacetedDocidsExtractor {
 | 
			
		||||
            DocumentChange::Insertion(inner) => extract_document_facets(
 | 
			
		||||
                attributes_to_extract,
 | 
			
		||||
                inner.inserted(),
 | 
			
		||||
                inner.external_document_id(),
 | 
			
		||||
                new_fields_ids_map.deref_mut(),
 | 
			
		||||
                &mut |fid, value| {
 | 
			
		||||
                    Self::facet_fn_with_options(
 | 
			
		||||
 
 | 
			
		||||
@@ -1,17 +1,18 @@
 | 
			
		||||
use serde_json::Value;
 | 
			
		||||
 | 
			
		||||
use crate::update::new::document::Document;
 | 
			
		||||
use crate::update::new::extract::geo::extract_geo_coordinates;
 | 
			
		||||
use crate::update::new::extract::perm_json_p;
 | 
			
		||||
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
 | 
			
		||||
 | 
			
		||||
pub fn extract_document_facets<'doc>(
 | 
			
		||||
    attributes_to_extract: &[&str],
 | 
			
		||||
    document: impl Document<'doc>,
 | 
			
		||||
    external_document_id: &str,
 | 
			
		||||
    field_id_map: &mut GlobalFieldsIdsMap,
 | 
			
		||||
    facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>,
 | 
			
		||||
) -> Result<()> {
 | 
			
		||||
    let geo = document.geo_field().transpose().map(|res|  res.map(|rval| ("_geo", rval)));
 | 
			
		||||
    for res in document.iter_top_level_fields().chain(geo) {
 | 
			
		||||
    for res in document.iter_top_level_fields() {
 | 
			
		||||
        let (field_name, value) = res?;
 | 
			
		||||
 | 
			
		||||
        let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) {
 | 
			
		||||
@@ -42,5 +43,19 @@ pub fn extract_document_facets<'doc>(
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if attributes_to_extract.contains(&"_geo") {
 | 
			
		||||
        if let Some(geo_value) = document.geo_field()? {
 | 
			
		||||
            if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
 | 
			
		||||
                let (lat_fid, lng_fid) = field_id_map
 | 
			
		||||
                    .id_or_insert("_geo.lat")
 | 
			
		||||
                    .zip(field_id_map.id_or_insert("_geo.lng"))
 | 
			
		||||
                    .ok_or(UserError::AttributeLimitReached)?;
 | 
			
		||||
 | 
			
		||||
                facet_fn(lat_fid, &lat.into())?;
 | 
			
		||||
                facet_fn(lng_fid, &lng.into())?;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@ use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
 | 
			
		||||
use std::{iter, mem, result};
 | 
			
		||||
 | 
			
		||||
use bumpalo::Bump;
 | 
			
		||||
use bytemuck::{bytes_of, from_bytes, pod_read_unaligned, Pod, Zeroable};
 | 
			
		||||
use bytemuck::{bytes_of, pod_read_unaligned, Pod, Zeroable};
 | 
			
		||||
use heed::RoTxn;
 | 
			
		||||
use serde_json::value::RawValue;
 | 
			
		||||
use serde_json::Value;
 | 
			
		||||
@@ -15,7 +15,7 @@ use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extra
 | 
			
		||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
 | 
			
		||||
use crate::update::new::DocumentChange;
 | 
			
		||||
use crate::update::GrenadParameters;
 | 
			
		||||
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Object, Result};
 | 
			
		||||
use crate::{lat_lng_to_xyz, DocumentId, GeoPoint, Index, InternalError, Result};
 | 
			
		||||
 | 
			
		||||
pub struct GeoExtractor {
 | 
			
		||||
    grenad_parameters: GrenadParameters,
 | 
			
		||||
@@ -244,7 +244,10 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
 | 
			
		||||
/// Extracts and validate the latitude and latitude from a document geo field.
 | 
			
		||||
///
 | 
			
		||||
/// It can be of the form `{ "lat": 0.0, "lng": "1.0" }`.
 | 
			
		||||
fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Option<[f64; 2]>> {
 | 
			
		||||
pub fn extract_geo_coordinates(
 | 
			
		||||
    external_id: &str,
 | 
			
		||||
    raw_value: &RawValue,
 | 
			
		||||
) -> Result<Option<[f64; 2]>> {
 | 
			
		||||
    let mut geo = match serde_json::from_str(raw_value.get()).map_err(InternalError::SerdeJson)? {
 | 
			
		||||
        Value::Null => return Ok(None),
 | 
			
		||||
        Value::Object(map) => map,
 | 
			
		||||
@@ -256,12 +259,22 @@ fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Op
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    let [lat, lng] = match (geo.remove("lat"), geo.remove("lng")) {
 | 
			
		||||
        (Some(lat), Some(lng)) => [lat, lng],
 | 
			
		||||
        (Some(lat), Some(lng)) => {
 | 
			
		||||
            if geo.is_empty() {
 | 
			
		||||
                [lat, lng]
 | 
			
		||||
            } else {
 | 
			
		||||
                return Err(GeoError::UnexpectedExtraFields {
 | 
			
		||||
                    document_id: Value::from(external_id),
 | 
			
		||||
                    value: Value::from(geo),
 | 
			
		||||
                }
 | 
			
		||||
                .into());
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        (Some(_), None) => {
 | 
			
		||||
            return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into())
 | 
			
		||||
            return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into())
 | 
			
		||||
        }
 | 
			
		||||
        (None, Some(_)) => {
 | 
			
		||||
            return Err(GeoError::MissingLongitude { document_id: Value::from(external_id) }.into())
 | 
			
		||||
            return Err(GeoError::MissingLatitude { document_id: Value::from(external_id) }.into())
 | 
			
		||||
        }
 | 
			
		||||
        (None, None) => {
 | 
			
		||||
            return Err(GeoError::MissingLatitudeAndLongitude {
 | 
			
		||||
@@ -271,13 +284,21 @@ fn extract_geo_coordinates(external_id: &str, raw_value: &RawValue) -> Result<Op
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    let lat = extract_finite_float_from_value(lat)
 | 
			
		||||
        .map_err(|value| GeoError::BadLatitude { document_id: Value::from(external_id), value })?;
 | 
			
		||||
 | 
			
		||||
    let lng = extract_finite_float_from_value(lng)
 | 
			
		||||
        .map_err(|value| GeoError::BadLongitude { document_id: Value::from(external_id), value })?;
 | 
			
		||||
 | 
			
		||||
    Ok(Some([lat, lng]))
 | 
			
		||||
    match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) {
 | 
			
		||||
        (Ok(lat), Ok(lng)) => Ok(Some([lat, lng])),
 | 
			
		||||
        (Ok(_), Err(value)) => {
 | 
			
		||||
            Err(GeoError::BadLongitude { document_id: Value::from(external_id), value }.into())
 | 
			
		||||
        }
 | 
			
		||||
        (Err(value), Ok(_)) => {
 | 
			
		||||
            Err(GeoError::BadLatitude { document_id: Value::from(external_id), value }.into())
 | 
			
		||||
        }
 | 
			
		||||
        (Err(lat), Err(lng)) => Err(GeoError::BadLatitudeAndLongitude {
 | 
			
		||||
            document_id: Value::from(external_id),
 | 
			
		||||
            lat,
 | 
			
		||||
            lng,
 | 
			
		||||
        }
 | 
			
		||||
        .into()),
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Extracts and validate that a serde JSON Value is actually a finite f64.
 | 
			
		||||
 
 | 
			
		||||
@@ -419,6 +419,6 @@ impl WordDocidsExtractors {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
 | 
			
		||||
        Ok(vec![])
 | 
			
		||||
        Ok(vec!["_geo"])
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -25,7 +25,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
 | 
			
		||||
        Ok(vec![])
 | 
			
		||||
        Ok(vec!["_geo"])
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // This method is reimplemented to count the number of words in the document in each field
 | 
			
		||||
 
 | 
			
		||||
@@ -50,7 +50,7 @@ where
 | 
			
		||||
 | 
			
		||||
    let mut file = tempfile::tempfile()?;
 | 
			
		||||
    /// manage error
 | 
			
		||||
    bincode::serialize_into(&mut file, dbg!(&rtree)).unwrap();
 | 
			
		||||
    bincode::serialize_into(&mut file, &rtree).unwrap();
 | 
			
		||||
    file.sync_all()?;
 | 
			
		||||
 | 
			
		||||
    let rtree_mmap = unsafe { Mmap::map(&file)? };
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user