mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Merge #3986
3986: Fix geo bounding box with strings r=ManyTheFish a=irevoire
# Pull Request
When sending a document with one geofield of type string (i.e.: `{ "_geo": { "lat": 12, "lng": "13" }}`), the geobounding box would exclude this document.
This PR fixes this issue by automatically parsing the string value in case we're working on a geofield.
## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3973
## What does this PR do?
- Automatically parse the facet value iif we're working on a geofield.
- Make insta works with snapshots in loops or closure executed multiple times. (you may need to update your cli if it panics after this PR: `cargo install cargo-insta`).
- Add one integration test in milli and in meilisearch to ensure it works forever.
- Add three snapshots for the dump that mysteriously disappeared I don't know how
Co-authored-by: Tamo <tamo@meilisearch.com>
			
			
This commit is contained in:
		@@ -1718,11 +1718,11 @@ pub(crate) mod tests {
 | 
			
		||||
            .unwrap();
 | 
			
		||||
        index
 | 
			
		||||
            .add_documents(documents!([
 | 
			
		||||
                { "id": 0, "_geo": { "lat": 0, "lng": 0 } },
 | 
			
		||||
                { "id": 1, "_geo": { "lat": 0, "lng": -175 } },
 | 
			
		||||
                { "id": 2, "_geo": { "lat": 0, "lng": 175 } },
 | 
			
		||||
                { "id": 0, "_geo": { "lat": "0", "lng": "0" } },
 | 
			
		||||
                { "id": 1, "_geo": { "lat": 0, "lng": "-175" } },
 | 
			
		||||
                { "id": 2, "_geo": { "lat": "0", "lng": 175 } },
 | 
			
		||||
                { "id": 3, "_geo": { "lat": 85, "lng": 0 } },
 | 
			
		||||
                { "id": 4, "_geo": { "lat": -85, "lng": 0 } },
 | 
			
		||||
                { "id": 4, "_geo": { "lat": "-85", "lng": "0" } },
 | 
			
		||||
            ]))
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -28,11 +28,13 @@ pub struct ExtractedFacetValues {
 | 
			
		||||
///
 | 
			
		||||
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
 | 
			
		||||
/// and the normalized value as value extracted from the given chunk of documents.
 | 
			
		||||
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
 | 
			
		||||
#[logging_timer::time]
 | 
			
		||||
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
 | 
			
		||||
    obkv_documents: grenad::Reader<R>,
 | 
			
		||||
    indexer: GrenadParameters,
 | 
			
		||||
    faceted_fields: &HashSet<FieldId>,
 | 
			
		||||
    geo_fields_ids: Option<(FieldId, FieldId)>,
 | 
			
		||||
) -> Result<ExtractedFacetValues> {
 | 
			
		||||
    let max_memory = indexer.max_memory_by_thread();
 | 
			
		||||
 | 
			
		||||
@@ -82,7 +84,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
 | 
			
		||||
 | 
			
		||||
                let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
 | 
			
		||||
 | 
			
		||||
                match extract_facet_values(&value) {
 | 
			
		||||
                match extract_facet_values(
 | 
			
		||||
                    &value,
 | 
			
		||||
                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
 | 
			
		||||
                ) {
 | 
			
		||||
                    FilterableValues::Null => {
 | 
			
		||||
                        facet_is_null_docids.entry(field_id).or_default().insert(document);
 | 
			
		||||
                    }
 | 
			
		||||
@@ -175,12 +180,13 @@ enum FilterableValues {
 | 
			
		||||
    Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn extract_facet_values(value: &Value) -> FilterableValues {
 | 
			
		||||
fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
 | 
			
		||||
    fn inner_extract_facet_values(
 | 
			
		||||
        value: &Value,
 | 
			
		||||
        can_recurse: bool,
 | 
			
		||||
        output_numbers: &mut Vec<f64>,
 | 
			
		||||
        output_strings: &mut Vec<(String, String)>,
 | 
			
		||||
        geo_field: bool,
 | 
			
		||||
    ) {
 | 
			
		||||
        match value {
 | 
			
		||||
            Value::Null => (),
 | 
			
		||||
@@ -191,13 +197,30 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            Value::String(original) => {
 | 
			
		||||
                // if we're working on a geofield it MUST be something we can parse or else there was an internal error
 | 
			
		||||
                // in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs.
 | 
			
		||||
                if geo_field {
 | 
			
		||||
                    if let Ok(float) = original.parse() {
 | 
			
		||||
                        output_numbers.push(float);
 | 
			
		||||
                    } else {
 | 
			
		||||
                        log::warn!(
 | 
			
		||||
                            "Internal error, could not parse a geofield that has been validated. Please open an issue."
 | 
			
		||||
                        )
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
                let normalized = crate::normalize_facet(original);
 | 
			
		||||
                output_strings.push((normalized, original.clone()));
 | 
			
		||||
            }
 | 
			
		||||
            Value::Array(values) => {
 | 
			
		||||
                if can_recurse {
 | 
			
		||||
                    for value in values {
 | 
			
		||||
                        inner_extract_facet_values(value, false, output_numbers, output_strings);
 | 
			
		||||
                        inner_extract_facet_values(
 | 
			
		||||
                            value,
 | 
			
		||||
                            false,
 | 
			
		||||
                            output_numbers,
 | 
			
		||||
                            output_strings,
 | 
			
		||||
                            geo_field,
 | 
			
		||||
                        );
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
@@ -213,7 +236,7 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
 | 
			
		||||
        otherwise => {
 | 
			
		||||
            let mut numbers = Vec::new();
 | 
			
		||||
            let mut strings = Vec::new();
 | 
			
		||||
            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
 | 
			
		||||
            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field);
 | 
			
		||||
            FilterableValues::Values { numbers, strings }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -366,6 +366,7 @@ fn send_and_extract_flattened_documents_data(
 | 
			
		||||
                    flattened_documents_chunk.clone(),
 | 
			
		||||
                    indexer,
 | 
			
		||||
                    faceted_fields,
 | 
			
		||||
                    geo_fields_ids,
 | 
			
		||||
                )?;
 | 
			
		||||
 | 
			
		||||
                // send docid_fid_facet_numbers_chunk to DB writer
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user