mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	remove multiple bugs
This commit is contained in:
		| @@ -21,19 +21,22 @@ pub fn extract_geo_points<R: io::Read>( | ||||
|     })?; | ||||
|  | ||||
|     // we never encountered any documents with a `_geo` field. We can skip entirely this step | ||||
|     if geo_field_id.is_none() { | ||||
|         return Ok(writer_into_reader(writer)?); | ||||
|     } | ||||
|     let geo_field_id = geo_field_id.unwrap(); | ||||
|     let geo_field_id = match geo_field_id { | ||||
|         Some(geo) => geo, | ||||
|         None => return Ok(writer_into_reader(writer)?), | ||||
|     }; | ||||
|  | ||||
|     while let Some((docid_bytes, value)) = obkv_documents.next()? { | ||||
|         let obkv = obkv::KvReader::new(value); | ||||
|         let point = obkv.get(geo_field_id).unwrap(); // TODO: TAMO where should we handle this error? | ||||
|         let point = match obkv.get(geo_field_id) { | ||||
|             Some(point) => point, | ||||
|             None => continue, | ||||
|         }; | ||||
|         let point: Value = serde_json::from_slice(point).map_err(InternalError::SerdeJson)?; | ||||
|  | ||||
|         if let Some((lat, lng)) = point["lat"].as_f64().zip(point["lng"].as_f64()) { | ||||
|             // this will create an array of 16 bytes (two 8 bytes floats) | ||||
|             let bytes: [u8; 16] = concat_arrays![lat.to_le_bytes(), lng.to_le_bytes()]; | ||||
|             let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()]; | ||||
|             writer.insert(docid_bytes, bytes)?; | ||||
|         } else { | ||||
|             // TAMO: improve the warn | ||||
|   | ||||
| @@ -51,13 +51,14 @@ pub(crate) fn data_from_obkv_documents( | ||||
|                 lmdb_writer_sx.clone(), | ||||
|                 &searchable_fields, | ||||
|                 &faceted_fields, | ||||
|                 geo_field_id, | ||||
|                 &stop_words, | ||||
|             ) | ||||
|         }) | ||||
|         .collect(); | ||||
|  | ||||
|     let ( | ||||
|         (docid_word_positions_chunks), | ||||
|         docid_word_positions_chunks, | ||||
|         (docid_fid_facet_numbers_chunks, docid_fid_facet_strings_chunks), | ||||
|     ) = result?; | ||||
|  | ||||
| @@ -121,16 +122,6 @@ pub(crate) fn data_from_obkv_documents( | ||||
|         "field-id-facet-number-docids", | ||||
|     ); | ||||
|  | ||||
|     spawn_extraction_task( | ||||
|         documents_chunk, | ||||
|         indexer.clone(), | ||||
|         lmdb_writer_sx.clone(), | ||||
|         move |documents, indexer| extract_geo_points(documents, indexer, geo_field_id), | ||||
|         merge_cbo_roaring_bitmaps, | ||||
|         TypedChunk::GeoPoints, | ||||
|         "geo-points", | ||||
|     ); | ||||
|  | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| @@ -181,6 +172,7 @@ fn extract_documents_data( | ||||
|     lmdb_writer_sx: Sender<Result<TypedChunk>>, | ||||
|     searchable_fields: &Option<HashSet<FieldId>>, | ||||
|     faceted_fields: &HashSet<FieldId>, | ||||
|     geo_field_id: Option<FieldId>, | ||||
|     stop_words: &Option<fst::Set<&[u8]>>, | ||||
| ) -> Result<( | ||||
|     grenad::Reader<CursorClonableMmap>, | ||||
| @@ -190,6 +182,12 @@ fn extract_documents_data( | ||||
|  | ||||
|     let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone()))); | ||||
|  | ||||
|     let (documents_chunk_cloned, lmdb_writer_sx_cloned) = (documents_chunk.clone(), lmdb_writer_sx.clone()); | ||||
|     rayon::spawn(move || { | ||||
|         let geo_points = extract_geo_points(documents_chunk_cloned, indexer, geo_field_id).unwrap(); | ||||
|         lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))).unwrap(); | ||||
|     }); | ||||
|  | ||||
|     let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) = | ||||
|         rayon::join( | ||||
|             || { | ||||
|   | ||||
| @@ -181,16 +181,16 @@ pub(crate) fn write_typed_chunk_into_index( | ||||
|         } | ||||
|         TypedChunk::GeoPoints(mut geo_points) => { | ||||
|             // TODO: TAMO: we should create the rtree with the `RTree::bulk_load` function | ||||
|             let mut rtree = index.geo_rtree(&index.read_txn()?)?.unwrap_or_default(); | ||||
|             let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); | ||||
|             while let Some((key, value)) = geo_points.next()? { | ||||
|                 // convert the key back to a u32 (4 bytes) | ||||
|                 let (key, _) = helpers::try_split_array_at::<u8, 4>(key).unwrap(); | ||||
|                 let key = u32::from_le_bytes(key); | ||||
|                 let key = u32::from_be_bytes(key); | ||||
|  | ||||
|                 // convert the latitude and longitude back to a f64 (8 bytes) | ||||
|                 let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap(); | ||||
|                 let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap(); | ||||
|                 let point = [f64::from_le_bytes(lat), f64::from_le_bytes(lng)]; | ||||
|                 let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)]; | ||||
|                 rtree.insert(GeoPoint::new(point, key)); | ||||
|             } | ||||
|             index.put_geo_rtree(wtxn, &rtree)?; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user