mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-06 04:36:32 +00:00
fix the cellulite spilling bug
This commit is contained in:
3
Cargo.lock
generated
3
Cargo.lock
generated
@ -1065,8 +1065,6 @@ dependencies = [
|
|||||||
[[package]]
|
[[package]]
|
||||||
name = "cellulite"
|
name = "cellulite"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "377e81073db1dd0b0f0f297da35717fd799a77117668f2a6c48a3cabe0ef717e"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"geo",
|
"geo",
|
||||||
"geo-types",
|
"geo-types",
|
||||||
@ -4214,6 +4212,7 @@ dependencies = [
|
|||||||
"url",
|
"url",
|
||||||
"utoipa",
|
"utoipa",
|
||||||
"uuid",
|
"uuid",
|
||||||
|
"zerometry",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -7,7 +7,8 @@ const LILLE: &str = include_str!("assets/lille.geojson");
|
|||||||
async fn basic_add_settings_and_geojson_documents() {
|
async fn basic_add_settings_and_geojson_documents() {
|
||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
let index = server.unique_index();
|
let index = server.unique_index();
|
||||||
let (task, _status_code) = index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
let (task, _status_code) =
|
||||||
|
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||||
server.wait_task(task.uid()).await.succeeded();
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
let (response, _) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
let (response, _) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
||||||
@ -113,7 +114,6 @@ async fn basic_add_settings_and_geojson_documents() {
|
|||||||
"#);
|
"#);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn basic_add_geojson_documents_and_settings() {
|
async fn basic_add_geojson_documents_and_settings() {
|
||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
@ -168,7 +168,8 @@ async fn basic_add_geojson_documents_and_settings() {
|
|||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
|
|
||||||
let (task, _status_code) = index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
let (task, _status_code) =
|
||||||
|
index.update_settings(json!({"filterableAttributes": ["_geojson"]})).await;
|
||||||
server.wait_task(task.uid()).await.succeeded();
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
||||||
snapshot!(response,
|
snapshot!(response,
|
||||||
@ -212,14 +213,16 @@ async fn add_and_remove_geojson() {
|
|||||||
]);
|
]);
|
||||||
let (task, _status_code) = index.add_documents(documents, None).await;
|
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||||
server.wait_task(task.uid()).await.succeeded();
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
|
|
||||||
let (task, _) = index.delete_document(0).await;
|
let (task, _) = index.delete_document(0).await;
|
||||||
server.wait_task(task.uid()).await.succeeded();
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
@ -233,13 +236,13 @@ async fn add_and_remove_geojson() {
|
|||||||
]);
|
]);
|
||||||
let (task, _status_code) = index.add_documents(documents, None).await;
|
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||||
server.wait_task(task.uid()).await.succeeded();
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn partial_update_geojson() {
|
async fn partial_update_geojson() {
|
||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
@ -255,12 +258,12 @@ async fn partial_update_geojson() {
|
|||||||
]);
|
]);
|
||||||
let (task, _status_code) = index.add_documents(documents, None).await;
|
let (task, _status_code) = index.add_documents(documents, None).await;
|
||||||
server.wait_task(task.uid()).await.succeeded();
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
|
|
||||||
|
|
||||||
let documents = json!([
|
let documents = json!([
|
||||||
{
|
{
|
||||||
"id": 0,
|
"id": 0,
|
||||||
@ -269,10 +272,12 @@ async fn partial_update_geojson() {
|
|||||||
]);
|
]);
|
||||||
let (task, _status_code) = index.update_documents(documents, None).await;
|
let (task, _status_code) = index.update_documents(documents, None).await;
|
||||||
server.wait_task(task.uid()).await.succeeded();
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0,0],[0.9,0],[0.9,0.9],[0,0.9])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
let (response, _code) = index.search_get("?filter=_geoPolygon([0,0],[2,0],[2,2],[0,2])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 1);
|
||||||
let (response, _code) = index.search_get("?filter=_geoPolygon([0.9,0.9],[2,0.9],[2,2],[0.9,2])").await;
|
let (response, _code) =
|
||||||
|
index.search_get("?filter=_geoPolygon([0.9,0.9],[2,0.9],[2,2],[0.9,2])").await;
|
||||||
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
assert_eq!(response.get("hits").unwrap().as_array().unwrap().len(), 0);
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
mod add_documents;
|
mod add_documents;
|
||||||
mod delete_documents;
|
mod delete_documents;
|
||||||
mod errors;
|
mod errors;
|
||||||
|
mod geojson;
|
||||||
mod get_documents;
|
mod get_documents;
|
||||||
mod update_documents;
|
mod update_documents;
|
||||||
mod geojson;
|
|
@ -18,8 +18,8 @@ bincode = "1.3.3"
|
|||||||
bstr = "1.12.0"
|
bstr = "1.12.0"
|
||||||
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
bytemuck = { version = "1.23.1", features = ["extern_crate_alloc"] }
|
||||||
byteorder = "1.5.0"
|
byteorder = "1.5.0"
|
||||||
# cellulite = { path = "../../../cellulite" }
|
cellulite = { path = "../../../cellulite" }
|
||||||
cellulite = "0.1.0"
|
# cellulite = "0.1.0"
|
||||||
steppe = "0.4.0"
|
steppe = "0.4.0"
|
||||||
charabia = { version = "0.9.6", default-features = false }
|
charabia = { version = "0.9.6", default-features = false }
|
||||||
concat-arrays = "0.1.2"
|
concat-arrays = "0.1.2"
|
||||||
@ -114,6 +114,7 @@ utoipa = { version = "5.4.0", features = [
|
|||||||
] }
|
] }
|
||||||
lru = "0.14.0"
|
lru = "0.14.0"
|
||||||
geo-types = "0.7.16"
|
geo-types = "0.7.16"
|
||||||
|
zerometry = "0.1.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.47", default-features = false }
|
mimalloc = { version = "0.1.47", default-features = false }
|
||||||
|
@ -6,7 +6,9 @@ use heed::RoTxn;
|
|||||||
|
|
||||||
use super::FieldsIdsMap;
|
use super::FieldsIdsMap;
|
||||||
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
||||||
use crate::constants::{RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
use crate::constants::{
|
||||||
|
RESERVED_GEOJSON_FIELD_NAME, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME,
|
||||||
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
|
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
|
||||||
LocalizedAttributesRule, Result, Weight,
|
LocalizedAttributesRule, Result, Weight,
|
||||||
|
@ -794,7 +794,8 @@ impl<'a> Filter<'a> {
|
|||||||
),
|
),
|
||||||
Vec::new(),
|
Vec::new(),
|
||||||
);
|
);
|
||||||
let result = index.cellulite
|
let result = index
|
||||||
|
.cellulite
|
||||||
.in_shape(rtxn, &polygon.into(), &mut |_| ())
|
.in_shape(rtxn, &polygon.into(), &mut |_| ())
|
||||||
.map_err(InternalError::CelluliteError)?;
|
.map_err(InternalError::CelluliteError)?;
|
||||||
// TODO: Remove once we update roaring
|
// TODO: Remove once we update roaring
|
||||||
|
@ -179,8 +179,6 @@ fn extract_geojson_field(
|
|||||||
.map(|v| serde_json::from_slice(v).map_err(InternalError::SerdeJson))
|
.map(|v| serde_json::from_slice(v).map_err(InternalError::SerdeJson))
|
||||||
.transpose()?)
|
.transpose()?)
|
||||||
}
|
}
|
||||||
_ => {
|
_ => Ok(None),
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -820,19 +820,20 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let documents_count = documents_ids.len() as usize;
|
let documents_count = documents_ids.len() as usize;
|
||||||
|
|
||||||
// We initialize the sorter with the user indexing settings.
|
// We initialize the sorter with the user indexing settings.
|
||||||
let mut original_sorter = if settings_diff.reindex_vectors() || settings_diff.reindex_geojson() {
|
let mut original_sorter =
|
||||||
Some(create_sorter(
|
if settings_diff.reindex_vectors() || settings_diff.reindex_geojson() {
|
||||||
grenad::SortAlgorithm::Stable,
|
Some(create_sorter(
|
||||||
KeepFirst,
|
grenad::SortAlgorithm::Stable,
|
||||||
self.indexer_settings.chunk_compression_type,
|
KeepFirst,
|
||||||
self.indexer_settings.chunk_compression_level,
|
self.indexer_settings.chunk_compression_type,
|
||||||
self.indexer_settings.max_nb_chunks,
|
self.indexer_settings.chunk_compression_level,
|
||||||
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
self.indexer_settings.max_nb_chunks,
|
||||||
true,
|
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
||||||
))
|
true,
|
||||||
} else {
|
))
|
||||||
None
|
} else {
|
||||||
};
|
None
|
||||||
|
};
|
||||||
|
|
||||||
let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
|
let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff
|
||||||
.embedding_config_updates
|
.embedding_config_updates
|
||||||
|
@ -644,7 +644,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
|
|
||||||
let geojson =
|
let geojson =
|
||||||
geojson::GeoJson::from_reader(value).map_err(UserError::SerdeJson)?;
|
geojson::GeoJson::from_reader(value).map_err(UserError::SerdeJson)?;
|
||||||
index.cellulite
|
index
|
||||||
|
.cellulite
|
||||||
.add(wtxn, docid, &geojson)
|
.add(wtxn, docid, &geojson)
|
||||||
.map_err(InternalError::CelluliteError)?;
|
.map_err(InternalError::CelluliteError)?;
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,6 @@ use bbqueue::framed::{FrameGrantR, FrameProducer};
|
|||||||
use bbqueue::BBBuffer;
|
use bbqueue::BBBuffer;
|
||||||
use bytemuck::{checked, CheckedBitPattern, NoUninit};
|
use bytemuck::{checked, CheckedBitPattern, NoUninit};
|
||||||
use flume::{RecvTimeoutError, SendError};
|
use flume::{RecvTimeoutError, SendError};
|
||||||
use geojson::GeoJson;
|
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use heed::{BytesDecode, MdbError};
|
use heed::{BytesDecode, MdbError};
|
||||||
use memmap2::{Mmap, MmapMut};
|
use memmap2::{Mmap, MmapMut};
|
||||||
@ -144,7 +143,7 @@ pub enum ReceiverAction {
|
|||||||
// The geojson for france made of 63k points takes 594KiB which means with a capacity of 1000,
|
// The geojson for france made of 63k points takes 594KiB which means with a capacity of 1000,
|
||||||
// the absolute maximum amounts of memory we could consume is about 580MiB which is acceptable for this POC.
|
// the absolute maximum amounts of memory we could consume is about 580MiB which is acceptable for this POC.
|
||||||
// If the geojson is None, it means that the document is being deleted.
|
// If the geojson is None, it means that the document is being deleted.
|
||||||
GeoJson(DocumentId, Option<GeoJson>),
|
GeoJson(DocumentId, Option<Vec<u8>>),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An entry that cannot fit in the BBQueue buffers has been
|
/// An entry that cannot fit in the BBQueue buffers has been
|
||||||
@ -1155,7 +1154,7 @@ impl GeoSender<'_, '_> {
|
|||||||
pub struct GeoJsonSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
|
pub struct GeoJsonSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
|
||||||
|
|
||||||
impl GeoJsonSender<'_, '_> {
|
impl GeoJsonSender<'_, '_> {
|
||||||
pub fn send_geojson(&self, docid: DocumentId, value: GeoJson) -> StdResult<(), SendError<()>> {
|
pub fn send_geojson(&self, docid: DocumentId, value: Vec<u8>) -> StdResult<(), SendError<()>> {
|
||||||
self.0.sender.send(ReceiverAction::GeoJson(docid, Some(value))).map_err(|_| SendError(()))
|
self.0.sender.send(ReceiverAction::GeoJson(docid, Some(value))).map_err(|_| SendError(()))
|
||||||
}
|
}
|
||||||
pub fn delete_geojson(&self, docid: DocumentId) -> StdResult<(), SendError<()>> {
|
pub fn delete_geojson(&self, docid: DocumentId) -> StdResult<(), SendError<()>> {
|
||||||
|
@ -1,21 +1,25 @@
|
|||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufReader, BufWriter, ErrorKind, Seek as _, Write as _};
|
use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::{iter, mem};
|
use std::{iter, mem};
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
|
||||||
|
use cellulite::zerometry::ZerometryCodec;
|
||||||
|
use geo_types::Geometry;
|
||||||
use geojson::GeoJson;
|
use geojson::GeoJson;
|
||||||
use heed::RoTxn;
|
use heed::{BytesEncode, RoTxn};
|
||||||
|
use zerometry::Zerometry;
|
||||||
|
|
||||||
|
use crate::update::new::channel::GeoJsonSender;
|
||||||
use crate::update::new::document::{Document, DocumentContext};
|
use crate::update::new::document::{Document, DocumentContext};
|
||||||
use crate::update::new::indexer::document_changes::Extractor;
|
use crate::update::new::indexer::document_changes::Extractor;
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::thread_local::MostlySend;
|
use crate::update::new::thread_local::MostlySend;
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
use crate::{DocumentId, Index, Result, UserError};
|
use crate::{DocumentId, Index, InternalError, Result, UserError};
|
||||||
|
|
||||||
pub struct GeoJsonExtractor {
|
pub struct GeoJsonExtractor {
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
@ -38,8 +42,8 @@ impl GeoJsonExtractor {
|
|||||||
pub struct GeoJsonExtractorData<'extractor> {
|
pub struct GeoJsonExtractorData<'extractor> {
|
||||||
/// The set of documents ids that were removed. If a document sees its geo
|
/// The set of documents ids that were removed. If a document sees its geo
|
||||||
/// point being updated, we first put it in the deleted and then in the inserted.
|
/// point being updated, we first put it in the deleted and then in the inserted.
|
||||||
removed: bumpalo::collections::Vec<'extractor, (DocumentId, GeoJson)>,
|
removed: bumpalo::collections::Vec<'extractor, (DocumentId, &'extractor [u8])>,
|
||||||
inserted: bumpalo::collections::Vec<'extractor, (DocumentId, GeoJson)>,
|
inserted: bumpalo::collections::Vec<'extractor, (DocumentId, &'extractor [u8])>,
|
||||||
/// Contains a packed list of `ExtractedGeoPoint` of the inserted geo points
|
/// Contains a packed list of `ExtractedGeoPoint` of the inserted geo points
|
||||||
/// data structures if we have spilled to disk.
|
/// data structures if we have spilled to disk.
|
||||||
spilled_removed: Option<BufWriter<File>>,
|
spilled_removed: Option<BufWriter<File>>,
|
||||||
@ -68,39 +72,43 @@ impl<'extractor> GeoJsonExtractorData<'extractor> {
|
|||||||
unsafe impl MostlySend for GeoJsonExtractorData<'_> {}
|
unsafe impl MostlySend for GeoJsonExtractorData<'_> {}
|
||||||
|
|
||||||
pub struct FrozenGeoJsonExtractorData<'extractor> {
|
pub struct FrozenGeoJsonExtractorData<'extractor> {
|
||||||
pub removed: &'extractor [(DocumentId, GeoJson)],
|
pub removed: &'extractor [(DocumentId, &'extractor [u8])],
|
||||||
pub inserted: &'extractor [(DocumentId, GeoJson)],
|
pub inserted: &'extractor [(DocumentId, &'extractor [u8])],
|
||||||
pub spilled_removed: Option<BufReader<File>>,
|
pub spilled_removed: Option<BufReader<File>>,
|
||||||
pub spilled_inserted: Option<BufReader<File>>,
|
pub spilled_inserted: Option<BufReader<File>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FrozenGeoJsonExtractorData<'_> {
|
impl FrozenGeoJsonExtractorData<'_> {
|
||||||
pub fn iter_and_clear_removed(
|
pub fn iter_and_clear_removed(&mut self, channel: GeoJsonSender<'_, '_>) -> Result<()> {
|
||||||
&mut self,
|
for (docid, _buf) in mem::take(&mut self.removed) {
|
||||||
) -> io::Result<impl IntoIterator<Item = Result<(DocumentId, GeoJson), serde_json::Error>> + '_>
|
channel.delete_geojson(*docid).unwrap();
|
||||||
{
|
}
|
||||||
Ok(mem::take(&mut self.removed)
|
|
||||||
.iter()
|
for ret in iterator_over_spilled_geojsons(&mut self.spilled_removed)? {
|
||||||
.cloned()
|
let (docid, _buf) = ret.map_err(InternalError::SerdeJson)?;
|
||||||
.map(Ok)
|
channel.delete_geojson(docid).unwrap();
|
||||||
.chain(iterator_over_spilled_geojsons(&mut self.spilled_removed)?))
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn iter_and_clear_inserted(
|
pub fn iter_and_clear_inserted(&mut self, channel: GeoJsonSender<'_, '_>) -> Result<()> {
|
||||||
&mut self,
|
for (docid, _buf) in mem::take(&mut self.inserted) {
|
||||||
) -> io::Result<impl IntoIterator<Item = Result<(DocumentId, GeoJson), serde_json::Error>> + '_>
|
channel.send_geojson(*docid, _buf.to_vec()).unwrap();
|
||||||
{
|
}
|
||||||
Ok(mem::take(&mut self.inserted)
|
|
||||||
.iter()
|
for ret in iterator_over_spilled_geojsons(&mut self.spilled_inserted)? {
|
||||||
.cloned()
|
let (docid, buf) = ret.map_err(InternalError::SerdeJson)?;
|
||||||
.map(Ok)
|
channel.send_geojson(docid, buf.to_vec()).unwrap();
|
||||||
.chain(iterator_over_spilled_geojsons(&mut self.spilled_inserted)?))
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn iterator_over_spilled_geojsons(
|
fn iterator_over_spilled_geojsons(
|
||||||
spilled: &mut Option<BufReader<File>>,
|
spilled: &mut Option<BufReader<File>>,
|
||||||
) -> io::Result<impl IntoIterator<Item = Result<(DocumentId, GeoJson), serde_json::Error>> + '_> {
|
) -> io::Result<impl IntoIterator<Item = Result<(DocumentId, Vec<u8>), serde_json::Error>> + '_> {
|
||||||
let mut spilled = spilled.take();
|
let mut spilled = spilled.take();
|
||||||
if let Some(spilled) = &mut spilled {
|
if let Some(spilled) = &mut spilled {
|
||||||
spilled.rewind()?;
|
spilled.rewind()?;
|
||||||
@ -113,10 +121,14 @@ fn iterator_over_spilled_geojsons(
|
|||||||
Err(e) if e.kind() == ErrorKind::UnexpectedEof => return None,
|
Err(e) if e.kind() == ErrorKind::UnexpectedEof => return None,
|
||||||
Err(e) => return Some(Err(serde_json::Error::io(e))),
|
Err(e) => return Some(Err(serde_json::Error::io(e))),
|
||||||
};
|
};
|
||||||
match GeoJson::from_reader(file) {
|
let size = match file.read_u32::<BigEndian>() {
|
||||||
Ok(geojson) => Some(Ok((docid, geojson))),
|
Ok(size) => size,
|
||||||
Err(e) if e.is_eof() => None,
|
Err(e) => return Some(Err(serde_json::Error::io(e))),
|
||||||
Err(e) => Some(Err(e)),
|
};
|
||||||
|
let mut buf = vec![0; size as usize];
|
||||||
|
match file.read_exact(&mut buf) {
|
||||||
|
Ok(()) => Some(Ok((docid, buf))),
|
||||||
|
Err(e) => return Some(Err(serde_json::Error::io(e))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => None,
|
None => None,
|
||||||
@ -138,7 +150,7 @@ impl<'extractor> Extractor<'extractor> for GeoJsonExtractor {
|
|||||||
fn process<'doc>(
|
fn process<'doc>(
|
||||||
&'doc self,
|
&'doc self,
|
||||||
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
||||||
context: &'doc DocumentContext<Self::Data>,
|
context: &'doc DocumentContext<'doc, 'extractor, '_, '_, Self::Data>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let rtxn = &context.rtxn;
|
let rtxn = &context.rtxn;
|
||||||
let index = context.index;
|
let index = context.index;
|
||||||
@ -161,15 +173,22 @@ impl<'extractor> Extractor<'extractor> for GeoJsonExtractor {
|
|||||||
let current = deletion.current(rtxn, index, db_fields_ids_map)?;
|
let current = deletion.current(rtxn, index, db_fields_ids_map)?;
|
||||||
|
|
||||||
if let Some(geojson) = current.geojson_field()? {
|
if let Some(geojson) = current.geojson_field()? {
|
||||||
|
let geojson = GeoJson::from_str(geojson.get()).map_err(UserError::from)?;
|
||||||
|
let geometry = Geometry::try_from(geojson).map_err(UserError::from)?;
|
||||||
|
let buf = ZerometryCodec::bytes_encode(&geometry).unwrap();
|
||||||
|
|
||||||
match &mut data_ref.spilled_removed {
|
match &mut data_ref.spilled_removed {
|
||||||
Some(file) => {
|
Some(file) => {
|
||||||
file.write_u32::<BigEndian>(docid)?;
|
file.write_u32::<BigEndian>(docid)?;
|
||||||
file.write_all(geojson.get().as_bytes())?;
|
file.write_u32::<BigEndian>(buf.len() as u32)?;
|
||||||
|
file.write_all(&buf)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut bvec =
|
||||||
|
bumpalo::collections::Vec::new_in(context.extractor_alloc);
|
||||||
|
bvec.extend_from_slice(&buf);
|
||||||
|
data_ref.removed.push((docid, bvec.into_bump_slice()));
|
||||||
}
|
}
|
||||||
None => data_ref.removed.push(
|
|
||||||
// TODO: The error type is wrong here. It should be an internal error.
|
|
||||||
(docid, GeoJson::from_str(geojson.get()).map_err(UserError::from)?),
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -187,50 +206,70 @@ impl<'extractor> Extractor<'extractor> for GeoJsonExtractor {
|
|||||||
// we need to replace the current by the new point and therefore
|
// we need to replace the current by the new point and therefore
|
||||||
// delete the current point from cellulite.
|
// delete the current point from cellulite.
|
||||||
if let Some(geojson) = current_geo {
|
if let Some(geojson) = current_geo {
|
||||||
|
let geojson =
|
||||||
|
GeoJson::from_str(geojson.get()).map_err(UserError::from)?;
|
||||||
|
let geometry = Geometry::try_from(geojson).map_err(UserError::from)?;
|
||||||
|
let buf = ZerometryCodec::bytes_encode(&geometry).unwrap();
|
||||||
|
|
||||||
match &mut data_ref.spilled_removed {
|
match &mut data_ref.spilled_removed {
|
||||||
Some(file) => {
|
Some(file) => {
|
||||||
file.write_u32::<BigEndian>(docid)?;
|
file.write_u32::<BigEndian>(docid)?;
|
||||||
file.write_all(geojson.get().as_bytes())?;
|
file.write_u32::<BigEndian>(buf.len() as u32)?;
|
||||||
|
file.write_all(&buf)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut bvec =
|
||||||
|
bumpalo::collections::Vec::new_in(context.extractor_alloc);
|
||||||
|
bvec.extend_from_slice(&buf);
|
||||||
|
data_ref.removed.push((docid, bvec.into_bump_slice()));
|
||||||
}
|
}
|
||||||
// TODO: Should be an internal error
|
|
||||||
None => data_ref.removed.push((
|
|
||||||
docid,
|
|
||||||
GeoJson::from_str(geojson.get()).map_err(UserError::from)?,
|
|
||||||
)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(geojson) = updated_geo {
|
if let Some(geojson) = updated_geo {
|
||||||
|
let geojson =
|
||||||
|
GeoJson::from_str(geojson.get()).map_err(UserError::from)?;
|
||||||
|
let geometry = Geometry::try_from(geojson).map_err(UserError::from)?;
|
||||||
|
let buf = ZerometryCodec::bytes_encode(&geometry).unwrap();
|
||||||
|
|
||||||
match &mut data_ref.spilled_inserted {
|
match &mut data_ref.spilled_inserted {
|
||||||
Some(file) => {
|
Some(file) => {
|
||||||
file.write_u32::<BigEndian>(docid)?;
|
file.write_u32::<BigEndian>(docid)?;
|
||||||
file.write_all(geojson.get().as_bytes())?;
|
file.write_u32::<BigEndian>(buf.len() as u32)?;
|
||||||
|
file.write_all(&buf)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut bvec =
|
||||||
|
bumpalo::collections::Vec::new_in(context.extractor_alloc);
|
||||||
|
bvec.extend_from_slice(&buf);
|
||||||
|
data_ref.inserted.push((docid, bvec.into_bump_slice()));
|
||||||
}
|
}
|
||||||
// TODO: Is the error type correct here? Shouldn't it be an internal error?
|
|
||||||
None => data_ref.inserted.push((
|
|
||||||
docid,
|
|
||||||
GeoJson::from_str(geojson.get()).map_err(UserError::from)?,
|
|
||||||
)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DocumentChange::Insertion(insertion) => {
|
DocumentChange::Insertion(insertion) => {
|
||||||
let docid = insertion.docid();
|
let docid = insertion.docid();
|
||||||
|
|
||||||
let inserted_geo = insertion.inserted().geojson_field()?;
|
let inserted_geo = insertion.inserted().geojson_field()?;
|
||||||
|
|
||||||
if let Some(geojson) = inserted_geo {
|
if let Some(geojson) = inserted_geo {
|
||||||
|
let geojson = GeoJson::from_str(geojson.get()).map_err(UserError::from)?;
|
||||||
|
let geometry = Geometry::try_from(geojson).map_err(UserError::from)?;
|
||||||
|
let mut bytes = Vec::new();
|
||||||
|
Zerometry::write_from_geometry(&mut bytes, &geometry)?;
|
||||||
|
|
||||||
match &mut data_ref.spilled_inserted {
|
match &mut data_ref.spilled_inserted {
|
||||||
Some(file) => {
|
Some(file) => {
|
||||||
file.write_u32::<BigEndian>(docid)?;
|
file.write_u32::<BigEndian>(docid)?;
|
||||||
file.write_all(geojson.get().as_bytes())?;
|
file.write_u32::<BigEndian>(bytes.len() as u32)?;
|
||||||
|
file.write_all(&bytes)?;
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
let mut bvec =
|
||||||
|
bumpalo::collections::Vec::new_in(context.extractor_alloc);
|
||||||
|
bvec.extend_from_slice(&bytes);
|
||||||
|
data_ref.inserted.push((docid, bvec.into_bump_slice()));
|
||||||
}
|
}
|
||||||
// TODO: Is the error type correct here? Shouldn't it be an internal error?
|
|
||||||
None => data_ref.inserted.push((
|
|
||||||
docid,
|
|
||||||
GeoJson::from_str(geojson.get()).map_err(UserError::from)?,
|
|
||||||
)),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -24,7 +24,7 @@ pub trait Extractor<'extractor>: Sync {
|
|||||||
fn process<'doc>(
|
fn process<'doc>(
|
||||||
&'doc self,
|
&'doc self,
|
||||||
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
||||||
context: &'doc DocumentContext<Self::Data>,
|
context: &'doc DocumentContext<'doc, 'extractor, '_, '_, Self::Data>,
|
||||||
) -> Result<()>;
|
) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,18 +72,17 @@ pub fn write_to_db(
|
|||||||
let embedding = large_vector.read_embedding(*dimensions);
|
let embedding = large_vector.read_embedding(*dimensions);
|
||||||
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
|
writer.add_item_in_store(wtxn, docid, extractor_id, embedding)?;
|
||||||
}
|
}
|
||||||
ReceiverAction::GeoJson(docid, geojson) => {
|
ReceiverAction::GeoJson(docid, geojson) => match geojson {
|
||||||
match geojson {
|
Some(geojson) => {
|
||||||
Some(geojson) => {
|
index
|
||||||
println!("Adding geojson {docid}");
|
.cellulite
|
||||||
index.cellulite.add(wtxn, docid, &geojson).map_err(InternalError::CelluliteError)?;
|
.add_raw_zerometry(wtxn, docid, &geojson)
|
||||||
}
|
.map_err(InternalError::CelluliteError)?;
|
||||||
None => {
|
|
||||||
println!("Deleting geojson {docid}");
|
|
||||||
index.cellulite.delete(wtxn, docid).map_err(InternalError::CelluliteError)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
None => {
|
||||||
|
index.cellulite.delete(wtxn, docid).map_err(InternalError::CelluliteError)?;
|
||||||
|
}
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Every time the is a message in the channel we search
|
// Every time the is a message in the channel we search
|
||||||
|
@ -80,15 +80,8 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut frozen = data.into_inner().freeze()?;
|
let mut frozen = data.into_inner().freeze()?;
|
||||||
for result in frozen.iter_and_clear_removed()? {
|
frozen.iter_and_clear_removed(geojson_sender)?;
|
||||||
let (docid, _) = result.map_err(InternalError::SerdeJson)?;
|
frozen.iter_and_clear_inserted(geojson_sender)?;
|
||||||
geojson_sender.delete_geojson(docid).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
for result in frozen.iter_and_clear_inserted()? {
|
|
||||||
let (docid, geojson) = result.map_err(InternalError::SerdeJson)?;
|
|
||||||
geojson_sender.send_geojson(docid, geojson).unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -1767,7 +1767,10 @@ impl InnerIndexSettingsDiff {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn any_reindexing_needed(&self) -> bool {
|
pub fn any_reindexing_needed(&self) -> bool {
|
||||||
self.reindex_searchable() || self.reindex_facets() || self.reindex_vectors() || self.reindex_geojson()
|
self.reindex_searchable()
|
||||||
|
|| self.reindex_facets()
|
||||||
|
|| self.reindex_vectors()
|
||||||
|
|| self.reindex_geojson()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reindex_searchable(&self) -> bool {
|
pub fn reindex_searchable(&self) -> bool {
|
||||||
|
Reference in New Issue
Block a user