Compare commits

..

3 Commits

Author SHA1 Message Date
Louis Dureuil
e644aa07a9 change generation 2024-06-17 16:41:31 +02:00
Louis Dureuil
497d15685c Use inspector allocator 2024-06-17 16:37:50 +02:00
Louis Dureuil
9776136f92 Add inspecting allocator 2024-06-17 16:37:24 +02:00
14 changed files with 312 additions and 186 deletions

44
Cargo.lock generated
View File

@@ -494,7 +494,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]]
name = "benchmarks"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"anyhow",
"bytes",
@@ -639,7 +639,7 @@ dependencies = [
[[package]]
name = "build-info"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"anyhow",
"time",
@@ -1539,7 +1539,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"anyhow",
"big_s",
@@ -1787,7 +1787,7 @@ dependencies = [
[[package]]
name = "file-store"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"faux",
"tempfile",
@@ -1810,7 +1810,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"insta",
"nom",
@@ -1830,7 +1830,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"criterion",
"serde_json",
@@ -1948,7 +1948,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"arbitrary",
"clap",
@@ -2442,7 +2442,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"
[[package]]
name = "index-scheduler"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"anyhow",
"big_s",
@@ -2506,6 +2506,14 @@ dependencies = [
"generic-array",
]
[[package]]
name = "inspecting-allocator"
version = "1.8.0"
dependencies = [
"tracing",
"tracing-error",
]
[[package]]
name = "insta"
version = "1.34.0"
@@ -2638,7 +2646,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"criterion",
"serde_json",
@@ -3275,7 +3283,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"insta",
"md5",
@@ -3284,7 +3292,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"actix-cors",
"actix-http",
@@ -3316,6 +3324,7 @@ dependencies = [
"http 0.2.11",
"index-scheduler",
"indexmap",
"inspecting-allocator",
"insta",
"is-terminal",
"itertools 0.11.0",
@@ -3365,6 +3374,7 @@ dependencies = [
"toml",
"tracing",
"tracing-actix-web",
"tracing-error",
"tracing-subscriber",
"tracing-trace",
"url",
@@ -3377,7 +3387,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"base64 0.21.7",
"enum-iterator",
@@ -3396,7 +3406,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"actix-web",
"anyhow",
@@ -3426,7 +3436,7 @@ dependencies = [
[[package]]
name = "meilitool"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"anyhow",
"clap",
@@ -3465,7 +3475,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"arroy",
"big_s",
@@ -3906,7 +3916,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "permissive-json-pointer"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"big_s",
"serde_json",
@@ -6074,7 +6084,7 @@ dependencies = [
[[package]]
name = "xtask"
version = "1.8.1"
version = "1.8.0"
dependencies = [
"anyhow",
"build-info",

View File

@@ -18,11 +18,11 @@ members = [
"fuzzers",
"tracing-trace",
"xtask",
"build-info",
"build-info", "inspecting-allocator",
]
[workspace.package]
version = "1.8.1"
version = "1.8.0"
authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",

View File

@@ -567,16 +567,16 @@ impl IndexScheduler {
tracing::debug!("index budget: {budget}B");
let mut index_count = budget / base_map_size;
if index_count < 3 {
if index_count < 2 {
// take a bit less than half than the budget to make sure we can always afford to open an index
let map_size = (budget * 2) / 5;
// single index of max budget
tracing::debug!("1 index of {map_size}B can be opened simultaneously.");
return IndexBudget { map_size, index_count: 1, task_db_size };
}
// give us some space for additional indexes when the cache is already full
// decrement is OK because index_count >= 3.
index_count -= 2;
// give us some space for an additional index when the cache is already full
// decrement is OK because index_count >= 2.
index_count -= 1;
if index_count > max_index_count {
index_count = max_index_count;
}
@@ -1834,7 +1834,7 @@ mod tests {
task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
enable_mdb_writemap: false,
index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB
index_growth_amount: 1000 * 1000, // 1 MB
index_count: 5,
indexer_config,
autobatching_enabled: true,

View File

@@ -0,0 +1,15 @@
[package]
name = "inspecting-allocator"
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
tracing = "0.1.40"
tracing-error = { version = "0.2.0", default-features = false }

View File

@@ -0,0 +1,160 @@
use std::alloc::GlobalAlloc;
use std::cell::{Cell, RefCell};
use std::collections::HashMap;
use std::sync::atomic::AtomicU64;
use tracing_error::SpanTrace;
#[derive(Debug, Clone)]
pub struct AllocEntry {
generation: u64,
span: SpanTrace,
}
impl AllocEntry {
pub fn generation(&self) -> u64 {
self.generation
}
}
impl std::fmt::Display for AllocEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut res = Ok(());
let mut depth = 0;
self.span.with_spans(|metadata, fields| {
let name_with_module_name: Vec<&str> = metadata
.module_path()
.into_iter()
.chain(std::iter::once(metadata.name()))
.collect();
let name_with_module_name = name_with_module_name.join("::");
let location = format!(
"{}:{}",
metadata.file().unwrap_or_default(),
metadata.line().unwrap_or_default()
);
if let Err(error) =
writeln!(f, "[{depth}]{name_with_module_name}({fields}) at {location}")
{
res = Err(error);
return false;
}
depth += 1;
true
});
res
}
}
struct AllocatorState {
is_allocating: Cell<bool>,
state: RefCell<HashMap<*mut u8, AllocEntry>>,
}
thread_local! {
static ALLOCATOR_STATE: AllocatorState = AllocatorState { is_allocating: Cell::new(false), state: RefCell::new(Default::default()) };
}
pub struct InspectingAllocator<InnerAllocator> {
inner: InnerAllocator,
current_generation: AtomicU64,
}
impl AllocatorState {
fn handle_alloc(&self, allocated: *mut u8, current_generation: u64) -> *mut u8 {
if self.is_allocating.get() {
return allocated;
}
self.is_allocating.set(true);
{
self.state.borrow_mut().insert(
allocated,
AllocEntry { generation: current_generation, span: SpanTrace::capture() },
);
}
self.is_allocating.set(false);
allocated
}
fn handle_dealloc(&self, allocated: *mut u8) {
if self.is_allocating.get() {
return;
}
self.is_allocating.set(true);
{
self.state.borrow_mut().remove(&allocated);
}
self.is_allocating.set(false);
}
fn find_older_generations(&self, older_generation: u64) -> Vec<(*mut u8, AllocEntry)> {
if self.is_allocating.get() {
return Vec::new();
}
self.is_allocating.set(true);
let mut entries = Vec::new();
self.state.borrow_mut().retain(|k, v| {
if v.generation > older_generation {
return true;
}
entries.push((*k, v.clone()));
false
});
self.is_allocating.set(false);
entries
}
}
impl<A> InspectingAllocator<A> {
pub const fn wrap(inner: A) -> Self {
Self { inner, current_generation: AtomicU64::new(0) }
}
pub fn next_generation(&self) {
self.current_generation.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
pub fn find_older_generations(&self, older_than: u64) -> Vec<(*mut u8, AllocEntry)> {
let current_generation = self.current_generation.load(std::sync::atomic::Ordering::Relaxed);
if current_generation < older_than {
return Vec::new();
}
ALLOCATOR_STATE.with(|allocator_state| {
allocator_state.find_older_generations(current_generation - older_than)
})
}
}
unsafe impl<InnerAllocator: GlobalAlloc> GlobalAlloc for InspectingAllocator<InnerAllocator> {
unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
let allocated = self.inner.alloc(layout);
let current_generation = self.current_generation.load(std::sync::atomic::Ordering::Relaxed);
ALLOCATOR_STATE
.with(|allocator_state| allocator_state.handle_alloc(allocated, current_generation))
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
self.inner.dealloc(ptr, layout);
ALLOCATOR_STATE.with(|allocator_state| allocator_state.handle_dealloc(ptr))
}
unsafe fn alloc_zeroed(&self, layout: std::alloc::Layout) -> *mut u8 {
let allocated = self.inner.alloc_zeroed(layout);
let current_generation = self.current_generation.load(std::sync::atomic::Ordering::Relaxed);
ALLOCATOR_STATE
.with(|allocator_state| allocator_state.handle_alloc(allocated, current_generation))
}
unsafe fn realloc(&self, ptr: *mut u8, layout: std::alloc::Layout, new_size: usize) -> *mut u8 {
let reallocated = self.inner.realloc(ptr, layout, new_size);
if reallocated == ptr {
return reallocated;
}
let current_generation = self.current_generation.load(std::sync::atomic::Ordering::Relaxed);
ALLOCATOR_STATE.with(|allocator_state| allocator_state.handle_dealloc(ptr));
ALLOCATOR_STATE
.with(|allocator_state| allocator_state.handle_alloc(reallocated, current_generation))
}
}

View File

@@ -108,6 +108,8 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.9"
build-info = { version = "1.7.0", path = "../build-info" }
inspecting-allocator = { version = "1.8.0", path = "../inspecting-allocator" }
tracing-error = { version = "0.2.0", default-features = false }
[dev-dependencies]
actix-rt = "2.9.0"

View File

@@ -16,15 +16,11 @@ use meilisearch::{
LogStderrType, Opt, SubscriberForSecondLayer,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use mimalloc::MiMalloc;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::Layer;
#[global_allocator]
static ALLOC: MiMalloc = MiMalloc;
fn default_log_route_layer() -> LogRouteType {
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
}
@@ -56,8 +52,10 @@ fn setup(opt: &Opt) -> anyhow::Result<(LogRouteHandle, LogStderrHandle)> {
let (stderr_layer, stderr_layer_handle) =
tracing_subscriber::reload::Layer::new(default_log_stderr_layer(opt));
let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
let error_layer = tracing_error::ErrorLayer::default();
let subscriber = tracing_subscriber::registry().with(route_layer).with(stderr_layer);
let subscriber =
tracing_subscriber::registry().with(route_layer).with(stderr_layer).with(error_layer);
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();

View File

@@ -8,6 +8,7 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use futures::StreamExt;
use index_scheduler::{IndexScheduler, TaskId};
use inspecting_allocator::InspectingAllocator;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@@ -20,6 +21,7 @@ use meilisearch_types::milli::DocumentId;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::{milli, Document, Index};
use mimalloc::MiMalloc;
use mime::Mime;
use once_cell::sync::Lazy;
use serde::Deserialize;
@@ -46,6 +48,9 @@ static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
});
#[global_allocator]
static ALLOC: InspectingAllocator<MiMalloc> = InspectingAllocator::wrap(MiMalloc);
/// Extracts the mime type from the content type and return
/// a meilisearch error if anything bad happen.
fn extract_mime_type(req: &HttpRequest) -> Result<Option<Mime>, MeilisearchHttpError> {
@@ -468,6 +473,14 @@ async fn document_addition(
};
let scheduler = index_scheduler.clone();
ALLOC.next_generation();
for (address, entry) in ALLOC.find_older_generations(5) {
println!(
"Found allocation older than 5 generations: {address:p} in generation {}. Span trace",
entry.generation()
);
println!("{entry}")
}
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
.await?
{

View File

@@ -117,69 +117,3 @@ async fn geo_bounding_box_with_string_and_number() {
)
.await;
}
#[actix_rt::test]
async fn bug_4640() {
// https://github.com/meilisearch/meilisearch/issues/4640
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.update_settings_filterable_attributes(json!(["_geo"])).await;
let (ret, _code) = index.update_settings_sortable_attributes(json!(["_geo"])).await;
index.wait_task(ret.uid()).await;
// Sort the document with the second one first
index
.search(
json!({
"sort": ["_geoPoint(45.4777599, 9.1967508):asc"],
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
{
"hits": [
{
"id": 2,
"name": "La Bella Italia",
"address": "456 Elm Street, Townsville",
"type": "Italian",
"rating": 9,
"_geo": {
"lat": "45.4777599",
"lng": "9.1967508"
}
},
{
"id": 1,
"name": "Taco Truck",
"address": "444 Salsa Street, Burritoville",
"type": "Mexican",
"rating": 9,
"_geo": {
"lat": 34.0522,
"lng": -118.2437
},
"_geoDistance": 9714063
},
{
"id": 3,
"name": "Crêpe Truck",
"address": "2 Billig Avenue, Rouenville",
"type": "French",
"rating": 10
}
],
"query": "",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"###);
},
)
.await;
}

View File

@@ -45,6 +45,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
settings_diff: &InnerIndexSettingsDiff,
geo_fields_ids: Option<(FieldId, FieldId)>,
) -> Result<ExtractedFacetValues> {
puffin::profile_function!();
@@ -126,18 +127,12 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
add_exists.insert(document);
}
let del_geo_support = settings_diff
.old
.geo_fields_ids
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
let add_geo_support = settings_diff
.new
.geo_fields_ids
.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
let geo_support =
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
let del_filterable_values =
del_value.map(|value| extract_facet_values(&value, del_geo_support));
del_value.map(|value| extract_facet_values(&value, geo_support));
let add_filterable_values =
add_value.map(|value| extract_facet_values(&value, add_geo_support));
add_value.map(|value| extract_facet_values(&value, geo_support));
// Those closures are just here to simplify things a bit.
let mut insert_numbers_diff = |del_numbers, add_numbers| {

View File

@@ -8,7 +8,6 @@ use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::error::GeoError;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::extract_finite_float_from_value;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::{FieldId, InternalError, Result};
/// Extracts the geographical coordinates contained in each document under the `_geo` field.
@@ -19,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
primary_key_id: FieldId,
settings_diff: &InnerIndexSettingsDiff,
(lat_fid, lng_fid): (FieldId, FieldId),
) -> Result<grenad::Reader<BufReader<File>>> {
puffin::profile_function!();
@@ -41,27 +40,47 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
serde_json::from_slice(document_id).unwrap()
};
// extract old version
let del_lat_lng =
extract_lat_lng(&obkv, &settings_diff.old, DelAdd::Deletion, document_id)?;
// extract new version
let add_lat_lng =
extract_lat_lng(&obkv, &settings_diff.new, DelAdd::Addition, document_id)?;
// first we get the two fields
match (obkv.get(lat_fid), obkv.get(lng_fid)) {
(Some(lat), Some(lng)) => {
let deladd_lat_obkv = KvReaderDelAdd::new(lat);
let deladd_lng_obkv = KvReaderDelAdd::new(lng);
if del_lat_lng != add_lat_lng {
let mut obkv = KvWriterDelAdd::memory();
if let Some([lat, lng]) = del_lat_lng {
#[allow(clippy::drop_non_drop)]
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
obkv.insert(DelAdd::Deletion, bytes)?;
// then we extract the values
let del_lat_lng = deladd_lat_obkv
.get(DelAdd::Deletion)
.zip(deladd_lng_obkv.get(DelAdd::Deletion))
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
.transpose()?;
let add_lat_lng = deladd_lat_obkv
.get(DelAdd::Addition)
.zip(deladd_lng_obkv.get(DelAdd::Addition))
.map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
.transpose()?;
if del_lat_lng != add_lat_lng {
let mut obkv = KvWriterDelAdd::memory();
if let Some([lat, lng]) = del_lat_lng {
#[allow(clippy::drop_non_drop)]
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
obkv.insert(DelAdd::Deletion, bytes)?;
}
if let Some([lat, lng]) = add_lat_lng {
#[allow(clippy::drop_non_drop)]
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
obkv.insert(DelAdd::Addition, bytes)?;
}
let bytes = obkv.into_inner()?;
writer.insert(docid_bytes, bytes)?;
}
}
if let Some([lat, lng]) = add_lat_lng {
#[allow(clippy::drop_non_drop)]
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
obkv.insert(DelAdd::Addition, bytes)?;
(None, Some(_)) => {
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
}
let bytes = obkv.into_inner()?;
writer.insert(docid_bytes, bytes)?;
(Some(_), None) => {
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
}
(None, None) => (),
}
}
@@ -69,37 +88,16 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
}
/// Extract the finite floats lat and lng from two bytes slices.
fn extract_lat_lng(
document: &obkv::KvReader<FieldId>,
settings: &InnerIndexSettings,
deladd: DelAdd,
document_id: impl Fn() -> Value,
) -> Result<Option<[f64; 2]>> {
match settings.geo_fields_ids {
Some((lat_fid, lng_fid)) => {
let lat = document.get(lat_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
let lng = document.get(lng_fid).map(KvReaderDelAdd::new).and_then(|r| r.get(deladd));
let (lat, lng) = match (lat, lng) {
(Some(lat), Some(lng)) => (lat, lng),
(Some(_), None) => {
return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
}
(None, Some(_)) => {
return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
}
(None, None) => return Ok(None),
};
let lat = extract_finite_float_from_value(
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
)
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
let lat = extract_finite_float_from_value(
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
)
.map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
let lng = extract_finite_float_from_value(
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
)
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
Ok(Some([lat, lng]))
}
None => Ok(None),
}
let lng = extract_finite_float_from_value(
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
)
.map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
Ok([lat, lng])
}

View File

@@ -43,6 +43,7 @@ pub(crate) fn data_from_obkv_documents(
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
primary_key_id: FieldId,
geo_fields_ids: Option<(FieldId, FieldId)>,
settings_diff: Arc<InnerIndexSettingsDiff>,
max_positions_per_attributes: Option<u32>,
) -> Result<()> {
@@ -71,6 +72,7 @@ pub(crate) fn data_from_obkv_documents(
indexer,
lmdb_writer_sx.clone(),
primary_key_id,
geo_fields_ids,
settings_diff.clone(),
max_positions_per_attributes,
)
@@ -298,6 +300,7 @@ fn send_and_extract_flattened_documents_data(
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
primary_key_id: FieldId,
geo_fields_ids: Option<(FieldId, FieldId)>,
settings_diff: Arc<InnerIndexSettingsDiff>,
max_positions_per_attributes: Option<u32>,
) -> Result<(
@@ -307,13 +310,12 @@ fn send_and_extract_flattened_documents_data(
let flattened_documents_chunk =
flattened_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
if settings_diff.run_geo_indexing() {
if let Some(geo_fields_ids) = geo_fields_ids {
let documents_chunk_cloned = flattened_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
let settings_diff = settings_diff.clone();
rayon::spawn(move || {
let result =
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, &settings_diff);
extract_geo_points(documents_chunk_cloned, indexer, primary_key_id, geo_fields_ids);
let _ = match result {
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
@@ -352,6 +354,7 @@ fn send_and_extract_flattened_documents_data(
flattened_documents_chunk.clone(),
indexer,
&settings_diff,
geo_fields_ids,
)?;
// send fid_docid_facet_numbers_chunk to DB writer

View File

@@ -324,6 +324,28 @@ where
// get the primary key field id
let primary_key_id = settings_diff.new.fields_ids_map.id(&primary_key).unwrap();
// get the fid of the `_geo.lat` and `_geo.lng` fields.
let mut field_id_map = self.index.fields_ids_map(self.wtxn)?;
// self.index.fields_ids_map($a)? ==>> field_id_map
let geo_fields_ids = match field_id_map.id("_geo") {
Some(gfid) => {
let is_sortable = self.index.sortable_fields_ids(self.wtxn)?.contains(&gfid);
let is_filterable = self.index.filterable_fields_ids(self.wtxn)?.contains(&gfid);
// if `_geo` is faceted then we get the `lat` and `lng`
if is_sortable || is_filterable {
let field_ids = field_id_map
.insert("_geo.lat")
.zip(field_id_map.insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
Some(field_ids)
} else {
None
}
}
None => None,
};
let pool_params = GrenadParameters {
chunk_compression_type: self.indexer_config.chunk_compression_type,
chunk_compression_level: self.indexer_config.chunk_compression_level,
@@ -390,6 +412,7 @@ where
pool_params,
lmdb_writer_sx.clone(),
primary_key_id,
geo_fields_ids,
settings_diff.clone(),
max_positions_per_attributes,
)

View File

@@ -1161,11 +1161,6 @@ impl InnerIndexSettingsDiff {
pub fn settings_update_only(&self) -> bool {
self.settings_update_only
}
pub fn run_geo_indexing(&self) -> bool {
self.old.geo_fields_ids != self.new.geo_fields_ids
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
}
}
#[derive(Clone)]
@@ -1182,7 +1177,6 @@ pub(crate) struct InnerIndexSettings {
pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs,
pub existing_fields: HashSet<String>,
pub geo_fields_ids: Option<(FieldId, FieldId)>,
}
impl InnerIndexSettings {
@@ -1191,7 +1185,7 @@ impl InnerIndexSettings {
let stop_words = stop_words.map(|sw| sw.map_data(Vec::from).unwrap());
let allowed_separators = index.allowed_separators(rtxn)?;
let dictionary = index.dictionary(rtxn)?;
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
let fields_ids_map = index.fields_ids_map(rtxn)?;
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
let user_defined_searchable_fields =
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
@@ -1206,24 +1200,6 @@ impl InnerIndexSettings {
.into_iter()
.filter_map(|(field, count)| (count != 0).then_some(field))
.collect();
// index.fields_ids_map($a)? ==>> fields_ids_map
let geo_fields_ids = match fields_ids_map.id("_geo") {
Some(gfid) => {
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
// if `_geo` is faceted then we get the `lat` and `lng`
if is_sortable || is_filterable {
let field_ids = fields_ids_map
.insert("_geo.lat")
.zip(fields_ids_map.insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
Some(field_ids)
} else {
None
}
}
None => None,
};
Ok(Self {
stop_words,
@@ -1238,7 +1214,6 @@ impl InnerIndexSettings {
proximity_precision,
embedding_configs,
existing_fields,
geo_fields_ids,
})
}