mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-02 18:55:36 +00:00
Compare commits
3 Commits
v1.3.1
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c9cd150ca6 | ||
|
|
8b218d35b7 | ||
|
|
3b521f6c69 |
28
Cargo.lock
generated
28
Cargo.lock
generated
@@ -469,7 +469,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -1199,7 +1199,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -1413,7 +1413,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@@ -1435,7 +1435,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@@ -1454,7 +1454,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -1572,7 +1572,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@@ -1894,7 +1894,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -2081,7 +2081,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -2493,7 +2493,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -2502,7 +2502,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -2591,7 +2591,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"enum-iterator",
|
||||
@@ -2610,7 +2610,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -2664,7 +2664,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"bimap",
|
||||
@@ -2994,7 +2994,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
|
||||
@@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.3.1"
|
||||
version = "1.3.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: spells.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: products.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {
|
||||
"android": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"iphone": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"phone": [
|
||||
"android",
|
||||
"iphone",
|
||||
"smartphone"
|
||||
]
|
||||
},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: movies.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness",
|
||||
"release_date:asc"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -534,7 +534,9 @@ impl IndexScheduler {
|
||||
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
|
||||
|
||||
// If autobatching is disabled we only take one task at a time.
|
||||
let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
|
||||
// Otherwise, we take only a maximum of tasks to create batches.
|
||||
let tasks_limit =
|
||||
if self.autobatching_enabled { self.maximum_number_of_batched_tasks } else { 1 };
|
||||
|
||||
let enqueued = index_tasks
|
||||
.into_iter()
|
||||
|
||||
@@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: _,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
file_store,
|
||||
|
||||
@@ -253,6 +253,9 @@ pub struct IndexSchedulerOptions {
|
||||
/// Set to `true` iff the index scheduler is allowed to automatically
|
||||
/// batch tasks together, to process multiple tasks at once.
|
||||
pub autobatching_enabled: bool,
|
||||
/// If the autobatcher is allowed to automatically batch tasks
|
||||
/// it will only batch this defined number of tasks at once.
|
||||
pub maximum_number_of_batched_tasks: usize,
|
||||
/// The maximum number of tasks stored in the task queue before starting
|
||||
/// to auto schedule task deletions.
|
||||
pub max_number_of_tasks: usize,
|
||||
@@ -310,6 +313,9 @@ pub struct IndexScheduler {
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) maximum_number_of_batched_tasks: usize,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
@@ -363,6 +369,7 @@ impl IndexScheduler {
|
||||
index_mapper: self.index_mapper.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: self.maximum_number_of_batched_tasks,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
@@ -458,6 +465,7 @@ impl IndexScheduler {
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: options.maximum_number_of_batched_tasks,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
@@ -790,19 +798,10 @@ impl IndexScheduler {
|
||||
|
||||
let mut res = BTreeMap::new();
|
||||
|
||||
let processing_tasks = { self.processing_tasks.read().unwrap().processing.len() };
|
||||
|
||||
res.insert(
|
||||
"statuses".to_string(),
|
||||
enum_iterator::all::<Status>()
|
||||
.map(|s| {
|
||||
let tasks = self.get_status(&rtxn, s)?.len();
|
||||
match s {
|
||||
Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)),
|
||||
Status::Processing => Ok((s.to_string(), processing_tasks)),
|
||||
s => Ok((s.to_string(), tasks)),
|
||||
}
|
||||
})
|
||||
.map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len())))
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
@@ -1587,6 +1586,7 @@ mod tests {
|
||||
index_count: 5,
|
||||
indexer_config,
|
||||
autobatching_enabled: true,
|
||||
maximum_number_of_batched_tasks: usize::MAX,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
instance_features: Default::default(),
|
||||
};
|
||||
@@ -4138,154 +4138,4 @@ mod tests {
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_get_stats() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind).unwrap();
|
||||
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 3,
|
||||
"failed": 0,
|
||||
"processing": 0,
|
||||
"succeeded": 0
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 2,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 0
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 1,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 1
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 0,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 2
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,9 +167,7 @@ macro_rules! snapshot {
|
||||
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name));
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
}
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
});
|
||||
};
|
||||
($value:expr, @$inline:literal) => {
|
||||
@@ -178,9 +176,7 @@ macro_rules! snapshot {
|
||||
let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument"));
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(snap, @$inline);
|
||||
}
|
||||
meili_snap::insta::assert_snapshot!(snap, @$inline);
|
||||
});
|
||||
};
|
||||
($value:expr) => {
|
||||
@@ -198,9 +194,7 @@ macro_rules! snapshot {
|
||||
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None);
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
}
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
@@ -285,6 +285,7 @@ impl From<Opt> for Infos {
|
||||
db_path,
|
||||
experimental_enable_metrics,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_limit_batched_tasks: _,
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
|
||||
@@ -236,6 +236,7 @@ fn open_or_create_database_unchecked(
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
maximum_number_of_batched_tasks: opt.experimental_limit_batched_tasks,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
|
||||
@@ -51,6 +51,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
||||
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS: &str = "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
@@ -301,6 +302,11 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_reduce_indexing_memory_usage: bool,
|
||||
|
||||
/// Experimental limit to the number of tasks per batch
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_limit_batched_tasks: usize,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@@ -393,7 +399,8 @@ impl Opt {
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
no_analytics,
|
||||
experimental_enable_metrics: enable_metrics_route,
|
||||
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_limit_batched_tasks,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -437,7 +444,11 @@ impl Opt {
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
|
||||
reduce_indexing_memory_usage.to_string(),
|
||||
experimental_reduce_indexing_memory_usage.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS,
|
||||
experimental_limit_batched_tasks.to_string(),
|
||||
);
|
||||
indexer_options.export_to_env();
|
||||
}
|
||||
@@ -739,6 +750,10 @@ fn default_dump_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_DUMP_DIR)
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
/// Indicates if a snapshot was scheduled, and if yes with which interval.
|
||||
#[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
|
||||
pub enum ScheduleSnapshot {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
@@ -61,59 +60,3 @@ async fn geo_sort_with_geo_strings() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn geo_bounding_box_with_string_and_number() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||
index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "_geoBoundingBox([89, 179], [-89, -179])",
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Taco Truck",
|
||||
"address": "444 Salsa Street, Burritoville",
|
||||
"type": "Mexican",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": 34.0522,
|
||||
"lng": -118.2437
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "La Bella Italia",
|
||||
"address": "456 Elm Street, Townsville",
|
||||
"type": "Italian",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": "45.4777599",
|
||||
"lng": "9.1967508"
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 2
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -1718,11 +1718,11 @@ pub(crate) mod tests {
|
||||
.unwrap();
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "_geo": { "lat": "0", "lng": "0" } },
|
||||
{ "id": 1, "_geo": { "lat": 0, "lng": "-175" } },
|
||||
{ "id": 2, "_geo": { "lat": "0", "lng": 175 } },
|
||||
{ "id": 0, "_geo": { "lat": 0, "lng": 0 } },
|
||||
{ "id": 1, "_geo": { "lat": 0, "lng": -175 } },
|
||||
{ "id": 2, "_geo": { "lat": 0, "lng": 175 } },
|
||||
{ "id": 3, "_geo": { "lat": 85, "lng": 0 } },
|
||||
{ "id": 4, "_geo": { "lat": "-85", "lng": "0" } },
|
||||
{ "id": 4, "_geo": { "lat": -85, "lng": 0 } },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ const MAX_LMDB_KEY_LENGTH: usize = 500;
|
||||
///
|
||||
/// This number is determined by the keys of the different facet databases
|
||||
/// and adding a margin of safety.
|
||||
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32;
|
||||
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 20;
|
||||
|
||||
/// The maximum length a word can be
|
||||
pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
|
||||
|
||||
@@ -94,7 +94,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::update::index_documents::create_sorter;
|
||||
use crate::update::merge_btreeset_string;
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod delete;
|
||||
@@ -191,16 +191,7 @@ impl<'i> FacetsUpdate<'i> {
|
||||
for result in database.iter(wtxn)? {
|
||||
let (facet_group_key, ()) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
let mut normalized_facet = left_bound.normalize(&options);
|
||||
let normalized_truncated_facet: String;
|
||||
if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
|
||||
normalized_truncated_facet = normalized_facet
|
||||
.char_indices()
|
||||
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||
.map(|(_, c)| c)
|
||||
.collect();
|
||||
normalized_facet = normalized_truncated_facet.into();
|
||||
}
|
||||
let normalized_facet = left_bound.normalize(&options);
|
||||
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||
let key = (field_id, normalized_facet.as_ref());
|
||||
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
|
||||
|
||||
@@ -44,7 +44,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
|
||||
normalised_truncated_value = normalised_value
|
||||
.char_indices()
|
||||
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||
.take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
|
||||
.map(|(_, c)| c)
|
||||
.collect();
|
||||
normalised_value = normalised_truncated_value.as_str();
|
||||
|
||||
@@ -28,13 +28,11 @@ pub struct ExtractedFacetValues {
|
||||
///
|
||||
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
|
||||
/// and the normalized value as value extracted from the given chunk of documents.
|
||||
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -84,10 +82,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
|
||||
let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
||||
|
||||
match extract_facet_values(
|
||||
&value,
|
||||
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
|
||||
) {
|
||||
match extract_facet_values(&value) {
|
||||
FilterableValues::Null => {
|
||||
facet_is_null_docids.entry(field_id).or_default().insert(document);
|
||||
}
|
||||
@@ -180,13 +175,12 @@ enum FilterableValues {
|
||||
Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
|
||||
}
|
||||
|
||||
fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
|
||||
fn extract_facet_values(value: &Value) -> FilterableValues {
|
||||
fn inner_extract_facet_values(
|
||||
value: &Value,
|
||||
can_recurse: bool,
|
||||
output_numbers: &mut Vec<f64>,
|
||||
output_strings: &mut Vec<(String, String)>,
|
||||
geo_field: bool,
|
||||
) {
|
||||
match value {
|
||||
Value::Null => (),
|
||||
@@ -197,30 +191,13 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
|
||||
}
|
||||
}
|
||||
Value::String(original) => {
|
||||
// if we're working on a geofield it MUST be something we can parse or else there was an internal error
|
||||
// in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs.
|
||||
if geo_field {
|
||||
if let Ok(float) = original.parse() {
|
||||
output_numbers.push(float);
|
||||
} else {
|
||||
log::warn!(
|
||||
"Internal error, could not parse a geofield that has been validated. Please open an issue."
|
||||
)
|
||||
}
|
||||
}
|
||||
let normalized = crate::normalize_facet(original);
|
||||
output_strings.push((normalized, original.clone()));
|
||||
}
|
||||
Value::Array(values) => {
|
||||
if can_recurse {
|
||||
for value in values {
|
||||
inner_extract_facet_values(
|
||||
value,
|
||||
false,
|
||||
output_numbers,
|
||||
output_strings,
|
||||
geo_field,
|
||||
);
|
||||
inner_extract_facet_values(value, false, output_numbers, output_strings);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -236,7 +213,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
|
||||
otherwise => {
|
||||
let mut numbers = Vec::new();
|
||||
let mut strings = Vec::new();
|
||||
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field);
|
||||
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
|
||||
FilterableValues::Values { numbers, strings }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -366,7 +366,6 @@ fn send_and_extract_flattened_documents_data(
|
||||
flattened_documents_chunk.clone(),
|
||||
indexer,
|
||||
faceted_fields,
|
||||
geo_fields_ids,
|
||||
)?;
|
||||
|
||||
// send docid_fid_facet_numbers_chunk to DB writer
|
||||
|
||||
Reference in New Issue
Block a user