mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-29 09:15:38 +00:00
Compare commits
5 Commits
v1.4.1
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b126bf3aec | ||
|
|
e82ff56416 | ||
|
|
1b26dde438 | ||
|
|
3bb644b54d | ||
|
|
34b9145db2 |
79
Cargo.lock
generated
79
Cargo.lock
generated
@@ -468,7 +468,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
|
||||
|
||||
[[package]]
|
||||
name = "benchmarks"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bytes",
|
||||
@@ -700,8 +700,7 @@ dependencies = [
|
||||
[[package]]
|
||||
name = "charabia"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "098219a776307414866165a03a9cc68c1578764fe3616fe979e1c280790ddd73"
|
||||
source = "git+https://github.com/meilisearch/charabia?branch=main#5c3d09a7127dcf5e0e5d94d991c4d3d5ef4768cc"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"cow-utils",
|
||||
@@ -1206,7 +1205,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dump"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -1417,7 +1416,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "file-store"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"faux",
|
||||
"tempfile",
|
||||
@@ -1439,7 +1438,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "filter-parser"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"nom",
|
||||
@@ -1459,7 +1458,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "flatten-serde-json"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -1577,7 +1576,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fuzzers"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"clap",
|
||||
@@ -1891,7 +1890,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "index-scheduler"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"big_s",
|
||||
@@ -2088,7 +2087,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "json-depth-checker"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"criterion",
|
||||
"serde_json",
|
||||
@@ -2177,9 +2176,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-cc-cedict-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d2e8f2ca97ddf952fe340642511b9c14b373cb2eef711d526bb8ef2ca0969b8"
|
||||
checksum = "6f567a47e47b5420908424de2c6c5e424e3cafe588d0146bd128c0f3755758a3"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2196,9 +2195,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-compress"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f72b460559bcbe8a9cee85ea4a5056133ed3abf373031191589236e656d65b59"
|
||||
checksum = "49f3e553d55ebe9881fa5e5de588b0a153456e93564d17dfbef498912caf63a2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@@ -2207,9 +2206,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-core"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f586eb8a9393c32d5525e0e9336a3727bd1329674740097126f3b0bff8a1a1ea"
|
||||
checksum = "a9a2440cc156a4a911a174ec68203543d1efb10df3a700a59b6bf581e453c726"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2224,9 +2223,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-decompress"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fb1facd8da698072fcc7338bd757730db53d59f313f44dd583fa03681dcc0e1"
|
||||
checksum = "e077a410e61c962cb526f71b7effd62ffc607488a8f61869c937582d2ccb529b"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"flate2",
|
||||
@@ -2235,9 +2234,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-dictionary"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec7be7410b1da7017a8948986b87af67082f605e9a716f0989790d795d677f0c"
|
||||
checksum = "d9f57491adf7b311a3ee87f5e4a36454df16a2ec73de4ef28b2106fac80bd782"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2255,9 +2254,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "705d07f8a45d04fd95149f7ad41a26d1f9e56c9c00402be6f9dd05e3d88b99c6"
|
||||
checksum = "a3476ec7748aebd2eb23d496ddfce5e7e0a5c031cffcd214451043e02d029f11"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2276,9 +2275,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ipadic-neologd-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "633a93983ba13fba42328311a501091bd4a7aff0c94ae9eaa9d4733dd2b0468a"
|
||||
checksum = "7b1c7576a02d5e4af2bf62de51790a01bc4b8bc0d0b6a6b86a46b157f5cb306d"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2297,9 +2296,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a428e0d316b6c86f51bd919479692bc41ad840dba266ebc044663970f431ea18"
|
||||
checksum = "b713ecd5b827d7d448c3c5eb3c6d5899ecaf22cd17087599996349a02c76828d"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2314,9 +2313,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-ko-dic-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a5288704c6b8a069c0a1705c38758e836497698b50453373ab3d56c6f9a7ef8"
|
||||
checksum = "3e545752f6487be87b572529ad594cb3b48d2ef20821516f598b2d152d23277b"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2334,9 +2333,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-tokenizer"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "106ba439b2e87529d9bbedbb88d69f635baba1195c26502b308f55a85885fc81"
|
||||
checksum = "24a2d4606a5a4da62ac4a3680ee884a75da7f0c892dc967fc9cb983ceba39a8f"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2349,9 +2348,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3399b6dcfe1701333451d184ff3c677f433b320153427b146360c9e4bd8cb816"
|
||||
checksum = "388b1bdf81794b5d5b8057ce0321c58ff4b90d676b637948ccc7863ae2f43d28"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"byteorder",
|
||||
@@ -2366,9 +2365,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "lindera-unidic-builder"
|
||||
version = "0.27.0"
|
||||
version = "0.27.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b698227fdaeac32289173ab389b990d4eb00a40cbc9912020f69a0c491dabf55"
|
||||
checksum = "cdfa3e29a22c047da57fadd960ff674b720de15a1e2fb35b5ed67f3408afb469"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
@@ -2500,7 +2499,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
|
||||
|
||||
[[package]]
|
||||
name = "meili-snap"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"insta",
|
||||
"md5",
|
||||
@@ -2509,7 +2508,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"actix-cors",
|
||||
"actix-http",
|
||||
@@ -2600,7 +2599,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-auth"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"base64 0.21.2",
|
||||
"enum-iterator",
|
||||
@@ -2619,7 +2618,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "meilisearch-types"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"actix-web",
|
||||
"anyhow",
|
||||
@@ -2673,7 +2672,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "milli"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"bimap",
|
||||
@@ -2995,7 +2994,7 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
|
||||
|
||||
[[package]]
|
||||
name = "permissive-json-pointer"
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
dependencies = [
|
||||
"big_s",
|
||||
"serde_json",
|
||||
|
||||
@@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.4.1"
|
||||
version = "1.4.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
||||
@@ -539,7 +539,9 @@ impl IndexScheduler {
|
||||
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
|
||||
|
||||
// If autobatching is disabled we only take one task at a time.
|
||||
let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
|
||||
// Otherwise, we take only a maximum of tasks to create batches.
|
||||
let tasks_limit =
|
||||
if self.autobatching_enabled { self.maximum_number_of_batched_tasks } else { 1 };
|
||||
|
||||
let enqueued = index_tasks
|
||||
.into_iter()
|
||||
|
||||
@@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
|
||||
let IndexScheduler {
|
||||
autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: _,
|
||||
must_stop_processing: _,
|
||||
processing_tasks,
|
||||
file_store,
|
||||
|
||||
@@ -253,6 +253,9 @@ pub struct IndexSchedulerOptions {
|
||||
/// Set to `true` iff the index scheduler is allowed to automatically
|
||||
/// batch tasks together, to process multiple tasks at once.
|
||||
pub autobatching_enabled: bool,
|
||||
/// If the autobatcher is allowed to automatically batch tasks
|
||||
/// it will only batch this defined number of tasks at once.
|
||||
pub maximum_number_of_batched_tasks: usize,
|
||||
/// The maximum number of tasks stored in the task queue before starting
|
||||
/// to auto schedule task deletions.
|
||||
pub max_number_of_tasks: usize,
|
||||
@@ -310,6 +313,9 @@ pub struct IndexScheduler {
|
||||
/// Whether auto-batching is enabled or not.
|
||||
pub(crate) autobatching_enabled: bool,
|
||||
|
||||
/// The maximum number of tasks that will be batched together.
|
||||
pub(crate) maximum_number_of_batched_tasks: usize,
|
||||
|
||||
/// The max number of tasks allowed before the scheduler starts to delete
|
||||
/// the finished tasks automatically.
|
||||
pub(crate) max_number_of_tasks: usize,
|
||||
@@ -363,6 +369,7 @@ impl IndexScheduler {
|
||||
index_mapper: self.index_mapper.clone(),
|
||||
wake_up: self.wake_up.clone(),
|
||||
autobatching_enabled: self.autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: self.maximum_number_of_batched_tasks,
|
||||
max_number_of_tasks: self.max_number_of_tasks,
|
||||
snapshots_path: self.snapshots_path.clone(),
|
||||
dumps_path: self.dumps_path.clone(),
|
||||
@@ -458,6 +465,7 @@ impl IndexScheduler {
|
||||
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
|
||||
wake_up: Arc::new(SignalEvent::auto(true)),
|
||||
autobatching_enabled: options.autobatching_enabled,
|
||||
maximum_number_of_batched_tasks: options.maximum_number_of_batched_tasks,
|
||||
max_number_of_tasks: options.max_number_of_tasks,
|
||||
dumps_path: options.dumps_path,
|
||||
snapshots_path: options.snapshots_path,
|
||||
@@ -1589,6 +1597,7 @@ mod tests {
|
||||
index_count: 5,
|
||||
indexer_config,
|
||||
autobatching_enabled: true,
|
||||
maximum_number_of_batched_tasks: usize::MAX,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
instance_features: Default::default(),
|
||||
};
|
||||
|
||||
@@ -133,7 +133,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] }
|
||||
zip = { version = "0.6.4", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard", "profile-with-puffin"]
|
||||
analytics = ["segment"]
|
||||
profile-with-puffin = ["dep:puffin_http"]
|
||||
mini-dashboard = [
|
||||
|
||||
@@ -285,6 +285,7 @@ impl From<Opt> for Infos {
|
||||
db_path,
|
||||
experimental_enable_metrics,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_limit_batched_tasks: _,
|
||||
http_addr,
|
||||
master_key: _,
|
||||
env,
|
||||
|
||||
@@ -236,6 +236,7 @@ fn open_or_create_database_unchecked(
|
||||
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
|
||||
indexer_config: (&opt.indexer_options).try_into()?,
|
||||
autobatching_enabled: true,
|
||||
maximum_number_of_batched_tasks: opt.experimental_limit_batched_tasks,
|
||||
max_number_of_tasks: 1_000_000,
|
||||
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
|
||||
index_count: DEFAULT_INDEX_COUNT,
|
||||
|
||||
@@ -51,6 +51,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
|
||||
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
|
||||
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
|
||||
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
|
||||
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS: &str = "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS";
|
||||
|
||||
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
|
||||
const DEFAULT_DB_PATH: &str = "./data.ms";
|
||||
@@ -301,6 +302,11 @@ pub struct Opt {
|
||||
#[serde(default)]
|
||||
pub experimental_reduce_indexing_memory_usage: bool,
|
||||
|
||||
/// Experimental limit to the number of tasks per batch
|
||||
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
|
||||
#[serde(default = "default_limit_batched_tasks")]
|
||||
pub experimental_limit_batched_tasks: usize,
|
||||
|
||||
#[serde(flatten)]
|
||||
#[clap(flatten)]
|
||||
pub indexer_options: IndexerOpts,
|
||||
@@ -393,7 +399,8 @@ impl Opt {
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
no_analytics,
|
||||
experimental_enable_metrics: enable_metrics_route,
|
||||
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
|
||||
experimental_reduce_indexing_memory_usage,
|
||||
experimental_limit_batched_tasks,
|
||||
} = self;
|
||||
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
|
||||
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
|
||||
@@ -437,7 +444,11 @@ impl Opt {
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
|
||||
reduce_indexing_memory_usage.to_string(),
|
||||
experimental_reduce_indexing_memory_usage.to_string(),
|
||||
);
|
||||
export_to_env_if_not_present(
|
||||
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS,
|
||||
experimental_limit_batched_tasks.to_string(),
|
||||
);
|
||||
indexer_options.export_to_env();
|
||||
}
|
||||
@@ -739,6 +750,10 @@ fn default_dump_dir() -> PathBuf {
|
||||
PathBuf::from(DEFAULT_DUMP_DIR)
|
||||
}
|
||||
|
||||
fn default_limit_batched_tasks() -> usize {
|
||||
usize::MAX
|
||||
}
|
||||
|
||||
/// Indicates if a snapshot was scheduled, and if yes with which interval.
|
||||
#[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
|
||||
pub enum ScheduleSnapshot {
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
use meili_snap::snapshot;
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use crate::common::Server;
|
||||
|
||||
pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{"productId": 1, "shopId": 1},
|
||||
{"productId": 2, "shopId": 1},
|
||||
{"productId": 3, "shopId": 2},
|
||||
{"productId": 4, "shopId": 2},
|
||||
{"productId": 5, "shopId": 3},
|
||||
{"productId": 6, "shopId": 3},
|
||||
{"productId": 7, "shopId": 4},
|
||||
{"productId": 8, "shopId": 4},
|
||||
{"productId": 9, "shopId": 5},
|
||||
{"productId": 10, "shopId": 5}
|
||||
])
|
||||
});
|
||||
|
||||
pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
|
||||
pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
|
||||
|
||||
/// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
#[actix_rt::test]
|
||||
async fn distinct_search_with_offset_no_ranking() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
|
||||
index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
fn get_hits(response: Value) -> Vec<i64> {
|
||||
let hits_array = response["hits"].as_array().unwrap();
|
||||
hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[1, 2]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"2");
|
||||
snapshot!(format!("{:?}", hits), @"[3, 4]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"1");
|
||||
snapshot!(format!("{:?}", hits), @"[5]");
|
||||
|
||||
let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
|
||||
let hits = get_hits(response);
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(hits.len(), @"0");
|
||||
}
|
||||
@@ -1,7 +1,6 @@
|
||||
// This modules contains all the test concerning search. Each particular feature of the search
|
||||
// should be tested in its own module to isolate tests and keep the tests readable.
|
||||
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod facet_search;
|
||||
mod formatted;
|
||||
|
||||
@@ -17,7 +17,8 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
# charabia = { version = "0.8.3", default-features = false }
|
||||
charabia = { git = "https://github.com/meilisearch/charabia", branch = "main", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{io, str};
|
||||
|
||||
use obkv::KvReader;
|
||||
@@ -20,14 +19,14 @@ use crate::FieldId;
|
||||
pub struct EnrichedDocumentsBatchReader<R> {
|
||||
documents: DocumentsBatchReader<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
||||
external_ids: grenad::ReaderCursor<File>,
|
||||
}
|
||||
|
||||
impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
|
||||
pub fn new(
|
||||
documents: DocumentsBatchReader<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::Reader<BufReader<File>>,
|
||||
external_ids: grenad::Reader<File>,
|
||||
) -> Result<Self, Error> {
|
||||
if documents.documents_count() as u64 == external_ids.len() {
|
||||
Ok(EnrichedDocumentsBatchReader {
|
||||
@@ -76,7 +75,7 @@ pub struct EnrichedDocument<'a> {
|
||||
pub struct EnrichedDocumentsBatchCursor<R> {
|
||||
documents: DocumentsBatchCursor<R>,
|
||||
primary_key: String,
|
||||
external_ids: grenad::ReaderCursor<BufReader<File>>,
|
||||
external_ids: grenad::ReaderCursor<File>,
|
||||
}
|
||||
|
||||
impl<R> EnrichedDocumentsBatchCursor<R> {
|
||||
|
||||
@@ -46,27 +46,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
if let Some(distinct_fid) = distinct_fid {
|
||||
let mut excluded = RoaringBitmap::new();
|
||||
let mut results = vec![];
|
||||
let mut skip = 0;
|
||||
for docid in universe.iter() {
|
||||
if results.len() >= length {
|
||||
if results.len() >= from + length {
|
||||
break;
|
||||
}
|
||||
if excluded.contains(docid) {
|
||||
continue;
|
||||
}
|
||||
|
||||
distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
|
||||
skip += 1;
|
||||
if skip <= from {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(docid);
|
||||
}
|
||||
|
||||
let mut all_candidates = universe - excluded;
|
||||
all_candidates.extend(results.iter().copied());
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); results.len()],
|
||||
docids: results,
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
@@ -31,7 +30,7 @@ pub struct FacetsUpdateBulk<'i> {
|
||||
facet_type: FacetType,
|
||||
field_ids: Vec<FieldId>,
|
||||
// None if level 0 does not need to be updated
|
||||
new_data: Option<grenad::Reader<BufReader<File>>>,
|
||||
new_data: Option<grenad::Reader<File>>,
|
||||
}
|
||||
|
||||
impl<'i> FacetsUpdateBulk<'i> {
|
||||
@@ -39,7 +38,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
index: &'i Index,
|
||||
field_ids: Vec<FieldId>,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
new_data: grenad::Reader<File>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
) -> FacetsUpdateBulk<'i> {
|
||||
@@ -188,7 +187,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
txn: &RoTxn,
|
||||
) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
|
||||
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
|
||||
let mut all_docids = RoaringBitmap::new();
|
||||
let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
|
||||
for bitmap in bitmaps {
|
||||
@@ -260,7 +259,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
field_id: u16,
|
||||
level: u8,
|
||||
handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
|
||||
) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
|
||||
) -> Result<Vec<grenad::Reader<File>>> {
|
||||
if level == 0 {
|
||||
self.read_level_0(rtxn, field_id, handle_group)?;
|
||||
// Level 0 is already in the database
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||
@@ -35,14 +34,14 @@ pub struct FacetsUpdateIncremental<'i> {
|
||||
index: &'i Index,
|
||||
inner: FacetsUpdateIncrementalInner,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
new_data: grenad::Reader<File>,
|
||||
}
|
||||
|
||||
impl<'i> FacetsUpdateIncremental<'i> {
|
||||
pub fn new(
|
||||
index: &'i Index,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
new_data: grenad::Reader<File>,
|
||||
group_size: u8,
|
||||
min_level_size: u8,
|
||||
max_group_size: u8,
|
||||
|
||||
@@ -78,7 +78,6 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use charabia::normalizer::{Normalize, NormalizerOption};
|
||||
@@ -109,17 +108,13 @@ pub struct FacetsUpdate<'i> {
|
||||
index: &'i Index,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
new_data: grenad::Reader<File>,
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
min_level_size: u8,
|
||||
}
|
||||
impl<'i> FacetsUpdate<'i> {
|
||||
pub fn new(
|
||||
index: &'i Index,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<BufReader<File>>,
|
||||
) -> Self {
|
||||
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||
let database = match facet_type {
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::io::{BufWriter, Read, Seek};
|
||||
use std::io::{Read, Seek};
|
||||
use std::result::Result as StdResult;
|
||||
use std::{fmt, iter};
|
||||
|
||||
@@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
|
||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||
|
||||
let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
|
||||
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
||||
let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];
|
||||
|
||||
// The primary key *field id* that has already been set for this index or the one
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{io, mem, str};
|
||||
|
||||
use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
||||
@@ -32,7 +31,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
allowed_separators: Option<&[&str]>,
|
||||
dictionary: Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
@@ -19,7 +19,7 @@ use crate::Result;
|
||||
pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_number: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use heed::BytesEncode;
|
||||
|
||||
@@ -17,7 +17,7 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
use std::mem::size_of;
|
||||
|
||||
use heed::zerocopy::AsBytes;
|
||||
@@ -17,11 +17,11 @@ use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET
|
||||
|
||||
/// The extracted facet values stored in grenad files by type.
|
||||
pub struct ExtractedFacetValues {
|
||||
pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
|
||||
pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
|
||||
pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
|
||||
pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
|
||||
}
|
||||
|
||||
/// Extracts the facet values of each faceted field of each document.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use grenad::Sorter;
|
||||
|
||||
@@ -21,7 +21,7 @@ use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
|
||||
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use concat_arrays::concat_arrays;
|
||||
use serde_json::Value;
|
||||
@@ -18,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::convert::TryFrom;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use bytemuck::cast_slice;
|
||||
use serde_json::{from_slice, Value};
|
||||
@@ -18,7 +18,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
primary_key_id: FieldId,
|
||||
vectors_fid: FieldId,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let mut writer = create_writer(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
@@ -26,7 +26,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
exact_attributes: &HashSet<FieldId>,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
||||
@@ -14,7 +14,7 @@ use crate::{relative_from_absolute_position, DocumentId, Result};
|
||||
pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::{cmp, io, mem, str, vec};
|
||||
|
||||
use super::helpers::{
|
||||
@@ -21,7 +20,7 @@ use crate::{DocumentId, Result};
|
||||
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use super::helpers::{
|
||||
create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
|
||||
@@ -17,7 +17,7 @@ use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Resu
|
||||
pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
||||
docid_word_positions: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
puffin::profile_function!();
|
||||
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@@ -12,7 +12,6 @@ mod extract_word_position_docids;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
|
||||
use crossbeam_channel::Sender;
|
||||
use log::debug;
|
||||
@@ -40,8 +39,8 @@ use crate::{FieldId, Result};
|
||||
/// Send data in grenad file over provided Sender.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn data_from_obkv_documents(
|
||||
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
||||
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
|
||||
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
|
||||
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
searchable_fields: Option<HashSet<FieldId>>,
|
||||
@@ -153,7 +152,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
});
|
||||
}
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -163,7 +162,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"word-pair-proximity-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -173,11 +172,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"field-id-wordcount-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<
|
||||
_,
|
||||
_,
|
||||
Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
|
||||
>(
|
||||
spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -190,7 +185,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"word-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_word_positions_chunks.clone(),
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -199,7 +194,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
TypedChunk::WordPositionDocids,
|
||||
"word-position-docids",
|
||||
);
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_word_positions_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -209,7 +204,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"word-fid-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_fid_facet_strings_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx.clone(),
|
||||
@@ -219,7 +214,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
"field-id-facet-string-docids",
|
||||
);
|
||||
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
|
||||
spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
|
||||
docid_fid_facet_numbers_chunks,
|
||||
indexer,
|
||||
lmdb_writer_sx,
|
||||
@@ -274,7 +269,7 @@ fn spawn_extraction_task<FE, FS, M>(
|
||||
/// Extract chunked data and send it into lmdb_writer_sx sender:
|
||||
/// - documents
|
||||
fn send_original_documents_data(
|
||||
original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
||||
original_documents_chunk: Result<grenad::Reader<File>>,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
@@ -316,7 +311,7 @@ fn send_original_documents_data(
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn send_and_extract_flattened_documents_data(
|
||||
flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
|
||||
flattened_documents_chunk: Result<grenad::Reader<File>>,
|
||||
indexer: GrenadParameters,
|
||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
@@ -333,10 +328,7 @@ fn send_and_extract_flattened_documents_data(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
(
|
||||
grenad::Reader<BufReader<File>>,
|
||||
(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
|
||||
),
|
||||
(grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
|
||||
),
|
||||
),
|
||||
)> {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader, BufWriter, Seek};
|
||||
use std::io::{self, Seek};
|
||||
use std::time::Instant;
|
||||
|
||||
use grenad::{CompressionType, Sorter};
|
||||
@@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
|
||||
typ: grenad::CompressionType,
|
||||
level: Option<u32>,
|
||||
file: R,
|
||||
) -> grenad::Writer<BufWriter<R>> {
|
||||
) -> grenad::Writer<R> {
|
||||
let mut builder = grenad::Writer::builder();
|
||||
builder.compression_type(typ);
|
||||
if let Some(level) = level {
|
||||
builder.compression_level(level);
|
||||
}
|
||||
builder.build(BufWriter::new(file))
|
||||
builder.build(file)
|
||||
}
|
||||
|
||||
pub fn create_sorter(
|
||||
@@ -53,7 +53,7 @@ pub fn create_sorter(
|
||||
pub fn sorter_into_reader(
|
||||
sorter: grenad::Sorter<MergeFn>,
|
||||
indexer: GrenadParameters,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
) -> Result<grenad::Reader<File>> {
|
||||
let mut writer = create_writer(
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
@@ -64,18 +64,16 @@ pub fn sorter_into_reader(
|
||||
writer_into_reader(writer)
|
||||
}
|
||||
|
||||
pub fn writer_into_reader(
|
||||
writer: grenad::Writer<BufWriter<File>>,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
|
||||
pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
|
||||
let mut file = writer.into_inner()?;
|
||||
file.rewind()?;
|
||||
grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
|
||||
grenad::Reader::new(file).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub unsafe fn as_cloneable_grenad(
|
||||
reader: &grenad::Reader<BufReader<File>>,
|
||||
reader: &grenad::Reader<File>,
|
||||
) -> Result<grenad::Reader<CursorClonableMmap>> {
|
||||
let file = reader.get_ref().get_ref();
|
||||
let file = reader.get_ref();
|
||||
let mmap = memmap2::Mmap::map(file)?;
|
||||
let cursor = io::Cursor::new(ClonableMmap::from(mmap));
|
||||
let reader = grenad::Reader::new(cursor)?;
|
||||
@@ -91,8 +89,8 @@ where
|
||||
fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
|
||||
}
|
||||
|
||||
impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
|
||||
type Output = grenad::Reader<BufReader<File>>;
|
||||
impl MergeableReader for Vec<grenad::Reader<File>> {
|
||||
type Output = grenad::Reader<File>;
|
||||
|
||||
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
||||
let mut merger = MergerBuilder::new(merge_fn);
|
||||
@@ -101,8 +99,8 @@ impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
|
||||
}
|
||||
}
|
||||
|
||||
impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);
|
||||
impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||
type Output = (grenad::Reader<File>, grenad::Reader<File>);
|
||||
|
||||
fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
|
||||
let mut m1 = MergerBuilder::new(merge_fn);
|
||||
@@ -127,7 +125,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
|
||||
let merger = self.0.build();
|
||||
let mut writer = create_writer(
|
||||
params.chunk_compression_type,
|
||||
@@ -178,7 +176,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
|
||||
reader: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
documents_chunk_size: usize,
|
||||
) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
|
||||
) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
|
||||
let mut continue_reading = true;
|
||||
let mut cursor = reader.into_cursor()?;
|
||||
|
||||
|
||||
@@ -659,10 +659,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
new_documents_ids: self.new_documents_ids,
|
||||
replaced_documents_ids: self.replaced_documents_ids,
|
||||
documents_count: self.documents_count,
|
||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
flattened_documents: flattened_documents
|
||||
.into_inner()
|
||||
.map_err(|err| err.into_error())?,
|
||||
original_documents,
|
||||
flattened_documents,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -781,10 +779,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
new_documents_ids: documents_ids,
|
||||
replaced_documents_ids: RoaringBitmap::default(),
|
||||
documents_count,
|
||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
flattened_documents: flattened_documents
|
||||
.into_inner()
|
||||
.map_err(|err| err.into_error())?,
|
||||
original_documents,
|
||||
flattened_documents,
|
||||
};
|
||||
|
||||
let new_facets = output.compute_real_facets(wtxn, self.index)?;
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufReader};
|
||||
use std::io;
|
||||
|
||||
use bytemuck::allocation::pod_collect_to_vec;
|
||||
use charabia::{Language, Script};
|
||||
@@ -27,22 +27,22 @@ pub(crate) enum TypedChunk {
|
||||
FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
|
||||
FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
|
||||
Documents(grenad::Reader<CursorClonableMmap>),
|
||||
FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdWordcountDocids(grenad::Reader<File>),
|
||||
NewDocumentsIds(RoaringBitmap),
|
||||
WordDocids {
|
||||
word_docids_reader: grenad::Reader<BufReader<File>>,
|
||||
exact_word_docids_reader: grenad::Reader<BufReader<File>>,
|
||||
word_docids_reader: grenad::Reader<File>,
|
||||
exact_word_docids_reader: grenad::Reader<File>,
|
||||
},
|
||||
WordPositionDocids(grenad::Reader<BufReader<File>>),
|
||||
WordFidDocids(grenad::Reader<BufReader<File>>),
|
||||
WordPairProximityDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetExistsDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
|
||||
FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
|
||||
GeoPoints(grenad::Reader<BufReader<File>>),
|
||||
VectorPoints(grenad::Reader<BufReader<File>>),
|
||||
WordPositionDocids(grenad::Reader<File>),
|
||||
WordFidDocids(grenad::Reader<File>),
|
||||
WordPairProximityDocids(grenad::Reader<File>),
|
||||
FieldIdFacetStringDocids(grenad::Reader<File>),
|
||||
FieldIdFacetNumberDocids(grenad::Reader<File>),
|
||||
FieldIdFacetExistsDocids(grenad::Reader<File>),
|
||||
FieldIdFacetIsNullDocids(grenad::Reader<File>),
|
||||
FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
|
||||
GeoPoints(grenad::Reader<File>),
|
||||
VectorPoints(grenad::Reader<File>),
|
||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashSet;
|
||||
use std::io::{BufReader, BufWriter};
|
||||
use std::io::BufReader;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use heed::types::ByteSlice;
|
||||
@@ -119,9 +119,9 @@ pub fn insert_into_database(
|
||||
pub fn write_into_lmdb_database_without_merging(
|
||||
wtxn: &mut heed::RwTxn,
|
||||
database: heed::PolyDatabase,
|
||||
writer: grenad::Writer<BufWriter<std::fs::File>>,
|
||||
writer: grenad::Writer<std::fs::File>,
|
||||
) -> Result<()> {
|
||||
let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
|
||||
let file = writer.into_inner()?;
|
||||
let reader = grenad::Reader::new(BufReader::new(file))?;
|
||||
if database.is_empty(wtxn)? {
|
||||
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
|
||||
|
||||
@@ -8,7 +8,7 @@ use Criterion::*;
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
macro_rules! test_distinct {
|
||||
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
|
||||
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = $criteria;
|
||||
@@ -27,7 +27,6 @@ macro_rules! test_distinct {
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit($limit);
|
||||
search.offset($offset);
|
||||
search.exhaustive_number_hits($exhaustive);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
@@ -48,7 +47,6 @@ macro_rules! test_distinct {
|
||||
Some(d.id)
|
||||
}
|
||||
})
|
||||
.skip($offset)
|
||||
.take($limit)
|
||||
.collect();
|
||||
|
||||
@@ -63,7 +61,6 @@ test_distinct!(
|
||||
tag,
|
||||
true,
|
||||
1,
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
3
|
||||
);
|
||||
@@ -72,7 +69,6 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
true,
|
||||
1,
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
7
|
||||
);
|
||||
@@ -81,7 +77,6 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
true,
|
||||
0,
|
||||
0,
|
||||
vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
|
||||
7
|
||||
);
|
||||
@@ -91,7 +86,6 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
3
|
||||
);
|
||||
@@ -100,7 +94,6 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
7
|
||||
);
|
||||
@@ -109,7 +102,6 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words],
|
||||
3
|
||||
);
|
||||
@@ -118,7 +110,6 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words],
|
||||
7
|
||||
);
|
||||
@@ -127,7 +118,6 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo],
|
||||
3
|
||||
);
|
||||
@@ -136,7 +126,6 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo],
|
||||
7
|
||||
);
|
||||
@@ -145,7 +134,6 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Proximity],
|
||||
3
|
||||
);
|
||||
@@ -154,7 +142,6 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Proximity],
|
||||
7
|
||||
);
|
||||
@@ -163,7 +150,6 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Attribute],
|
||||
3
|
||||
);
|
||||
@@ -172,7 +158,6 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Attribute],
|
||||
7
|
||||
);
|
||||
@@ -181,7 +166,6 @@ test_distinct!(
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Exactness],
|
||||
3
|
||||
);
|
||||
@@ -190,47 +174,6 @@ test_distinct!(
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Exactness],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
distinct_string_limit_and_offset,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
1,
|
||||
vec![],
|
||||
2
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
exhaustive_distinct_string_limit_and_offset,
|
||||
tag,
|
||||
true,
|
||||
1,
|
||||
2,
|
||||
vec![],
|
||||
1
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
distinct_number_limit_and_offset,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
2,
|
||||
vec![],
|
||||
5
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
exhaustive_distinct_number_limit_and_offset,
|
||||
asc_desc_rank,
|
||||
true,
|
||||
2,
|
||||
4,
|
||||
vec![],
|
||||
3
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user