Compare commits

...

17 Commits

Author SHA1 Message Date
Clément Renault
f46c2de607 Fix the tests 2023-09-15 12:01:29 +02:00
Tamo
388d78f70e update the description of the cli argument 2023-09-15 12:01:29 +02:00
Clément Renault
b252900470 Expose a new flag to limit the number of batched tasks 2023-09-15 12:01:28 +02:00
meili-bors[bot]
8822ca234e Merge #4057
4057: Fix stats delete by filter for v1.3.4 r=irevoire a=curquiza

Fixes https://github.com/meilisearch/meilisearch/issues/4018 for v1.3.4

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-09-12 13:34:39 +00:00
Tamo
d23abc8771 fix clippy 2023-09-12 11:26:48 +02:00
Tamo
036b846e4d Fix the stats of the documents deletion by filter
The issue was that the operation « DocumentDeletionByFilter » was not
declared as an index operation. That means the indexes stats were not
reprocessed after the application of the operation.
2023-09-12 11:26:41 +02:00
meili-bors[bot]
9889390d13 Merge #4055
4055: Update version for the next release (v1.3.4) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-09-11 17:04:31 +00:00
curquiza
8e2bb29cf1 Update version for the next release (v1.3.4) in Cargo.toml 2023-09-11 16:20:03 +00:00
meili-bors[bot]
256cf33bca Merge #4039
4039: Fix multiple vectors dimensions r=ManyTheFish a=Kerollmops

This PR fixes #4035, making providing multiple vectors in documents possible. This is fixed by extracting the vectors from the non-flattened version of the documents.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-09-07 09:25:58 +00:00
meili-bors[bot]
9945cbf9db Merge #4038
4038: Fix filter escaping issues r=ManyTheFish a=Kerollmops

This PR fixes #4034 by always escaping the sequences. Users must always put quotes (simple or double) to escape the filter values.

Co-authored-by: Kerollmops <clement@meilisearch.com>
2023-09-06 12:29:29 +00:00
Kerollmops
03d0f628bd Use the unescaper crate to unescape any char sequence 2023-09-06 13:59:45 +02:00
Kerollmops
ea78060916 Fix tests that were supposed to escape characters 2023-09-06 13:59:45 +02:00
Kerollmops
b42d48187a Add a test case scenario 2023-09-06 13:59:44 +02:00
Kerollmops
679c0b0f97 Extract the vectors from the non-flattened version of the documents 2023-09-06 12:26:00 +02:00
Kerollmops
e02d0064bd Add a test case scenario 2023-09-06 12:26:00 +02:00
meili-bors[bot]
7ef3572f11 Merge #4037
4037: Update version for the next release (v1.3.3) in Cargo.toml r=curquiza a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: curquiza <curquiza@users.noreply.github.com>
2023-09-06 09:50:58 +00:00
curquiza
93285041a9 Update version for the next release (v1.3.3) in Cargo.toml 2023-09-06 09:23:20 +00:00
15 changed files with 256 additions and 117 deletions

38
Cargo.lock generated
View File

@@ -469,7 +469,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
[[package]]
name = "benchmarks"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"anyhow",
"bytes",
@@ -1199,7 +1199,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"anyhow",
"big_s",
@@ -1413,7 +1413,7 @@ dependencies = [
[[package]]
name = "file-store"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"faux",
"tempfile",
@@ -1435,11 +1435,12 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"insta",
"nom",
"nom_locate",
"unescaper",
]
[[package]]
@@ -1454,7 +1455,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"criterion",
"serde_json",
@@ -1572,7 +1573,7 @@ dependencies = [
[[package]]
name = "fuzzers"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"arbitrary",
"clap",
@@ -1894,7 +1895,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"anyhow",
"big_s",
@@ -2081,7 +2082,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"criterion",
"serde_json",
@@ -2493,7 +2494,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"insta",
"md5",
@@ -2502,7 +2503,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"actix-cors",
"actix-http",
@@ -2591,7 +2592,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"base64 0.21.2",
"enum-iterator",
@@ -2610,7 +2611,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"actix-web",
"anyhow",
@@ -2664,7 +2665,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"big_s",
"bimap",
@@ -2994,7 +2995,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
[[package]]
name = "permissive-json-pointer"
version = "1.3.2"
version = "1.3.4"
dependencies = [
"big_s",
"serde_json",
@@ -4147,6 +4148,15 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81"
[[package]]
name = "unescaper"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96a44ae11e25afb520af4534fd7b0bd8cd613e35a78def813b8cf41631fa3c8"
dependencies = [
"thiserror",
]
[[package]]
name = "unicase"
version = "2.6.0"

View File

@@ -18,7 +18,7 @@ members = [
]
[workspace.package]
version = "1.3.2"
version = "1.3.4"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"

View File

@@ -14,6 +14,7 @@ license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.1.0"
unescaper = "0.1.2"
[dev-dependencies]
insta = "1.29.0"

View File

@@ -62,6 +62,7 @@ pub enum ErrorKind<'a> {
MisusedGeoRadius,
MisusedGeoBoundingBox,
InvalidPrimary,
InvalidEscapedNumber,
ExpectedEof,
ExpectedValue(ExpectedValueKind),
MalformedValue,
@@ -147,6 +148,9 @@ impl<'a> Display for Error<'a> {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
}
ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
}
ErrorKind::ExpectedEof => {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
}

View File

@@ -545,6 +545,8 @@ impl<'a> std::fmt::Display for Token<'a> {
#[cfg(test)]
pub mod tests {
use FilterCondition as Fc;
use super::*;
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
@@ -556,14 +558,22 @@ pub mod tests {
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
}
fn p(s: &str) -> impl std::fmt::Display + '_ {
Fc::parse(s).unwrap().unwrap()
}
#[test]
fn parse_escaped() {
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}
#[test]
fn parse() {
use FilterCondition as Fc;
fn p(s: &str) -> impl std::fmt::Display + '_ {
Fc::parse(s).unwrap().unwrap()
}
// Test equal
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");

View File

@@ -171,7 +171,24 @@ pub fn parse_value(input: Span) -> IResult<Token> {
})
})?;
Ok((input, value))
match unescaper::unescape(value.value()) {
Ok(content) => {
if content.len() != value.value().len() {
Ok((input, Token::new(value.original_span(), Some(content))))
} else {
Ok((input, value))
}
}
Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
value.original_span(),
ErrorKind::InvalidEscapedNumber,
))),
Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
value.original_span(),
ErrorKind::MalformedValue,
))),
}
}
fn is_value_component(c: char) -> bool {
@@ -318,17 +335,17 @@ pub mod test {
("\"cha'nnel\"", "cha'nnel", false),
("I'm tamo", "I", false),
// escaped thing but not quote
(r#""\\""#, r#"\\"#, false),
(r#""\\\\\\""#, r#"\\\\\\"#, false),
(r#""aa\\aa""#, r#"aa\\aa"#, false),
(r#""\\""#, r#"\"#, true),
(r#""\\\\\\""#, r#"\\\"#, true),
(r#""aa\\aa""#, r#"aa\aa"#, true),
// with double quote
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
(r#""\"\"""#, r#""""#, true),
// with simple quote
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
(r#"'\'\''"#, r#"''"#, true),
];
@@ -350,7 +367,14 @@ pub mod test {
"Filter `{}` was not supposed to be escaped",
input
);
assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
assert_eq!(
token.value(),
expected,
"Filter `{}` failed by giving `{}` instead of `{}`.",
input,
token.value(),
expected
);
}
}

View File

@@ -67,10 +67,6 @@ pub(crate) enum Batch {
op: IndexOperation,
must_create_index: bool,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
},
IndexCreation {
index_uid: String,
primary_key: Option<String>,
@@ -114,6 +110,10 @@ pub(crate) enum IndexOperation {
documents: Vec<Vec<String>>,
tasks: Vec<Task>,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
},
DocumentClear {
index_uid: String,
tasks: Vec<Task>,
@@ -155,7 +155,6 @@ impl Batch {
| Batch::TaskDeletion(task)
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexDocumentDeletionByFilter { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
@@ -167,6 +166,7 @@ impl Batch {
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
IndexOperation::SettingsAndDocumentOperation {
document_import_tasks: tasks,
settings_tasks: other,
@@ -194,8 +194,7 @@ impl Batch {
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. }
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
@@ -205,6 +204,7 @@ impl IndexOperation {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
@@ -239,9 +239,12 @@ impl IndexScheduler {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
Ok(Some(Batch::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
Ok(Some(Batch::IndexOperation {
op: IndexOperation::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
},
must_create_index: false,
}))
}
_ => unreachable!(),
@@ -534,7 +537,9 @@ impl IndexScheduler {
let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;
// If autobatching is disabled we only take one task at a time.
let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
// Otherwise, we take only a maximum of tasks to create batches.
let tasks_limit =
if self.autobatching_enabled { self.maximum_number_of_batched_tasks } else { 1 };
let enqueued = index_tasks
.into_iter()
@@ -891,51 +896,6 @@ impl IndexScheduler {
Ok(tasks)
}
Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let (index_uid, filter) =
if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
&task.kind
{
(index_uid, filter_expr)
} else {
unreachable!()
};
let index = {
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, index_uid)?
};
let deleted_documents = delete_document_by_filter(filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
Batch::IndexCreation { index_uid, primary_key, task } => {
let wtxn = self.env.write_txn()?;
if self.index_mapper.exists(&wtxn, &index_uid)? {
@@ -1292,6 +1252,47 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
&task.kind
{
filter_expr
} else {
unreachable!()
};
let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
let indexer_config = self.index_mapper.indexer_config();
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
@@ -1491,23 +1492,22 @@ impl IndexScheduler {
}
}
fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a, '_>,
filter: &serde_json::Value,
index: &'a Index,
) -> Result<u64> {
let filter = Filter::from_json(filter)?;
Ok(if let Some(filter) = filter {
let mut wtxn = index.write_txn()?;
let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
let candidates = filter.evaluate(wtxn, index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
}
e => e.into(),
})?;
let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
delete_operation.delete_documents(&candidates);
let deleted_documents =
delete_operation.execute().map(|result| result.deleted_documents)?;
wtxn.commit()?;
deleted_documents
delete_operation.execute().map(|result| result.deleted_documents)?
} else {
0
})

View File

@@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let IndexScheduler {
autobatching_enabled,
maximum_number_of_batched_tasks: _,
must_stop_processing: _,
processing_tasks,
file_store,

View File

@@ -253,6 +253,9 @@ pub struct IndexSchedulerOptions {
/// Set to `true` iff the index scheduler is allowed to automatically
/// batch tasks together, to process multiple tasks at once.
pub autobatching_enabled: bool,
/// If the autobatcher is allowed to automatically batch tasks
/// it will only batch this defined number of tasks at once.
pub maximum_number_of_batched_tasks: usize,
/// The maximum number of tasks stored in the task queue before starting
/// to auto schedule task deletions.
pub max_number_of_tasks: usize,
@@ -310,6 +313,9 @@ pub struct IndexScheduler {
/// Whether auto-batching is enabled or not.
pub(crate) autobatching_enabled: bool,
/// The maximum number of tasks that will be batched together.
pub(crate) maximum_number_of_batched_tasks: usize,
/// The max number of tasks allowed before the scheduler starts to delete
/// the finished tasks automatically.
pub(crate) max_number_of_tasks: usize,
@@ -363,6 +369,7 @@ impl IndexScheduler {
index_mapper: self.index_mapper.clone(),
wake_up: self.wake_up.clone(),
autobatching_enabled: self.autobatching_enabled,
maximum_number_of_batched_tasks: self.maximum_number_of_batched_tasks,
max_number_of_tasks: self.max_number_of_tasks,
snapshots_path: self.snapshots_path.clone(),
dumps_path: self.dumps_path.clone(),
@@ -458,6 +465,7 @@ impl IndexScheduler {
// we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
wake_up: Arc::new(SignalEvent::auto(true)),
autobatching_enabled: options.autobatching_enabled,
maximum_number_of_batched_tasks: options.maximum_number_of_batched_tasks,
max_number_of_tasks: options.max_number_of_tasks,
dumps_path: options.dumps_path,
snapshots_path: options.snapshots_path,
@@ -1587,6 +1595,7 @@ mod tests {
index_count: 5,
indexer_config,
autobatching_enabled: true,
maximum_number_of_batched_tasks: usize::MAX,
max_number_of_tasks: 1_000_000,
instance_features: Default::default(),
};

View File

@@ -285,6 +285,7 @@ impl From<Opt> for Infos {
db_path,
experimental_enable_metrics,
experimental_reduce_indexing_memory_usage,
experimental_limit_batched_tasks: _,
http_addr,
master_key: _,
env,

View File

@@ -236,6 +236,7 @@ fn open_or_create_database_unchecked(
enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
maximum_number_of_batched_tasks: opt.experimental_limit_batched_tasks,
max_number_of_tasks: 1_000_000,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,

View File

@@ -51,6 +51,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS: &str = "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS";
const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
const DEFAULT_DB_PATH: &str = "./data.ms";
@@ -301,6 +302,11 @@ pub struct Opt {
#[serde(default)]
pub experimental_reduce_indexing_memory_usage: bool,
/// Experimental limit to the number of tasks per batch
#[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
#[serde(default = "default_limit_batched_tasks")]
pub experimental_limit_batched_tasks: usize,
#[serde(flatten)]
#[clap(flatten)]
pub indexer_options: IndexerOpts,
@@ -393,7 +399,8 @@ impl Opt {
#[cfg(all(not(debug_assertions), feature = "analytics"))]
no_analytics,
experimental_enable_metrics: enable_metrics_route,
experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
experimental_reduce_indexing_memory_usage,
experimental_limit_batched_tasks,
} = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path);
export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -437,7 +444,11 @@ impl Opt {
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
reduce_indexing_memory_usage.to_string(),
experimental_reduce_indexing_memory_usage.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS,
experimental_limit_batched_tasks.to_string(),
);
indexer_options.export_to_env();
}
@@ -739,6 +750,10 @@ fn default_dump_dir() -> PathBuf {
PathBuf::from(DEFAULT_DUMP_DIR)
}
fn default_limit_batched_tasks() -> usize {
usize::MAX
}
/// Indicates if a snapshot was scheduled, and if yes with which interval.
#[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
pub enum ScheduleSnapshot {

View File

@@ -154,6 +154,19 @@ async fn delete_document_by_filter() {
)
.await;
index.wait_task(1).await;
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 4,
"isIndexing": false,
"fieldDistribution": {
"color": 3,
"id": 4
}
}
"###);
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
snapshot!(code, @"202 Accepted");
@@ -188,6 +201,18 @@ async fn delete_document_by_filter() {
}
"###);
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"fieldDistribution": {
"color": 1,
"id": 2
}
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
@@ -241,6 +266,18 @@ async fn delete_document_by_filter() {
}
"###);
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 1,
"isIndexing": false,
"fieldDistribution": {
"color": 1,
"id": 1
}
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"

View File

@@ -55,7 +55,13 @@ pub(crate) fn data_from_obkv_documents(
original_obkv_chunks
.par_bridge()
.map(|original_documents_chunk| {
send_original_documents_data(original_documents_chunk, lmdb_writer_sx.clone())
send_original_documents_data(
original_documents_chunk,
indexer,
lmdb_writer_sx.clone(),
vectors_field_id,
primary_key_id,
)
})
.collect::<Result<()>>()?;
@@ -72,7 +78,6 @@ pub(crate) fn data_from_obkv_documents(
&faceted_fields,
primary_key_id,
geo_fields_ids,
vectors_field_id,
&stop_words,
max_positions_per_attributes,
)
@@ -257,11 +262,33 @@ fn spawn_extraction_task<FE, FS, M>(
/// - documents
fn send_original_documents_data(
original_documents_chunk: Result<grenad::Reader<File>>,
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
vectors_field_id: Option<FieldId>,
primary_key_id: FieldId,
) -> Result<()> {
let original_documents_chunk =
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
if let Some(vectors_field_id) = vectors_field_id {
let documents_chunk_cloned = original_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let result = extract_vector_points(
documents_chunk_cloned,
indexer,
primary_key_id,
vectors_field_id,
);
let _ = match result {
Ok(vector_points) => {
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
}
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
// TODO: create a custom internal error
lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))).unwrap();
Ok(())
@@ -283,7 +310,6 @@ fn send_and_extract_flattened_documents_data(
faceted_fields: &HashSet<FieldId>,
primary_key_id: FieldId,
geo_fields_ids: Option<(FieldId, FieldId)>,
vectors_field_id: Option<FieldId>,
stop_words: &Option<fst::Set<&[u8]>>,
max_positions_per_attributes: Option<u32>,
) -> Result<(
@@ -312,25 +338,6 @@ fn send_and_extract_flattened_documents_data(
});
}
if let Some(vectors_field_id) = vectors_field_id {
let documents_chunk_cloned = flattened_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let result = extract_vector_points(
documents_chunk_cloned,
indexer,
primary_key_id,
vectors_field_id,
);
let _ = match result {
Ok(vector_points) => {
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
}
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
rayon::join(
|| {

View File

@@ -2519,6 +2519,25 @@ mod tests {
db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
}
/// Index multiple different number of vectors in documents.
/// Vectors must be of the same length.
#[test]
fn test_multiple_vectors() {
let index = TempIndex::new();
index.add_documents(documents!([{"id": 0, "_vectors": [[0, 1, 2], [3, 4, 5]] }])).unwrap();
index.add_documents(documents!([{"id": 1, "_vectors": [6, 7, 8] }])).unwrap();
index
.add_documents(
documents!([{"id": 2, "_vectors": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }]),
)
.unwrap();
let rtxn = index.read_txn().unwrap();
let res = index.search(&rtxn).vector([0.0, 1.0, 2.0]).execute().unwrap();
assert_eq!(res.documents_ids.len(), 3);
}
#[test]
fn reproduce_the_bug() {
/*