Compare commits

...

19 Commits

Author SHA1 Message Date
ManyTheFish
0fbda6383b deactivate japanese transliteration 2023-05-02 11:46:00 +02:00
ManyTheFish
559e597706 Merge remote-tracking branch 'origin/release-v1.1.1' into force-japanese-v1.1.0 2023-04-24 11:14:07 +02:00
bors[bot]
4b953d62fb Merge #3673
3673: Handle the task queue being full r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes a remaining issue with #3659 where it was not always possible to send tasks back even after deleting some tasks when prompted.

## Tests

- see integration test
- also manually tested with a 1MiB task queue. Was not possible to become unblocked before this PR, is now possible.

## What does this PR do?
- Use the `non_free_pages_size` method to compute the space occupied by the task db instead of the `real_disk_size` which is not always affected by task deletion.
- Expand the test so that it adds a task after the deletion. The test now fails before this PR and succeeds after this PR.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-04-13 16:24:16 +00:00
Louis Dureuil
c2f4b6ced0 Test: await for the deletion task to complete before trying to add another task 2023-04-13 18:22:42 +02:00
Louis Dureuil
1e6cbcaf12 Update test comment
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-04-13 17:27:12 +02:00
Louis Dureuil
066c6bd875 test task db full now checks that a task can be successfully added after deleting tasks 2023-04-13 17:20:06 +02:00
Louis Dureuil
fd583501d7 Use non_free_pages_size instead of real_disk_size to check task db space taken 2023-04-13 17:07:44 +02:00
bors[bot]
bff4bde0ce Merge #3672
3672: Update version for the next release (v1.1.1) in Cargo.toml r=dureuill a=meili-bot

⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging.

Co-authored-by: dureuill <dureuill@users.noreply.github.com>
2023-04-13 13:34:29 +00:00
dureuill
cd45d21d6e Update version for the next release (v1.1.1) in Cargo.toml 2023-04-13 13:25:10 +00:00
bors[bot]
f9960be115 Merge #3659
3659: stops receiving tasks once the task queue is full r=Kerollmops a=irevoire

Give 20GiB to the task queue + once 50% of the task queue is used, it blocks itself and only receives task deletion requests to ensure we never get in a state where we can’t do anything.

Also, create a new error message when we reach this case:
```
Meilisearch cannot receive write operations because the size limit of the tasks database has been reached. Please delete tasks to continue performing write operations.
```

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-04-13 09:11:12 +00:00
Tamo
b3f60ee805 try to fix the ci 2023-04-13 10:18:58 +02:00
Tamo
b4fabce36d update the error message + update the task db size to 20GiB with a limit at 50% 2023-04-12 18:54:11 +02:00
Tamo
9350a7b017 improve the test and try to understand the issue happening on windows 2023-04-12 18:54:11 +02:00
Tamo
be69ab320d stops receiving tasks once the task queue is full 2023-04-12 18:54:11 +02:00
bors[bot]
d59d75c9cd Merge #3667
3667: Disable autobatching of additions and deletions r=irevoire a=dureuill

# Pull Request

## Related issue
Fixes #3664

## What does this PR do?
- Modifies the autobatcher to not batch document additions and deletions, as a workaround to the DB corruption in #3664 



Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2023-04-12 16:51:13 +00:00
ManyTheFish
9a9d71eff6 Merge branch 'release-v1.1.0' into force-japanese-v1.1.0 2023-03-30 19:10:18 +02:00
ManyTheFish
d6554800ec Merge branch 'release-v1.1.0' into force-japanese-v1.1.0 2023-03-29 18:36:48 +02:00
ManyTheFish
f779548d48 Update test 2023-03-13 17:26:08 +01:00
ManyTheFish
492bff4b21 Deactivate Chinese tokenization 2023-03-13 13:54:37 +01:00
10 changed files with 150 additions and 20 deletions

26
Cargo.lock generated
View File

@@ -410,7 +410,7 @@ checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf"
[[package]]
name = "benchmarks"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"anyhow",
"bytes",
@@ -1150,7 +1150,7 @@ dependencies = [
[[package]]
name = "dump"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"anyhow",
"big_s",
@@ -1371,7 +1371,7 @@ dependencies = [
[[package]]
name = "file-store"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"faux",
"tempfile",
@@ -1393,7 +1393,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"insta",
"nom",
@@ -1413,7 +1413,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"criterion",
"serde_json",
@@ -1890,7 +1890,7 @@ dependencies = [
[[package]]
name = "index-scheduler"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"anyhow",
"big_s",
@@ -2049,7 +2049,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"criterion",
"serde_json",
@@ -2445,7 +2445,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"
[[package]]
name = "meili-snap"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"insta",
"md5",
@@ -2454,7 +2454,7 @@ dependencies = [
[[package]]
name = "meilisearch"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"actix-cors",
"actix-http",
@@ -2542,7 +2542,7 @@ dependencies = [
[[package]]
name = "meilisearch-auth"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"base64 0.13.1",
"enum-iterator",
@@ -2561,7 +2561,7 @@ dependencies = [
[[package]]
name = "meilisearch-types"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"actix-web",
"anyhow",
@@ -2615,7 +2615,7 @@ dependencies = [
[[package]]
name = "milli"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"big_s",
"bimap",
@@ -2969,7 +2969,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
[[package]]
name = "permissive-json-pointer"
version = "1.1.0"
version = "1.1.1"
dependencies = [
"big_s",
"serde_json",

View File

@@ -17,7 +17,7 @@ members = [
]
[workspace.package]
version = "1.1.0"
version = "1.1.1"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"

View File

@@ -61,6 +61,8 @@ pub enum Error {
SwapDuplicateIndexesFound(Vec<String>),
#[error("Index `{0}` not found.")]
SwapIndexNotFound(String),
#[error("Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.")]
NoSpaceLeftInTaskQueue,
#[error(
"Indexes {} not found.",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
@@ -152,6 +154,8 @@ impl ErrorCode for Error {
Error::TaskNotFound(_) => Code::TaskNotFound,
Error::TaskDeletionWithEmptyQuery => Code::MissingTaskFilters,
Error::TaskCancelationWithEmptyQuery => Code::MissingTaskFilters,
// TODO: not sure of the Code to use
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
Error::Dump(e) => e.error_code(),
Error::Milli(e) => e.error_code(),
Error::ProcessBatchPanicked => Code::Internal,

View File

@@ -820,6 +820,13 @@ impl IndexScheduler {
pub fn register(&self, kind: KindWithContent) -> Result<Task> {
let mut wtxn = self.env.write_txn()?;
// if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
&& (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50
{
return Err(Error::NoSpaceLeftInTaskQueue);
}
let mut task = Task {
uid: self.next_task_id(&wtxn)?,
enqueued_at: OffsetDateTime::now_utc(),

View File

@@ -68,7 +68,7 @@ const DEFAULT_LOG_EVERY_N: usize = 100_000;
// The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be
// opened simultaneously.
pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
pub const TASK_DB_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB
pub const TASK_DB_SIZE: u64 = 20 * 1024 * 1024 * 1024; // 20 GiB
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]

View File

@@ -443,7 +443,7 @@ async fn displayedattr_2_smol() {
.await;
}
#[cfg(feature = "default")]
#[cfg(feature = "chinese")]
#[actix_rt::test]
async fn test_cjk_highlight() {
let server = Server::new().await;

View File

@@ -1,11 +1,14 @@
mod errors;
use byte_unit::{Byte, ByteUnit};
use meili_snap::insta::assert_json_snapshot;
use meili_snap::{json_string, snapshot};
use serde_json::json;
use tempfile::TempDir;
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
use crate::common::Server;
use crate::common::{default_settings, Server};
#[actix_rt::test]
async fn error_get_unexisting_task_status() {
@@ -1000,3 +1003,117 @@ async fn test_summarized_dump_creation() {
}
"###);
}
#[actix_web::test]
async fn test_task_queue_is_full() {
let dir = TempDir::new().unwrap();
let mut options = default_settings(dir.path());
options.max_task_db_size = Byte::from_unit(500.0, ByteUnit::B).unwrap();
let server = Server::new_with_options(options).await.unwrap();
// the first task should be enqueued without issue
let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(result, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "doggo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
loop {
let (res, code) = server.create_index(json!({ "uid": "doggo" })).await;
if code == 422 {
break;
}
if res["taskUid"] == json!(null) {
panic!(
"Encountered the strange case:\n{}",
serde_json::to_string_pretty(&res).unwrap()
);
}
}
let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
snapshot!(code, @"422 Unprocessable Entity");
snapshot!(json_string!(result), @r###"
{
"message": "Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.",
"code": "no_space_left_on_device",
"type": "system",
"link": "https://docs.meilisearch.com/errors#no_space_left_on_device"
}
"###);
// But we should still be able to register tasks deletion IF they delete something
let (result, code) = server.delete_tasks("uids=*").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
{
"taskUid": "uid",
"indexUid": null,
"status": "enqueued",
"type": "taskDeletion",
"enqueuedAt": "[date]"
}
"###);
let result = server.wait_task(result["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(result["status"]), @r###""succeeded""###);
// Now we should be able to register tasks again
let (result, code) = server.create_index(json!({ "uid": "doggo" })).await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
{
"taskUid": "uid",
"indexUid": "doggo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
// we're going to fill up the queue once again
loop {
let (res, code) = server.delete_tasks("uids=0").await;
if code == 422 {
break;
}
if res["taskUid"] == json!(null) {
panic!(
"Encountered the strange case:\n{}",
serde_json::to_string_pretty(&res).unwrap()
);
}
}
// But we should NOT be able to register this task because it doesn't match any tasks
let (result, code) = server.delete_tasks("uids=0").await;
snapshot!(code, @"422 Unprocessable Entity");
snapshot!(json_string!(result), @r###"
{
"message": "Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations.",
"code": "no_space_left_on_device",
"type": "system",
"link": "https://docs.meilisearch.com/errors#no_space_left_on_device"
}
"###);
// The deletion still works
let (result, code) = server.delete_tasks("uids=*").await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(result, { ".enqueuedAt" => "[date]", ".taskUid" => "uid" }), @r###"
{
"taskUid": "uid",
"indexUid": null,
"status": "enqueued",
"type": "taskDeletion",
"enqueuedAt": "[date]"
}
"###);
}

View File

@@ -68,7 +68,7 @@ rand = {version = "0.8.5", features = ["small_rng"] }
fuzzcheck = "0.12.1"
[features]
default = [ "charabia/default" ]
default = [ "charabia/japanese", "charabia/hebrew", "charabia/korean", "charabia/thai", "charabia/greek" ]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml

View File

@@ -1184,6 +1184,7 @@ mod tests {
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
}
#[cfg(feature = "chinese")]
fn stored_detected_script_and_language_should_not_return_deleted_documents_(
deletion_strategy: DeletionStrategy,
) {
@@ -1222,6 +1223,7 @@ mod tests {
assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
}
#[cfg(feature = "chinese")]
#[test]
fn stored_detected_script_and_language_should_not_return_deleted_documents() {
stored_detected_script_and_language_should_not_return_deleted_documents_(

View File

@@ -1558,7 +1558,7 @@ mod tests {
assert_eq!(count, 4);
}
#[cfg(feature = "default")]
#[cfg(feature = "chinese")]
#[test]
fn test_meilisearch_1714() {
let index = TempIndex::new();
@@ -1906,7 +1906,7 @@ mod tests {
index.add_documents(doc1).unwrap();
}
#[cfg(feature = "default")]
#[cfg(feature = "chinese")]
#[test]
fn store_detected_script_and_language_per_document_during_indexing() {
use charabia::{Language, Script};