Compare commits

..

4 Commits

Author SHA1 Message Date
Tamo
6243422ff4 make the task id u64 2023-09-07 13:03:43 +02:00
Tamo
3b69233906 let you specify your task id 2023-09-07 11:16:51 +02:00
meili-bors[bot]
e9b62aacb3 Merge #4025
4025: Bump Swatinem/rust-cache from 2.5.1 to 2.6.2 r=curquiza a=dependabot[bot]

Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.5.1 to 2.6.2.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a href="https://github.com/swatinem/rust-cache/releases">Swatinem/rust-cache's releases</a>.</em></p>
<blockquote>
<h2>v2.6.2</h2>
<h2>What's Changed</h2>
<ul>
<li>dep: Use <code>smol-toml</code> instead of <code>toml</code> by <a href="https://github.com/NobodyXu"><code>`@​NobodyXu</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/164">Swatinem/rust-cache#164</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2...v2.6.2">https://github.com/Swatinem/rust-cache/compare/v2...v2.6.2</a></p>
<h2>v2.6.1</h2>
<ul>
<li>Fix hash contributions of <code>Cargo.lock</code>/<code>Cargo.toml</code> files.</li>
</ul>
<h2>v2.6.0</h2>
<h2>What's Changed</h2>
<ul>
<li>Add &quot;buildjet&quot; as a second <code>cache-provider</code> backend <a href="https://github.com/joroshiba"><code>`@​joroshiba</code></a>` in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/154">Swatinem/rust-cache#154</a></li>
<li>Clean up sparse registry index.</li>
<li>Do not clean up src of <code>-sys</code> crates.</li>
<li>Remove <code>.cargo/credentials.toml</code> before saving.</li>
</ul>
<h2>New Contributors</h2>
<ul>
<li><a href="https://github.com/joroshiba"><code>`@​joroshiba</code></a>` made their first contribution in <a href="https://redirect.github.com/Swatinem/rust-cache/pull/154">Swatinem/rust-cache#154</a></li>
</ul>
<p><strong>Full Changelog</strong>: <a href="https://github.com/Swatinem/rust-cache/compare/v2.5.1...v2.6.0">https://github.com/Swatinem/rust-cache/compare/v2.5.1...v2.6.0</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a href="https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md">Swatinem/rust-cache's changelog</a>.</em></p>
<blockquote>
<h2>2.6.2</h2>
<ul>
<li>Fix <code>toml</code> parsing.</li>
</ul>
<h2>2.6.1</h2>
<ul>
<li>Fix hash contributions of <code>Cargo.lock</code>/<code>Cargo.toml</code> files.</li>
</ul>
<h2>2.6.0</h2>
<ul>
<li>Add &quot;buildjet&quot; as a second <code>cache-provider</code> backend.</li>
<li>Clean up sparse registry index.</li>
<li>Do not clean up src of <code>-sys</code> crates.</li>
<li>Remove <code>.cargo/credentials.toml</code> before saving.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a href="e207df5d26"><code>e207df5</code></a> 2.6.2</li>
<li><a href="decb69d790"><code>decb69d</code></a> Update dependencies and add changelog</li>
<li><a href="ab6b2769d1"><code>ab6b276</code></a> dep: Use <code>smol-toml</code> instead of <code>toml</code> (<a href="https://redirect.github.com/swatinem/rust-cache/issues/164">#164</a>)</li>
<li><a href="578b235f6e"><code>578b235</code></a> 2.6.1</li>
<li><a href="5113490c3f"><code>5113490</code></a> prepare 2.6.1</li>
<li><a href="c0e052c18c"><code>c0e052c</code></a> Fix hashing of parsed <code>Cargo.toml</code> (<a href="https://redirect.github.com/swatinem/rust-cache/issues/160">#160</a>)</li>
<li><a href="4e0f4b19dd"><code>4e0f4b1</code></a> Fix typo in hashing parsed <code>Cargo.lock</code> (<a href="https://redirect.github.com/swatinem/rust-cache/issues/159">#159</a>)</li>
<li><a href="b919e1427f"><code>b919e14</code></a> feat: Add logging to <code>Cargo.lock</code>/<code>Cargo.toml</code> hashing (<a href="https://redirect.github.com/swatinem/rust-cache/issues/156">#156</a>)</li>
<li><a href="b8a6852b4f"><code>b8a6852</code></a> 2.6.0</li>
<li><a href="80c47cc945"><code>80c47cc</code></a> Clean up <code>credentials.toml</code></li>
<li>Additional commits viewable in <a href="https://github.com/swatinem/rust-cache/compare/v2.5.1...v2.6.2">compare view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Swatinem/rust-cache&package-manager=github_actions&previous-version=2.5.1&new-version=2.6.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

You can trigger a rebase of this PR by commenting ``@dependabot` rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- ``@dependabot` rebase` will rebase this PR
- ``@dependabot` recreate` will recreate this PR, overwriting any edits that have been made to it
- ``@dependabot` merge` will merge this PR after your CI passes on it
- ``@dependabot` squash and merge` will squash and merge this PR after your CI passes on it
- ``@dependabot` cancel merge` will cancel a previously requested merge and block automerging
- ``@dependabot` reopen` will reopen this PR if it is closed
- ``@dependabot` close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually
- ``@dependabot` show <dependency name> ignore conditions` will show all of the ignore conditions of the specified dependency
- ``@dependabot` ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)
- ``@dependabot` ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)


</details>

Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-09-04 12:30:53 +00:00
dependabot[bot]
456960d2c7 Bump Swatinem/rust-cache from 2.5.1 to 2.6.2
Bumps [Swatinem/rust-cache](https://github.com/swatinem/rust-cache) from 2.5.1 to 2.6.2.
- [Release notes](https://github.com/swatinem/rust-cache/releases)
- [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md)
- [Commits](https://github.com/swatinem/rust-cache/compare/v2.5.1...v2.6.2)

---
updated-dependencies:
- dependency-name: Swatinem/rust-cache
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-09-01 17:17:39 +00:00
57 changed files with 1243 additions and 894 deletions

View File

@@ -53,6 +53,5 @@ jobs:
uses: mislav/bump-homebrew-formula-action@v2
with:
formula-name: meilisearch
formula-path: Formula/m/meilisearch.rb
env:
COMMITTER_TOKEN: ${{ secrets.HOMEBREW_COMMITTER_TOKEN }}

View File

@@ -43,7 +43,7 @@ jobs:
toolchain: nightly
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -65,7 +65,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo check without any default features
uses: actions-rs/cargo@v1
with:
@@ -146,7 +146,7 @@ jobs:
toolchain: stable
override: true
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run tests in debug
uses: actions-rs/cargo@v1
with:
@@ -165,7 +165,7 @@ jobs:
override: true
components: clippy
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo clippy
uses: actions-rs/cargo@v1
with:
@@ -184,7 +184,7 @@ jobs:
override: true
components: rustfmt
- name: Cache dependencies
uses: Swatinem/rust-cache@v2.5.1
uses: Swatinem/rust-cache@v2.6.2
- name: Run cargo fmt
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate

10
Cargo.lock generated
View File

@@ -1444,7 +1444,6 @@ dependencies = [
"insta",
"nom",
"nom_locate",
"unescaper",
]
[[package]]
@@ -4181,15 +4180,6 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
[[package]]
name = "unescaper"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96a44ae11e25afb520af4534fd7b0bd8cd613e35a78def813b8cf41631fa3c8"
dependencies = [
"thiserror",
]
[[package]]
name = "unicase"
version = "2.6.0"

View File

@@ -7,7 +7,7 @@ use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
use roaring::RoaringTreemap;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
@@ -121,11 +121,11 @@ pub enum KindDump {
},
TaskCancelation {
query: String,
tasks: RoaringBitmap,
tasks: RoaringTreemap,
},
TasksDeletion {
query: String,
tasks: RoaringBitmap,
tasks: RoaringTreemap,
},
DumpCreation {
keys: Vec<Key>,

View File

@@ -69,7 +69,7 @@ impl CompatV5ToV6 {
}
let task = v6::Task {
uid: task_view.uid,
uid: task_view.uid as u64,
index_uid: task_view.index_uid,
status: match task_view.status {
v5::Status::Enqueued => v6::Status::Enqueued,

View File

@@ -14,7 +14,6 @@ license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.1.0"
unescaper = "0.1.2"
[dev-dependencies]
insta = "1.29.0"

View File

@@ -62,7 +62,6 @@ pub enum ErrorKind<'a> {
MisusedGeoRadius,
MisusedGeoBoundingBox,
InvalidPrimary,
InvalidEscapedNumber,
ExpectedEof,
ExpectedValue(ExpectedValueKind),
MalformedValue,
@@ -148,9 +147,6 @@ impl<'a> Display for Error<'a> {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
}
ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
}
ErrorKind::ExpectedEof => {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
}

View File

@@ -545,8 +545,6 @@ impl<'a> std::fmt::Display for Token<'a> {
#[cfg(test)]
pub mod tests {
use FilterCondition as Fc;
use super::*;
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
@@ -558,22 +556,14 @@ pub mod tests {
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
}
fn p(s: &str) -> impl std::fmt::Display + '_ {
Fc::parse(s).unwrap().unwrap()
}
#[test]
fn parse_escaped() {
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}
#[test]
fn parse() {
use FilterCondition as Fc;
fn p(s: &str) -> impl std::fmt::Display + '_ {
Fc::parse(s).unwrap().unwrap()
}
// Test equal
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");

View File

@@ -171,24 +171,7 @@ pub fn parse_value(input: Span) -> IResult<Token> {
})
})?;
match unescaper::unescape(value.value()) {
Ok(content) => {
if content.len() != value.value().len() {
Ok((input, Token::new(value.original_span(), Some(content))))
} else {
Ok((input, value))
}
}
Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
value.original_span(),
ErrorKind::InvalidEscapedNumber,
))),
Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
value.original_span(),
ErrorKind::MalformedValue,
))),
}
Ok((input, value))
}
fn is_value_component(c: char) -> bool {
@@ -335,17 +318,17 @@ pub mod test {
("\"cha'nnel\"", "cha'nnel", false),
("I'm tamo", "I", false),
// escaped thing but not quote
(r#""\\""#, r#"\"#, true),
(r#""\\\\\\""#, r#"\\\"#, true),
(r#""aa\\aa""#, r#"aa\aa"#, true),
(r#""\\""#, r#"\\"#, false),
(r#""\\\\\\""#, r#"\\\\\\"#, false),
(r#""aa\\aa""#, r#"aa\\aa"#, false),
// with double quote
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
(r#""\"\"""#, r#""""#, true),
// with simple quote
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
(r#"'\'\''"#, r#"''"#, true),
];
@@ -367,14 +350,7 @@ pub mod test {
"Filter `{}` was not supposed to be escaped",
input
);
assert_eq!(
token.value(),
expected,
"Filter `{}` failed by giving `{}` instead of `{}`.",
input,
token.value(),
expected
);
assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
}
}

View File

@@ -32,11 +32,11 @@ use meilisearch_types::milli::update::{
DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
};
use meilisearch_types::milli::{self, Filter, BEU32};
use meilisearch_types::milli::{self, Filter, BEU64};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
use roaring::RoaringBitmap;
use roaring::RoaringTreemap;
use time::macros::format_description;
use time::OffsetDateTime;
use uuid::Uuid;
@@ -58,7 +58,7 @@ pub(crate) enum Batch {
/// The date and time at which the previously processing tasks started.
previous_started_at: OffsetDateTime,
/// The list of tasks that were processing when this task cancelation appeared.
previous_processing_tasks: RoaringBitmap,
previous_processing_tasks: RoaringTreemap,
},
TaskDeletion(Task),
SnapshotCreation(Vec<Task>),
@@ -67,6 +67,10 @@ pub(crate) enum Batch {
op: IndexOperation,
must_create_index: bool,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
},
IndexCreation {
index_uid: String,
primary_key: Option<String>,
@@ -110,10 +114,6 @@ pub(crate) enum IndexOperation {
documents: Vec<Vec<String>>,
tasks: Vec<Task>,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
},
DocumentClear {
index_uid: String,
tasks: Vec<Task>,
@@ -155,6 +155,7 @@ impl Batch {
| Batch::TaskDeletion(task)
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexDocumentDeletionByFilter { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
@@ -166,7 +167,6 @@ impl Batch {
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
IndexOperation::SettingsAndDocumentOperation {
document_import_tasks: tasks,
settings_tasks: other,
@@ -194,7 +194,8 @@ impl Batch {
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
| IndexDeletion { index_uid, .. }
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
}
}
}
@@ -204,7 +205,6 @@ impl IndexOperation {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
@@ -239,12 +239,9 @@ impl IndexScheduler {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
Ok(Some(Batch::IndexOperation {
op: IndexOperation::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
},
must_create_index: false,
Ok(Some(Batch::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
}))
}
_ => unreachable!(),
@@ -899,6 +896,51 @@ impl IndexScheduler {
Ok(tasks)
}
Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let (index_uid, filter) =
if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
&task.kind
{
(index_uid, filter_expr)
} else {
unreachable!()
};
let index = {
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, index_uid)?
};
let deleted_documents = delete_document_by_filter(filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
Batch::IndexCreation { index_uid, primary_key, task } => {
let wtxn = self.env.write_txn()?;
if self.index_mapper.exists(&wtxn, &index_uid)? {
@@ -1023,7 +1065,13 @@ impl IndexScheduler {
}
/// Swap the index `lhs` with the index `rhs`.
fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
fn apply_index_swap(
&self,
wtxn: &mut RwTxn,
task_id: TaskId,
lhs: &str,
rhs: &str,
) -> Result<()> {
// 1. Verify that both lhs and rhs are existing indexes
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
if !index_lhs_exists {
@@ -1044,7 +1092,7 @@ impl IndexScheduler {
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
swap_index_uid_in_task(&mut task, (lhs, rhs));
self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
self.all_tasks.put(wtxn, &BEU64::new(task_id), &task)?;
}
// 4. remove the task from indexuid = before_name
@@ -1257,47 +1305,6 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
&task.kind
{
filter_expr
} else {
unreachable!()
};
let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
let indexer_config = self.index_mapper.indexer_config();
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
@@ -1388,7 +1395,11 @@ impl IndexScheduler {
/// Delete each given task from all the databases (if it is deleteable).
///
/// Return the number of tasks that were actually deleted.
fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
fn delete_matched_tasks(
&self,
wtxn: &mut RwTxn,
matched_tasks: &RoaringTreemap,
) -> Result<u64> {
// 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@@ -1403,7 +1414,7 @@ impl IndexScheduler {
let mut affected_indexes = HashSet::new();
let mut affected_statuses = HashSet::new();
let mut affected_kinds = HashSet::new();
let mut affected_canceled_by = RoaringBitmap::new();
let mut affected_canceled_by = RoaringTreemap::new();
for task_id in to_delete_tasks.iter() {
let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
@@ -1440,10 +1451,10 @@ impl IndexScheduler {
}
for task in to_delete_tasks.iter() {
self.all_tasks.delete(wtxn, &BEU32::new(task))?;
self.all_tasks.delete(wtxn, &BEU64::new(task))?;
}
for canceled_by in affected_canceled_by {
let canceled_by = BEU32::new(canceled_by);
let canceled_by = BEU64::new(canceled_by);
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
tasks -= &to_delete_tasks;
if tasks.is_empty() {
@@ -1464,9 +1475,9 @@ impl IndexScheduler {
&self,
wtxn: &mut RwTxn,
cancel_task_id: TaskId,
matched_tasks: &RoaringBitmap,
matched_tasks: &RoaringTreemap,
previous_started_at: OffsetDateTime,
previous_processing_tasks: &RoaringBitmap,
previous_processing_tasks: &RoaringTreemap,
) -> Result<Vec<Uuid>> {
let now = OffsetDateTime::now_utc();
@@ -1491,28 +1502,29 @@ impl IndexScheduler {
task.details = task.details.map(|d| d.to_failed());
self.update_task(wtxn, &task)?;
}
self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
self.canceled_by.put(wtxn, &BEU64::new(cancel_task_id), &tasks_to_cancel)?;
Ok(content_files_to_delete)
}
}
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a, '_>,
filter: &serde_json::Value,
index: &'a Index,
) -> Result<u64> {
fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
let filter = Filter::from_json(filter)?;
Ok(if let Some(filter) = filter {
let candidates = filter.evaluate(wtxn, index).map_err(|err| match err {
let mut wtxn = index.write_txn()?;
let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
}
e => e.into(),
})?;
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
delete_operation.delete_documents(&candidates);
delete_operation.execute().map(|result| result.deleted_documents)?
let deleted_documents =
delete_operation.execute().map(|result| result.deleted_documents)?;
wtxn.commit()?;
deleted_documents
} else {
0
})

View File

@@ -48,6 +48,8 @@ impl From<DateField> for Code {
pub enum Error {
#[error("{1}")]
WithCustomErrorCode(Code, Box<Self>),
#[error("Received bad task id: {received} should be >= to {expected}.")]
BadTaskId { received: TaskId, expected: TaskId },
#[error("Index `{0}` not found.")]
IndexNotFound(String),
#[error("Index `{0}` already exists.")]
@@ -159,6 +161,7 @@ impl Error {
match self {
Error::IndexNotFound(_)
| Error::WithCustomErrorCode(_, _)
| Error::BadTaskId { .. }
| Error::IndexAlreadyExists(_)
| Error::SwapDuplicateIndexFound(_)
| Error::SwapDuplicateIndexesFound(_)
@@ -202,6 +205,7 @@ impl ErrorCode for Error {
fn error_code(&self) -> Code {
match self {
Error::WithCustomErrorCode(code, _) => *code,
Error::BadTaskId { .. } => Code::BadRequest,
Error::IndexNotFound(_) => Code::IndexNotFound,
Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,

View File

@@ -3,9 +3,10 @@ use std::fmt::Write;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{Database, RoTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
use meilisearch_types::milli::heed_codec::{CboRoaringTreemapCodec, RoaringTreemapCodec};
use meilisearch_types::milli::BEU64;
use meilisearch_types::tasks::{Details, Task};
use roaring::RoaringBitmap;
use roaring::RoaringTreemap;
use crate::index_mapper::IndexMapper;
use crate::{IndexScheduler, Kind, Status, BEI128};
@@ -47,7 +48,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
let processing_tasks = processing_tasks.read().unwrap().processing.clone();
snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
snap.push_str("### Processing Tasks:\n");
snap.push_str(&snapshot_bitmap(&processing_tasks));
snap.push_str(&snapshot_treemap(&processing_tasks));
snap.push_str("\n----------------------------------------------------------------------\n");
snap.push_str("### All Tasks:\n");
@@ -103,7 +104,7 @@ pub fn snapshot_file_store(file_store: &file_store::FileStore) -> String {
snap
}
pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
pub fn snapshot_treemap(r: &RoaringTreemap) -> String {
let mut snap = String::new();
snap.push('[');
for x in r {
@@ -113,7 +114,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
snap
}
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU64>, SerdeJson<Task>>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
@@ -125,13 +126,13 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson
pub fn snapshot_date_db(
rtxn: &RoTxn,
db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
db: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
let (_timestamp, task_ids) = next.unwrap();
snap.push_str(&format!("[timestamp] {}\n", snapshot_bitmap(&task_ids)));
snap.push_str(&format!("[timestamp] {}\n", snapshot_treemap(&task_ids)));
}
snap
}
@@ -216,45 +217,48 @@ fn snapshot_details(d: &Details) -> String {
pub fn snapshot_status(
rtxn: &RoTxn,
db: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
db: Database<SerdeBincode<Status>, RoaringTreemapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
let (status, task_ids) = next.unwrap();
writeln!(snap, "{status} {}", snapshot_bitmap(&task_ids)).unwrap();
writeln!(snap, "{status} {}", snapshot_treemap(&task_ids)).unwrap();
}
snap
}
pub fn snapshot_kind(rtxn: &RoTxn, db: Database<SerdeBincode<Kind>, RoaringBitmapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
let (kind, task_ids) = next.unwrap();
let kind = serde_json::to_string(&kind).unwrap();
writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap();
}
snap
}
pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
let (index, task_ids) = next.unwrap();
writeln!(snap, "{index} {}", snapshot_bitmap(&task_ids)).unwrap();
}
snap
}
pub fn snapshot_canceled_by(
pub fn snapshot_kind(
rtxn: &RoTxn,
db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
db: Database<SerdeBincode<Kind>, RoaringTreemapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
let (kind, task_ids) = next.unwrap();
writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap();
let kind = serde_json::to_string(&kind).unwrap();
writeln!(snap, "{kind} {}", snapshot_treemap(&task_ids)).unwrap();
}
snap
}
pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringTreemapCodec>) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
let (index, task_ids) = next.unwrap();
writeln!(snap, "{index} {}", snapshot_treemap(&task_ids)).unwrap();
}
snap
}
pub fn snapshot_canceled_by(
rtxn: &RoTxn,
db: Database<OwnedType<BEU64>, RoaringTreemapCodec>,
) -> String {
let mut snap = String::new();
let iter = db.iter(rtxn).unwrap();
for next in iter {
let (kind, task_ids) = next.unwrap();
writeln!(snap, "{kind} {}", snapshot_treemap(&task_ids)).unwrap();
}
snap
}

File diff suppressed because it is too large Load Diff

View File

@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]

View File

@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
----------------------------------------------------------------------
### Status:
enqueued [1,2,3,]

View File

@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
----------------------------------------------------------------------
### Status:
enqueued [1,2,3,]

View File

@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]

View File

@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]

View File

@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: canceled, canceled_by: 3, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }}
2 {uid: 2, status: canceled, canceled_by: 3, details: { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
----------------------------------------------------------------------
### Status:
enqueued []

View File

@@ -7,8 +7,8 @@ source: index-scheduler/src/lib.rs
----------------------------------------------------------------------
### All Tasks:
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [1,]

View File

@@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [1,2,]

View File

@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
----------------------------------------------------------------------
### All Tasks:
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
----------------------------------------------------------------------
### Status:
enqueued [1,]

View File

@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,]

View File

@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,3,]

View File

@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,2,3,]

View File

@@ -41,6 +41,18 @@ source: index-scheduler/src/lib.rs
"taskDeletion": {
"query": "[query]",
"tasks": [
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
58,
48,
0,

View File

@@ -21,6 +21,18 @@ source: index-scheduler/src/lib.rs
"taskDeletion": {
"query": "[query]",
"tasks": [
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
58,
48,
0,

View File

@@ -106,6 +106,18 @@ source: index-scheduler/src/lib.rs
"taskDeletion": {
"query": "[query]",
"tasks": [
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
58,
48,
0,

View File

@@ -61,6 +61,18 @@ source: index-scheduler/src/lib.rs
"taskDeletion": {
"query": "[query]",
"tasks": [
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
58,
48,
0,

View File

@@ -5,15 +5,16 @@ use std::ops::Bound;
use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
use meilisearch_types::heed::{Database, RoTxn, RwTxn};
use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
use meilisearch_types::milli::heed_codec::CboRoaringTreemapCodec;
use meilisearch_types::milli::BEU64;
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
use roaring::{MultiOps, RoaringBitmap};
use roaring::{MultiOps, RoaringTreemap};
use time::OffsetDateTime;
use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128};
impl IndexScheduler {
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringTreemap> {
enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
}
@@ -26,7 +27,7 @@ impl IndexScheduler {
}
pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
Ok(self.all_tasks.get(rtxn, &BEU64::new(task_id))?)
}
/// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
@@ -88,12 +89,12 @@ impl IndexScheduler {
}
}
self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
self.all_tasks.put(wtxn, &BEU64::new(task.uid), task)?;
Ok(())
}
/// Returns the whole set of tasks that belongs to this index.
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringTreemap> {
Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
}
@@ -101,7 +102,7 @@ impl IndexScheduler {
&self,
wtxn: &mut RwTxn,
index: &str,
f: impl Fn(&mut RoaringBitmap),
f: impl Fn(&mut RoaringTreemap),
) -> Result<()> {
let mut tasks = self.index_tasks(wtxn, index)?;
f(&mut tasks);
@@ -114,7 +115,7 @@ impl IndexScheduler {
Ok(())
}
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringTreemap> {
Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
}
@@ -122,7 +123,7 @@ impl IndexScheduler {
&self,
wtxn: &mut RwTxn,
status: Status,
bitmap: &RoaringBitmap,
bitmap: &RoaringTreemap,
) -> Result<()> {
Ok(self.status.put(wtxn, &status, bitmap)?)
}
@@ -131,7 +132,7 @@ impl IndexScheduler {
&self,
wtxn: &mut RwTxn,
status: Status,
f: impl Fn(&mut RoaringBitmap),
f: impl Fn(&mut RoaringTreemap),
) -> Result<()> {
let mut tasks = self.get_status(wtxn, status)?;
f(&mut tasks);
@@ -140,7 +141,7 @@ impl IndexScheduler {
Ok(())
}
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringTreemap> {
Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
}
@@ -148,7 +149,7 @@ impl IndexScheduler {
&self,
wtxn: &mut RwTxn,
kind: Kind,
bitmap: &RoaringBitmap,
bitmap: &RoaringTreemap,
) -> Result<()> {
Ok(self.kind.put(wtxn, &kind, bitmap)?)
}
@@ -157,7 +158,7 @@ impl IndexScheduler {
&self,
wtxn: &mut RwTxn,
kind: Kind,
f: impl Fn(&mut RoaringBitmap),
f: impl Fn(&mut RoaringTreemap),
) -> Result<()> {
let mut tasks = self.get_kind(wtxn, kind)?;
f(&mut tasks);
@@ -169,20 +170,20 @@ impl IndexScheduler {
pub(crate) fn insert_task_datetime(
wtxn: &mut RwTxn,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
let timestamp = BEI128::new(time.unix_timestamp_nanos());
let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default();
task_ids.insert(task_id);
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?;
database.put(wtxn, &timestamp, &RoaringTreemap::from_iter(task_ids))?;
Ok(())
}
pub(crate) fn remove_task_datetime(
wtxn: &mut RwTxn,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
time: OffsetDateTime,
task_id: TaskId,
) -> Result<()> {
@@ -192,7 +193,7 @@ pub(crate) fn remove_task_datetime(
if existing.is_empty() {
database.delete(wtxn, &timestamp)?;
} else {
database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(existing))?;
database.put(wtxn, &timestamp, &RoaringTreemap::from_iter(existing))?;
}
}
@@ -201,8 +202,8 @@ pub(crate) fn remove_task_datetime(
pub(crate) fn keep_tasks_within_datetimes(
rtxn: &RoTxn,
tasks: &mut RoaringBitmap,
database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
tasks: &mut RoaringTreemap,
database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
after: Option<OffsetDateTime>,
before: Option<OffsetDateTime>,
) -> Result<()> {
@@ -212,7 +213,7 @@ pub(crate) fn keep_tasks_within_datetimes(
(Some(after), None) => (Bound::Excluded(*after), Bound::Unbounded),
(Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
};
let mut collected_task_ids = RoaringBitmap::new();
let mut collected_task_ids = RoaringTreemap::new();
let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
let iter = database.range(rtxn, &(start, end))?;

View File

@@ -104,6 +104,7 @@ macro_rules! impl_from_query_param_wrap_original_value_in_error {
}
impl_from_query_param_wrap_original_value_in_error!(usize, DeserrParseIntError);
impl_from_query_param_wrap_original_value_in_error!(u32, DeserrParseIntError);
impl_from_query_param_wrap_original_value_in_error!(u64, DeserrParseIntError);
impl_from_query_param_wrap_original_value_in_error!(bool, DeserrParseBoolError);
impl FromQueryParameter for String {

View File

@@ -5,7 +5,7 @@ use std::str::FromStr;
use enum_iterator::Sequence;
use milli::update::IndexDocumentsMethod;
use roaring::RoaringBitmap;
use roaring::RoaringTreemap;
use serde::{Deserialize, Serialize, Serializer};
use time::{Duration, OffsetDateTime};
use uuid::Uuid;
@@ -15,7 +15,7 @@ use crate::keys::Key;
use crate::settings::{Settings, Unchecked};
use crate::InstanceUid;
pub type TaskId = u32;
pub type TaskId = u64;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
@@ -127,11 +127,11 @@ pub enum KindWithContent {
},
TaskCancelation {
query: String,
tasks: RoaringBitmap,
tasks: RoaringTreemap,
},
TaskDeletion {
query: String,
tasks: RoaringBitmap,
tasks: RoaringTreemap,
},
DumpCreation {
keys: Vec<Key>,

View File

@@ -203,7 +203,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
.name(String::from("register-snapshot-tasks"))
.spawn(move || loop {
thread::sleep(snapshot_delay);
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation, None) {
error!("Error while registering snapshot: {}", e);
}
})

View File

@@ -11,7 +11,7 @@ use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::SummarizedTaskView;
use crate::routes::{get_task_id, SummarizedTaskView};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
@@ -29,8 +29,9 @@ pub async fn create_dump(
keys: auth_controller.list_keys()?,
instance_uid: analytics.instance_uid().cloned(),
};
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -7,7 +7,7 @@ use bstr::ByteSlice;
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use index_scheduler::{IndexScheduler, TaskId};
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
@@ -36,7 +36,7 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::search::parse_filter;
static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
@@ -129,8 +129,9 @@ pub async fn delete_document(
index_uid: index_uid.to_string(),
documents_ids: vec![document_id],
};
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@@ -277,6 +278,7 @@ pub async fn replace_documents(
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let uid = get_task_id(&req)?;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
@@ -285,6 +287,7 @@ pub async fn replace_documents(
params.csv_delimiter,
body,
IndexDocumentsMethod::ReplaceDocuments,
uid,
allow_index_creation,
)
.await?;
@@ -308,6 +311,7 @@ pub async fn update_documents(
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
let uid = get_task_id(&req)?;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
@@ -316,6 +320,7 @@ pub async fn update_documents(
params.csv_delimiter,
body,
IndexDocumentsMethod::UpdateDocuments,
uid,
allow_index_creation,
)
.await?;
@@ -332,6 +337,7 @@ async fn document_addition(
csv_delimiter: Option<u8>,
mut body: Payload,
method: IndexDocumentsMethod,
task_id: Option<TaskId>,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match (
@@ -445,7 +451,7 @@ async fn document_addition(
};
let scheduler = index_scheduler.clone();
let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id)).await? {
Ok(task) => task,
Err(e) => {
index_scheduler.delete_update_file(uuid)?;
@@ -476,8 +482,9 @@ pub async fn delete_documents_batch(
let task =
KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -512,8 +519,9 @@ pub async fn delete_documents_by_filter(
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -529,8 +537,9 @@ pub async fn clear_all_documents(
analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);
let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -17,7 +17,7 @@ use serde::Serialize;
use serde_json::json;
use time::OffsetDateTime;
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
@@ -135,8 +135,9 @@ pub async fn create_index(
);
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
Ok(HttpResponse::Accepted().json(task))
} else {
@@ -203,8 +204,9 @@ pub async fn update_index(
primary_key: body.primary_key,
};
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -213,11 +215,13 @@ pub async fn update_index(
pub async fn delete_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -14,7 +14,7 @@ use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::routes::SummarizedTaskView;
use crate::routes::{get_task_id, SummarizedTaskView};
#[macro_export]
macro_rules! make_setting_route {
@@ -33,7 +33,7 @@ macro_rules! make_setting_route {
use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData;
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::routes::SummarizedTaskView;
use $crate::routes::{get_task_id, SummarizedTaskView};
pub async fn delete(
index_scheduler: GuardedData<
@@ -41,6 +41,7 @@ macro_rules! make_setting_route {
Data<IndexScheduler>,
>,
index_uid: web::Path<String>,
req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@@ -55,8 +56,9 @@ macro_rules! make_setting_route {
is_deletion: true,
allow_index_creation,
};
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid))
.await??
.into();
@@ -97,8 +99,9 @@ macro_rules! make_setting_route {
is_deletion: false,
allow_index_creation,
};
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid))
.await??
.into();
@@ -664,8 +667,9 @@ pub async fn update_all(
is_deletion: false,
allow_index_creation,
};
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@@ -687,6 +691,7 @@ pub async fn get_all(
pub async fn delete_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@@ -700,8 +705,9 @@ pub async fn delete_all(
is_deletion: true,
allow_index_creation,
};
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View File

@@ -5,7 +5,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::settings::{Settings, Unchecked};
use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize};
@@ -41,6 +41,34 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/experimental-features").configure(features::configure));
}
pub fn get_task_id(req: &HttpRequest) -> Result<Option<TaskId>, ResponseError> {
let task_id = req
.headers()
.get("TaskId")
.map(|header| {
header.to_str().map_err(|e| {
ResponseError::from_msg(
format!("TaskId is not a valid utf-8 string: {e}"),
Code::BadRequest,
)
})
})
.transpose()?
.map(|s| {
s.parse::<TaskId>().map_err(|e| {
ResponseError::from_msg(
format!(
"Could not parse the TaskId as a {}: {e}",
std::any::type_name::<TaskId>(),
),
Code::BadRequest,
)
})
})
.transpose()?;
Ok(task_id)
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {

View File

@@ -10,7 +10,7 @@ use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::tasks::{IndexSwap, KindWithContent};
use serde_json::json;
use super::SummarizedTaskView;
use super::{get_task_id, SummarizedTaskView};
use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
@@ -60,7 +60,9 @@ pub async fn swap_indexes(
}
let task = KindWithContent::IndexSwap { swaps };
let uid = get_task_id(&req)?;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -20,13 +20,13 @@ use time::macros::format_description;
use time::{Date, Duration, OffsetDateTime, Time};
use tokio::task;
use super::SummarizedTaskView;
use super::{get_task_id, SummarizedTaskView};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
const DEFAULT_LIMIT: u32 = 20;
const DEFAULT_LIMIT: u64 = 20;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
@@ -175,14 +175,14 @@ impl From<Details> for DetailsView {
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TasksFilterQuery {
#[deserr(default = Param(DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidTaskLimit>)]
pub limit: Param<u32>,
pub limit: Param<TaskId>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskFrom>)]
pub from: Option<Param<TaskId>>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
pub uids: OptionStarOrList<u32>,
pub uids: OptionStarOrList<TaskId>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
pub canceled_by: OptionStarOrList<u32>,
pub canceled_by: OptionStarOrList<TaskId>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
pub types: OptionStarOrList<Kind>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
@@ -249,9 +249,9 @@ impl TaskDeletionOrCancelationQuery {
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TaskDeletionOrCancelationQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
pub uids: OptionStarOrList<u32>,
pub uids: OptionStarOrList<TaskId>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
pub canceled_by: OptionStarOrList<u32>,
pub canceled_by: OptionStarOrList<TaskId>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
pub types: OptionStarOrList<Kind>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
@@ -333,7 +333,9 @@ async fn cancel_tasks(
let task_cancelation =
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
let uid = get_task_id(&req)?;
let task =
task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))
@@ -378,7 +380,8 @@ async fn delete_tasks(
let task_deletion =
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
let uid = get_task_id(&req)?;
let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Ok().json(task))
@@ -388,9 +391,9 @@ async fn delete_tasks(
pub struct AllTasks {
results: Vec<TaskView>,
total: u64,
limit: u32,
from: Option<u32>,
next: Option<u32>,
limit: TaskId,
from: Option<TaskId>,
next: Option<TaskId>,
}
async fn get_tasks(

View File

@@ -154,19 +154,6 @@ async fn delete_document_by_filter() {
)
.await;
index.wait_task(1).await;
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 4,
"isIndexing": false,
"fieldDistribution": {
"color": 3,
"id": 4
}
}
"###);
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
snapshot!(code, @"202 Accepted");
@@ -201,18 +188,6 @@ async fn delete_document_by_filter() {
}
"###);
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"fieldDistribution": {
"color": 1,
"id": 2
}
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
@@ -266,18 +241,6 @@ async fn delete_document_by_filter() {
}
"###);
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 1,
"isIndexing": false,
"fieldDistribution": {
"color": 1,
"id": 1
}
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"

View File

@@ -199,3 +199,74 @@ async fn error_create_with_invalid_index_uid() {
}
"###);
}
#[actix_rt::test]
async fn send_task_id() {
let server = Server::new().await;
let app = server.init_web_app().await;
let index = server.index("catto");
let (response, code) = index.create(None).await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "catto",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
let body = serde_json::to_string(&json!({
"uid": "doggo",
"primaryKey": None::<&str>,
}))
.unwrap();
let req = test::TestRequest::post()
.uri("/indexes")
.insert_header(("TaskId", "25"))
.insert_header(ContentType::json())
.set_payload(body)
.to_request();
let res = test::call_service(&app, req).await;
snapshot!(res.status(), @"202 Accepted");
let bytes = test::read_body(res).await;
let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 25,
"indexUid": "doggo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
let body = serde_json::to_string(&json!({
"uid": "girafo",
"primaryKey": None::<&str>,
}))
.unwrap();
let req = test::TestRequest::post()
.uri("/indexes")
.insert_header(("TaskId", "12"))
.insert_header(ContentType::json())
.set_payload(body)
.to_request();
let res = test::call_service(&app, req).await;
snapshot!(res.status(), @"400 Bad Request");
let bytes = test::read_body(res).await;
let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
snapshot!(json_string!(response), @r###"
{
"message": "Received bad task id: 12 should be >= to 26.",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}

View File

@@ -1104,59 +1104,3 @@ async fn camelcased_words() {
})
.await;
}
#[actix_rt::test]
async fn simple_search_with_strange_synonyms() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await;
let r = index.wait_task(0).await;
meili_snap::snapshot!(r["status"], @r###""succeeded""###);
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(json!({"q": "How to train"}), |response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428"
}
]
"###);
})
.await;
index
.search(json!({"q": "How & train"}), |response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428"
}
]
"###);
})
.await;
index
.search(json!({"q": "to"}), |response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428"
}
]
"###);
})
.await;
}

View File

@@ -20,7 +20,10 @@ pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
pub use self::fst_set_codec::FstSetCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap::{
BoRoaringBitmapCodec, CboRoaringBitmapCodec, CboRoaringTreemapCodec, RoaringBitmapCodec,
RoaringTreemapCodec,
};
pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
};

View File

@@ -0,0 +1,196 @@
use std::borrow::Cow;
use std::io;
use std::mem::size_of;
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
use roaring::RoaringTreemap;
use crate::heed_codec::BytesDecodeOwned;
/// This is the limit where using a byteorder became less size efficient
/// than using a direct roaring encoding, it is also the point where we are able
/// to determine the encoding used only by using the array of bytes length.
pub const THRESHOLD: usize = 4;
/// A conditionnal codec that either use the RoaringBitmap
/// or a lighter ByteOrder en/decoding method.
pub struct CboRoaringTreemapCodec;
impl CboRoaringTreemapCodec {
pub fn serialized_size(roaring: &RoaringTreemap) -> usize {
if roaring.len() <= THRESHOLD as u64 {
roaring.len() as usize * size_of::<u64>()
} else {
roaring.serialized_size()
}
}
pub fn serialize_into(roaring: &RoaringTreemap, vec: &mut Vec<u8>) {
if roaring.len() <= THRESHOLD as u64 {
// If the number of items (u32s) to encode is less than or equal to the threshold
// it means that it would weigh the same or less than the RoaringBitmap
// header, so we directly encode them using ByteOrder instead.
for integer in roaring {
vec.write_u64::<NativeEndian>(integer).unwrap();
}
} else {
// Otherwise, we use the classic RoaringBitmapCodec that writes a header.
roaring.serialize_into(vec).unwrap();
}
}
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringTreemap> {
if bytes.len() <= THRESHOLD * size_of::<u64>() {
// If there is threshold or less than threshold integers that can fit into this array
// of bytes it means that we used the ByteOrder codec serializer.
let mut bitmap = RoaringTreemap::new();
while let Ok(integer) = bytes.read_u64::<NativeEndian>() {
bitmap.insert(integer);
}
Ok(bitmap)
} else {
// Otherwise, it means we used the classic RoaringBitmapCodec and
// that the header takes threshold integers.
RoaringTreemap::deserialize_unchecked_from(bytes)
}
}
/// Merge serialized CboRoaringBitmaps in a buffer.
///
/// if the merged values length is under the threshold, values are directly
/// serialized in the buffer else a RoaringBitmap is created from the
/// values and is serialized in the buffer.
pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
let mut roaring = RoaringTreemap::new();
let mut vec = Vec::new();
for bytes in slices {
if bytes.len() <= THRESHOLD * size_of::<u64>() {
let mut reader = bytes.as_ref();
while let Ok(integer) = reader.read_u64::<NativeEndian>() {
vec.push(integer);
}
} else {
roaring |= RoaringTreemap::deserialize_unchecked_from(bytes.as_ref())?;
}
}
if roaring.is_empty() {
vec.sort_unstable();
vec.dedup();
if vec.len() <= THRESHOLD {
for integer in vec {
buffer.extend_from_slice(&integer.to_ne_bytes());
}
} else {
// We can unwrap safely because the vector is sorted upper.
let roaring = RoaringTreemap::from_sorted_iter(vec.into_iter()).unwrap();
roaring.serialize_into(buffer)?;
}
} else {
roaring.extend(vec);
roaring.serialize_into(buffer)?;
}
Ok(())
}
}
impl heed::BytesDecode<'_> for CboRoaringTreemapCodec {
type DItem = RoaringTreemap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
Self::deserialize_from(bytes).ok()
}
}
impl BytesDecodeOwned for CboRoaringTreemapCodec {
type DItem = RoaringTreemap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
Self::deserialize_from(bytes).ok()
}
}
impl heed::BytesEncode<'_> for CboRoaringTreemapCodec {
type EItem = RoaringTreemap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
let mut vec = Vec::with_capacity(Self::serialized_size(item));
Self::serialize_into(item, &mut vec);
Some(Cow::Owned(vec))
}
}
#[cfg(test)]
mod tests {
use std::iter::FromIterator;
use heed::{BytesDecode, BytesEncode};
use super::*;
#[test]
fn verify_encoding_decoding() {
let input = RoaringTreemap::from_iter(0..THRESHOLD as u64);
let bytes = CboRoaringTreemapCodec::bytes_encode(&input).unwrap();
let output = CboRoaringTreemapCodec::bytes_decode(&bytes).unwrap();
assert_eq!(input, output);
}
#[test]
fn verify_threshold() {
let input = RoaringTreemap::from_iter(0..THRESHOLD as u64);
// use roaring treemap
let mut bytes = Vec::new();
input.serialize_into(&mut bytes).unwrap();
let roaring_size = bytes.len();
// use byteorder directly
let mut bytes = Vec::new();
for integer in input {
bytes.write_u64::<NativeEndian>(integer).unwrap();
}
let bo_size = bytes.len();
assert!(roaring_size > bo_size, "roaring size: {}, bo size {}", roaring_size, bo_size);
}
#[test]
fn merge_cbo_roaring_bitmaps() {
let mut buffer = Vec::new();
let small_data = vec![
RoaringTreemap::from_sorted_iter(1..4).unwrap(),
RoaringTreemap::from_sorted_iter(2..5).unwrap(),
RoaringTreemap::from_sorted_iter(4..6).unwrap(),
RoaringTreemap::from_sorted_iter(1..3).unwrap(),
];
let small_data: Vec<_> =
small_data.iter().map(|b| CboRoaringTreemapCodec::bytes_encode(b).unwrap()).collect();
CboRoaringTreemapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
let bitmap = CboRoaringTreemapCodec::deserialize_from(&buffer).unwrap();
let expected = RoaringTreemap::from_sorted_iter(1..6).unwrap();
assert_eq!(bitmap, expected);
let medium_data = vec![
RoaringTreemap::from_sorted_iter(1..4).unwrap(),
RoaringTreemap::from_sorted_iter(2..5).unwrap(),
RoaringTreemap::from_sorted_iter(4..8).unwrap(),
RoaringTreemap::from_sorted_iter(0..3).unwrap(),
RoaringTreemap::from_sorted_iter(7..23).unwrap(),
];
let medium_data: Vec<_> =
medium_data.iter().map(|b| CboRoaringTreemapCodec::bytes_encode(b).unwrap()).collect();
buffer.clear();
CboRoaringTreemapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
let bitmap = CboRoaringTreemapCodec::deserialize_from(&buffer).unwrap();
let expected = RoaringTreemap::from_sorted_iter(0..23).unwrap();
assert_eq!(bitmap, expected);
}
}

View File

@@ -1,7 +1,11 @@
mod bo_roaring_bitmap_codec;
pub mod cbo_roaring_bitmap_codec;
pub mod cbo_roaring_treemap_codec;
mod roaring_bitmap_codec;
mod roaring_treemap_codec;
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
pub use self::cbo_roaring_treemap_codec::CboRoaringTreemapCodec;
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
pub use self::roaring_treemap_codec::RoaringTreemapCodec;

View File

@@ -0,0 +1,33 @@
use std::borrow::Cow;
use roaring::RoaringTreemap;
use crate::heed_codec::BytesDecodeOwned;
pub struct RoaringTreemapCodec;
impl heed::BytesDecode<'_> for RoaringTreemapCodec {
type DItem = RoaringTreemap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
RoaringTreemap::deserialize_unchecked_from(bytes).ok()
}
}
impl BytesDecodeOwned for RoaringTreemapCodec {
type DItem = RoaringTreemap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
RoaringTreemap::deserialize_from(bytes).ok()
}
}
impl heed::BytesEncode<'_> for RoaringTreemapCodec {
type EItem = RoaringTreemap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(item.serialized_size());
item.serialize_into(&mut bytes).ok()?;
Some(Cow::Owned(bytes))
}
}

View File

@@ -418,11 +418,19 @@ impl<'t> Matcher<'t, '_> {
} else {
match &self.matches {
Some((tokens, matches)) => {
// If the text has to be cropped,
// compute the best interval to crop around.
let matches = match format_options.crop {
Some(crop_size) if crop_size > 0 => {
self.find_best_match_interval(matches, crop_size)
}
_ => matches,
};
// If the text has to be cropped,
// crop around the best interval.
let (byte_start, byte_end) = match format_options.crop {
Some(crop_size) if crop_size > 0 => {
let matches = self.find_best_match_interval(matches, crop_size);
self.crop_bounds(tokens, matches, crop_size)
}
_ => (0, self.text.len()),
@@ -442,11 +450,6 @@ impl<'t> Matcher<'t, '_> {
for m in matches {
let token = &tokens[m.token_position];
// skip matches out of the crop window.
if token.byte_start < byte_start || token.byte_end > byte_end {
continue;
}
if byte_index < token.byte_start {
formatted.push(&self.text[byte_index..token.byte_start]);
}
@@ -797,37 +800,6 @@ mod tests {
);
}
#[test]
fn format_highlight_crop_phrase_query() {
//! testing: https://github.com/meilisearch/meilisearch/issues/3975
let temp_index = TempIndex::new();
temp_index
.add_documents(documents!([
{ "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" }
]))
.unwrap();
let rtxn = temp_index.read_txn().unwrap();
let format_options = FormatOptions { highlight: true, crop: Some(10) };
let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
let mut matcher = builder.build(text);
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
insta::assert_snapshot!(
matcher.format(format_options),
@"…had the power to split <em>the</em> <em>world</em> between those who…"
);
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
let mut matcher = builder.build(text);
// should highlight "those" and the phrase "and those".
insta::assert_snapshot!(
matcher.format(format_options),
@"…world between <em>those</em> who embraced progress <em>and</em> <em>those</em> who resisted…"
);
}
#[test]
fn smaller_crop_size() {
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295

View File

@@ -226,9 +226,9 @@ fn process_tokens<'a>(
) -> impl Iterator<Item = (usize, Token<'a>)> {
tokens
.skip_while(|token| token.is_separator())
.scan((0, None), |(offset, prev_kind), mut token| {
.scan((0, None), |(offset, prev_kind), token| {
match token.kind {
TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
*offset += match *prev_kind {
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
Some(_) => 1,
@@ -244,7 +244,7 @@ fn process_tokens<'a>(
{
*prev_kind = Some(token.kind);
}
_ => token.kind = TokenKind::Unknown,
_ => (),
}
Some((*offset, token))
})

View File

@@ -59,13 +59,7 @@ pub(crate) fn data_from_obkv_documents(
original_obkv_chunks
.par_bridge()
.map(|original_documents_chunk| {
send_original_documents_data(
original_documents_chunk,
indexer,
lmdb_writer_sx.clone(),
vectors_field_id,
primary_key_id,
)
send_original_documents_data(original_documents_chunk, lmdb_writer_sx.clone())
})
.collect::<Result<()>>()?;
@@ -82,6 +76,7 @@ pub(crate) fn data_from_obkv_documents(
&faceted_fields,
primary_key_id,
geo_fields_ids,
vectors_field_id,
&stop_words,
&allowed_separators,
&dictionary,
@@ -270,33 +265,11 @@ fn spawn_extraction_task<FE, FS, M>(
/// - documents
fn send_original_documents_data(
original_documents_chunk: Result<grenad::Reader<File>>,
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
vectors_field_id: Option<FieldId>,
primary_key_id: FieldId,
) -> Result<()> {
let original_documents_chunk =
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
if let Some(vectors_field_id) = vectors_field_id {
let documents_chunk_cloned = original_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let result = extract_vector_points(
documents_chunk_cloned,
indexer,
primary_key_id,
vectors_field_id,
);
let _ = match result {
Ok(vector_points) => {
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
}
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
// TODO: create a custom internal error
lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))).unwrap();
Ok(())
@@ -318,6 +291,7 @@ fn send_and_extract_flattened_documents_data(
faceted_fields: &HashSet<FieldId>,
primary_key_id: FieldId,
geo_fields_ids: Option<(FieldId, FieldId)>,
vectors_field_id: Option<FieldId>,
stop_words: &Option<fst::Set<&[u8]>>,
allowed_separators: &Option<&[&str]>,
dictionary: &Option<&[&str]>,
@@ -348,6 +322,25 @@ fn send_and_extract_flattened_documents_data(
});
}
if let Some(vectors_field_id) = vectors_field_id {
let documents_chunk_cloned = flattened_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let result = extract_vector_points(
documents_chunk_cloned,
indexer,
primary_key_id,
vectors_field_id,
);
let _ = match result {
Ok(vector_points) => {
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
}
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
rayon::join(
|| {

View File

@@ -2550,25 +2550,6 @@ mod tests {
db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
}
/// Index multiple different number of vectors in documents.
/// Vectors must be of the same length.
#[test]
fn test_multiple_vectors() {
let index = TempIndex::new();
index.add_documents(documents!([{"id": 0, "_vectors": [[0, 1, 2], [3, 4, 5]] }])).unwrap();
index.add_documents(documents!([{"id": 1, "_vectors": [6, 7, 8] }])).unwrap();
index
.add_documents(
documents!([{"id": 2, "_vectors": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }]),
)
.unwrap();
let rtxn = index.read_txn().unwrap();
let res = index.search(&rtxn).vector([0.0, 1.0, 2.0]).execute().unwrap();
assert_eq!(res.documents_ids.len(), 3);
}
#[test]
fn reproduce_the_bug() {
/*

View File

@@ -573,7 +573,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
tokenizer
.tokenize(text)
.filter_map(|token| {
if token.is_word() && !token.lemma().is_empty() {
if token.is_word() {
Some(token.lemma().to_string())
} else {
None
@@ -608,18 +608,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
for (word, synonyms) in user_synonyms {
// Normalize both the word and associated synonyms.
let normalized_word = normalize(&tokenizer, word);
let normalized_synonyms: Vec<_> = synonyms
.iter()
.map(|synonym| normalize(&tokenizer, synonym))
.filter(|synonym| !synonym.is_empty())
.collect();
let normalized_synonyms =
synonyms.iter().map(|synonym| normalize(&tokenizer, synonym));
// Store the normalized synonyms under the normalized word,
// merging the possible duplicate words.
if !normalized_word.is_empty() && !normalized_synonyms.is_empty() {
let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
entry.extend(normalized_synonyms.into_iter());
}
let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
entry.extend(normalized_synonyms);
}
// Make sure that we don't have duplicate synonyms.
@@ -1427,43 +1422,6 @@ mod tests {
assert!(result.documents_ids.is_empty());
}
#[test]
fn thai_synonyms() {
let mut index = TempIndex::new();
index.index_documents_config.autogenerate_docids = true;
let mut wtxn = index.write_txn().unwrap();
// Send 3 documents with ids from 1 to 3.
index
.add_documents_using_wtxn(
&mut wtxn,
documents!([
{ "name": "ยี่ปุ่น" },
{ "name": "ญี่ปุ่น" },
]),
)
.unwrap();
// In the same transaction provide some synonyms
index
.update_settings_using_wtxn(&mut wtxn, |settings| {
settings.set_synonyms(btreemap! {
"japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")],
});
})
.unwrap();
wtxn.commit().unwrap();
// Ensure synonyms are effectively stored
let rtxn = index.read_txn().unwrap();
let synonyms = index.synonyms(&rtxn).unwrap();
assert!(!synonyms.is_empty()); // at this point the index should return something
// Check that we can use synonyms
let result = index.search(&rtxn).query("japanese").execute().unwrap();
assert_eq!(result.documents_ids.len(), 2);
}
#[test]
fn setting_searchable_recomputes_other_settings() {
let index = TempIndex::new();

View File

@@ -186,16 +186,12 @@ fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document {
let array = create_array(array, &sub_selectors);
if !array.is_empty() {
new_value.insert(key.to_string(), array.into());
} else {
new_value.insert(key.to_string(), Value::Array(vec![]));
}
}
Value::Object(object) => {
let object = create_value(object, sub_selectors);
if !object.is_empty() {
new_value.insert(key.to_string(), object.into());
} else {
new_value.insert(key.to_string(), Value::Object(Map::new()));
}
}
_ => (),
@@ -215,8 +211,6 @@ fn create_array(array: &[Value], selectors: &HashSet<&str>) -> Vec<Value> {
let array = create_array(array, selectors);
if !array.is_empty() {
res.push(array.into());
} else {
res.push(Value::Array(vec![]));
}
}
Value::Object(object) => {
@@ -643,24 +637,6 @@ mod tests {
);
}
#[test]
fn empty_array_object_return_empty() {
let value: Value = json!({
"array": [],
"object": {},
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["array.name", "object.name"]).into();
assert_eq!(
res,
json!({
"array": [],
"object": {},
})
);
}
#[test]
fn all_conflict_variation() {
let value: Value = json!({