Compare commits

..

1 Commits

Author SHA1 Message Date
Clément Renault
872e76b342 Plug sentry as an ActixWeb middleware 2023-08-30 11:00:40 +02:00
19 changed files with 355 additions and 405 deletions

View File

@@ -53,6 +53,5 @@ jobs:
uses: mislav/bump-homebrew-formula-action@v2
with:
formula-name: meilisearch
formula-path: Formula/m/meilisearch.rb
env:
COMMITTER_TOKEN: ${{ secrets.HOMEBREW_COMMITTER_TOKEN }}

204
Cargo.lock generated
View File

@@ -1073,6 +1073,16 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "debugid"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
dependencies = [
"serde",
"uuid 1.4.1",
]
[[package]]
name = "deranged"
version = "0.3.7"
@@ -1444,7 +1454,18 @@ dependencies = [
"insta",
"nom",
"nom_locate",
"unescaper",
]
[[package]]
name = "findshlibs"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
dependencies = [
"cc",
"lazy_static",
"libc",
"winapi",
]
[[package]]
@@ -1795,6 +1816,17 @@ dependencies = [
"digest",
]
[[package]]
name = "hostname"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867"
dependencies = [
"libc",
"match_cfg",
"winapi",
]
[[package]]
name = "http"
version = "0.2.9"
@@ -2492,6 +2524,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "match_cfg"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
[[package]]
name = "md5"
version = "0.7.0"
@@ -2572,6 +2610,8 @@ dependencies = [
"rustls 0.20.8",
"rustls-pemfile",
"segment",
"sentry",
"sentry-actix",
"serde",
"serde_json",
"serde_urlencoded",
@@ -2891,6 +2931,17 @@ dependencies = [
"num-traits",
]
[[package]]
name = "os_info"
version = "3.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "006e42d5b888366f1880eda20371fedde764ed2213dc8496f49622fa0c99cd5e"
dependencies = [
"log",
"serde",
"winapi",
]
[[package]]
name = "page_size"
version = "0.4.2"
@@ -3613,6 +3664,126 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
[[package]]
name = "sentry"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e95efd0cefa32028cdb9766c96de71d96671072f9fb494dc9fb84c0ef93e52b"
dependencies = [
"httpdate",
"reqwest",
"rustls 0.21.6",
"sentry-backtrace",
"sentry-contexts",
"sentry-core",
"sentry-debug-images",
"sentry-panic",
"sentry-tracing",
"tokio",
"ureq",
"webpki-roots 0.25.2",
]
[[package]]
name = "sentry-actix"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "795851be3047d9be16ea8a808383f89e75d2aebc9b53d25fe708c27bc56b4488"
dependencies = [
"actix-web",
"futures-util",
"sentry-core",
]
[[package]]
name = "sentry-backtrace"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ac2bac6f310c4c4c4bb094d1541d32ae497f8c5c23405e85492cefdfe0971a9"
dependencies = [
"backtrace",
"once_cell",
"regex",
"sentry-core",
]
[[package]]
name = "sentry-contexts"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c3e17295cecdbacf66c5bd38d6e1147e09e1e9d824d2d5341f76638eda02a3a"
dependencies = [
"hostname",
"libc",
"os_info",
"rustc_version",
"sentry-core",
"uname",
]
[[package]]
name = "sentry-core"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8339474f587f36cb110fa1ed1b64229eea6d47b0b886375579297b7e47aeb055"
dependencies = [
"once_cell",
"rand",
"sentry-types",
"serde",
"serde_json",
]
[[package]]
name = "sentry-debug-images"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c11e7d2b809b06497a18a2e60f513206462ae2db27081dfb7be9ade1f329cc8"
dependencies = [
"findshlibs",
"once_cell",
"sentry-core",
]
[[package]]
name = "sentry-panic"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "875b69f506da75bd664029eafb05f8934297d2990192896d17325f066bd665b7"
dependencies = [
"sentry-backtrace",
"sentry-core",
]
[[package]]
name = "sentry-tracing"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89feead9bdd116f8035e89567651340fc382db29240b6c55ef412078b08d1aa3"
dependencies = [
"sentry-backtrace",
"sentry-core",
"tracing-core",
"tracing-subscriber",
]
[[package]]
name = "sentry-types"
version = "0.31.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99dc599bd6646884fc403d593cdcb9816dd67c50cff3271c01ff123617908dcd"
dependencies = [
"debugid",
"getrandom",
"hex",
"serde",
"serde_json",
"thiserror",
"time",
"url",
"uuid 1.4.1",
]
[[package]]
name = "serde"
version = "1.0.183"
@@ -4161,6 +4332,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
dependencies = [
"tracing-core",
]
[[package]]
@@ -4182,12 +4363,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"
[[package]]
name = "unescaper"
version = "0.1.2"
name = "uname"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96a44ae11e25afb520af4534fd7b0bd8cd613e35a78def813b8cf41631fa3c8"
checksum = "b72f89f0ca32e4db1c04e2a72f5345d59796d4866a1ee0609084569f73683dc8"
dependencies = [
"thiserror",
"libc",
]
[[package]]
@@ -4262,6 +4443,7 @@ dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
"serde",
]
[[package]]
@@ -4301,6 +4483,12 @@ dependencies = [
"serde",
]
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vcpkg"
version = "0.2.15"
@@ -4469,6 +4657,12 @@ dependencies = [
"rustls-webpki 0.100.1",
]
[[package]]
name = "webpki-roots"
version = "0.25.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"
[[package]]
name = "whatlang"
version = "0.16.2"

View File

@@ -14,7 +14,6 @@ license.workspace = true
[dependencies]
nom = "7.1.3"
nom_locate = "4.1.0"
unescaper = "0.1.2"
[dev-dependencies]
insta = "1.29.0"

View File

@@ -62,7 +62,6 @@ pub enum ErrorKind<'a> {
MisusedGeoRadius,
MisusedGeoBoundingBox,
InvalidPrimary,
InvalidEscapedNumber,
ExpectedEof,
ExpectedValue(ExpectedValueKind),
MalformedValue,
@@ -148,9 +147,6 @@ impl<'a> Display for Error<'a> {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
}
ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
}
ErrorKind::ExpectedEof => {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
}

View File

@@ -545,8 +545,6 @@ impl<'a> std::fmt::Display for Token<'a> {
#[cfg(test)]
pub mod tests {
use FilterCondition as Fc;
use super::*;
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
@@ -558,22 +556,14 @@ pub mod tests {
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
}
fn p(s: &str) -> impl std::fmt::Display + '_ {
Fc::parse(s).unwrap().unwrap()
}
#[test]
fn parse_escaped() {
insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
// but it also works with other sequencies
insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
}
#[test]
fn parse() {
use FilterCondition as Fc;
fn p(s: &str) -> impl std::fmt::Display + '_ {
Fc::parse(s).unwrap().unwrap()
}
// Test equal
insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");

View File

@@ -171,24 +171,7 @@ pub fn parse_value(input: Span) -> IResult<Token> {
})
})?;
match unescaper::unescape(value.value()) {
Ok(content) => {
if content.len() != value.value().len() {
Ok((input, Token::new(value.original_span(), Some(content))))
} else {
Ok((input, value))
}
}
Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
value.original_span(),
ErrorKind::InvalidEscapedNumber,
))),
Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
value.original_span(),
ErrorKind::MalformedValue,
))),
}
Ok((input, value))
}
fn is_value_component(c: char) -> bool {
@@ -335,17 +318,17 @@ pub mod test {
("\"cha'nnel\"", "cha'nnel", false),
("I'm tamo", "I", false),
// escaped thing but not quote
(r#""\\""#, r#"\"#, true),
(r#""\\\\\\""#, r#"\\\"#, true),
(r#""aa\\aa""#, r#"aa\aa"#, true),
(r#""\\""#, r#"\\"#, false),
(r#""\\\\\\""#, r#"\\\\\\"#, false),
(r#""aa\\aa""#, r#"aa\\aa"#, false),
// with double quote
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
(r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
(r#""\"\"""#, r#""""#, true),
// with simple quote
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
(r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
(r#"'\'\''"#, r#"''"#, true),
];
@@ -367,14 +350,7 @@ pub mod test {
"Filter `{}` was not supposed to be escaped",
input
);
assert_eq!(
token.value(),
expected,
"Filter `{}` failed by giving `{}` instead of `{}`.",
input,
token.value(),
expected
);
assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
}
}

View File

@@ -67,6 +67,10 @@ pub(crate) enum Batch {
op: IndexOperation,
must_create_index: bool,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
},
IndexCreation {
index_uid: String,
primary_key: Option<String>,
@@ -110,10 +114,6 @@ pub(crate) enum IndexOperation {
documents: Vec<Vec<String>>,
tasks: Vec<Task>,
},
IndexDocumentDeletionByFilter {
index_uid: String,
task: Task,
},
DocumentClear {
index_uid: String,
tasks: Vec<Task>,
@@ -155,6 +155,7 @@ impl Batch {
| Batch::TaskDeletion(task)
| Batch::Dump(task)
| Batch::IndexCreation { task, .. }
| Batch::IndexDocumentDeletionByFilter { task, .. }
| Batch::IndexUpdate { task, .. } => vec![task.uid],
Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
@@ -166,7 +167,6 @@ impl Batch {
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
IndexOperation::SettingsAndDocumentOperation {
document_import_tasks: tasks,
settings_tasks: other,
@@ -194,7 +194,8 @@ impl Batch {
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
| IndexDeletion { index_uid, .. }
| IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
}
}
}
@@ -204,7 +205,6 @@ impl IndexOperation {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
@@ -239,12 +239,9 @@ impl IndexScheduler {
let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
match &task.kind {
KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
Ok(Some(Batch::IndexOperation {
op: IndexOperation::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
},
must_create_index: false,
Ok(Some(Batch::IndexDocumentDeletionByFilter {
index_uid: index_uid.clone(),
task,
}))
}
_ => unreachable!(),
@@ -899,6 +896,51 @@ impl IndexScheduler {
Ok(tasks)
}
Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let (index_uid, filter) =
if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
&task.kind
{
(index_uid, filter_expr)
} else {
unreachable!()
};
let index = {
let rtxn = self.env.read_txn()?;
self.index_mapper.index(&rtxn, index_uid)?
};
let deleted_documents = delete_document_by_filter(filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
Batch::IndexCreation { index_uid, primary_key, task } => {
let wtxn = self.env.write_txn()?;
if self.index_mapper.exists(&wtxn, &index_uid)? {
@@ -1257,47 +1299,6 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
let filter =
if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
&task.kind
{
filter_expr
} else {
unreachable!()
};
let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
let original_filter = if let Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: _,
}) = task.details
{
original_filter
} else {
// In the case of a `documentDeleteByFilter` the details MUST be set
unreachable!();
};
match deleted_documents {
Ok(deleted_documents) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(deleted_documents),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletionByFilter {
original_filter,
deleted_documents: Some(0),
});
task.error = Some(e.into());
}
}
Ok(vec![task])
}
IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
let indexer_config = self.index_mapper.indexer_config();
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
@@ -1497,22 +1498,23 @@ impl IndexScheduler {
}
}
fn delete_document_by_filter<'a>(
wtxn: &mut RwTxn<'a, '_>,
filter: &serde_json::Value,
index: &'a Index,
) -> Result<u64> {
fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
let filter = Filter::from_json(filter)?;
Ok(if let Some(filter) = filter {
let candidates = filter.evaluate(wtxn, index).map_err(|err| match err {
let mut wtxn = index.write_txn()?;
let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
}
e => e.into(),
})?;
let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
delete_operation.delete_documents(&candidates);
delete_operation.execute().map(|result| result.deleted_documents)?
let deleted_documents =
delete_operation.execute().map(|result| result.deleted_documents)?;
wtxn.commit()?;
deleted_documents
} else {
0
})

View File

@@ -83,6 +83,16 @@ rustls-pemfile = "1.0.2"
segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
sentry = { version = "0.31.6", default-features = false, features = [
"backtrace",
"contexts",
"debug-images",
"panic",
"reqwest",
"rustls",
] }
sentry-actix = "0.31.6"
serde_urlencoded = "0.7.1"
sha2 = "0.10.6"
siphasher = "0.3.10"
slice-group-by = "0.3.0"
@@ -90,6 +100,7 @@ static-files = { version = "0.2.3", optional = true }
sysinfo = "0.29.7"
tar = "0.4.38"
tempfile = "3.5.0"
termcolor = "1.2.0"
thiserror = "1.0.40"
time = { version = "0.3.20", features = [
"serde-well-known",
@@ -103,8 +114,6 @@ toml = "0.7.3"
uuid = { version = "1.3.1", features = ["serde", "v4"] }
walkdir = "2.3.3"
yaup = "0.2.1"
serde_urlencoded = "0.7.1"
termcolor = "1.2.0"
[dev-dependencies]
actix-rt = "2.8.0"
@@ -136,17 +145,7 @@ zip = { version = "0.6.4", optional = true }
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"]
profile-with-puffin = ["dep:puffin_http"]
mini-dashboard = [
"actix-web-static-files",
"static-files",
"anyhow",
"cargo_toml",
"hex",
"reqwest",
"sha-1",
"tempfile",
"zip",
]
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
chinese = ["meilisearch-types/chinese"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]

View File

@@ -101,7 +101,7 @@ pub fn create_app(
InitError = (),
>,
> {
let app = actix_web::App::new()
actix_web::App::new()
.configure(|s| {
configure_data(
s,
@@ -112,23 +112,23 @@ pub fn create_app(
)
})
.configure(routes::configure)
.configure(|s| dashboard(s, enable_dashboard));
let app = app.wrap(actix_web::middleware::Condition::new(
opt.experimental_enable_metrics,
middleware::RouteMetrics,
));
app.wrap(
Cors::default()
.send_wildcard()
.allow_any_header()
.allow_any_origin()
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
.configure(|s| dashboard(s, enable_dashboard))
.wrap(sentry_actix::Sentry::new())
.wrap(actix_web::middleware::Condition::new(
opt.experimental_enable_metrics,
middleware::RouteMetrics,
))
.wrap(
Cors::default()
.send_wildcard()
.allow_any_header()
.allow_any_origin()
.allow_any_method()
.max_age(86_400), // 24h
)
.wrap(actix_web::middleware::Logger::default())
.wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
}
enum OnFailure {

View File

@@ -30,6 +30,13 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
let _sentry = sentry::init(sentry::ClientOptions {
release: sentry::release_name!(),
session_mode: sentry::SessionMode::Request,
auto_session_tracking: true,
..Default::default()
});
#[cfg(feature = "profile-with-puffin")]
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));

View File

@@ -60,7 +60,8 @@ pub async fn swap_indexes(
}
let task = KindWithContent::IndexSwap { swaps };
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
let task = index_scheduler.register(task)?;
let task: SummarizedTaskView = task.into();
Ok(HttpResponse::Accepted().json(task))
}

View File

@@ -154,19 +154,6 @@ async fn delete_document_by_filter() {
)
.await;
index.wait_task(1).await;
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 4,
"isIndexing": false,
"fieldDistribution": {
"color": 3,
"id": 4
}
}
"###);
let (response, code) =
index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
snapshot!(code, @"202 Accepted");
@@ -201,18 +188,6 @@ async fn delete_document_by_filter() {
}
"###);
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 2,
"isIndexing": false,
"fieldDistribution": {
"color": 1,
"id": 2
}
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
@@ -266,18 +241,6 @@ async fn delete_document_by_filter() {
}
"###);
let (stats, _) = index.stats().await;
snapshot!(json_string!(stats), @r###"
{
"numberOfDocuments": 1,
"isIndexing": false,
"fieldDistribution": {
"color": 1,
"id": 1
}
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"

View File

@@ -1104,59 +1104,3 @@ async fn camelcased_words() {
})
.await;
}
#[actix_rt::test]
async fn simple_search_with_strange_synonyms() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await;
let r = index.wait_task(0).await;
meili_snap::snapshot!(r["status"], @r###""succeeded""###);
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
index
.search(json!({"q": "How to train"}), |response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428"
}
]
"###);
})
.await;
index
.search(json!({"q": "How & train"}), |response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428"
}
]
"###);
})
.await;
index
.search(json!({"q": "to"}), |response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"title": "How to Train Your Dragon: The Hidden World",
"id": "166428"
}
]
"###);
})
.await;
}

View File

@@ -418,11 +418,19 @@ impl<'t> Matcher<'t, '_> {
} else {
match &self.matches {
Some((tokens, matches)) => {
// If the text has to be cropped,
// compute the best interval to crop around.
let matches = match format_options.crop {
Some(crop_size) if crop_size > 0 => {
self.find_best_match_interval(matches, crop_size)
}
_ => matches,
};
// If the text has to be cropped,
// crop around the best interval.
let (byte_start, byte_end) = match format_options.crop {
Some(crop_size) if crop_size > 0 => {
let matches = self.find_best_match_interval(matches, crop_size);
self.crop_bounds(tokens, matches, crop_size)
}
_ => (0, self.text.len()),
@@ -442,11 +450,6 @@ impl<'t> Matcher<'t, '_> {
for m in matches {
let token = &tokens[m.token_position];
// skip matches out of the crop window.
if token.byte_start < byte_start || token.byte_end > byte_end {
continue;
}
if byte_index < token.byte_start {
formatted.push(&self.text[byte_index..token.byte_start]);
}
@@ -797,37 +800,6 @@ mod tests {
);
}
#[test]
fn format_highlight_crop_phrase_query() {
//! testing: https://github.com/meilisearch/meilisearch/issues/3975
let temp_index = TempIndex::new();
temp_index
.add_documents(documents!([
{ "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" }
]))
.unwrap();
let rtxn = temp_index.read_txn().unwrap();
let format_options = FormatOptions { highlight: true, crop: Some(10) };
let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
let mut matcher = builder.build(text);
// should return 10 words with a marker at the start as well the end, and the highlighted matches.
insta::assert_snapshot!(
matcher.format(format_options),
@"…had the power to split <em>the</em> <em>world</em> between those who…"
);
let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
let mut matcher = builder.build(text);
// should highlight "those" and the phrase "and those".
insta::assert_snapshot!(
matcher.format(format_options),
@"…world between <em>those</em> who embraced progress <em>and</em> <em>those</em> who resisted…"
);
}
#[test]
fn smaller_crop_size() {
//! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295

View File

@@ -226,9 +226,9 @@ fn process_tokens<'a>(
) -> impl Iterator<Item = (usize, Token<'a>)> {
tokens
.skip_while(|token| token.is_separator())
.scan((0, None), |(offset, prev_kind), mut token| {
.scan((0, None), |(offset, prev_kind), token| {
match token.kind {
TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
*offset += match *prev_kind {
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
Some(_) => 1,
@@ -244,7 +244,7 @@ fn process_tokens<'a>(
{
*prev_kind = Some(token.kind);
}
_ => token.kind = TokenKind::Unknown,
_ => (),
}
Some((*offset, token))
})

View File

@@ -59,13 +59,7 @@ pub(crate) fn data_from_obkv_documents(
original_obkv_chunks
.par_bridge()
.map(|original_documents_chunk| {
send_original_documents_data(
original_documents_chunk,
indexer,
lmdb_writer_sx.clone(),
vectors_field_id,
primary_key_id,
)
send_original_documents_data(original_documents_chunk, lmdb_writer_sx.clone())
})
.collect::<Result<()>>()?;
@@ -82,6 +76,7 @@ pub(crate) fn data_from_obkv_documents(
&faceted_fields,
primary_key_id,
geo_fields_ids,
vectors_field_id,
&stop_words,
&allowed_separators,
&dictionary,
@@ -270,33 +265,11 @@ fn spawn_extraction_task<FE, FS, M>(
/// - documents
fn send_original_documents_data(
original_documents_chunk: Result<grenad::Reader<File>>,
indexer: GrenadParameters,
lmdb_writer_sx: Sender<Result<TypedChunk>>,
vectors_field_id: Option<FieldId>,
primary_key_id: FieldId,
) -> Result<()> {
let original_documents_chunk =
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
if let Some(vectors_field_id) = vectors_field_id {
let documents_chunk_cloned = original_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let result = extract_vector_points(
documents_chunk_cloned,
indexer,
primary_key_id,
vectors_field_id,
);
let _ = match result {
Ok(vector_points) => {
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
}
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
// TODO: create a custom internal error
lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))).unwrap();
Ok(())
@@ -318,6 +291,7 @@ fn send_and_extract_flattened_documents_data(
faceted_fields: &HashSet<FieldId>,
primary_key_id: FieldId,
geo_fields_ids: Option<(FieldId, FieldId)>,
vectors_field_id: Option<FieldId>,
stop_words: &Option<fst::Set<&[u8]>>,
allowed_separators: &Option<&[&str]>,
dictionary: &Option<&[&str]>,
@@ -348,6 +322,25 @@ fn send_and_extract_flattened_documents_data(
});
}
if let Some(vectors_field_id) = vectors_field_id {
let documents_chunk_cloned = flattened_documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let result = extract_vector_points(
documents_chunk_cloned,
indexer,
primary_key_id,
vectors_field_id,
);
let _ = match result {
Ok(vector_points) => {
lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
}
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
rayon::join(
|| {

View File

@@ -2550,25 +2550,6 @@ mod tests {
db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
}
/// Index multiple different number of vectors in documents.
/// Vectors must be of the same length.
#[test]
fn test_multiple_vectors() {
let index = TempIndex::new();
index.add_documents(documents!([{"id": 0, "_vectors": [[0, 1, 2], [3, 4, 5]] }])).unwrap();
index.add_documents(documents!([{"id": 1, "_vectors": [6, 7, 8] }])).unwrap();
index
.add_documents(
documents!([{"id": 2, "_vectors": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }]),
)
.unwrap();
let rtxn = index.read_txn().unwrap();
let res = index.search(&rtxn).vector([0.0, 1.0, 2.0]).execute().unwrap();
assert_eq!(res.documents_ids.len(), 3);
}
#[test]
fn reproduce_the_bug() {
/*

View File

@@ -573,7 +573,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
tokenizer
.tokenize(text)
.filter_map(|token| {
if token.is_word() && !token.lemma().is_empty() {
if token.is_word() {
Some(token.lemma().to_string())
} else {
None
@@ -608,18 +608,13 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
for (word, synonyms) in user_synonyms {
// Normalize both the word and associated synonyms.
let normalized_word = normalize(&tokenizer, word);
let normalized_synonyms: Vec<_> = synonyms
.iter()
.map(|synonym| normalize(&tokenizer, synonym))
.filter(|synonym| !synonym.is_empty())
.collect();
let normalized_synonyms =
synonyms.iter().map(|synonym| normalize(&tokenizer, synonym));
// Store the normalized synonyms under the normalized word,
// merging the possible duplicate words.
if !normalized_word.is_empty() && !normalized_synonyms.is_empty() {
let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
entry.extend(normalized_synonyms.into_iter());
}
let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
entry.extend(normalized_synonyms);
}
// Make sure that we don't have duplicate synonyms.
@@ -1427,43 +1422,6 @@ mod tests {
assert!(result.documents_ids.is_empty());
}
#[test]
fn thai_synonyms() {
let mut index = TempIndex::new();
index.index_documents_config.autogenerate_docids = true;
let mut wtxn = index.write_txn().unwrap();
// Send 3 documents with ids from 1 to 3.
index
.add_documents_using_wtxn(
&mut wtxn,
documents!([
{ "name": "ยี่ปุ่น" },
{ "name": "ญี่ปุ่น" },
]),
)
.unwrap();
// In the same transaction provide some synonyms
index
.update_settings_using_wtxn(&mut wtxn, |settings| {
settings.set_synonyms(btreemap! {
"japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")],
});
})
.unwrap();
wtxn.commit().unwrap();
// Ensure synonyms are effectively stored
let rtxn = index.read_txn().unwrap();
let synonyms = index.synonyms(&rtxn).unwrap();
assert!(!synonyms.is_empty()); // at this point the index should return something
// Check that we can use synonyms
let result = index.search(&rtxn).query("japanese").execute().unwrap();
assert_eq!(result.documents_ids.len(), 2);
}
#[test]
fn setting_searchable_recomputes_other_settings() {
let index = TempIndex::new();

View File

@@ -186,16 +186,12 @@ fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document {
let array = create_array(array, &sub_selectors);
if !array.is_empty() {
new_value.insert(key.to_string(), array.into());
} else {
new_value.insert(key.to_string(), Value::Array(vec![]));
}
}
Value::Object(object) => {
let object = create_value(object, sub_selectors);
if !object.is_empty() {
new_value.insert(key.to_string(), object.into());
} else {
new_value.insert(key.to_string(), Value::Object(Map::new()));
}
}
_ => (),
@@ -215,8 +211,6 @@ fn create_array(array: &[Value], selectors: &HashSet<&str>) -> Vec<Value> {
let array = create_array(array, selectors);
if !array.is_empty() {
res.push(array.into());
} else {
res.push(Value::Array(vec![]));
}
}
Value::Object(object) => {
@@ -643,24 +637,6 @@ mod tests {
);
}
#[test]
fn empty_array_object_return_empty() {
let value: Value = json!({
"array": [],
"object": {},
});
let value: &Document = value.as_object().unwrap();
let res: Value = select_values(value, vec!["array.name", "object.name"]).into();
assert_eq!(
res,
json!({
"array": [],
"object": {},
})
);
}
#[test]
fn all_conflict_variation() {
let value: Value = json!({