mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-18 04:11:07 +00:00
Compare commits
5 Commits
binary-qua
...
panic-repo
Author | SHA1 | Date | |
---|---|---|---|
b06e04fb9b | |||
905dc241ae | |||
146908f062 | |||
84f701679d | |||
355d3b7e45 |
21
Cargo.lock
generated
21
Cargo.lock
generated
@ -231,9 +231,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.20.0"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
|
||||
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
@ -435,9 +435,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.68"
|
||||
version = "0.3.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
|
||||
checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
@ -1638,9 +1638,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.27.3"
|
||||
version = "0.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
|
||||
checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
|
||||
|
||||
[[package]]
|
||||
name = "git2"
|
||||
@ -1894,6 +1894,7 @@ name = "index-scheduler"
|
||||
version = "1.4.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"backtrace",
|
||||
"big_s",
|
||||
"bincode",
|
||||
"crossbeam",
|
||||
@ -2856,9 +2857,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.31.1"
|
||||
version = "0.32.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
|
||||
checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
@ -3640,9 +3641,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.104"
|
||||
version = "1.0.108"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c"
|
||||
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
|
||||
dependencies = [
|
||||
"indexmap 2.0.0",
|
||||
"itoa",
|
||||
|
@ -28,6 +28,7 @@ license = "MIT"
|
||||
|
||||
[profile.release]
|
||||
codegen-units = 1
|
||||
debug = true
|
||||
|
||||
[profile.dev.package.flate2]
|
||||
opt-level = 3
|
||||
|
@ -12,6 +12,7 @@ license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.70"
|
||||
backtrace = "0.3.69"
|
||||
bincode = "1.3.3"
|
||||
csv = "1.2.1"
|
||||
derive_builder = "0.12.0"
|
||||
|
@ -117,8 +117,13 @@ pub enum Error {
|
||||
Heed(#[from] heed::Error),
|
||||
#[error(transparent)]
|
||||
Milli(#[from] milli::Error),
|
||||
#[error("An unexpected crash occurred when processing the task.")]
|
||||
ProcessBatchPanicked,
|
||||
#[error("An unexpected crash occurred when processing the task. {}", {
|
||||
match .0 {
|
||||
Some(report) => format!("Get /reports/{}", report),
|
||||
None => "No report was saved.".into(),
|
||||
}
|
||||
})]
|
||||
ProcessBatchPanicked(Option<uuid::Uuid>),
|
||||
#[error(transparent)]
|
||||
FileStore(#[from] file_store::Error),
|
||||
#[error(transparent)]
|
||||
@ -181,7 +186,7 @@ impl Error {
|
||||
| Error::Dump(_)
|
||||
| Error::Heed(_)
|
||||
| Error::Milli(_)
|
||||
| Error::ProcessBatchPanicked
|
||||
| Error::ProcessBatchPanicked(_)
|
||||
| Error::FileStore(_)
|
||||
| Error::IoError(_)
|
||||
| Error::Persist(_)
|
||||
@ -224,7 +229,7 @@ impl ErrorCode for Error {
|
||||
Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
|
||||
Error::Dump(e) => e.error_code(),
|
||||
Error::Milli(e) => e.error_code(),
|
||||
Error::ProcessBatchPanicked => Code::Internal,
|
||||
Error::ProcessBatchPanicked(_) => Code::Internal,
|
||||
Error::Heed(e) => e.error_code(),
|
||||
Error::HeedTransaction(e) => e.error_code(),
|
||||
Error::FileStore(e) => e.error_code(),
|
||||
|
@ -39,6 +39,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
|
||||
test_breakpoint_sdr: _,
|
||||
planned_failures: _,
|
||||
run_loop_iteration: _,
|
||||
panic_reader: _,
|
||||
} = scheduler;
|
||||
|
||||
let rtxn = env.read_txn().unwrap();
|
||||
|
@ -26,6 +26,7 @@ mod index_mapper;
|
||||
#[cfg(test)]
|
||||
mod insta_snapshot;
|
||||
mod lru;
|
||||
mod panic_hook;
|
||||
mod utils;
|
||||
mod uuid_codec;
|
||||
|
||||
@ -53,6 +54,8 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||
use meilisearch_types::milli::update::IndexerConfig;
|
||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||
use panic_hook::ReportReader;
|
||||
pub use panic_hook::{Panic, Report, ReportRegistry};
|
||||
use puffin::FrameView;
|
||||
use roaring::RoaringBitmap;
|
||||
use synchronoise::SignalEvent;
|
||||
@ -331,6 +334,8 @@ pub struct IndexScheduler {
|
||||
/// The path to the version file of Meilisearch.
|
||||
pub(crate) version_file_path: PathBuf,
|
||||
|
||||
pub(crate) panic_reader: ReportReader,
|
||||
|
||||
// ================= test
|
||||
// The next entry is dedicated to the tests.
|
||||
/// Provide a way to set a breakpoint in multiple part of the scheduler.
|
||||
@ -381,6 +386,7 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: self.run_loop_iteration.clone(),
|
||||
features: self.features.clone(),
|
||||
panic_reader: self.panic_reader.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -438,6 +444,12 @@ impl IndexScheduler {
|
||||
let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
const MAX_REPORT_COUNT: usize = 20;
|
||||
|
||||
let panic_reader = panic_hook::ReportReader::install_panic_hook(
|
||||
std::num::NonZeroUsize::new(MAX_REPORT_COUNT).unwrap(),
|
||||
);
|
||||
|
||||
// allow unreachable_code to get rids of the warning in the case of a test build.
|
||||
let this = Self {
|
||||
must_stop_processing: MustStopProcessing::default(),
|
||||
@ -478,6 +490,7 @@ impl IndexScheduler {
|
||||
#[cfg(test)]
|
||||
run_loop_iteration: Arc::new(RwLock::new(0)),
|
||||
features,
|
||||
panic_reader,
|
||||
};
|
||||
|
||||
this.run();
|
||||
@ -1130,7 +1143,10 @@ impl IndexScheduler {
|
||||
.name(String::from("batch-operation"))
|
||||
.spawn(move || cloned_index_scheduler.process_batch(batch))
|
||||
.unwrap();
|
||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||
|
||||
self.panic_reader
|
||||
.join_thread(handle)
|
||||
.unwrap_or_else(|maybe_report| Err(Error::ProcessBatchPanicked(maybe_report)))
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
@ -1311,6 +1327,10 @@ impl IndexScheduler {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reports(&self) -> Arc<RwLock<ReportRegistry>> {
|
||||
self.panic_reader.registry()
|
||||
}
|
||||
|
||||
/// Blocks the thread until the test handle asks to progress to/through this breakpoint.
|
||||
///
|
||||
/// Two messages are sent through the channel for each breakpoint.
|
||||
|
211
index-scheduler/src/panic_hook.rs
Normal file
211
index-scheduler/src/panic_hook.rs
Normal file
@ -0,0 +1,211 @@
|
||||
//! Panic hook designed to fetch a panic from a subthread and recover it on join.
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::panic::PanicInfo;
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::thread::{JoinHandle, ThreadId};
|
||||
|
||||
use backtrace::Backtrace;
|
||||
|
||||
// Represents a panic in a shallowy structured fashion
|
||||
pub struct Panic {
|
||||
pub payload: Option<String>,
|
||||
pub location: Option<String>,
|
||||
pub thread_name: Option<String>,
|
||||
pub thread_id: ThreadId,
|
||||
pub backtrace: Backtrace,
|
||||
}
|
||||
|
||||
/// A panic enriched with a unique id
|
||||
#[derive(serde::Serialize)]
|
||||
pub struct Report {
|
||||
pub id: uuid::Uuid,
|
||||
#[serde(serialize_with = "serialize_panic")]
|
||||
pub panic: Panic,
|
||||
}
|
||||
|
||||
fn serialize_panic<S>(panic: &Panic, s: S) -> std::result::Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
use serde::Serialize;
|
||||
|
||||
panic.to_json().serialize(s)
|
||||
}
|
||||
|
||||
impl Report {
|
||||
pub fn new(panic: Panic) -> Self {
|
||||
Self { id: uuid::Uuid::new_v4(), panic }
|
||||
}
|
||||
}
|
||||
|
||||
impl Panic {
|
||||
pub fn to_json(&self) -> serde_json::Value {
|
||||
json::panic_to_json(self)
|
||||
}
|
||||
}
|
||||
|
||||
mod json {
|
||||
use backtrace::{Backtrace, BacktraceFrame, BacktraceSymbol};
|
||||
use serde_json::{json, Value};
|
||||
|
||||
use super::Panic;
|
||||
|
||||
fn symbol_to_json(symbol: &BacktraceSymbol) -> Value {
|
||||
let address = symbol.addr().map(|addr| format!("{:p}", addr));
|
||||
let column = symbol.colno();
|
||||
let line = symbol.lineno();
|
||||
let function = symbol.name().map(|name| name.to_string());
|
||||
let filename = symbol.filename();
|
||||
json!({
|
||||
"function": function,
|
||||
"filename": filename,
|
||||
"line": line,
|
||||
"column": column,
|
||||
"address": address,
|
||||
})
|
||||
}
|
||||
|
||||
fn frame_to_json(frame: &BacktraceFrame) -> Value {
|
||||
let symbols: Vec<_> = frame.symbols().iter().map(symbol_to_json).collect();
|
||||
match symbols.as_slice() {
|
||||
[] => {
|
||||
let address = format!("{:p}", frame.ip());
|
||||
json!({"address": address})
|
||||
}
|
||||
[symbol] => json!(symbol),
|
||||
symbols => json!(symbols),
|
||||
}
|
||||
}
|
||||
|
||||
fn backtrace_to_json(backtrace: &Backtrace) -> Value {
|
||||
let frames: Vec<_> = backtrace.frames().iter().map(frame_to_json).collect();
|
||||
json!(frames)
|
||||
}
|
||||
|
||||
pub fn panic_to_json(panic: &Panic) -> Value {
|
||||
let thread_id = format!("{:?}", panic.thread_id);
|
||||
serde_json::json!({
|
||||
"payload": panic.payload,
|
||||
"location": panic.location,
|
||||
"thread": {
|
||||
"id": thread_id,
|
||||
"name": panic.thread_name,
|
||||
},
|
||||
"backtrace": backtrace_to_json(&panic.backtrace),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct ReportWriter(Arc<RwLock<ReportRegistry>>);
|
||||
|
||||
/// A FIFO queue of reports.
|
||||
pub struct ReportRegistry {
|
||||
reports: std::collections::VecDeque<Report>,
|
||||
}
|
||||
|
||||
impl ReportRegistry {
|
||||
pub fn new(capacity: NonZeroUsize) -> Self {
|
||||
Self { reports: VecDeque::with_capacity(capacity.get()) }
|
||||
}
|
||||
|
||||
pub fn push(&mut self, report: Report) -> Option<Report> {
|
||||
let popped = if self.reports.len() == self.reports.capacity() {
|
||||
self.reports.pop_back()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.reports.push_front(report);
|
||||
popped
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &Report> {
|
||||
self.reports.iter()
|
||||
}
|
||||
|
||||
pub fn find(&self, report_id: uuid::Uuid) -> Option<&Report> {
|
||||
self.iter().find(|report| report.id == report_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl ReportWriter {
|
||||
#[track_caller]
|
||||
fn write_panic(&self, panic_info: &PanicInfo<'_>) {
|
||||
let payload = panic_info
|
||||
.payload()
|
||||
.downcast_ref::<&str>()
|
||||
.map(ToString::to_string)
|
||||
.or_else(|| panic_info.payload().downcast_ref::<String>().cloned());
|
||||
let location = panic_info.location().map(|loc| {
|
||||
format!(
|
||||
"{file}:{line}:{column}",
|
||||
file = loc.file(),
|
||||
line = loc.line(),
|
||||
column = loc.column()
|
||||
)
|
||||
});
|
||||
|
||||
let thread_name = std::thread::current().name().map(ToString::to_string);
|
||||
let thread_id = std::thread::current().id();
|
||||
let backtrace = backtrace::Backtrace::new();
|
||||
|
||||
let panic = Panic { payload, location, thread_name, thread_id, backtrace };
|
||||
|
||||
let report = Report::new(panic);
|
||||
|
||||
log::error!(
|
||||
"An unexpected panic occurred on thread {name} at {location}: {payload}. See report '{report}' for details.",
|
||||
payload = report.panic.payload.as_deref().unwrap_or("Box<dyn Any>"),
|
||||
name = report.panic.thread_name.as_deref().unwrap_or("<unnamed>"),
|
||||
location = report.panic.location.as_deref().unwrap_or("<unknown>"),
|
||||
report = report.id,
|
||||
);
|
||||
|
||||
if let Ok(mut registry) = self.0.write() {
|
||||
if let Some(old_report) = registry.push(report) {
|
||||
log::trace!("Forgetting report {} to make space for new report.", old_report.id)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Reads the reports written in case of a panic.
|
||||
#[derive(Clone)]
|
||||
pub struct ReportReader(Arc<RwLock<ReportRegistry>>);
|
||||
|
||||
impl ReportReader {
|
||||
/// Installs a new global panic hook, overriding any existing hook.
|
||||
///
|
||||
/// The hook writes any incoming panic in reports.
|
||||
/// The reports can then be read by the returned [`ReportReader`].
|
||||
pub fn install_panic_hook(capacity: NonZeroUsize) -> Self {
|
||||
let registry = Arc::new(RwLock::new(ReportRegistry::new(capacity)));
|
||||
let reader = ReportReader(registry.clone());
|
||||
let writer = ReportWriter(registry.clone());
|
||||
|
||||
std::panic::set_hook(Box::new(move |panic_info| writer.write_panic(panic_info)));
|
||||
reader
|
||||
}
|
||||
|
||||
/// Join the thread corresponding to the passed handle, recovering either its value
|
||||
/// or, in case the thread panicked, the id of the report corresponding to the panic.
|
||||
///
|
||||
/// The id can be used to read the report from the [`self.registry()`].
|
||||
pub fn join_thread<T>(&self, thread: JoinHandle<T>) -> Result<T, Option<uuid::Uuid>> {
|
||||
let thread_id = thread.thread().id();
|
||||
thread.join().map_err(|_e| {
|
||||
self.0
|
||||
.read()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.find(|report| report.panic.thread_id == thread_id)
|
||||
.map(|report| report.id)
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a registry that can be used to read the reports written during a panic.
|
||||
pub fn registry(&self) -> Arc<RwLock<ReportRegistry>> {
|
||||
self.0.clone()
|
||||
}
|
||||
}
|
@ -88,7 +88,6 @@ pub trait ErrorCode {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
enum ErrorType {
|
||||
Internal,
|
||||
InvalidRequest,
|
||||
@ -298,6 +297,7 @@ MissingSwapIndexes , InvalidRequest , BAD_REQUEST ;
|
||||
MissingTaskFilters , InvalidRequest , BAD_REQUEST ;
|
||||
NoSpaceLeftOnDevice , System , UNPROCESSABLE_ENTITY;
|
||||
PayloadTooLarge , InvalidRequest , PAYLOAD_TOO_LARGE ;
|
||||
ReportNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TaskNotFound , InvalidRequest , NOT_FOUND ;
|
||||
TooManyOpenFiles , System , UNPROCESSABLE_ENTITY ;
|
||||
UnretrievableDocument , Internal , BAD_REQUEST ;
|
||||
|
@ -51,6 +51,8 @@ pub enum MeilisearchHttpError {
|
||||
DocumentFormat(#[from] DocumentFormatError),
|
||||
#[error(transparent)]
|
||||
Join(#[from] JoinError),
|
||||
#[error("Report `{0}` not found. Either its id is incorrect, or it was deleted. To save on memory, only a limited amount of reports are kept.")]
|
||||
ReportNotFound(uuid::Uuid),
|
||||
}
|
||||
|
||||
impl ErrorCode for MeilisearchHttpError {
|
||||
@ -74,6 +76,7 @@ impl ErrorCode for MeilisearchHttpError {
|
||||
MeilisearchHttpError::FileStore(_) => Code::Internal,
|
||||
MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
|
||||
MeilisearchHttpError::Join(_) => Code::Internal,
|
||||
MeilisearchHttpError::ReportNotFound(_) => Code::ReportNotFound,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ pub mod features;
|
||||
pub mod indexes;
|
||||
mod metrics;
|
||||
mod multi_search;
|
||||
mod reports;
|
||||
mod snapshot;
|
||||
mod swap_indexes;
|
||||
pub mod tasks;
|
||||
@ -40,7 +41,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::scope("/multi-search").configure(multi_search::configure))
|
||||
.service(web::scope("/swap-indexes").configure(swap_indexes::configure))
|
||||
.service(web::scope("/metrics").configure(metrics::configure))
|
||||
.service(web::scope("/experimental-features").configure(features::configure));
|
||||
.service(web::scope("/experimental-features").configure(features::configure))
|
||||
.service(web::scope("/reports").configure(reports::configure));
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
|
39
meilisearch/src/routes/reports.rs
Normal file
39
meilisearch/src/routes/reports.rs
Normal file
@ -0,0 +1,39 @@
|
||||
use actix_web::web::{self, Data};
|
||||
use actix_web::HttpResponse;
|
||||
use index_scheduler::{IndexScheduler, Report};
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::keys::actions;
|
||||
|
||||
use crate::extractors::authentication::policies::ActionPolicy;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::get().to(list_reports))).service(
|
||||
web::scope("/{report_uid}")
|
||||
.service(web::resource("").route(web::get().to(SeqHandler(get_report)))),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn list_reports(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_ALL }>, Data<IndexScheduler>>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let reports = &index_scheduler.reports();
|
||||
let reports = &reports.read().unwrap();
|
||||
let reports: Vec<&Report> = reports.iter().collect();
|
||||
|
||||
Ok(HttpResponse::Ok().json(reports))
|
||||
}
|
||||
|
||||
pub async fn get_report(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_ALL }>, Data<IndexScheduler>>,
|
||||
report_id: web::Path<uuid::Uuid>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
let reports = &index_scheduler.reports();
|
||||
let reports = &reports.read().unwrap();
|
||||
let report = reports
|
||||
.find(*report_id)
|
||||
.ok_or(crate::error::MeilisearchHttpError::ReportNotFound(*report_id))?;
|
||||
|
||||
Ok(HttpResponse::Ok().json(report))
|
||||
}
|
Reference in New Issue
Block a user