Skip to content

Commit

Permalink
Add signal handler on *nix with troubleshooting and stacktrace (rerun…
Browse files Browse the repository at this point in the history
…-io#1340)

* Add signal handler on *nix with troubleshooting and stacktrace

* Add a way to segfault the app in the debug menu

* Add debug->crash menu button to trigger a stack overflow
  • Loading branch information
emilk authored Feb 17, 2023
1 parent e333e2c commit 3e4c194
Show file tree
Hide file tree
Showing 6 changed files with 187 additions and 27 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

66 changes: 54 additions & 12 deletions crates/re_viewer/src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1514,23 +1514,65 @@ fn debug_menu(options: &mut AppOptions, ui: &mut egui::Ui) {

ui.separator();

#[allow(clippy::manual_assert)]
if ui.button("panic!").clicked() {
panic!("Intentional panic");
}
ui.menu_button("Crash", |ui| {
#[allow(clippy::manual_assert)]
if ui.button("panic!").clicked() {
panic!("Intentional panic");
}

if ui.button("panic! during unwind").clicked() {
struct PanicOnDrop {}

impl Drop for PanicOnDrop {
fn drop(&mut self) {
panic!("Second intentional panic in Drop::drop");
}
}

if ui.button("panic! during unwind").clicked() {
struct PanicOnDrop {}
let _this_will_panic_when_dropped = PanicOnDrop {};
panic!("First intentional panic");
}

impl Drop for PanicOnDrop {
fn drop(&mut self) {
panic!("Second intentional panic in Drop::drop");
if ui.button("SEGFAULT").clicked() {
// Taken from https://github.com/EmbarkStudios/crash-handling/blob/main/sadness-generator/src/lib.rs

/// This is the fixed address used to generate a segfault. It's possible that
/// this address can be mapped and writable by the your process in which case a
/// crash may not occur
#[cfg(target_pointer_width = "64")]
pub const SEGFAULT_ADDRESS: u64 = u32::MAX as u64 + 0x42;
#[cfg(target_pointer_width = "32")]
pub const SEGFAULT_ADDRESS: u32 = 0x42;

let bad_ptr: *mut u8 = SEGFAULT_ADDRESS as _;
#[allow(unsafe_code)]
// SAFETY: this is not safe. We are _trying_ to crash.
unsafe {
std::ptr::write_volatile(bad_ptr, 1);
}
}

let _this_will_panic_when_dropped = PanicOnDrop {};
panic!("First intentional panic");
}
if ui.button("Stack overflow").clicked() {
// Taken from https://github.com/EmbarkStudios/crash-handling/blob/main/sadness-generator/src/lib.rs
fn recurse(data: u64) -> u64 {
let mut buff = [0u8; 256];
buff[..9].copy_from_slice(b"junk data");

let mut result = data;
for c in buff {
result += c as u64;
}

if result == 0 {
result
} else {
recurse(result) + 1
}
}

recurse(42);
}
});
}

// ---
Expand Down
5 changes: 5 additions & 0 deletions crates/rerun/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,8 @@ clap = { workspace = true, features = ["derive"] }
mimalloc = "0.1.29"
puffin_http = "0.11"
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }

# Native unix dependencies:
[target.'cfg(not(any(target_arch = "wasm32", target_os = "windows")))'.dependencies]
backtrace = "0.3"
libc = "0.2"
124 changes: 124 additions & 0 deletions crates/rerun/src/crash_handler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
pub fn install_crash_handlers() {
install_panic_hook();

#[cfg(not(target_arch = "wasm32"))]
#[cfg(not(target_os = "windows"))]
install_signal_handler();
}

fn install_panic_hook() {
let previous_panic_hook = std::panic::take_hook();

std::panic::set_hook(Box::new(move |panic_info: &std::panic::PanicInfo<'_>| {
// This prints the callstack etc
(*previous_panic_hook)(panic_info);

eprintln!(
"\n\
Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting"
);
}));
}

#[cfg(not(target_arch = "wasm32"))]
#[cfg(not(target_os = "windows"))]
#[allow(unsafe_code)]
#[allow(clippy::fn_to_numeric_cast_any)]
fn install_signal_handler() {
// SAFETY: we're installing a signal handler.
unsafe {
for signum in [
libc::SIGABRT,
libc::SIGBUS,
libc::SIGFPE,
libc::SIGILL,
libc::SIGINT,
libc::SIGSEGV,
libc::SIGTERM,
] {
libc::signal(
signum,
signal_handler as *const fn(libc::c_int) as libc::size_t,
);
}
}

unsafe extern "C" fn signal_handler(signal_number: libc::c_int) {
let signal_name = match signal_number {
libc::SIGABRT => "SIGABRT",
libc::SIGBUS => "SIGBUS",
libc::SIGFPE => "SIGFPE",
libc::SIGILL => "SIGILL",
libc::SIGINT => "SIGINT",
libc::SIGSEGV => "SIGSEGV",
libc::SIGTERM => "SIGTERM",
_ => "UNKNOWN SIGNAL",
};

// There are very few things that are safe to do in a signal handler,
// but writing to stderr is one of them.
// So we first print out what happened to stderr so we're sure that gets out,
// then we do the unsafe things, like logging the stack trace.
// We take care not to allocate any memory along the way.

write_to_stderr("\n");
write_to_stderr("Rerun caught a signal: ");
write_to_stderr(signal_name);
write_to_stderr("\n");
write_to_stderr(
"Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting\n\n",
);

// Ok, we printed the most important things.
// Let's do less important things that require memory allocations.
// Allocating memory can lead to deadlocks if the signal
// was triggered from the system's memory management functions.

print_callstack();

// We seem to have managed printing the callstack - great!
// Then let's print the important stuff _again_ so it is visible at the bottom of the users terminal:

write_to_stderr("\n");
write_to_stderr("Rerun caught a signal: ");
write_to_stderr(signal_name);
write_to_stderr("\n");
write_to_stderr(
"Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting\n\n",
);

// We are done!
// Call the default signal handler (which usually terminates the app):
// SAFETY: we're calling a signal handler
unsafe {
libc::signal(signal_number, libc::SIG_DFL);
libc::raise(signal_number);
}
}

fn write_to_stderr(text: &str) {
// SAFETY: writing to stderr is fine, even in a signal handler.
unsafe {
libc::write(libc::STDERR_FILENO, text.as_ptr().cast(), text.len());
}
}

fn print_callstack() {
let backtrace = backtrace::Backtrace::new();
let stack = format!("{backtrace:?}");

// Trim it a bit:
let mut stack = stack.as_str();
let start_pattern = "install_signal_handler::signal_handler\n";
if let Some(start_offset) = stack.find(start_pattern) {
stack = &stack[start_offset + start_pattern.len()..];
}
if let Some(end_offset) =
stack.find("std::sys_common::backtrace::__rust_begin_short_backtrace")
{
stack = &stack[..end_offset];
}

write_to_stderr(stack);
}
}
1 change: 1 addition & 0 deletions crates/rerun/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
#![warn(missing_docs)] // Let's keep the this crate well-documented!

mod crash_handler;
mod run;

pub use run::{run, CallSource};
Expand Down
16 changes: 1 addition & 15 deletions crates/rerun/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ fn run_analytics(cmd: &AnalyticsCommands) -> Result<(), re_analytics::cli::CliEr
}

async fn run_impl(call_source: CallSource, args: Args) -> anyhow::Result<()> {
install_panic_hook();
crate::crash_handler::install_crash_handlers();

let mut profiler = re_viewer::Profiler::default();
if args.profile {
Expand Down Expand Up @@ -332,17 +332,3 @@ fn parse_max_latency(max_latency: Option<&String>) -> f32 {
.unwrap_or_else(|err| panic!("Failed to parse max_latency ({max_latency:?}): {err}"))
})
}

fn install_panic_hook() {
let previous_panic_hook = std::panic::take_hook();

std::panic::set_hook(Box::new(move |panic_info: &std::panic::PanicInfo<'_>| {
// The prints the callstack etc
(*previous_panic_hook)(panic_info);

eprintln!(
"\n\
Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting"
);
}));
}

0 comments on commit 3e4c194

Please sign in to comment.