From 3e4c194f64bc70dc89ef7d6badb3dfe078c6ae9b Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 17 Feb 2023 11:35:50 +0100 Subject: [PATCH] Add signal handler on *nix with troubleshooting and stacktrace (#1340) * Add signal handler on *nix with troubleshooting and stacktrace * Add a way to segfault the app in the debug menu * Add debug->crash menu button to trigger a stack overflow --- Cargo.lock | 2 + crates/re_viewer/src/app.rs | 66 +++++++++++++--- crates/rerun/Cargo.toml | 5 ++ crates/rerun/src/crash_handler.rs | 124 ++++++++++++++++++++++++++++++ crates/rerun/src/lib.rs | 1 + crates/rerun/src/run.rs | 16 +--- 6 files changed, 187 insertions(+), 27 deletions(-) create mode 100644 crates/rerun/src/crash_handler.rs diff --git a/Cargo.lock b/Cargo.lock index ebd73072e576..eb235781e2fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4435,10 +4435,12 @@ name = "rerun" version = "0.2.0" dependencies = [ "anyhow", + "backtrace", "clap 4.1.4", "crossbeam", "document-features", "egui", + "libc", "mimalloc", "puffin", "puffin_http", diff --git a/crates/re_viewer/src/app.rs b/crates/re_viewer/src/app.rs index 079ab7fa627e..8368c7a1df97 100644 --- a/crates/re_viewer/src/app.rs +++ b/crates/re_viewer/src/app.rs @@ -1514,23 +1514,65 @@ fn debug_menu(options: &mut AppOptions, ui: &mut egui::Ui) { ui.separator(); - #[allow(clippy::manual_assert)] - if ui.button("panic!").clicked() { - panic!("Intentional panic"); - } + ui.menu_button("Crash", |ui| { + #[allow(clippy::manual_assert)] + if ui.button("panic!").clicked() { + panic!("Intentional panic"); + } + + if ui.button("panic! during unwind").clicked() { + struct PanicOnDrop {} + + impl Drop for PanicOnDrop { + fn drop(&mut self) { + panic!("Second intentional panic in Drop::drop"); + } + } - if ui.button("panic! during unwind").clicked() { - struct PanicOnDrop {} + let _this_will_panic_when_dropped = PanicOnDrop {}; + panic!("First intentional panic"); + } - impl Drop for PanicOnDrop { - fn drop(&mut self) { - panic!("Second intentional panic in Drop::drop"); + if ui.button("SEGFAULT").clicked() { + // Taken from https://github.com/EmbarkStudios/crash-handling/blob/main/sadness-generator/src/lib.rs + + /// This is the fixed address used to generate a segfault. It's possible that + /// this address can be mapped and writable by the your process in which case a + /// crash may not occur + #[cfg(target_pointer_width = "64")] + pub const SEGFAULT_ADDRESS: u64 = u32::MAX as u64 + 0x42; + #[cfg(target_pointer_width = "32")] + pub const SEGFAULT_ADDRESS: u32 = 0x42; + + let bad_ptr: *mut u8 = SEGFAULT_ADDRESS as _; + #[allow(unsafe_code)] + // SAFETY: this is not safe. We are _trying_ to crash. + unsafe { + std::ptr::write_volatile(bad_ptr, 1); } } - let _this_will_panic_when_dropped = PanicOnDrop {}; - panic!("First intentional panic"); - } + if ui.button("Stack overflow").clicked() { + // Taken from https://github.com/EmbarkStudios/crash-handling/blob/main/sadness-generator/src/lib.rs + fn recurse(data: u64) -> u64 { + let mut buff = [0u8; 256]; + buff[..9].copy_from_slice(b"junk data"); + + let mut result = data; + for c in buff { + result += c as u64; + } + + if result == 0 { + result + } else { + recurse(result) + 1 + } + } + + recurse(42); + } + }); } // --- diff --git a/crates/rerun/Cargo.toml b/crates/rerun/Cargo.toml index 0c7b4164f516..9a49d5333a87 100644 --- a/crates/rerun/Cargo.toml +++ b/crates/rerun/Cargo.toml @@ -81,3 +81,8 @@ clap = { workspace = true, features = ["derive"] } mimalloc = "0.1.29" puffin_http = "0.11" tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } + +# Native unix dependencies: +[target.'cfg(not(any(target_arch = "wasm32", target_os = "windows")))'.dependencies] +backtrace = "0.3" +libc = "0.2" diff --git a/crates/rerun/src/crash_handler.rs b/crates/rerun/src/crash_handler.rs new file mode 100644 index 000000000000..1b6868899f65 --- /dev/null +++ b/crates/rerun/src/crash_handler.rs @@ -0,0 +1,124 @@ +pub fn install_crash_handlers() { + install_panic_hook(); + + #[cfg(not(target_arch = "wasm32"))] + #[cfg(not(target_os = "windows"))] + install_signal_handler(); +} + +fn install_panic_hook() { + let previous_panic_hook = std::panic::take_hook(); + + std::panic::set_hook(Box::new(move |panic_info: &std::panic::PanicInfo<'_>| { + // This prints the callstack etc + (*previous_panic_hook)(panic_info); + + eprintln!( + "\n\ + Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting" + ); + })); +} + +#[cfg(not(target_arch = "wasm32"))] +#[cfg(not(target_os = "windows"))] +#[allow(unsafe_code)] +#[allow(clippy::fn_to_numeric_cast_any)] +fn install_signal_handler() { + // SAFETY: we're installing a signal handler. + unsafe { + for signum in [ + libc::SIGABRT, + libc::SIGBUS, + libc::SIGFPE, + libc::SIGILL, + libc::SIGINT, + libc::SIGSEGV, + libc::SIGTERM, + ] { + libc::signal( + signum, + signal_handler as *const fn(libc::c_int) as libc::size_t, + ); + } + } + + unsafe extern "C" fn signal_handler(signal_number: libc::c_int) { + let signal_name = match signal_number { + libc::SIGABRT => "SIGABRT", + libc::SIGBUS => "SIGBUS", + libc::SIGFPE => "SIGFPE", + libc::SIGILL => "SIGILL", + libc::SIGINT => "SIGINT", + libc::SIGSEGV => "SIGSEGV", + libc::SIGTERM => "SIGTERM", + _ => "UNKNOWN SIGNAL", + }; + + // There are very few things that are safe to do in a signal handler, + // but writing to stderr is one of them. + // So we first print out what happened to stderr so we're sure that gets out, + // then we do the unsafe things, like logging the stack trace. + // We take care not to allocate any memory along the way. + + write_to_stderr("\n"); + write_to_stderr("Rerun caught a signal: "); + write_to_stderr(signal_name); + write_to_stderr("\n"); + write_to_stderr( + "Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting\n\n", + ); + + // Ok, we printed the most important things. + // Let's do less important things that require memory allocations. + // Allocating memory can lead to deadlocks if the signal + // was triggered from the system's memory management functions. + + print_callstack(); + + // We seem to have managed printing the callstack - great! + // Then let's print the important stuff _again_ so it is visible at the bottom of the users terminal: + + write_to_stderr("\n"); + write_to_stderr("Rerun caught a signal: "); + write_to_stderr(signal_name); + write_to_stderr("\n"); + write_to_stderr( + "Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting\n\n", + ); + + // We are done! + // Call the default signal handler (which usually terminates the app): + // SAFETY: we're calling a signal handler + unsafe { + libc::signal(signal_number, libc::SIG_DFL); + libc::raise(signal_number); + } + } + + fn write_to_stderr(text: &str) { + // SAFETY: writing to stderr is fine, even in a signal handler. + unsafe { + libc::write(libc::STDERR_FILENO, text.as_ptr().cast(), text.len()); + } + } + + fn print_callstack() { + let backtrace = backtrace::Backtrace::new(); + let stack = format!("{backtrace:?}"); + + // Trim it a bit: + let mut stack = stack.as_str(); + let start_pattern = "install_signal_handler::signal_handler\n"; + if let Some(start_offset) = stack.find(start_pattern) { + stack = &stack[start_offset + start_pattern.len()..]; + } + if let Some(end_offset) = + stack.find("std::sys_common::backtrace::__rust_begin_short_backtrace") + { + stack = &stack[..end_offset]; + } + + write_to_stderr(stack); + } +} diff --git a/crates/rerun/src/lib.rs b/crates/rerun/src/lib.rs index 000047c9bc57..40c52036ec5c 100644 --- a/crates/rerun/src/lib.rs +++ b/crates/rerun/src/lib.rs @@ -90,6 +90,7 @@ #![warn(missing_docs)] // Let's keep the this crate well-documented! +mod crash_handler; mod run; pub use run::{run, CallSource}; diff --git a/crates/rerun/src/run.rs b/crates/rerun/src/run.rs index f331a7657c5f..1fcc9dd91958 100644 --- a/crates/rerun/src/run.rs +++ b/crates/rerun/src/run.rs @@ -177,7 +177,7 @@ fn run_analytics(cmd: &AnalyticsCommands) -> Result<(), re_analytics::cli::CliEr } async fn run_impl(call_source: CallSource, args: Args) -> anyhow::Result<()> { - install_panic_hook(); + crate::crash_handler::install_crash_handlers(); let mut profiler = re_viewer::Profiler::default(); if args.profile { @@ -332,17 +332,3 @@ fn parse_max_latency(max_latency: Option<&String>) -> f32 { .unwrap_or_else(|err| panic!("Failed to parse max_latency ({max_latency:?}): {err}")) }) } - -fn install_panic_hook() { - let previous_panic_hook = std::panic::take_hook(); - - std::panic::set_hook(Box::new(move |panic_info: &std::panic::PanicInfo<'_>| { - // The prints the callstack etc - (*previous_panic_hook)(panic_info); - - eprintln!( - "\n\ - Troubleshooting Rerun: https://www.rerun.io/docs/getting-started/troubleshooting" - ); - })); -}