Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: print a backtrace on SIGSEGV #6907

Merged
merged 3 commits into from
Mar 1, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 5 additions & 9 deletions bin/reth/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ reth-trie.workspace = true
reth-nippy-jar.workspace = true
reth-node-api.workspace = true
reth-node-ethereum.workspace = true
reth-node-optimism = { workspace = true, optional = true, features = [
"optimism",
] }
reth-node-optimism = { workspace = true, optional = true, features = ["optimism"] }
reth-node-core.workspace = true
reth-node-builder.workspace = true

Expand Down Expand Up @@ -83,12 +81,7 @@ ratatui = "0.25.0"
human_bytes = "0.4.1"

# async
tokio = { workspace = true, features = [
"sync",
"macros",
"time",
"rt-multi-thread",
] }
tokio = { workspace = true, features = ["sync", "macros", "time", "rt-multi-thread"] }
futures.workspace = true

# misc
Expand All @@ -105,6 +98,9 @@ boyer-moore-magiclen = "0.2.16"
[target.'cfg(not(windows))'.dependencies]
jemallocator = { version = "0.5.0", optional = true }

[target.'cfg(unix)'.dependencies]
libc = "0.2"

[dev-dependencies]
jsonrpsee.workspace = true
assert_matches = "1.5.0"
Expand Down
13 changes: 13 additions & 0 deletions bin/reth/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,19 @@ pub mod rpc {
}
}

#[cfg(all(unix, any(target_env = "gnu", target_os = "macos")))]
pub mod sigsegv_handler;

#[cfg(not(all(unix, any(target_env = "gnu", target_os = "macos"))))]
pub mod sigsegv_handler {
//! Signal handler to extract a backtrace from stack overflow.
//!
//! This is a no-op because this platform doesn't support our signal handler's requirements.

/// No-op function.
pub fn install() {}
}

#[cfg(all(feature = "jemalloc", unix))]
use jemallocator as _;

Expand Down
4 changes: 3 additions & 1 deletion bin/reth/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// We use jemalloc for performance reasons
#![allow(missing_docs)]

// We use jemalloc for performance reasons.
#[cfg(all(feature = "jemalloc", unix))]
#[global_allocator]
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
Expand All @@ -13,6 +13,8 @@ fn main() {
use reth::cli::Cli;
use reth_node_ethereum::EthereumNode;

reth::sigsegv_handler::install();

// Enable backtraces unless a RUST_BACKTRACE value has already been explicitly provided.
if std::env::var("RUST_BACKTRACE").is_err() {
std::env::set_var("RUST_BACKTRACE", "1");
Expand Down
2 changes: 2 additions & 0 deletions bin/reth/src/optimism.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ compile_error!("Cannot build the `op-reth` binary with the `optimism` feature fl

#[cfg(feature = "optimism")]
fn main() {
reth::sigsegv_handler::install();

// Enable backtraces unless a RUST_BACKTRACE value has already been explicitly provided.
if std::env::var("RUST_BACKTRACE").is_err() {
std::env::set_var("RUST_BACKTRACE", "1");
Expand Down
153 changes: 153 additions & 0 deletions bin/reth/src/sigsegv_handler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
//! Signal handler to extract a backtrace from stack overflow.
//!
//! Implementation modified from [`rustc`](https://github.com/rust-lang/rust/blob/3dee9775a8c94e701a08f7b2df2c444f353d8699/compiler/rustc_driver_impl/src/signal_handler.rs).

use std::{
alloc::{alloc, Layout},
fmt, mem, ptr,
};

extern "C" {
fn backtrace_symbols_fd(buffer: *const *mut libc::c_void, size: libc::c_int, fd: libc::c_int);
}

fn backtrace_stderr(buffer: &[*mut libc::c_void]) {
let size = buffer.len().try_into().unwrap_or_default();
unsafe { backtrace_symbols_fd(buffer.as_ptr(), size, libc::STDERR_FILENO) };
}

/// Unbuffered, unsynchronized writer to stderr.
///
/// Only acceptable because everything will end soon anyways.
struct RawStderr(());

impl fmt::Write for RawStderr {
fn write_str(&mut self, s: &str) -> Result<(), fmt::Error> {
let ret = unsafe { libc::write(libc::STDERR_FILENO, s.as_ptr().cast(), s.len()) };
if ret == -1 {
Err(fmt::Error)
} else {
Ok(())
}
}
}

/// We don't really care how many bytes we actually get out. SIGSEGV comes for our head.
/// Splash stderr with letters of our own blood to warn our friends about the monster.
macro_rules! raw_errln {
($tokens:tt) => {
let _ = ::core::fmt::Write::write_fmt(&mut RawStderr(()), format_args!($tokens));
let _ = ::core::fmt::Write::write_char(&mut RawStderr(()), '\n');
};
}

/// Signal handler installed for SIGSEGV
extern "C" fn print_stack_trace(_: libc::c_int) {
const MAX_FRAMES: usize = 256;
// Reserve data segment so we don't have to malloc in a signal handler, which might fail
// in incredibly undesirable and unexpected ways due to e.g. the allocator deadlocking
static mut STACK_TRACE: [*mut libc::c_void; MAX_FRAMES] = [ptr::null_mut(); MAX_FRAMES];
let stack = unsafe {
// Collect return addresses
let depth = libc::backtrace(STACK_TRACE.as_mut_ptr(), MAX_FRAMES as i32);
if depth == 0 {
return;
}
&STACK_TRACE.as_slice()[0..(depth as _)]
};

// Just a stack trace is cryptic. Explain what we're doing.
raw_errln!("error: reth interrupted by SIGSEGV, printing backtrace\n");
let mut written = 1;
let mut consumed = 0;
// Begin elaborating return addrs into symbols and writing them directly to stderr
// Most backtraces are stack overflow, most stack overflows are from recursion
// Check for cycles before writing 250 lines of the same ~5 symbols
let cycled = |(runner, walker)| runner == walker;
let mut cyclic = false;
if let Some(period) = stack.iter().skip(1).step_by(2).zip(stack).position(cycled) {
let period = period.saturating_add(1); // avoid "what if wrapped?" branches
let Some(offset) = stack.iter().skip(period).zip(stack).position(cycled) else {
// impossible.
return;
};

// Count matching trace slices, else we could miscount "biphasic cycles"
// with the same period + loop entry but a different inner loop
let next_cycle = stack[offset..].chunks_exact(period).skip(1);
let cycles = 1 + next_cycle
.zip(stack[offset..].chunks_exact(period))
.filter(|(next, prev)| next == prev)
.count();
backtrace_stderr(&stack[..offset]);
written += offset;
consumed += offset;
if cycles > 1 {
raw_errln!("\n### cycle encountered after {offset} frames with period {period}");
backtrace_stderr(&stack[consumed..consumed + period]);
raw_errln!("### recursed {cycles} times\n");
written += period + 4;
consumed += period * cycles;
cyclic = true;
};
}
let rem = &stack[consumed..];
backtrace_stderr(rem);
raw_errln!("");
written += rem.len() + 1;

let random_depth = || 8 * 16; // chosen by random diceroll (2d20)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bruh

if cyclic || stack.len() > random_depth() {
// technically speculation, but assert it with confidence anyway.
// We only arrived in this signal handler because bad things happened
// and this message is for explaining it's not the programmer's fault
raw_errln!("note: reth unexpectedly overflowed its stack! this is a bug");
written += 1;
}
if stack.len() == MAX_FRAMES {
raw_errln!("note: maximum backtrace depth reached, frames may have been lost");
written += 1;
}
raw_errln!("note: we would appreciate a report at https://github.com/paradigmxyz/reth");
written += 1;
if written > 24 {
// We probably just scrolled the earlier "we got SIGSEGV" message off the terminal
raw_errln!("note: backtrace dumped due to SIGSEGV! resuming signal");
}
}

/// Installs a SIGSEGV handler.
///
/// When SIGSEGV is delivered to the process, print a stack trace and then exit.
pub fn install() {
unsafe {
let alt_stack_size: usize = min_sigstack_size() + 64 * 1024;
let mut alt_stack: libc::stack_t = mem::zeroed();
alt_stack.ss_sp = alloc(Layout::from_size_align(alt_stack_size, 1).unwrap()).cast();
alt_stack.ss_size = alt_stack_size;
libc::sigaltstack(&alt_stack, ptr::null_mut());

let mut sa: libc::sigaction = mem::zeroed();
sa.sa_sigaction = print_stack_trace as libc::sighandler_t;
sa.sa_flags = libc::SA_NODEFER | libc::SA_RESETHAND | libc::SA_ONSTACK;
libc::sigemptyset(&mut sa.sa_mask);
libc::sigaction(libc::SIGSEGV, &sa, ptr::null_mut());
}
}

/// Modern kernels on modern hardware can have dynamic signal stack sizes.
#[cfg(any(target_os = "linux", target_os = "android"))]
fn min_sigstack_size() -> usize {
const AT_MINSIGSTKSZ: core::ffi::c_ulong = 51;
let dynamic_sigstksz = unsafe { libc::getauxval(AT_MINSIGSTKSZ) };
// If getauxval couldn't find the entry, it returns 0,
// so take the higher of the "constant" and auxval.
// This transparently supports older kernels which don't provide AT_MINSIGSTKSZ
libc::MINSIGSTKSZ.max(dynamic_sigstksz as _)
}

/// Not all OS support hardware where this is needed.
#[cfg(not(any(target_os = "linux", target_os = "android")))]
fn min_sigstack_size() -> usize {
libc::MINSIGSTKSZ
}
Loading