Allow unwinding to propagate across a context swap.

The main purpose of this is having nice backtraces in gdb, although
it also slightly simplifies poisoning state of the API consumers
after a panic.
This commit is contained in:
whitequark 2016-07-17 21:42:45 +00:00 committed by edef
parent 40fbfdde0c
commit 892a7696ec
6 changed files with 286 additions and 135 deletions

View File

@ -3,15 +3,32 @@
// whitequark <whitequark@whitequark.org> // whitequark <whitequark@whitequark.org>
// See the LICENSE file included in this distribution. // See the LICENSE file included in this distribution.
//! To understand the code in this file, keep in mind this fact: // To understand the machine code in this file, keep in mind these facts:
//! * i686 SysV C ABI requires the stack to be aligned at function entry, // * i686 SysV C ABI requires the stack to be aligned at function entry,
//! so that `%esp+4` is a multiple of 16. Aligned operands are a requirement // so that `%esp+4` is a multiple of 16. Aligned operands are a requirement
//! of SIMD instructions, and making this the responsibility of the caller // of SIMD instructions, and making this the responsibility of the caller
//! avoids having to maintain a frame pointer, which is necessary when // avoids having to maintain a frame pointer, which is necessary when
//! a function has to realign the stack from an unknown state. // a function has to realign the stack from an unknown state.
//! * i686 SysV C ABI passes the first argument on the stack. This is // * i686 SysV C ABI passes the first argument on the stack. This is
//! unfortunate, because unlike every other architecture we can't reuse // unfortunate, because unlike every other architecture we can't reuse
//! `swap` for the initial call, and so we use a trampoline. // `swap` for the initial call, and so we use a trampoline.
//
// To understand the DWARF CFI code in this file, keep in mind these facts:
// * CFI is "call frame information"; a set of instructions to a debugger or
// an unwinder that allow it to simulate returning from functions. This implies
// restoring every register to its pre-call state, as well as the stack pointer.
// * CFA is "call frame address"; the value of stack pointer right before the call
// instruction in the caller. Everything strictly below CFA (and inclusive until
// the next CFA) is the call frame of the callee. This implies that the return
// address is the part of callee's call frame.
// * Logically, DWARF CFI is a table where rows are instruction pointer values and
// columns describe where registers are spilled (mostly using expressions that
// compute a memory location as CFA+n). A .cfi_offset pseudoinstruction changes
// the state of a column for all IP numerically larger than the one it's placed
// after. A .cfi_def_* pseudoinstruction changes the CFA value similarly.
// * Simulating return is as easy as restoring register values from the CFI table
// and then setting stack pointer to CFA.
use core::intrinsics;
use stack::Stack; use stack::Stack;
#[derive(Debug)] #[derive(Debug)]
@ -19,17 +36,50 @@ pub struct StackPointer(*mut usize);
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer { pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer {
#[naked] #[naked]
unsafe extern "C" fn trampoline() -> ! { unsafe extern "C" fn init_trampoline_1() -> ! {
asm!( asm!(
r#" r#"
# Pop function. # gdb has a hardcoded check that rejects backtraces where frame addresses
popl %ebx # do not monotonically decrease. It is turned off if the function is called
# "__morestack" and that is hardcoded. So, to make gdb backtraces match
# the actual unwinder behavior, we call ourselves "__morestack" and mark
# the symbol as local; it shouldn't interfere with anything.
__morestack:
.local __morestack
# Set up the first part of our DWARF CFI linking stacks together.
# When unwinding the frame corresponding to this function, a DWARF unwinder
# will use %ebx as the next call frame address, restore return address
# from CFA-4 and restore %ebp from CFA-8. This mirrors what the second half
# of `swap_trampoline` does.
.cfi_def_cfa %ebx, 0
.cfi_offset %ebp, -8
# Call the next trampoline.
call ${0:c}
.Lend:
.size __morestack, .Lend-__morestack
"#
: : "s" (init_trampoline_2 as usize) : "memory" : "volatile");
intrinsics::unreachable()
}
#[naked]
unsafe extern "C" fn init_trampoline_2() -> ! {
asm!(
r#"
# Set up the second part of our DWARF CFI.
# When unwinding the frame corresponding to this function, a DWARF unwinder
# will restore %ebx (and thus CFA of the first trampoline) from the stack slot.
.cfi_offset %ebx, 4
# Push argument. # Push argument.
.cfi_def_cfa_offset 8
pushl %eax pushl %eax
# Call it. # Call the provided function.
call *%ebx call *8(%esp)
"# ::: "memory" : "volatile"); "#
::core::intrinsics::unreachable() : : : "memory" : "volatile");
intrinsics::unreachable()
} }
unsafe fn push(sp: &mut StackPointer, val: usize) { unsafe fn push(sp: &mut StackPointer, val: usize) {
@ -38,49 +88,62 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
} }
let mut sp = StackPointer(stack.top() as *mut usize); let mut sp = StackPointer(stack.top() as *mut usize);
push(&mut sp, 0); // alignment push(&mut sp, 0xdead0cfa); // CFA slot
push(&mut sp, 0); // alignment
push(&mut sp, 0); // alignment
push(&mut sp, f as usize); // function push(&mut sp, f as usize); // function
push(&mut sp, trampoline as usize); push(&mut sp, init_trampoline_1 as usize);
push(&mut sp, 0xdeadbbbb); // saved %ebp
sp sp
} }
#[inline(always)] #[inline(always)]
pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize { pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer,
let ret: usize; new_stack: &Stack) -> usize {
// Address of the topmost CFA stack slot.
let new_cfa = (new_stack.top() as *mut usize).offset(-1);
#[naked]
unsafe extern "C" fn swap_trampoline() -> ! {
asm!( asm!(
r#" r#"
# Save frame pointer explicitly; LLVM doesn't spill it even if it is # Save frame pointer explicitly; the unwinder uses it to find CFA of
# marked as clobbered. # the caller, and so it has to have the correct value immediately after
# the call instruction that invoked the trampoline.
pushl %ebp pushl %ebp
# Push instruction pointer of the old context and switch to
# the new context.
call 1f
# Restore frame pointer.
popl %ebp
# Continue executing old context.
jmp 2f
1: # Remember stack pointer of the old context, in case %edx==%esi.
# Remember stack pointer of the old context, in case %rdx==%rsi.
movl %esp, %ebx movl %esp, %ebx
# Load stack pointer of the new context. # Load stack pointer of the new context.
movl (%edx), %esp movl (%edx), %esp
# Save stack pointer of the old context. # Save stack pointer of the old context.
movl %ebx, (%esi) movl %ebx, (%esi)
# Pop instruction pointer of the new context (placed onto stack by # Restore frame pointer of the new context.
# the call above) and jump there; don't use `ret` to avoid return popl %ebp
# address mispredictions (~8ns on Ivy Bridge).
# Return into the new context. Use `pop` and `jmp` instead of a `ret`
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
popl %ebx popl %ebx
jmpl *%ebx jmpl *%ebx
2: "#
: : : "memory" : "volatile");
intrinsics::unreachable();
}
let ret: usize;
asm!(
r#"
# Link the call stacks together.
movl %esp, (%edi)
# Push instruction pointer of the old context and switch to
# the new context.
call ${1:c}
"# "#
: "={eax}" (ret) : "={eax}" (ret)
: "{eax}" (arg) : "s" (swap_trampoline as usize)
"{eax}" (arg)
"{esi}" (old_sp) "{esi}" (old_sp)
"{edx}" (new_sp) "{edx}" (new_sp)
"{edi}" (new_cfa)
: "eax", "ebx", "ecx", "edx", "esi", "edi", //"ebp", "esp", : "eax", "ebx", "ecx", "edx", "esi", "edi", //"ebp", "esp",
"mmx0", "mmx1", "mmx2", "mmx3", "mmx4", "mmx5", "mmx6", "mmx7", "mmx0", "mmx1", "mmx2", "mmx3", "mmx4", "mmx5", "mmx6", "mmx7",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",

View File

@ -3,54 +3,115 @@
// whitequark <whitequark@whitequark.org> // whitequark <whitequark@whitequark.org>
// See the LICENSE file included in this distribution. // See the LICENSE file included in this distribution.
//! To understand the code in this file, keep in mind these two facts: // To understand the code in this file, keep in mind these two facts:
//! * x86_64 SysV C ABI has a "red zone": 128 bytes under the top of the stack // * x86_64 SysV C ABI has a "red zone": 128 bytes under the top of the stack
//! that is defined to be unmolested by signal handlers, interrupts, etc. // that is defined to be unmolested by signal handlers, interrupts, etc.
//! Leaf functions can use the red zone without adjusting rsp or rbp. // Leaf functions can use the red zone without adjusting rsp or rbp.
//! * x86_64 SysV C ABI requires the stack to be aligned at function entry, // * x86_64 SysV C ABI requires the stack to be aligned at function entry,
//! so that (%rsp+8) is a multiple of 16. Aligned operands are a requirement // so that (%rsp+8) is a multiple of 16. Aligned operands are a requirement
//! of SIMD instructions, and making this the responsibility of the caller // of SIMD instructions, and making this the responsibility of the caller
//! avoids having to maintain a frame pointer, which is necessary when // avoids having to maintain a frame pointer, which is necessary when
//! a function has to realign the stack from an unknown state. // a function has to realign the stack from an unknown state.
//! * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi // * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi
//! to pass a value while swapping context; this is an arbitrary choice // to pass a value while swapping context; this is an arbitrary choice
//! (we clobber all registers and could use any of them) but this allows us // (we clobber all registers and could use any of them) but this allows us
//! to reuse the swap function to perform the initial call. // to reuse the swap function to perform the initial call.
//
// To understand the DWARF CFI code in this file, keep in mind these facts:
// * CFI is "call frame information"; a set of instructions to a debugger or
// an unwinder that allow it to simulate returning from functions. This implies
// restoring every register to its pre-call state, as well as the stack pointer.
// * CFA is "call frame address"; the value of stack pointer right before the call
// instruction in the caller. Everything strictly below CFA (and inclusive until
// the next CFA) is the call frame of the callee. This implies that the return
// address is the part of callee's call frame.
// * Logically, DWARF CFI is a table where rows are instruction pointer values and
// columns describe where registers are spilled (mostly using expressions that
// compute a memory location as CFA+n). A .cfi_offset pseudoinstruction changes
// the state of a column for all IP numerically larger than the one it's placed
// after. A .cfi_def_* pseudoinstruction changes the CFA value similarly.
// * Simulating return is as easy as restoring register values from the CFI table
// and then setting stack pointer to CFA.
use core::intrinsics;
use stack::Stack; use stack::Stack;
#[derive(Debug)] #[derive(Debug)]
pub struct StackPointer(*mut usize); pub struct StackPointer(*mut usize);
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer { pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer {
#[naked]
unsafe extern "C" fn init_trampoline_1() -> ! {
asm!(
r#"
# gdb has a hardcoded check that rejects backtraces where frame addresses
# do not monotonically decrease. It is turned off if the function is called
# "__morestack" and that is hardcoded. So, to make gdb backtraces match
# the actual unwinder behavior, we call ourselves "__morestack" and mark
# the symbol as local; it shouldn't interfere with anything.
__morestack:
.local __morestack
# Set up the first part of our DWARF CFI linking stacks together.
# When unwinding the frame corresponding to this function, a DWARF unwinder
# will use %rbx as the next call frame address, restore return address
# from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
# of `swap_trampoline` does.
.cfi_def_cfa %rbx, 0
.cfi_offset %rbp, -16
# Call the next trampoline.
call ${0:c}
.Lend:
.size __morestack, .Lend-__morestack
"#
: : "s" (init_trampoline_2 as usize) : "memory" : "volatile");
intrinsics::unreachable()
}
#[naked]
unsafe extern "C" fn init_trampoline_2() -> ! {
asm!(
r#"
# Set up the second part of our DWARF CFI.
# When unwinding the frame corresponding to this function, a DWARF unwinder
# will restore %rbx (and thus CFA of the first trampoline) from the stack slot.
.cfi_offset %rbx, 16
# Call the provided function.
call *8(%rsp)
"#
: : : "memory" : "volatile");
intrinsics::unreachable()
}
unsafe fn push(sp: &mut StackPointer, val: usize) { unsafe fn push(sp: &mut StackPointer, val: usize) {
sp.0 = sp.0.offset(-1); sp.0 = sp.0.offset(-1);
*sp.0 = val *sp.0 = val
} }
let mut sp = StackPointer(stack.top() as *mut usize); let mut sp = StackPointer(stack.top() as *mut usize);
push(&mut sp, 0); // alignment push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot
push(&mut sp, f as usize); push(&mut sp, 0 as usize); // alignment
push(&mut sp, f as usize); // function
push(&mut sp, init_trampoline_1 as usize);
push(&mut sp, 0xdeaddeaddeadbbbb); // saved %rbp
sp sp
} }
#[inline(always)] #[inline(always)]
pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize { pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer,
macro_rules! swap_body { new_stack: &Stack) -> usize {
() => { // Address of the topmost CFA stack slot.
r#" let new_cfa = (new_stack.top() as *mut usize).offset(-1);
# Save frame pointer explicitly; LLVM doesn't spill it even if it is
# marked as clobbered. #[naked]
pushq %rbp unsafe extern "C" fn swap_trampoline() -> ! {
# Push instruction pointer of the old context and switch to asm!(
# the new context. r#"
call 1f # Save frame pointer explicitly; the unwinder uses it to find CFA of
# Restore frame pointer. # the caller, and so it has to have the correct value immediately after
popq %rbp # the call instruction that invoked the trampoline.
# Continue executing old context. pushq %rbp
jmp 2f
1:
# Remember stack pointer of the old context, in case %rdx==%rsi. # Remember stack pointer of the old context, in case %rdx==%rsi.
movq %rsp, %rbx movq %rsp, %rbx
# Load stack pointer of the new context. # Load stack pointer of the new context.
@ -58,25 +119,33 @@ pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer)
# Save stack pointer of the old context. # Save stack pointer of the old context.
movq %rbx, (%rsi) movq %rbx, (%rsi)
# Pop instruction pointer of the new context (placed onto stack by # Restore frame pointer of the new context.
# the call above) and jump there; don't use `ret` to avoid return popq %rbp
# address mispredictions (~8ns on Ivy Bridge).
# Return into the new context. Use `pop` and `jmp` instead of a `ret`
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
popq %rbx popq %rbx
jmpq *%rbx jmpq *%rbx
2:
"# "#
} : : : "memory" : "volatile");
intrinsics::unreachable();
} }
#[cfg(not(windows))]
#[inline(always)]
unsafe fn swap_impl(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
let ret: usize; let ret: usize;
asm!(swap_body!() asm!(
r#"
# Link the call stacks together.
movq %rsp, (%rcx)
# Push instruction pointer of the old context and switch to
# the new context.
call ${1:c}
"#
: "={rdi}" (ret) : "={rdi}" (ret)
: "{rdi}" (arg) : "s" (swap_trampoline as usize)
"{rdi}" (arg)
"{rsi}" (old_sp) "{rsi}" (old_sp)
"{rdx}" (new_sp) "{rdx}" (new_sp)
"{rcx}" (new_cfa)
: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", //"rbp", "rsp", : "rax", "rbx", "rcx", "rdx", "rsi", "rdi", //"rbp", "rsp",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
@ -91,33 +160,4 @@ pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer)
// thing on x86_64. // thing on x86_64.
: "volatile", "alignstack"); : "volatile", "alignstack");
ret ret
}
#[cfg(windows)]
#[inline(always)]
unsafe fn swap_impl(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
let ret: usize;
asm!(swap_body!()
: "={rcx}" (ret)
: "{rcx}" (arg)
"{rsi}" (old_sp)
"{rdx}" (new_sp)
: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", //"rbp", "rsp",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
"xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23",
"xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
"cc", "fpsr", "flags", "memory"
// Ideally, we would set the LLVM "noredzone" attribute on this function
// (and it would be propagated to the call site). Unfortunately, rustc
// provides no such functionality. Fortunately, by a lucky coincidence,
// the "alignstack" LLVM inline assembly option does exactly the same
// thing on x86_64.
: "volatile", "alignstack");
ret
}
swap_impl(arg, old_sp, new_sp)
} }

View File

@ -49,6 +49,6 @@ impl<OldStack> Context<OldStack> where OldStack: stack::Stack {
new_ctx: *const Context<NewStack>, new_ctx: *const Context<NewStack>,
arg: usize) -> usize arg: usize) -> usize
where NewStack: stack::Stack { where NewStack: stack::Stack {
arch::swap(arg, &mut (*old_ctx).stack_ptr, &(*new_ctx).stack_ptr) arch::swap(arg, &mut (*old_ctx).stack_ptr, &(*new_ctx).stack_ptr, &(*new_ctx).stack)
} }
} }

View File

@ -3,6 +3,7 @@
// See the LICENSE file included in this distribution. // See the LICENSE file included in this distribution.
#![feature(asm)] #![feature(asm)]
#![cfg_attr(target_arch = "x86", feature(naked_functions, core_intrinsics))] #![cfg_attr(target_arch = "x86", feature(naked_functions, core_intrinsics))]
#![cfg_attr(target_arch = "x86_64", feature(naked_functions, core_intrinsics))]
#![no_std] #![no_std]
//! libfringe is a library implementing lightweight context switches, //! libfringe is a library implementing lightweight context switches,

View File

@ -50,12 +50,14 @@ impl Stack {
} }
impl stack::Stack for Stack { impl stack::Stack for Stack {
#[inline(always)]
fn top(&self) -> *mut u8 { fn top(&self) -> *mut u8 {
unsafe { unsafe {
self.ptr.offset(self.len as isize) self.ptr.offset(self.len as isize)
} }
} }
#[inline(always)]
fn limit(&self) -> *mut u8 { fn limit(&self) -> *mut u8 {
unsafe { unsafe {
self.ptr.offset(sys::page_size() as isize) self.ptr.offset(sys::page_size() as isize)

45
tests/panic.rs Normal file
View File

@ -0,0 +1,45 @@
// This file is part of libfringe, a low-level green threading library.
// Copyright (c) whitequark <whitequark@whitequark.org>
// See the LICENSE file included in this distribution.
#![feature(thread_local)]
extern crate fringe;
use fringe::Context;
#[thread_local]
static mut ctx_slot: *mut Context<fringe::OsStack> = 0 as *mut Context<_>;
unsafe extern "C" fn do_panic(arg: usize) -> ! {
match arg {
0 => panic!("arg=0"),
1 => {
Context::swap(ctx_slot, ctx_slot, 0);
panic!("arg=1");
}
_ => unreachable!()
}
}
#[test]
#[should_panic="arg=0"]
fn panic_after_start() {
unsafe {
let stack = fringe::OsStack::new(4 << 20).unwrap();
let mut ctx = Context::new(stack, do_panic);
Context::swap(&mut ctx, &ctx, 0);
}
}
#[test]
#[should_panic="arg=1"]
fn panic_after_swap() {
unsafe {
let stack = fringe::OsStack::new(4 << 20).unwrap();
let mut ctx = Context::new(stack, do_panic);
ctx_slot = &mut ctx;
Context::swap(&mut ctx, &ctx, 1);
Context::swap(&mut ctx, &ctx, 0);
}
}