Allow unwinding to propagate across a context swap.

The main purpose of this is having nice backtraces in gdb, although it also slightly simplifies poisoning state of the API consumers after a panic.
2016-07-17 21:42:45 +00:00 · 2016-07-17 21:42:45 +00:00 · 892a7696ec
parent 40fbfdde0c
commit 892a7696ec
6 changed files with 286 additions and 135 deletions
--- a/src/arch/x86.rs
+++ b/src/arch/x86.rs
@ -3,15 +3,32 @@
 //               whitequark <whitequark@whitequark.org>
 // See the LICENSE file included in this distribution.
-//! To understand the code in this file, keep in mind this fact:
+// To understand the machine code in this file, keep in mind these facts:
-//! * i686 SysV C ABI requires the stack to be aligned at function entry,
+// * i686 SysV C ABI requires the stack to be aligned at function entry,
-//!   so that `%esp+4` is a multiple of 16. Aligned operands are a requirement
+//   so that `%esp+4` is a multiple of 16. Aligned operands are a requirement
-//!   of SIMD instructions, and making this the responsibility of the caller
+//   of SIMD instructions, and making this the responsibility of the caller
-//!   avoids having to maintain a frame pointer, which is necessary when
+//   avoids having to maintain a frame pointer, which is necessary when
-//!   a function has to realign the stack from an unknown state.
+//   a function has to realign the stack from an unknown state.
-//! * i686 SysV C ABI passes the first argument on the stack. This is
+// * i686 SysV C ABI passes the first argument on the stack. This is
-//!   unfortunate, because unlike every other architecture we can't reuse
+//   unfortunate, because unlike every other architecture we can't reuse
-//!   `swap` for the initial call, and so we use a trampoline.
+//   `swap` for the initial call, and so we use a trampoline.
 //
 // To understand the DWARF CFI code in this file, keep in mind these facts:
 // * CFI is "call frame information"; a set of instructions to a debugger or
 //   an unwinder that allow it to simulate returning from functions. This implies
 //   restoring every register to its pre-call state, as well as the stack pointer.
 // * CFA is "call frame address"; the value of stack pointer right before the call
 //   instruction in the caller. Everything strictly below CFA (and inclusive until
 //   the next CFA) is the call frame of the callee. This implies that the return
 //   address is the part of callee's call frame.
 // * Logically, DWARF CFI is a table where rows are instruction pointer values and
 //   columns describe where registers are spilled (mostly using expressions that
 //   compute a memory location as CFA+n). A .cfi_offset pseudoinstruction changes
 //   the state of a column for all IP numerically larger than the one it's placed
 //   after. A .cfi_def_* pseudoinstruction changes the CFA value similarly.
 // * Simulating return is as easy as restoring register values from the CFI table
 //   and then setting stack pointer to CFA.
 use core::intrinsics;
 use stack::Stack;
 #[derive(Debug)]
@ -19,17 +36,50 @@ pub struct StackPointer(*mut usize);
 pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer {
  #[naked]
-  unsafe extern "C" fn trampoline() -> ! {
+  unsafe extern "C" fn init_trampoline_1() -> ! {
    asm!(
      r#"
-        # Pop function.
+        # gdb has a hardcoded check that rejects backtraces where frame addresses
-        popl    %ebx
+        # do not monotonically decrease. It is turned off if the function is called
        # "__morestack" and that is hardcoded. So, to make gdb backtraces match
        # the actual unwinder behavior, we call ourselves "__morestack" and mark
        # the symbol as local; it shouldn't interfere with anything.
      __morestack:
      .local __morestack
        # Set up the first part of our DWARF CFI linking stacks together.
        # When unwinding the frame corresponding to this function, a DWARF unwinder
        # will use %ebx as the next call frame address, restore return address
        # from CFA-4 and restore %ebp from CFA-8. This mirrors what the second half
        # of `swap_trampoline` does.
        .cfi_def_cfa %ebx, 0
        .cfi_offset %ebp, -8
        # Call the next trampoline.
        call   ${0:c}
      .Lend:
      .size __morestack, .Lend-__morestack
      "#
      : : "s" (init_trampoline_2 as usize) : "memory" : "volatile");
    intrinsics::unreachable()
  }
  #[naked]
  unsafe extern "C" fn init_trampoline_2() -> ! {
    asm!(
      r#"
        # Set up the second part of our DWARF CFI.
        # When unwinding the frame corresponding to this function, a DWARF unwinder
        # will restore %ebx (and thus CFA of the first trampoline) from the stack slot.
        .cfi_offset %ebx, 4
        # Push argument.
        .cfi_def_cfa_offset 8
        pushl   %eax
-        # Call it.
+        # Call the provided function.
-        call    *%ebx
+        call    *8(%esp)
-      "# ::: "memory" : "volatile");
+      "#
-    ::core::intrinsics::unreachable()
+      : : : "memory" : "volatile");
    intrinsics::unreachable()
  }
  unsafe fn push(sp: &mut StackPointer, val: usize) {
@ -38,49 +88,62 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
  }
  let mut sp = StackPointer(stack.top() as *mut usize);
-  push(&mut sp, 0); // alignment
+  push(&mut sp, 0xdead0cfa); // CFA slot
  push(&mut sp, 0); // alignment
  push(&mut sp, 0); // alignment
  push(&mut sp, f as usize); // function
-  push(&mut sp, trampoline as usize);
+  push(&mut sp, init_trampoline_1 as usize);
  push(&mut sp, 0xdeadbbbb); // saved %ebp
  sp
 }
 #[inline(always)]
-pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
+pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer,
-  let ret: usize;
+                   new_stack: &Stack) -> usize {
  // Address of the topmost CFA stack slot.
  let new_cfa = (new_stack.top() as *mut usize).offset(-1);
  #[naked]
  unsafe extern "C" fn swap_trampoline() -> ! {
    asm!(
      r#"
-      # Save frame pointer explicitly; LLVM doesn't spill it even if it is
+        # Save frame pointer explicitly; the unwinder uses it to find CFA of
-      # marked as clobbered.
+        # the caller, and so it has to have the correct value immediately after
        # the call instruction that invoked the trampoline.
        pushl   %ebp
      # Push instruction pointer of the old context and switch to
      # the new context.
      call    1f
      # Restore frame pointer.
      popl    %ebp
      # Continue executing old context.
      jmp     2f
-    1:
+        # Remember stack pointer of the old context, in case %edx==%esi.
      # Remember stack pointer of the old context, in case %rdx==%rsi.
        movl    %esp, %ebx
        # Load stack pointer of the new context.
        movl    (%edx), %esp
        # Save stack pointer of the old context.
        movl    %ebx, (%esi)
-      # Pop instruction pointer of the new context (placed onto stack by
+        # Restore frame pointer of the new context.
-      # the call above) and jump there; don't use `ret` to avoid return
+        popl    %ebp
-      # address mispredictions (~8ns on Ivy Bridge).
+
        # Return into the new context. Use `pop` and `jmp` instead of a `ret`
        # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
        popl    %ebx
        jmpl    *%ebx
-    2:
+      "#
      : : : "memory" : "volatile");
    intrinsics::unreachable();
  }
  let ret: usize;
  asm!(
    r#"
      # Link the call stacks together.
      movl    %esp, (%edi)
      # Push instruction pointer of the old context and switch to
      # the new context.
      call    ${1:c}
    "#
    : "={eax}" (ret)
-    : "{eax}" (arg)
+    : "s" (swap_trampoline as usize)
      "{eax}" (arg)
      "{esi}" (old_sp)
      "{edx}" (new_sp)
      "{edi}" (new_cfa)
    : "eax",  "ebx",  "ecx",  "edx",  "esi",  "edi", //"ebp",  "esp",
      "mmx0", "mmx1", "mmx2", "mmx3", "mmx4", "mmx5", "mmx6", "mmx7",
      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
--- a/src/arch/x86_64.rs
+++ b/src/arch/x86_64.rs
@ -3,54 +3,115 @@
 //               whitequark <whitequark@whitequark.org>
 // See the LICENSE file included in this distribution.
-//! To understand the code in this file, keep in mind these two facts:
+// To understand the code in this file, keep in mind these two facts:
-//! * x86_64 SysV C ABI has a "red zone": 128 bytes under the top of the stack
+// * x86_64 SysV C ABI has a "red zone": 128 bytes under the top of the stack
-//!   that is defined to be unmolested by signal handlers, interrupts, etc.
+//   that is defined to be unmolested by signal handlers, interrupts, etc.
-//!   Leaf functions can use the red zone without adjusting rsp or rbp.
+//   Leaf functions can use the red zone without adjusting rsp or rbp.
-//! * x86_64 SysV C ABI requires the stack to be aligned at function entry,
+// * x86_64 SysV C ABI requires the stack to be aligned at function entry,
-//!   so that (%rsp+8) is a multiple of 16. Aligned operands are a requirement
+//   so that (%rsp+8) is a multiple of 16. Aligned operands are a requirement
-//!   of SIMD instructions, and making this the responsibility of the caller
+//   of SIMD instructions, and making this the responsibility of the caller
-//!   avoids having to maintain a frame pointer, which is necessary when
+//   avoids having to maintain a frame pointer, which is necessary when
-//!   a function has to realign the stack from an unknown state.
+//   a function has to realign the stack from an unknown state.
-//! * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi
+// * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi
-//!   to pass a value while swapping context; this is an arbitrary choice
+//   to pass a value while swapping context; this is an arbitrary choice
-//!   (we clobber all registers and could use any of them) but this allows us
+//   (we clobber all registers and could use any of them) but this allows us
-//!   to reuse the swap function to perform the initial call.
+//   to reuse the swap function to perform the initial call.
-
+//
 // To understand the DWARF CFI code in this file, keep in mind these facts:
 // * CFI is "call frame information"; a set of instructions to a debugger or
 //   an unwinder that allow it to simulate returning from functions. This implies
 //   restoring every register to its pre-call state, as well as the stack pointer.
 // * CFA is "call frame address"; the value of stack pointer right before the call
 //   instruction in the caller. Everything strictly below CFA (and inclusive until
 //   the next CFA) is the call frame of the callee. This implies that the return
 //   address is the part of callee's call frame.
 // * Logically, DWARF CFI is a table where rows are instruction pointer values and
 //   columns describe where registers are spilled (mostly using expressions that
 //   compute a memory location as CFA+n). A .cfi_offset pseudoinstruction changes
 //   the state of a column for all IP numerically larger than the one it's placed
 //   after. A .cfi_def_* pseudoinstruction changes the CFA value similarly.
 // * Simulating return is as easy as restoring register values from the CFI table
 //   and then setting stack pointer to CFA.
 use core::intrinsics;
 use stack::Stack;
 #[derive(Debug)]
 pub struct StackPointer(*mut usize);
 pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer {
  #[naked]
  unsafe extern "C" fn init_trampoline_1() -> ! {
    asm!(
      r#"
        # gdb has a hardcoded check that rejects backtraces where frame addresses
        # do not monotonically decrease. It is turned off if the function is called
        # "__morestack" and that is hardcoded. So, to make gdb backtraces match
        # the actual unwinder behavior, we call ourselves "__morestack" and mark
        # the symbol as local; it shouldn't interfere with anything.
      __morestack:
      .local __morestack
        # Set up the first part of our DWARF CFI linking stacks together.
        # When unwinding the frame corresponding to this function, a DWARF unwinder
        # will use %rbx as the next call frame address, restore return address
        # from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
        # of `swap_trampoline` does.
        .cfi_def_cfa %rbx, 0
        .cfi_offset %rbp, -16
        # Call the next trampoline.
        call   ${0:c}
      .Lend:
      .size __morestack, .Lend-__morestack
      "#
      : : "s" (init_trampoline_2 as usize) : "memory" : "volatile");
    intrinsics::unreachable()
  }
  #[naked]
  unsafe extern "C" fn init_trampoline_2() -> ! {
    asm!(
      r#"
        # Set up the second part of our DWARF CFI.
        # When unwinding the frame corresponding to this function, a DWARF unwinder
        # will restore %rbx (and thus CFA of the first trampoline) from the stack slot.
        .cfi_offset %rbx, 16
        # Call the provided function.
        call    *8(%rsp)
      "#
      : : : "memory" : "volatile");
    intrinsics::unreachable()
  }
  unsafe fn push(sp: &mut StackPointer, val: usize) {
    sp.0 = sp.0.offset(-1);
    *sp.0 = val
  }
  let mut sp = StackPointer(stack.top() as *mut usize);
-  push(&mut sp, 0); // alignment
+  push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot
-  push(&mut sp, f as usize);
+  push(&mut sp, 0 as usize); // alignment
  push(&mut sp, f as usize); // function
  push(&mut sp, init_trampoline_1 as usize);
  push(&mut sp, 0xdeaddeaddeadbbbb); // saved %rbp
  sp
 }
 #[inline(always)]
-pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
+pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer,
-  macro_rules! swap_body {
+                   new_stack: &Stack) -> usize {
-    () => {
+  // Address of the topmost CFA stack slot.
-      r#"
+  let new_cfa = (new_stack.top() as *mut usize).offset(-1);
-        # Save frame pointer explicitly; LLVM doesn't spill it even if it is
+
-        # marked as clobbered.
+  #[naked]
-        pushq   %rbp
+  unsafe extern "C" fn swap_trampoline() -> ! {
-        # Push instruction pointer of the old context and switch to
+    asm!(
-        # the new context.
+      r#"
-        call    1f
+        # Save frame pointer explicitly; the unwinder uses it to find CFA of
-        # Restore frame pointer.
+        # the caller, and so it has to have the correct value immediately after
-        popq    %rbp
+        # the call instruction that invoked the trampoline.
-        # Continue executing old context.
+        pushq   %rbp
        jmp     2f
      1:
        # Remember stack pointer of the old context, in case %rdx==%rsi.
        movq    %rsp, %rbx
        # Load stack pointer of the new context.
@ -58,25 +119,33 @@ pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer)
        # Save stack pointer of the old context.
        movq    %rbx, (%rsi)
-        # Pop instruction pointer of the new context (placed onto stack by
+        # Restore frame pointer of the new context.
-        # the call above) and jump there; don't use `ret` to avoid return
+        popq    %rbp
-        # address mispredictions (~8ns on Ivy Bridge).
+
        # Return into the new context. Use `pop` and `jmp` instead of a `ret`
        # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
        popq    %rbx
        jmpq    *%rbx
      2:
      "#
-    }
+      : : : "memory" : "volatile");
    intrinsics::unreachable();
  }
  #[cfg(not(windows))]
  #[inline(always)]
  unsafe fn swap_impl(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
  let ret: usize;
-    asm!(swap_body!()
+  asm!(
    r#"
      # Link the call stacks together.
      movq    %rsp, (%rcx)
      # Push instruction pointer of the old context and switch to
      # the new context.
      call    ${1:c}
    "#
    : "={rdi}" (ret)
-      : "{rdi}" (arg)
+    : "s" (swap_trampoline as usize)
      "{rdi}" (arg)
      "{rsi}" (old_sp)
      "{rdx}" (new_sp)
      "{rcx}" (new_cfa)
    : "rax",   "rbx",   "rcx",   "rdx",   "rsi",   "rdi", //"rbp",   "rsp",
      "r8",    "r9",    "r10",   "r11",   "r12",   "r13",   "r14",   "r15",
      "xmm0",  "xmm1",  "xmm2",  "xmm3",  "xmm4",  "xmm5",  "xmm6",  "xmm7",
@ -92,32 +161,3 @@ pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer)
    : "volatile", "alignstack");
  ret
 }
  #[cfg(windows)]
  #[inline(always)]
  unsafe fn swap_impl(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
    let ret: usize;
    asm!(swap_body!()
      : "={rcx}" (ret)
      : "{rcx}" (arg)
        "{rsi}" (old_sp)
        "{rdx}" (new_sp)
      : "rax",   "rbx",   "rcx",   "rdx",   "rsi",   "rdi", //"rbp",   "rsp",
        "r8",    "r9",    "r10",   "r11",   "r12",   "r13",   "r14",   "r15",
        "xmm0",  "xmm1",  "xmm2",  "xmm3",  "xmm4",  "xmm5",  "xmm6",  "xmm7",
        "xmm8",  "xmm9",  "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
        "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23",
        "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
        "cc", "fpsr", "flags", "memory"
        // Ideally, we would set the LLVM "noredzone" attribute on this function
        // (and it would be propagated to the call site). Unfortunately, rustc
        // provides no such functionality. Fortunately, by a lucky coincidence,
        // the "alignstack" LLVM inline assembly option does exactly the same
        // thing on x86_64.
      : "volatile", "alignstack");
    ret
  }
  swap_impl(arg, old_sp, new_sp)
 }
--- a/src/context.rs
+++ b/src/context.rs
@ -49,6 +49,6 @@ impl<OldStack> Context<OldStack> where OldStack: stack::Stack {
                               new_ctx: *const Context<NewStack>,
                               arg: usize) -> usize
      where NewStack: stack::Stack {
-    arch::swap(arg, &mut (*old_ctx).stack_ptr, &(*new_ctx).stack_ptr)
+    arch::swap(arg, &mut (*old_ctx).stack_ptr, &(*new_ctx).stack_ptr, &(*new_ctx).stack)
  }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -3,6 +3,7 @@
 // See the LICENSE file included in this distribution.
 #![feature(asm)]
 #![cfg_attr(target_arch = "x86",    feature(naked_functions, core_intrinsics))]
 #![cfg_attr(target_arch = "x86_64", feature(naked_functions, core_intrinsics))]
 #![no_std]
 //! libfringe is a library implementing lightweight context switches,
--- a/src/os/mod.rs
+++ b/src/os/mod.rs
@ -50,12 +50,14 @@ impl Stack {
 }
 impl stack::Stack for Stack {
  #[inline(always)]
  fn top(&self) -> *mut u8 {
    unsafe {
      self.ptr.offset(self.len as isize)
    }
  }
  #[inline(always)]
  fn limit(&self) -> *mut u8 {
    unsafe {
      self.ptr.offset(sys::page_size() as isize)
--- a/tests/panic.rs
+++ b/tests/panic.rs
@ -0,0 +1,45 @@
 // This file is part of libfringe, a low-level green threading library.
 // Copyright (c) whitequark <whitequark@whitequark.org>
 // See the LICENSE file included in this distribution.
 #![feature(thread_local)]
 extern crate fringe;
 use fringe::Context;
 #[thread_local]
 static mut ctx_slot: *mut Context<fringe::OsStack> = 0 as *mut Context<_>;
 unsafe extern "C" fn do_panic(arg: usize) -> ! {
  match arg {
    0 => panic!("arg=0"),
    1 => {
      Context::swap(ctx_slot, ctx_slot, 0);
      panic!("arg=1");
    }
    _ => unreachable!()
  }
 }
 #[test]
 #[should_panic="arg=0"]
 fn panic_after_start() {
  unsafe {
    let stack = fringe::OsStack::new(4 << 20).unwrap();
    let mut ctx = Context::new(stack, do_panic);
    Context::swap(&mut ctx, &ctx, 0);
  }
 }
 #[test]
 #[should_panic="arg=1"]
 fn panic_after_swap() {
  unsafe {
    let stack = fringe::OsStack::new(4 << 20).unwrap();
    let mut ctx = Context::new(stack, do_panic);
    ctx_slot = &mut ctx;
    Context::swap(&mut ctx, &ctx, 1);
    Context::swap(&mut ctx, &ctx, 0);
  }
 }