Inline the swap trampoline on x86_64 and AArch64

2016-11-01 06:15:41 +00:00 · 2016-11-01 06:15:41 +00:00 · 74c4444a7f
parent 243e9ffc6c
commit 74c4444a7f
2 changed files with 25 additions and 48 deletions
--- a/src/arch/aarch64.rs
+++ b/src/arch/aarch64.rs
@ -177,52 +177,40 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
    &mut dummy
  };
-  #[naked]
+  let ret: usize;
-  unsafe extern "C" fn trampoline() {
+  let ret_sp: *mut usize;
-    asm!(
+  asm!(
-      r#"
+    r#"
        # Set up the link register
        adr     lr, 0f
        # Save the frame pointer and link register; the unwinder uses them to find
        # the CFA of the caller, and so they have to have the correct value immediately
        # after the call instruction that invoked the trampoline.
        stp     x29, x30, [sp, #-16]!
        .cfi_adjust_cfa_offset 16
        .cfi_rel_offset x30, 8
        .cfi_rel_offset x29, 0
        # Link the call stacks together by writing the current stack bottom
        # address to the CFA slot in the new stack.
        mov     x4, sp
        str     x4, [x3]
        # Pass the stack pointer of the old context to the new one.
        mov     x1, sp
        # Link the call stacks together by writing the current stack bottom
        # address to the CFA slot in the new stack.
        str     x1, [x3]
        # Load stack pointer of the new context.
        mov     sp, x2
        # Load frame and instruction pointers of the new context.
        ldp     x29, x30, [sp], #16
        .cfi_adjust_cfa_offset -16
        .cfi_restore x29
        .cfi_restore x30
        # Return into the new context. Use `br` instead of a `ret` to avoid
        # return address mispredictions.
        br      x30
      "#
      : : : : "volatile")
  }
-  let ret: usize;
+      0:
  let ret_sp: *mut usize;
  asm!(
    r#"
      # Call the trampoline to switch to the new context.
      bl      ${2}
    "#
    : "={x0}" (ret)
      "={x1}" (ret_sp)
-    : "s" (trampoline as usize)
+    : "{x0}" (arg)
      "{x0}" (arg)
      "{x2}" (new_sp.0)
      "{x3}" (new_cfa)
    :/*x0,   "x1",*/"x2",  "x3",  "x4",  "x5",  "x6",  "x7",
--- a/src/arch/x86_64.rs
+++ b/src/arch/x86_64.rs
@ -179,16 +179,18 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
    &mut dummy
  };
-  #[naked]
+  let ret: usize;
-  unsafe extern "C" fn trampoline() {
+  let ret_sp: *mut usize;
-    asm!(
+  asm!(
-      r#"
+    r#"
        # Push the return address
        leaq    0f(%rip), %rax
        pushq   %rax
        # Save frame pointer explicitly; the unwinder uses it to find CFA of
        # the caller, and so it has to have the correct value immediately after
        # the call instruction that invoked the trampoline.
        pushq   %rbp
        .cfi_adjust_cfa_offset 8
        .cfi_rel_offset %rbp, 0
        # Link the call stacks together by writing the current stack bottom
        # address to the CFA slot in the new stack.
@ -196,36 +198,23 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
        # Pass the stack pointer of the old context to the new one.
        movq    %rsp, %rsi
        # Load stack pointer of the new context.
        movq    %rdx, %rsp
        # Restore frame pointer of the new context.
        popq    %rbp
        .cfi_adjust_cfa_offset -8
        .cfi_restore %rbp
        # Return into the new context. Use `pop` and `jmp` instead of a `ret`
        # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
        popq    %rax
        .cfi_adjust_cfa_offset -8
        .cfi_register %rip, %rax
        jmpq    *%rax
      "#
      : : : : "volatile")
  }
-  let ret: usize;
+      0:
  let ret_sp: *mut usize;
  asm!(
    r#"
      # Push instruction pointer of the old context and switch to
      # the new context.
      call    ${2:c}
    "#
    : "={rdi}" (ret)
      "={rsi}" (ret_sp)
-    : "s" (trampoline as usize)
+    : "{rdi}" (arg)
      "{rdi}" (arg)
      "{rdx}" (new_sp.0)
      "{rcx}" (new_cfa)
    : "rax",   "rbx",   "rcx",   "rdx", /*"rsi",   "rdi",   "rbp",   "rsp",*/