From 74c4444a7fa17343faa95dd1d6941e5af5af8ef5 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 1 Nov 2016 06:15:41 +0000 Subject: [PATCH] Inline the swap trampoline on x86_64 and AArch64 --- src/arch/aarch64.rs | 40 ++++++++++++++-------------------------- src/arch/x86_64.rs | 33 +++++++++++---------------------- 2 files changed, 25 insertions(+), 48 deletions(-) diff --git a/src/arch/aarch64.rs b/src/arch/aarch64.rs index 4329382..58a4a64 100644 --- a/src/arch/aarch64.rs +++ b/src/arch/aarch64.rs @@ -177,52 +177,40 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, &mut dummy }; - #[naked] - unsafe extern "C" fn trampoline() { - asm!( - r#" + let ret: usize; + let ret_sp: *mut usize; + asm!( + r#" + # Set up the link register + adr lr, 0f + # Save the frame pointer and link register; the unwinder uses them to find # the CFA of the caller, and so they have to have the correct value immediately # after the call instruction that invoked the trampoline. stp x29, x30, [sp, #-16]! - .cfi_adjust_cfa_offset 16 - .cfi_rel_offset x30, 8 - .cfi_rel_offset x29, 0 - - # Link the call stacks together by writing the current stack bottom - # address to the CFA slot in the new stack. - mov x4, sp - str x4, [x3] # Pass the stack pointer of the old context to the new one. mov x1, sp + + # Link the call stacks together by writing the current stack bottom + # address to the CFA slot in the new stack. + str x1, [x3] + # Load stack pointer of the new context. mov sp, x2 # Load frame and instruction pointers of the new context. ldp x29, x30, [sp], #16 - .cfi_adjust_cfa_offset -16 - .cfi_restore x29 - .cfi_restore x30 # Return into the new context. Use `br` instead of a `ret` to avoid # return address mispredictions. br x30 - "# - : : : : "volatile") - } - let ret: usize; - let ret_sp: *mut usize; - asm!( - r#" - # Call the trampoline to switch to the new context. - bl ${2} + 0: "# : "={x0}" (ret) "={x1}" (ret_sp) - : "s" (trampoline as usize) - "{x0}" (arg) + : "{x0}" (arg) "{x2}" (new_sp.0) "{x3}" (new_cfa) :/*x0, "x1",*/"x2", "x3", "x4", "x5", "x6", "x7", diff --git a/src/arch/x86_64.rs b/src/arch/x86_64.rs index a14a0f6..1da7dc2 100644 --- a/src/arch/x86_64.rs +++ b/src/arch/x86_64.rs @@ -179,16 +179,18 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, &mut dummy }; - #[naked] - unsafe extern "C" fn trampoline() { - asm!( - r#" + let ret: usize; + let ret_sp: *mut usize; + asm!( + r#" + # Push the return address + leaq 0f(%rip), %rax + pushq %rax + # Save frame pointer explicitly; the unwinder uses it to find CFA of # the caller, and so it has to have the correct value immediately after # the call instruction that invoked the trampoline. pushq %rbp - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset %rbp, 0 # Link the call stacks together by writing the current stack bottom # address to the CFA slot in the new stack. @@ -196,36 +198,23 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer, # Pass the stack pointer of the old context to the new one. movq %rsp, %rsi + # Load stack pointer of the new context. movq %rdx, %rsp # Restore frame pointer of the new context. popq %rbp - .cfi_adjust_cfa_offset -8 - .cfi_restore %rbp # Return into the new context. Use `pop` and `jmp` instead of a `ret` # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge). popq %rax - .cfi_adjust_cfa_offset -8 - .cfi_register %rip, %rax jmpq *%rax - "# - : : : : "volatile") - } - let ret: usize; - let ret_sp: *mut usize; - asm!( - r#" - # Push instruction pointer of the old context and switch to - # the new context. - call ${2:c} + 0: "# : "={rdi}" (ret) "={rsi}" (ret_sp) - : "s" (trampoline as usize) - "{rdi}" (arg) + : "{rdi}" (arg) "{rdx}" (new_sp.0) "{rcx}" (new_cfa) : "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/