Inline the swap trampoline on x86_64 and AArch64

master
Amanieu d'Antras 2016-11-01 06:15:41 +00:00 committed by edef
parent 243e9ffc6c
commit 74c4444a7f
2 changed files with 25 additions and 48 deletions

View File

@ -177,52 +177,40 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
&mut dummy
};
#[naked]
unsafe extern "C" fn trampoline() {
asm!(
r#"
let ret: usize;
let ret_sp: *mut usize;
asm!(
r#"
# Set up the link register
adr lr, 0f
# Save the frame pointer and link register; the unwinder uses them to find
# the CFA of the caller, and so they have to have the correct value immediately
# after the call instruction that invoked the trampoline.
stp x29, x30, [sp, #-16]!
.cfi_adjust_cfa_offset 16
.cfi_rel_offset x30, 8
.cfi_rel_offset x29, 0
# Link the call stacks together by writing the current stack bottom
# address to the CFA slot in the new stack.
mov x4, sp
str x4, [x3]
# Pass the stack pointer of the old context to the new one.
mov x1, sp
# Link the call stacks together by writing the current stack bottom
# address to the CFA slot in the new stack.
str x1, [x3]
# Load stack pointer of the new context.
mov sp, x2
# Load frame and instruction pointers of the new context.
ldp x29, x30, [sp], #16
.cfi_adjust_cfa_offset -16
.cfi_restore x29
.cfi_restore x30
# Return into the new context. Use `br` instead of a `ret` to avoid
# return address mispredictions.
br x30
"#
: : : : "volatile")
}
let ret: usize;
let ret_sp: *mut usize;
asm!(
r#"
# Call the trampoline to switch to the new context.
bl ${2}
0:
"#
: "={x0}" (ret)
"={x1}" (ret_sp)
: "s" (trampoline as usize)
"{x0}" (arg)
: "{x0}" (arg)
"{x2}" (new_sp.0)
"{x3}" (new_cfa)
:/*x0, "x1",*/"x2", "x3", "x4", "x5", "x6", "x7",

View File

@ -179,16 +179,18 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
&mut dummy
};
#[naked]
unsafe extern "C" fn trampoline() {
asm!(
r#"
let ret: usize;
let ret_sp: *mut usize;
asm!(
r#"
# Push the return address
leaq 0f(%rip), %rax
pushq %rax
# Save frame pointer explicitly; the unwinder uses it to find CFA of
# the caller, and so it has to have the correct value immediately after
# the call instruction that invoked the trampoline.
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rbp, 0
# Link the call stacks together by writing the current stack bottom
# address to the CFA slot in the new stack.
@ -196,36 +198,23 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
# Pass the stack pointer of the old context to the new one.
movq %rsp, %rsi
# Load stack pointer of the new context.
movq %rdx, %rsp
# Restore frame pointer of the new context.
popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore %rbp
# Return into the new context. Use `pop` and `jmp` instead of a `ret`
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
popq %rax
.cfi_adjust_cfa_offset -8
.cfi_register %rip, %rax
jmpq *%rax
"#
: : : : "volatile")
}
let ret: usize;
let ret_sp: *mut usize;
asm!(
r#"
# Push instruction pointer of the old context and switch to
# the new context.
call ${2:c}
0:
"#
: "={rdi}" (ret)
"={rsi}" (ret_sp)
: "s" (trampoline as usize)
"{rdi}" (arg)
: "{rdi}" (arg)
"{rdx}" (new_sp.0)
"{rcx}" (new_cfa)
: "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/