Inline the swap trampoline on x86_64 and AArch64

This commit is contained in:
Amanieu d'Antras 2016-11-01 06:15:41 +00:00 committed by edef
parent 243e9ffc6c
commit 74c4444a7f
2 changed files with 25 additions and 48 deletions

View File

@ -177,52 +177,40 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
&mut dummy &mut dummy
}; };
#[naked] let ret: usize;
unsafe extern "C" fn trampoline() { let ret_sp: *mut usize;
asm!( asm!(
r#" r#"
# Set up the link register
adr lr, 0f
# Save the frame pointer and link register; the unwinder uses them to find # Save the frame pointer and link register; the unwinder uses them to find
# the CFA of the caller, and so they have to have the correct value immediately # the CFA of the caller, and so they have to have the correct value immediately
# after the call instruction that invoked the trampoline. # after the call instruction that invoked the trampoline.
stp x29, x30, [sp, #-16]! stp x29, x30, [sp, #-16]!
.cfi_adjust_cfa_offset 16
.cfi_rel_offset x30, 8
.cfi_rel_offset x29, 0
# Link the call stacks together by writing the current stack bottom
# address to the CFA slot in the new stack.
mov x4, sp
str x4, [x3]
# Pass the stack pointer of the old context to the new one. # Pass the stack pointer of the old context to the new one.
mov x1, sp mov x1, sp
# Link the call stacks together by writing the current stack bottom
# address to the CFA slot in the new stack.
str x1, [x3]
# Load stack pointer of the new context. # Load stack pointer of the new context.
mov sp, x2 mov sp, x2
# Load frame and instruction pointers of the new context. # Load frame and instruction pointers of the new context.
ldp x29, x30, [sp], #16 ldp x29, x30, [sp], #16
.cfi_adjust_cfa_offset -16
.cfi_restore x29
.cfi_restore x30
# Return into the new context. Use `br` instead of a `ret` to avoid # Return into the new context. Use `br` instead of a `ret` to avoid
# return address mispredictions. # return address mispredictions.
br x30 br x30
"#
: : : : "volatile")
}
let ret: usize; 0:
let ret_sp: *mut usize;
asm!(
r#"
# Call the trampoline to switch to the new context.
bl ${2}
"# "#
: "={x0}" (ret) : "={x0}" (ret)
"={x1}" (ret_sp) "={x1}" (ret_sp)
: "s" (trampoline as usize) : "{x0}" (arg)
"{x0}" (arg)
"{x2}" (new_sp.0) "{x2}" (new_sp.0)
"{x3}" (new_cfa) "{x3}" (new_cfa)
:/*x0, "x1",*/"x2", "x3", "x4", "x5", "x6", "x7", :/*x0, "x1",*/"x2", "x3", "x4", "x5", "x6", "x7",

View File

@ -179,16 +179,18 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
&mut dummy &mut dummy
}; };
#[naked] let ret: usize;
unsafe extern "C" fn trampoline() { let ret_sp: *mut usize;
asm!( asm!(
r#" r#"
# Push the return address
leaq 0f(%rip), %rax
pushq %rax
# Save frame pointer explicitly; the unwinder uses it to find CFA of # Save frame pointer explicitly; the unwinder uses it to find CFA of
# the caller, and so it has to have the correct value immediately after # the caller, and so it has to have the correct value immediately after
# the call instruction that invoked the trampoline. # the call instruction that invoked the trampoline.
pushq %rbp pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_rel_offset %rbp, 0
# Link the call stacks together by writing the current stack bottom # Link the call stacks together by writing the current stack bottom
# address to the CFA slot in the new stack. # address to the CFA slot in the new stack.
@ -196,36 +198,23 @@ pub unsafe fn swap(arg: usize, new_sp: StackPointer,
# Pass the stack pointer of the old context to the new one. # Pass the stack pointer of the old context to the new one.
movq %rsp, %rsi movq %rsp, %rsi
# Load stack pointer of the new context. # Load stack pointer of the new context.
movq %rdx, %rsp movq %rdx, %rsp
# Restore frame pointer of the new context. # Restore frame pointer of the new context.
popq %rbp popq %rbp
.cfi_adjust_cfa_offset -8
.cfi_restore %rbp
# Return into the new context. Use `pop` and `jmp` instead of a `ret` # Return into the new context. Use `pop` and `jmp` instead of a `ret`
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge). # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
popq %rax popq %rax
.cfi_adjust_cfa_offset -8
.cfi_register %rip, %rax
jmpq *%rax jmpq *%rax
"#
: : : : "volatile")
}
let ret: usize; 0:
let ret_sp: *mut usize;
asm!(
r#"
# Push instruction pointer of the old context and switch to
# the new context.
call ${2:c}
"# "#
: "={rdi}" (ret) : "={rdi}" (ret)
"={rsi}" (ret_sp) "={rsi}" (ret_sp)
: "s" (trampoline as usize) : "{rdi}" (arg)
"{rdi}" (arg)
"{rdx}" (new_sp.0) "{rdx}" (new_sp.0)
"{rcx}" (new_cfa) "{rcx}" (new_cfa)
: "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/ : "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/