diff --git a/src/arch/x86_64.rs b/src/arch/x86_64.rs index 5f9559b..d9e8f53 100644 --- a/src/arch/x86_64.rs +++ b/src/arch/x86_64.rs @@ -42,51 +42,88 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP #[inline(always)] pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize { - let ret: usize; - asm!( - r#" - # Save frame pointer explicitly; LLVM doesn't spill it even if it is - # marked as clobbered. - pushq %rbp - # Push instruction pointer of the old context and switch to - # the new context. - call 1f - # Restore frame pointer. - popq %rbp - # Continue executing old context. - jmp 2f + macro_rules! swap_body { + () => { + r#" + # Save frame pointer explicitly; LLVM doesn't spill it even if it is + # marked as clobbered. + pushq %rbp + # Push instruction pointer of the old context and switch to + # the new context. + call 1f + # Restore frame pointer. + popq %rbp + # Continue executing old context. + jmp 2f - 1: - # Remember stack pointer of the old context, in case %rdx==%rsi. - movq %rsp, %rbx - # Load stack pointer of the new context. - movq (%rdx), %rsp - # Save stack pointer of the old context. - movq %rbx, (%rsi) + 1: + # Remember stack pointer of the old context, in case %rdx==%rsi. + movq %rsp, %rbx + # Load stack pointer of the new context. + movq (%rdx), %rsp + # Save stack pointer of the old context. + movq %rbx, (%rsi) - # Pop instruction pointer of the new context (placed onto stack by - # the call above) and jump there; don't use `ret` to avoid return - # address mispredictions (~8ns on Ivy Bridge). - popq %rbx - jmpq *%rbx - 2: - "# - : "={rdi}" (ret) - : "{rdi}" (arg) - "{rsi}" (old_sp) - "{rdx}" (new_sp) - : "rax", "rbx", "rcx", "rdx", "rsi", "rdi", //"rbp", "rsp", - "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", - "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", - "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", - "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31" - "cc", "fpsr", "flags", "memory" - // Ideally, we would set the LLVM "noredzone" attribute on this function - // (and it would be propagated to the call site). Unfortunately, rustc - // provides no such functionality. Fortunately, by a lucky coincidence, - // the "alignstack" LLVM inline assembly option does exactly the same - // thing on x86_64. - : "volatile", "alignstack"); - ret + # Pop instruction pointer of the new context (placed onto stack by + # the call above) and jump there; don't use `ret` to avoid return + # address mispredictions (~8ns on Ivy Bridge). + popq %rbx + jmpq *%rbx + 2: + "# + } + } + + #[cfg(not(windows))] + #[inline(always)] + unsafe fn swap_impl(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize { + let ret: usize; + asm!(swap_body!() + : "={rdi}" (ret) + : "{rdi}" (arg) + "{rsi}" (old_sp) + "{rdx}" (new_sp) + : "rax", "rbx", "rcx", "rdx", "rsi", "rdi", //"rbp", "rsp", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", + "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31" + "cc", "fpsr", "flags", "memory" + // Ideally, we would set the LLVM "noredzone" attribute on this function + // (and it would be propagated to the call site). Unfortunately, rustc + // provides no such functionality. Fortunately, by a lucky coincidence, + // the "alignstack" LLVM inline assembly option does exactly the same + // thing on x86_64. + : "volatile", "alignstack"); + ret + } + + + #[cfg(windows)] + #[inline(always)] + unsafe fn swap_impl(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize { + let ret: usize; + asm!(swap_body!() + : "={rcx}" (ret) + : "{rcx}" (arg) + "{rsi}" (old_sp) + "{rdx}" (new_sp) + : "rax", "rbx", "rcx", "rdx", "rsi", "rdi", //"rbp", "rsp", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", + "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31" + "cc", "fpsr", "flags", "memory" + // Ideally, we would set the LLVM "noredzone" attribute on this function + // (and it would be propagated to the call site). Unfortunately, rustc + // provides no such functionality. Fortunately, by a lucky coincidence, + // the "alignstack" LLVM inline assembly option does exactly the same + // thing on x86_64. + : "volatile", "alignstack"); + ret + } + + swap_impl(arg, old_sp, new_sp) }