diff --git a/src/lib.rs b/src/lib.rs index 0e1a435..e57a5ef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![feature(abi_unadjusted)] #![feature(asm)] +#![feature(global_asm)] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] #![feature(core_intrinsics)] diff --git a/src/probestack.rs b/src/probestack.rs index 9bcaf4f..933a60d 100644 --- a/src/probestack.rs +++ b/src/probestack.rs @@ -41,95 +41,149 @@ //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would //! be more than welcome to accept such a change! -#![cfg(not(windows))] // Windows already has builtins to do this +#![cfg(not(feature = "mangled-names"))] +// Windows already has builtins to do this. +#![cfg(not(windows))] +// We only define stack probing for these architectures today. +#![cfg(any(target_arch = "x86_64", target_arch = "x86"))] -#[naked] -#[no_mangle] -#[cfg(all(target_arch = "x86_64", not(feature = "mangled-names")))] -pub unsafe extern "C" fn __rust_probestack() { - // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, - // ensuring that if any pages are unmapped we'll make a page fault. - // - // The ABI here is that the stack frame size is located in `%eax`. Upon - // return we're not supposed to modify `%esp` or `%eax`. - asm!(" - pushq %rbp - movq %rsp, %rbp - - mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - - // Main loop, taken in one page increments. We're decrementing rsp by - // a page each time until there's less than a page remaining. We're - // guaranteed that this function isn't called unless there's more than a - // page needed. - // - // Note that we're also testing against `8(%rsp)` to account for the 8 - // bytes pushed on the stack orginally with our return address. Using - // `8(%rsp)` simulates us testing the stack pointer in the caller's - // context. - - // It's usually called when %rax >= 0x1000, but that's not always true. - // Dynamic stack allocation, which is needed to implement unsized - // rvalues, triggers stackprobe even if %rax < 0x1000. - // Thus we have to check %r11 first to avoid segfault. - cmp $$0x1000,%r11 - jna 3f - 2: - sub $$0x1000,%rsp - test %rsp,8(%rsp) - sub $$0x1000,%r11 - cmp $$0x1000,%r11 - ja 2b - - 3: - // Finish up the last remaining stack space requested, getting the last - // bits out of r11 - sub %r11,%rsp - test %rsp,8(%rsp) - - // Restore the stack pointer to what it previously was when entering - // this function. The caller will readjust the stack pointer after we - // return. - add %rax,%rsp - - leave - ret - " ::: "memory" : "volatile"); - ::core::intrinsics::unreachable(); +extern "C" { + pub fn __rust_probestack(); } -#[naked] -#[no_mangle] -#[cfg(all(target_arch = "x86", not(feature = "mangled-names")))] -pub unsafe extern "C" fn __rust_probestack() { - // This is the same as x86_64 above, only translated for 32-bit sizes. Note - // that on Unix we're expected to restore everything as it was, this - // function basically can't tamper with anything. - // - // The ABI here is the same as x86_64, except everything is 32-bits large. - asm!(" - push %ebp - mov %esp, %ebp - push %ecx - mov %eax,%ecx - - cmp $$0x1000,%ecx - jna 3f - 2: - sub $$0x1000,%esp - test %esp,8(%esp) - sub $$0x1000,%ecx - cmp $$0x1000,%ecx - ja 2b - - 3: - sub %ecx,%esp - test %esp,8(%esp) - - add %eax,%esp - pop %ecx - leave - ret - " ::: "memory" : "volatile"); - ::core::intrinsics::unreachable(); +// A wrapper for our implementation of __rust_probestack, which allows us to +// keep the assembly inline while controlling all CFI directives in the assembly +// emitted for the function. +// +// This is the ELF version. +#[cfg(not(target_vendor = "apple"))] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .pushsection .text.__rust_probestack + .globl __rust_probestack + .type __rust_probestack, @function + __rust_probestack: + ", + $body, + " + .size __rust_probestack, . - __rust_probestack + .popsection + " + ) + }; } + +// Same as above, but for Mach-O. +#[cfg(target_vendor = "apple")] +macro_rules! define_rust_probestack { + ($body: expr) => { + concat!( + " + .globl ___rust_probestack + ___rust_probestack: + ", + $body + ) + }; +} + +// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, +// ensuring that if any pages are unmapped we'll make a page fault. +// +// The ABI here is that the stack frame size is located in `%rax`. Upon +// return we're not supposed to modify `%rsp` or `%rax`. +#[cfg(target_arch = "x86_64")] +global_asm!(define_rust_probestack!( + " + .cfi_startproc + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 + + // Main loop, taken in one page increments. We're decrementing rsp by + // a page each time until there's less than a page remaining. We're + // guaranteed that this function isn't called unless there's more than a + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. + + // It's usually called when %rax >= 0x1000, but that's not always true. + // Dynamic stack allocation, which is needed to implement unsized + // rvalues, triggers stackprobe even if %rax < 0x1000. + // Thus we have to check %r11 first to avoid segfault. + cmp $0x1000,%r11 + jna 3f +2: + sub $0x1000,%rsp + test %rsp,8(%rsp) + sub $0x1000,%r11 + cmp $0x1000,%r11 + ja 2b + +3: + // Finish up the last remaining stack space requested, getting the last + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) + + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp + + leave + .cfi_def_cfa_register %rsp + .cfi_adjust_cfa_offset -8 + ret + .cfi_endproc + " +)); + +#[cfg(target_arch = "x86")] +// This is the same as x86_64 above, only translated for 32-bit sizes. Note +// that on Unix we're expected to restore everything as it was, this +// function basically can't tamper with anything. +// +// The ABI here is the same as x86_64, except everything is 32-bits large. +global_asm!(define_rust_probestack!( + " + .cfi_startproc + push %ebp + .cfi_adjust_cfa_offset 4 + .cfi_offset %ebp, -8 + mov %esp, %ebp + .cfi_def_cfa_register %ebp + push %ecx + mov %eax,%ecx + + cmp $0x1000,%ecx + jna 3f +2: + sub $0x1000,%esp + test %esp,8(%esp) + sub $0x1000,%ecx + cmp $0x1000,%ecx + ja 2b + +3: + sub %ecx,%esp + test %esp,8(%esp) + + add %eax,%esp + pop %ecx + leave + .cfi_def_cfa_register %esp + .cfi_adjust_cfa_offset -4 + ret + .cfi_endproc + " +));