Add control flow information to __rust_probestack (#328)
This commit is contained in:
parent
f8c28c5c3b
commit
2566aa663b
|
@ -1,6 +1,7 @@
|
|||
#![cfg_attr(feature = "compiler-builtins", compiler_builtins)]
|
||||
#![feature(abi_unadjusted)]
|
||||
#![feature(asm)]
|
||||
#![feature(global_asm)]
|
||||
#![feature(cfg_target_has_atomic)]
|
||||
#![feature(compiler_builtins)]
|
||||
#![feature(core_intrinsics)]
|
||||
|
|
|
@ -41,95 +41,149 @@
|
|||
//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
|
||||
//! be more than welcome to accept such a change!
|
||||
|
||||
#![cfg(not(windows))] // Windows already has builtins to do this
|
||||
#![cfg(not(feature = "mangled-names"))]
|
||||
// Windows already has builtins to do this.
|
||||
#![cfg(not(windows))]
|
||||
// We only define stack probing for these architectures today.
|
||||
#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
|
||||
#[naked]
|
||||
#[no_mangle]
|
||||
#[cfg(all(target_arch = "x86_64", not(feature = "mangled-names")))]
|
||||
pub unsafe extern "C" fn __rust_probestack() {
|
||||
// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
|
||||
// ensuring that if any pages are unmapped we'll make a page fault.
|
||||
//
|
||||
// The ABI here is that the stack frame size is located in `%eax`. Upon
|
||||
// return we're not supposed to modify `%esp` or `%eax`.
|
||||
asm!("
|
||||
pushq %rbp
|
||||
movq %rsp, %rbp
|
||||
|
||||
mov %rax,%r11 // duplicate %rax as we're clobbering %r11
|
||||
|
||||
// Main loop, taken in one page increments. We're decrementing rsp by
|
||||
// a page each time until there's less than a page remaining. We're
|
||||
// guaranteed that this function isn't called unless there's more than a
|
||||
// page needed.
|
||||
//
|
||||
// Note that we're also testing against `8(%rsp)` to account for the 8
|
||||
// bytes pushed on the stack orginally with our return address. Using
|
||||
// `8(%rsp)` simulates us testing the stack pointer in the caller's
|
||||
// context.
|
||||
|
||||
// It's usually called when %rax >= 0x1000, but that's not always true.
|
||||
// Dynamic stack allocation, which is needed to implement unsized
|
||||
// rvalues, triggers stackprobe even if %rax < 0x1000.
|
||||
// Thus we have to check %r11 first to avoid segfault.
|
||||
cmp $$0x1000,%r11
|
||||
jna 3f
|
||||
2:
|
||||
sub $$0x1000,%rsp
|
||||
test %rsp,8(%rsp)
|
||||
sub $$0x1000,%r11
|
||||
cmp $$0x1000,%r11
|
||||
ja 2b
|
||||
|
||||
3:
|
||||
// Finish up the last remaining stack space requested, getting the last
|
||||
// bits out of r11
|
||||
sub %r11,%rsp
|
||||
test %rsp,8(%rsp)
|
||||
|
||||
// Restore the stack pointer to what it previously was when entering
|
||||
// this function. The caller will readjust the stack pointer after we
|
||||
// return.
|
||||
add %rax,%rsp
|
||||
|
||||
leave
|
||||
ret
|
||||
" ::: "memory" : "volatile");
|
||||
::core::intrinsics::unreachable();
|
||||
extern "C" {
|
||||
pub fn __rust_probestack();
|
||||
}
|
||||
|
||||
#[naked]
|
||||
#[no_mangle]
|
||||
#[cfg(all(target_arch = "x86", not(feature = "mangled-names")))]
|
||||
pub unsafe extern "C" fn __rust_probestack() {
|
||||
// This is the same as x86_64 above, only translated for 32-bit sizes. Note
|
||||
// that on Unix we're expected to restore everything as it was, this
|
||||
// function basically can't tamper with anything.
|
||||
//
|
||||
// The ABI here is the same as x86_64, except everything is 32-bits large.
|
||||
asm!("
|
||||
push %ebp
|
||||
mov %esp, %ebp
|
||||
push %ecx
|
||||
mov %eax,%ecx
|
||||
|
||||
cmp $$0x1000,%ecx
|
||||
jna 3f
|
||||
2:
|
||||
sub $$0x1000,%esp
|
||||
test %esp,8(%esp)
|
||||
sub $$0x1000,%ecx
|
||||
cmp $$0x1000,%ecx
|
||||
ja 2b
|
||||
|
||||
3:
|
||||
sub %ecx,%esp
|
||||
test %esp,8(%esp)
|
||||
|
||||
add %eax,%esp
|
||||
pop %ecx
|
||||
leave
|
||||
ret
|
||||
" ::: "memory" : "volatile");
|
||||
::core::intrinsics::unreachable();
|
||||
// A wrapper for our implementation of __rust_probestack, which allows us to
|
||||
// keep the assembly inline while controlling all CFI directives in the assembly
|
||||
// emitted for the function.
|
||||
//
|
||||
// This is the ELF version.
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
macro_rules! define_rust_probestack {
|
||||
($body: expr) => {
|
||||
concat!(
|
||||
"
|
||||
.pushsection .text.__rust_probestack
|
||||
.globl __rust_probestack
|
||||
.type __rust_probestack, @function
|
||||
__rust_probestack:
|
||||
",
|
||||
$body,
|
||||
"
|
||||
.size __rust_probestack, . - __rust_probestack
|
||||
.popsection
|
||||
"
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
// Same as above, but for Mach-O.
|
||||
#[cfg(target_vendor = "apple")]
|
||||
macro_rules! define_rust_probestack {
|
||||
($body: expr) => {
|
||||
concat!(
|
||||
"
|
||||
.globl ___rust_probestack
|
||||
___rust_probestack:
|
||||
",
|
||||
$body
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
|
||||
// ensuring that if any pages are unmapped we'll make a page fault.
|
||||
//
|
||||
// The ABI here is that the stack frame size is located in `%rax`. Upon
|
||||
// return we're not supposed to modify `%rsp` or `%rax`.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
global_asm!(define_rust_probestack!(
|
||||
"
|
||||
.cfi_startproc
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.cfi_def_cfa_register %rbp
|
||||
|
||||
mov %rax,%r11 // duplicate %rax as we're clobbering %r11
|
||||
|
||||
// Main loop, taken in one page increments. We're decrementing rsp by
|
||||
// a page each time until there's less than a page remaining. We're
|
||||
// guaranteed that this function isn't called unless there's more than a
|
||||
// page needed.
|
||||
//
|
||||
// Note that we're also testing against `8(%rsp)` to account for the 8
|
||||
// bytes pushed on the stack orginally with our return address. Using
|
||||
// `8(%rsp)` simulates us testing the stack pointer in the caller's
|
||||
// context.
|
||||
|
||||
// It's usually called when %rax >= 0x1000, but that's not always true.
|
||||
// Dynamic stack allocation, which is needed to implement unsized
|
||||
// rvalues, triggers stackprobe even if %rax < 0x1000.
|
||||
// Thus we have to check %r11 first to avoid segfault.
|
||||
cmp $0x1000,%r11
|
||||
jna 3f
|
||||
2:
|
||||
sub $0x1000,%rsp
|
||||
test %rsp,8(%rsp)
|
||||
sub $0x1000,%r11
|
||||
cmp $0x1000,%r11
|
||||
ja 2b
|
||||
|
||||
3:
|
||||
// Finish up the last remaining stack space requested, getting the last
|
||||
// bits out of r11
|
||||
sub %r11,%rsp
|
||||
test %rsp,8(%rsp)
|
||||
|
||||
// Restore the stack pointer to what it previously was when entering
|
||||
// this function. The caller will readjust the stack pointer after we
|
||||
// return.
|
||||
add %rax,%rsp
|
||||
|
||||
leave
|
||||
.cfi_def_cfa_register %rsp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
ret
|
||||
.cfi_endproc
|
||||
"
|
||||
));
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
// This is the same as x86_64 above, only translated for 32-bit sizes. Note
|
||||
// that on Unix we're expected to restore everything as it was, this
|
||||
// function basically can't tamper with anything.
|
||||
//
|
||||
// The ABI here is the same as x86_64, except everything is 32-bits large.
|
||||
global_asm!(define_rust_probestack!(
|
||||
"
|
||||
.cfi_startproc
|
||||
push %ebp
|
||||
.cfi_adjust_cfa_offset 4
|
||||
.cfi_offset %ebp, -8
|
||||
mov %esp, %ebp
|
||||
.cfi_def_cfa_register %ebp
|
||||
push %ecx
|
||||
mov %eax,%ecx
|
||||
|
||||
cmp $0x1000,%ecx
|
||||
jna 3f
|
||||
2:
|
||||
sub $0x1000,%esp
|
||||
test %esp,8(%esp)
|
||||
sub $0x1000,%ecx
|
||||
cmp $0x1000,%ecx
|
||||
ja 2b
|
||||
|
||||
3:
|
||||
sub %ecx,%esp
|
||||
test %esp,8(%esp)
|
||||
|
||||
add %eax,%esp
|
||||
pop %ecx
|
||||
leave
|
||||
.cfi_def_cfa_register %esp
|
||||
.cfi_adjust_cfa_offset -4
|
||||
ret
|
||||
.cfi_endproc
|
||||
"
|
||||
));
|
||||
|
|
Loading…
Reference in New Issue