Manually patch ret instruction for LVI (#359)

Co-authored-by: Jethro Beekman <jethro@fortanix.com>
2020-06-01 17:55:42 +02:00 · 2020-06-01 17:55:42 +02:00 · f853d6d9b7
parent 5488a09800
commit f853d6d9b7
1 changed files with 69 additions and 1 deletions
--- a/src/probestack.rs
+++ b/src/probestack.rs
@ -109,7 +109,12 @@ macro_rules! define_rust_probestack {
 //
 // The ABI here is that the stack frame size is located in `%rax`. Upon
 // return we're not supposed to modify `%rsp` or `%rax`.
-#[cfg(target_arch = "x86_64")]
+//
+// Any changes to this function should be replicated to the SGX version below.
+#[cfg(all(
+    target_arch = "x86_64",
+    not(all(target_env = "sgx", target_vendor = "fortanix"))
+))]
 global_asm!(define_rust_probestack!(
    "
    .cfi_startproc
@ -163,6 +168,69 @@ global_asm!(define_rust_probestack!(
    "
 ));

+// This function is the same as above, except that some instructions are
+// [manually patched for LVI].
+//
+// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
+#[cfg(all(
+    target_arch = "x86_64",
+    all(target_env = "sgx", target_vendor = "fortanix")
+))]
+global_asm!(define_rust_probestack!(
+    "
+    .cfi_startproc
+    pushq  %rbp
+    .cfi_adjust_cfa_offset 8
+    .cfi_offset %rbp, -16
+    movq   %rsp, %rbp
+    .cfi_def_cfa_register %rbp
+
+    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
+
+    // Main loop, taken in one page increments. We're decrementing rsp by
+    // a page each time until there's less than a page remaining. We're
+    // guaranteed that this function isn't called unless there's more than a
+    // page needed.
+    //
+    // Note that we're also testing against `8(%rsp)` to account for the 8
+    // bytes pushed on the stack orginally with our return address. Using
+    // `8(%rsp)` simulates us testing the stack pointer in the caller's
+    // context.
+
+    // It's usually called when %rax >= 0x1000, but that's not always true.
+    // Dynamic stack allocation, which is needed to implement unsized
+    // rvalues, triggers stackprobe even if %rax < 0x1000.
+    // Thus we have to check %r11 first to avoid segfault.
+    cmp    $0x1000,%r11
+    jna    3f
+2:
+    sub    $0x1000,%rsp
+    test   %rsp,8(%rsp)
+    sub    $0x1000,%r11
+    cmp    $0x1000,%r11
+    ja     2b
+
+3:
+    // Finish up the last remaining stack space requested, getting the last
+    // bits out of r11
+    sub    %r11,%rsp
+    test   %rsp,8(%rsp)
+
+    // Restore the stack pointer to what it previously was when entering
+    // this function. The caller will readjust the stack pointer after we
+    // return.
+    add    %rax,%rsp
+
+    leave
+    .cfi_def_cfa_register %rsp
+    .cfi_adjust_cfa_offset -8
+    pop %r11
+    lfence
+    jmp *%r11
+    .cfi_endproc
+    "
+));
+
 #[cfg(target_arch = "x86")]
 // This is the same as x86_64 above, only translated for 32-bit sizes. Note
 // that on Unix we're expected to restore everything as it was, this