diff --git a/src/arch/x86_64.rs b/src/arch/x86_64.rs
index 5f9559b..d9e8f53 100644
--- a/src/arch/x86_64.rs
+++ b/src/arch/x86_64.rs
@@ -42,51 +42,88 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
 
 #[inline(always)]
 pub unsafe fn swap(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
-  let ret: usize;
-  asm!(
-    r#"
-      # Save frame pointer explicitly; LLVM doesn't spill it even if it is
-      # marked as clobbered.
-      pushq   %rbp
-      # Push instruction pointer of the old context and switch to
-      # the new context.
-      call    1f
-      # Restore frame pointer.
-      popq    %rbp
-      # Continue executing old context.
-      jmp     2f
+  macro_rules! swap_body {
+    () => {
+      r#"
+        # Save frame pointer explicitly; LLVM doesn't spill it even if it is
+        # marked as clobbered.
+        pushq   %rbp
+        # Push instruction pointer of the old context and switch to
+        # the new context.
+        call    1f
+        # Restore frame pointer.
+        popq    %rbp
+        # Continue executing old context.
+        jmp     2f
 
-    1:
-      # Remember stack pointer of the old context, in case %rdx==%rsi.
-      movq    %rsp, %rbx
-      # Load stack pointer of the new context.
-      movq    (%rdx), %rsp
-      # Save stack pointer of the old context.
-      movq    %rbx, (%rsi)
+      1:
+        # Remember stack pointer of the old context, in case %rdx==%rsi.
+        movq    %rsp, %rbx
+        # Load stack pointer of the new context.
+        movq    (%rdx), %rsp
+        # Save stack pointer of the old context.
+        movq    %rbx, (%rsi)
 
-      # Pop instruction pointer of the new context (placed onto stack by
-      # the call above) and jump there; don't use `ret` to avoid return
-      # address mispredictions (~8ns on Ivy Bridge).
-      popq    %rbx
-      jmpq    *%rbx
-    2:
-    "#
-    : "={rdi}" (ret)
-    : "{rdi}" (arg)
-      "{rsi}" (old_sp)
-      "{rdx}" (new_sp)
-    : "rax",   "rbx",   "rcx",   "rdx",   "rsi",   "rdi", //"rbp",   "rsp",
-      "r8",    "r9",    "r10",   "r11",   "r12",   "r13",   "r14",   "r15",
-      "xmm0",  "xmm1",  "xmm2",  "xmm3",  "xmm4",  "xmm5",  "xmm6",  "xmm7",
-      "xmm8",  "xmm9",  "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
-      "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23",
-      "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
-      "cc", "fpsr", "flags", "memory"
-      // Ideally, we would set the LLVM "noredzone" attribute on this function
-      // (and it would be propagated to the call site). Unfortunately, rustc
-      // provides no such functionality. Fortunately, by a lucky coincidence,
-      // the "alignstack" LLVM inline assembly option does exactly the same
-      // thing on x86_64.
-    : "volatile", "alignstack");
-  ret
+        # Pop instruction pointer of the new context (placed onto stack by
+        # the call above) and jump there; don't use `ret` to avoid return
+        # address mispredictions (~8ns on Ivy Bridge).
+        popq    %rbx
+        jmpq    *%rbx
+      2:
+      "#
+    }
+  }
+
+  #[cfg(not(windows))]
+  #[inline(always)]
+  unsafe fn swap_impl(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
+    let ret: usize;
+    asm!(swap_body!()
+      : "={rdi}" (ret)
+      : "{rdi}" (arg)
+        "{rsi}" (old_sp)
+        "{rdx}" (new_sp)
+      : "rax",   "rbx",   "rcx",   "rdx",   "rsi",   "rdi", //"rbp",   "rsp",
+        "r8",    "r9",    "r10",   "r11",   "r12",   "r13",   "r14",   "r15",
+        "xmm0",  "xmm1",  "xmm2",  "xmm3",  "xmm4",  "xmm5",  "xmm6",  "xmm7",
+        "xmm8",  "xmm9",  "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
+        "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23",
+        "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
+        "cc", "fpsr", "flags", "memory"
+        // Ideally, we would set the LLVM "noredzone" attribute on this function
+        // (and it would be propagated to the call site). Unfortunately, rustc
+        // provides no such functionality. Fortunately, by a lucky coincidence,
+        // the "alignstack" LLVM inline assembly option does exactly the same
+        // thing on x86_64.
+      : "volatile", "alignstack");
+    ret
+  }
+
+
+  #[cfg(windows)]
+  #[inline(always)]
+  unsafe fn swap_impl(arg: usize, old_sp: &mut StackPointer, new_sp: &StackPointer) -> usize {
+    let ret: usize;
+    asm!(swap_body!()
+      : "={rcx}" (ret)
+      : "{rcx}" (arg)
+        "{rsi}" (old_sp)
+        "{rdx}" (new_sp)
+      : "rax",   "rbx",   "rcx",   "rdx",   "rsi",   "rdi", //"rbp",   "rsp",
+        "r8",    "r9",    "r10",   "r11",   "r12",   "r13",   "r14",   "r15",
+        "xmm0",  "xmm1",  "xmm2",  "xmm3",  "xmm4",  "xmm5",  "xmm6",  "xmm7",
+        "xmm8",  "xmm9",  "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
+        "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23",
+        "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31"
+        "cc", "fpsr", "flags", "memory"
+        // Ideally, we would set the LLVM "noredzone" attribute on this function
+        // (and it would be propagated to the call site). Unfortunately, rustc
+        // provides no such functionality. Fortunately, by a lucky coincidence,
+        // the "alignstack" LLVM inline assembly option does exactly the same
+        // thing on x86_64.
+      : "volatile", "alignstack");
+    ret
+  }
+
+  swap_impl(arg, old_sp, new_sp)
 }