From fff625767cd7411f570d73575aa81aef2ae6c48e Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Wed, 7 Sep 2016 13:56:32 +0100 Subject: [PATCH] Clean up and simplify the implementation of generators --- benches/generator.rs | 2 +- src/arch/mod.rs | 111 ++++++++++++++++++++++++++++ src/arch/or1k.rs | 52 +++++++------ src/arch/x86.rs | 85 ++++++++++++--------- src/arch/x86_64.rs | 66 ++++++++++------- src/context.rs | 172 ------------------------------------------- src/generator.rs | 89 +++++++++++----------- src/lib.rs | 3 +- tests/generator.rs | 4 +- 9 files changed, 274 insertions(+), 310 deletions(-) delete mode 100644 src/context.rs diff --git a/benches/generator.rs b/benches/generator.rs index bfd5dd2..bbc5f76 100644 --- a/benches/generator.rs +++ b/benches/generator.rs @@ -17,5 +17,5 @@ fn generate(b: &mut test::Bencher) { loop { input = yielder.suspend(input) } }); - b.iter(|| test::black_box(identity.resume(test::black_box(0)))); + b.iter(|| for _ in 0..10 { test::black_box(identity.resume(test::black_box(0))); }); } diff --git a/src/arch/mod.rs b/src/arch/mod.rs index 06a5afb..70b3272 100644 --- a/src/arch/mod.rs +++ b/src/arch/mod.rs @@ -13,3 +13,114 @@ pub use self::imp::*; #[cfg_attr(target_arch = "x86_64", path = "x86_64.rs")] #[cfg_attr(target_arch = "or1k", path = "or1k.rs")] mod imp; + +#[cfg(test)] +mod tests { + extern crate test; + extern crate simd; + + use arch::{self, StackPointer}; + use ::OsStack; + + #[test] + fn context() { + unsafe extern "C" fn adder(arg: usize, stack_ptr: StackPointer) -> ! { + println!("it's alive! arg: {}", arg); + let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr, None); + println!("still alive! arg: {}", arg); + arch::swap(arg + 1, stack_ptr, None); + panic!("i should be dead"); + } + + unsafe { + let stack = OsStack::new(4 << 20).unwrap(); + let stack_ptr = arch::init(&stack, adder); + + let (ret, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack)); + assert_eq!(ret, 11); + let (ret, _) = arch::swap(50, stack_ptr, Some(&stack)); + assert_eq!(ret, 51); + } + } + + #[test] + fn context_simd() { + unsafe extern "C" fn permuter(arg: usize, stack_ptr: StackPointer) -> ! { + // This will crash if the stack is not aligned properly. + let x = simd::i32x4::splat(arg as i32); + let y = x * x; + println!("simd result: {:?}", y); + let (_, stack_ptr) = arch::swap(0, stack_ptr, None); + // And try again after a context switch. + let x = simd::i32x4::splat(arg as i32); + let y = x * x; + println!("simd result: {:?}", y); + arch::swap(0, stack_ptr, None); + panic!("i should be dead"); + } + + unsafe { + let stack = OsStack::new(4 << 20).unwrap(); + let stack_ptr = arch::init(&stack, permuter); + + let (_, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack)); + arch::swap(20, stack_ptr, Some(&stack)); + } + } + + unsafe extern "C" fn do_panic(arg: usize, stack_ptr: StackPointer) -> ! { + match arg { + 0 => panic!("arg=0"), + 1 => { + arch::swap(0, stack_ptr, None); + panic!("arg=1"); + } + _ => unreachable!() + } + } + + #[test] + #[should_panic="arg=0"] + fn panic_after_start() { + unsafe { + let stack = OsStack::new(4 << 20).unwrap(); + let stack_ptr = arch::init(&stack, do_panic); + + arch::swap(0, stack_ptr, Some(&stack)); + } + } + + #[test] + #[should_panic="arg=1"] + fn panic_after_swap() { + unsafe { + let stack = OsStack::new(4 << 20).unwrap(); + let stack_ptr = arch::init(&stack, do_panic); + + let (_, stack_ptr) = arch::swap(1, stack_ptr, Some(&stack)); + arch::swap(0, stack_ptr, Some(&stack)); + } + } + + #[bench] + fn swap(b: &mut test::Bencher) { + unsafe extern "C" fn loopback(mut arg: usize, mut stack_ptr: StackPointer) -> ! { + // This deliberately does not ignore arg, to measure the time it takes + // to move the return value between registers. + loop { + let data = arch::swap(arg, stack_ptr, None); + arg = data.0; + stack_ptr = data.1; + } + } + + unsafe { + let stack = OsStack::new(4 << 20).unwrap(); + let mut stack_ptr = arch::init(&stack, loopback); + + b.iter(|| for _ in 0..10 { + stack_ptr = arch::swap(0, stack_ptr, Some(&stack)).1; + }); + } + } +} diff --git a/src/arch/or1k.rs b/src/arch/or1k.rs index c23b2ed..7fa243c 100644 --- a/src/arch/or1k.rs +++ b/src/arch/or1k.rs @@ -14,7 +14,8 @@ // * OR1K C ABI passes the first argument in r3. We also use r3 to pass a value // while swapping context; this is an arbitrary choice // (we clobber all registers and could use any of them) but this allows us -// to reuse the swap function to perform the initial call. +// to reuse the swap function to perform the initial call. We do the same +// thing with r4 to pass the stack pointer to the new context. // // To understand the DWARF CFI code in this file, keep in mind these facts: // * CFI is "call frame information"; a set of instructions to a debugger or @@ -47,7 +48,7 @@ pub const STACK_ALIGNMENT: usize = 4; #[derive(Debug, Clone, Copy)] pub struct StackPointer(*mut usize); -pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer { +pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer { #[naked] unsafe extern "C" fn trampoline_1() { asm!( @@ -96,6 +97,12 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP .cfi_offset r2, -4 .cfi_offset r9, -8 + # This nop is here so that the return address of the swap trampoline + # doesn't point to the start of the symbol. This confuses gdb's backtraces, + # causing them to think the parent function is trampoline_1 instead of + # trampoline_2. + nop + # Call the provided function. l.lwz r4, 8(r1) l.jalr r4 @@ -123,25 +130,31 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline // each time a context switch is performed. - push(&mut sp, 0xdead0cfa); // CFA slot + push(&mut sp, 0xdead0cfa); // CFA slot push(&mut sp, trampoline_1 as usize + 4); // Return after the nop // Call frame for swap::trampoline. We set up the r2 value to point to the // parent call frame. let frame = sp; - push(&mut sp, frame.0 as usize); // Pointer to parent call frame - push(&mut sp, trampoline_2 as usize); // Entry point + push(&mut sp, frame.0 as usize); // Pointer to parent call frame + push(&mut sp, trampoline_2 as usize + 4); // Entry point, skip initial nop - // The call frame for swap::trampoline is actually in the red zone and not - // below the stack pointer. + // The last two values are read by the swap trampoline and are actually in the + // red zone and not below the stack pointer. frame } #[inline(always)] -pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, - new_stack: &Stack) -> usize { +pub unsafe fn swap(arg: usize, new_sp: StackPointer, + new_stack: Option<&Stack>) -> (usize, StackPointer) { // Address of the topmost CFA stack slot. - let new_cfa = (new_stack.base() as *mut usize).offset(-2); + let mut dummy: usize = mem::uninitialized(); + let new_cfa = if let Some(new_stack) = new_stack { + (new_stack.base() as *mut usize).offset(-2) + } else { + // Just pass a dummy pointer if we aren't linking the stack + &mut dummy + }; #[naked] unsafe extern "C" fn trampoline() { @@ -160,17 +173,13 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, l.addi r7, r1, -8 l.sw 0(r6), r7 - # Switch to the new stack for unwinding purposes. The old stack may no - # longer be valid now that we have modified the link. - .cfi_def_cfa_register r5 - - # Save stack pointer of the old context. - l.sw 0(r4), r1 + # Pass the stack pointer of the old context to the new one. + l.or r4, r0, r1 # Load stack pointer of the new context. l.or r1, r0, r5 - .cfi_def_cfa_register r1 # Restore frame pointer and link register of the new context. + # Load frame and instruction pointers of the new context. l.lwz r2, -4(r1) l.lwz r9, -8(r1) @@ -182,23 +191,24 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, } let ret: usize; + let ret_sp: *mut usize; asm!( r#" # Call the trampoline to switch to the new context. - l.jal ${1} + l.jal ${2} l.nop "# : "={r3}" (ret) + "={r4}" (ret_sp) : "s" (trampoline as usize) "{r3}" (arg) - "{r4}" (old_sp) "{r5}" (new_sp.0) "{r6}" (new_cfa) - :/*"r0", "r1", "r2", "r3",*/"r4", "r5", "r6", "r7", + :/*"r0", "r1", "r2", "r3", "r4",*/"r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", "cc", "memory" : "volatile"); - ret + (ret, StackPointer(ret_sp)) } diff --git a/src/arch/x86.rs b/src/arch/x86.rs index 462ee50..1543a03 100644 --- a/src/arch/x86.rs +++ b/src/arch/x86.rs @@ -41,6 +41,7 @@ // * The 1st init trampoline tells the unwinder to restore %ebp and its return // address from the stack frame at %ebp (in the parent stack), thus continuing // unwinding at the swap call site instead of falling off the end of context stack. +use core::mem; use stack::Stack; pub const STACK_ALIGNMENT: usize = 16; @@ -48,7 +49,7 @@ pub const STACK_ALIGNMENT: usize = 16; #[derive(Debug, Clone, Copy)] pub struct StackPointer(*mut usize); -pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer { +pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer { #[cfg(not(target_vendor = "apple"))] #[naked] unsafe extern "C" fn trampoline_1() { @@ -69,8 +70,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP # will use %ebp+8 as the next call frame address, restore return address # from CFA-4 and restore %ebp from CFA-8. This mirrors what the second half # of `swap_trampoline` does. - .cfi_def_cfa ebp, 8 - .cfi_offset ebp, -8 + .cfi_def_cfa %ebp, 8 + .cfi_offset %ebp, -8 # This nop is here so that the initial swap doesn't return to the start # of the trampoline, which confuses the unwinder since it will look for @@ -97,8 +98,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP # Identical to the above, except avoids .local/.size that aren't available on Mach-O. __morestack: .private_extern __morestack - .cfi_def_cfa ebp, 8 - .cfi_offset ebp, -8 + .cfi_def_cfa %ebp, 8 + .cfi_offset %ebp, -8 nop nop "# @@ -114,13 +115,20 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP # will restore %ebp (and thus CFA of the first trampoline) from the stack slot. # This stack slot is updated every time swap() is called to point to the bottom # of the stack of the context switch just switched from. - .cfi_def_cfa ebp, 8 - .cfi_offset ebp, -8 + .cfi_def_cfa %ebp, 8 + .cfi_offset %ebp, -8 - # Push argument. - pushl %eax + # This nop is here so that the return address of the swap trampoline + # doesn't point to the start of the symbol. This confuses gdb's backtraces, + # causing them to think the parent function is trampoline_1 instead of + # trampoline_2. + nop + + # Push arguments. + pushl %esi + pushl %edi # Call the provided function. - call *12(%esp) + calll *16(%esp) "# : : : : "volatile") } @@ -140,27 +148,36 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP // such as perf or dtrace. let mut sp = StackPointer(stack.base() as *mut usize); + push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned + push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned + push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned push(&mut sp, f as usize); // Function that trampoline_2 should call // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline // each time a context switch is performed. push(&mut sp, trampoline_1 as usize + 2); // Return after the 2 nops - push(&mut sp, 0xdead0cfa); // CFA slot + push(&mut sp, 0xdead0cfa); // CFA slot // Call frame for swap::trampoline. We set up the %ebp value to point to the // parent call frame. let frame = sp; - push(&mut sp, trampoline_2 as usize); // Entry point - push(&mut sp, frame.0 as usize); // Pointer to parent call frame + push(&mut sp, trampoline_2 as usize + 1); // Entry point, skip initial nop + push(&mut sp, frame.0 as usize); // Pointer to parent call frame sp } #[inline(always)] -pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, - new_stack: &Stack) -> usize { +pub unsafe fn swap(arg: usize, new_sp: StackPointer, + new_stack: Option<&Stack>) -> (usize, StackPointer) { // Address of the topmost CFA stack slot. - let new_cfa = (new_stack.base() as *mut usize).offset(-3); + let mut dummy: usize = mem::uninitialized(); + let new_cfa = if let Some(new_stack) = new_stack { + (new_stack.base() as *mut usize).offset(-6) + } else { + // Just pass a dummy pointer if we aren't linking the stack + &mut dummy + }; #[naked] unsafe extern "C" fn trampoline() { @@ -171,54 +188,50 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, # the call instruction that invoked the trampoline. pushl %ebp .cfi_adjust_cfa_offset 4 - .cfi_rel_offset ebp, 0 + .cfi_rel_offset %ebp, 0 # Link the call stacks together by writing the current stack bottom # address to the CFA slot in the new stack. - movl %esp, (%edi) + movl %esp, (%ecx) - # Switch to the new stack for unwinding purposes. The old stack may no - # longer be valid now that we have modified the link. - .cfi_def_cfa_register edx - - # Save stack pointer of the old context. - movl %esp, (%esi) + # Pass the stack pointer of the old context to the new one. + movl %esp, %esi # Load stack pointer of the new context. movl %edx, %esp - .cfi_def_cfa_register esp # Restore frame pointer of the new context. popl %ebp .cfi_adjust_cfa_offset -4 - .cfi_restore ebp + .cfi_restore %ebp # Return into the new context. Use `pop` and `jmp` instead of a `ret` # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge). - popl %ecx + popl %eax .cfi_adjust_cfa_offset -4 - .cfi_register eip, ecx - jmpl *%ecx + .cfi_register %eip, %eax + jmpl *%eax "# : : : : "volatile") } let ret: usize; + let ret_sp: *mut usize; asm!( r#" # Push instruction pointer of the old context and switch to # the new context. - call ${1:c} + call ${2:c} "# - : "={eax}" (ret) + : "={edi}" (ret) + "={esi}" (ret_sp) : "s" (trampoline as usize) - "{eax}" (arg) - "{esi}" (old_sp) + "{edi}" (arg) "{edx}" (new_sp.0) - "{edi}" (new_cfa) - :/*"eax",*/"ebx", "ecx", "edx", "esi", "edi",/*"ebp", "esp",*/ + "{ecx}" (new_cfa) + : "eax", "ebx", "ecx", "edx", /*"esi", "edi", "ebp", "esp",*/ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "cc", "dirflag", "fpsr", "flags", "memory" : "volatile"); - ret + (ret, StackPointer(ret_sp)) } diff --git a/src/arch/x86_64.rs b/src/arch/x86_64.rs index 6ed2630..a14a0f6 100644 --- a/src/arch/x86_64.rs +++ b/src/arch/x86_64.rs @@ -19,7 +19,8 @@ // * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi // to pass a value while swapping context; this is an arbitrary choice // (we clobber all registers and could use any of them) but this allows us -// to reuse the swap function to perform the initial call. +// to reuse the swap function to perform the initial call. We do the same +// thing with %rsi to pass the stack pointer to the new context. // // To understand the DWARF CFI code in this file, keep in mind these facts: // * CFI is "call frame information"; a set of instructions to a debugger or @@ -45,6 +46,7 @@ // * The 1st init trampoline tells the unwinder to restore %rbp and its return // address from the stack frame at %rbp (in the parent stack), thus continuing // unwinding at the swap call site instead of falling off the end of context stack. +use core::mem; use stack::Stack; pub const STACK_ALIGNMENT: usize = 16; @@ -52,7 +54,7 @@ pub const STACK_ALIGNMENT: usize = 16; #[derive(Debug, Clone, Copy)] pub struct StackPointer(*mut usize); -pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer { +pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer { #[cfg(not(target_vendor = "apple"))] #[naked] unsafe extern "C" fn trampoline_1() { @@ -73,8 +75,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP # will use %rbp+16 as the next call frame address, restore return address # from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half # of `swap_trampoline` does. - .cfi_def_cfa rbp, 16 - .cfi_offset rbp, -16 + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 # This nop is here so that the initial swap doesn't return to the start # of the trampoline, which confuses the unwinder since it will look for @@ -101,8 +103,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP # Identical to the above, except avoids .local/.size that aren't available on Mach-O. __morestack: .private_extern __morestack - .cfi_def_cfa rbp, 16 - .cfi_offset rbp, -16 + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 nop nop "# @@ -118,8 +120,14 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP # will restore %rbp (and thus CFA of the first trampoline) from the stack slot. # This stack slot is updated every time swap() is called to point to the bottom # of the stack of the context switch just switched from. - .cfi_def_cfa rbp, 16 - .cfi_offset rbp, -16 + .cfi_def_cfa %rbp, 16 + .cfi_offset %rbp, -16 + + # This nop is here so that the return address of the swap trampoline + # doesn't point to the start of the symbol. This confuses gdb's backtraces, + # causing them to think the parent function is trampoline_1 instead of + # trampoline_2. + nop # Call the provided function. call *16(%rsp) @@ -148,22 +156,28 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline // each time a context switch is performed. push(&mut sp, trampoline_1 as usize + 2); // Return after the 2 nops - push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot + push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot // Call frame for swap::trampoline. We set up the %rbp value to point to the // parent call frame. let frame = sp; - push(&mut sp, trampoline_2 as usize); // Entry point - push(&mut sp, frame.0 as usize); // Pointer to parent call frame + push(&mut sp, trampoline_2 as usize + 1); // Entry point, skip initial nop + push(&mut sp, frame.0 as usize); // Pointer to parent call frame sp } #[inline(always)] -pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, - new_stack: &Stack) -> usize { +pub unsafe fn swap(arg: usize, new_sp: StackPointer, + new_stack: Option<&Stack>) -> (usize, StackPointer) { // Address of the topmost CFA stack slot. - let new_cfa = (new_stack.base() as *mut usize).offset(-4); + let mut dummy: usize = mem::uninitialized(); + let new_cfa = if let Some(new_stack) = new_stack { + (new_stack.base() as *mut usize).offset(-4) + } else { + // Just pass a dummy pointer if we aren't linking the stack + &mut dummy + }; #[naked] unsafe extern "C" fn trampoline() { @@ -174,51 +188,47 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, # the call instruction that invoked the trampoline. pushq %rbp .cfi_adjust_cfa_offset 8 - .cfi_rel_offset rbp, 0 + .cfi_rel_offset %rbp, 0 # Link the call stacks together by writing the current stack bottom # address to the CFA slot in the new stack. movq %rsp, (%rcx) - # Switch to the new stack for unwinding purposes. The old stack may no - # longer be valid now that we have modified the link. - .cfi_def_cfa_register rdx - - # Save stack pointer of the old context. - movq %rsp, (%rsi) + # Pass the stack pointer of the old context to the new one. + movq %rsp, %rsi # Load stack pointer of the new context. movq %rdx, %rsp - .cfi_def_cfa_register rsp # Restore frame pointer of the new context. popq %rbp .cfi_adjust_cfa_offset -8 - .cfi_restore rbp + .cfi_restore %rbp # Return into the new context. Use `pop` and `jmp` instead of a `ret` # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge). popq %rax .cfi_adjust_cfa_offset -8 - .cfi_register rip, rax + .cfi_register %rip, %rax jmpq *%rax "# : : : : "volatile") } let ret: usize; + let ret_sp: *mut usize; asm!( r#" # Push instruction pointer of the old context and switch to # the new context. - call ${1:c} + call ${2:c} "# : "={rdi}" (ret) + "={rsi}" (ret_sp) : "s" (trampoline as usize) "{rdi}" (arg) - "{rsi}" (old_sp) "{rdx}" (new_sp.0) "{rcx}" (new_cfa) - : "rax", "rbx", "rcx", "rdx", "rsi", /*"rdi", "rbp", "rsp",*/ + : "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", @@ -232,5 +242,5 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, // the "alignstack" LLVM inline assembly option does exactly the same // thing on x86_64. : "volatile", "alignstack"); - ret + (ret, StackPointer(ret_sp)) } diff --git a/src/context.rs b/src/context.rs deleted file mode 100644 index 34da702..0000000 --- a/src/context.rs +++ /dev/null @@ -1,172 +0,0 @@ -// This file is part of libfringe, a low-level green threading library. -// Copyright (c) edef , -// whitequark -// Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be -// copied, modified, or distributed except according to those terms. -use stack; -use debug; -use arch; - -/// Context holds a suspended thread of execution along with a stack. -/// -/// It can be swapped into and out of with the swap method, -/// and once you're done with it, you can get the stack back through unwrap. -/// -/// Every operation is unsafe, because no guarantees can be made about -/// the state of the context. -#[derive(Debug)] -pub struct Context { - stack: Stack, - stack_id: debug::StackId, - stack_ptr: arch::StackPointer -} - -unsafe impl Send for Context - where Stack: stack::Stack + Send {} - -impl Context where Stack: stack::Stack { - /// Creates a new Context. When it is swapped into, it will call - /// `f(arg)`, where `arg` is the argument passed to `swap`. - pub unsafe fn new(stack: Stack, f: unsafe extern "C" fn(usize) -> !) -> Context { - let stack_id = debug::StackId::register(&stack); - let stack_ptr = arch::init(&stack, f); - Context { - stack: stack, - stack_id: stack_id, - stack_ptr: stack_ptr - } - } - - /// Unwraps the context, returning the stack it contained. - pub unsafe fn unwrap(self) -> Stack { - self.stack - } -} - -impl Context where OldStack: stack::Stack { - /// Switches to `in_ctx`, saving the current thread of execution to `out_ctx`. - #[inline(always)] - pub unsafe fn swap(old_ctx: *mut Context, - new_ctx: *const Context, - arg: usize) -> usize - where NewStack: stack::Stack { - arch::swap(arg, &mut (*old_ctx).stack_ptr, (*new_ctx).stack_ptr, &(*new_ctx).stack) - } -} - -#[cfg(test)] -mod test { - extern crate test; - extern crate simd; - - use std::ptr; - use super::Context; - use ::OsStack; - - #[thread_local] - static mut ctx_slot: *mut Context = ptr::null_mut(); - - #[test] - fn context() { - unsafe extern "C" fn adder(arg: usize) -> ! { - println!("it's alive! arg: {}", arg); - let arg = Context::swap(ctx_slot, ctx_slot, arg + 1); - println!("still alive! arg: {}", arg); - Context::swap(ctx_slot, ctx_slot, arg + 1); - panic!("i should be dead"); - } - - unsafe { - let stack = OsStack::new(4 << 20).unwrap(); - let mut ctx = Context::new(stack, adder); - ctx_slot = &mut ctx; - - let ret = Context::swap(ctx_slot, ctx_slot, 10); - assert_eq!(ret, 11); - let ret = Context::swap(ctx_slot, ctx_slot, 50); - assert_eq!(ret, 51); - } - } - - #[test] - fn context_simd() { - unsafe extern "C" fn permuter(arg: usize) -> ! { - // This will crash if the stack is not aligned properly. - let x = simd::i32x4::splat(arg as i32); - let y = x * x; - println!("simd result: {:?}", y); - Context::swap(ctx_slot, ctx_slot, 0); - // And try again after a context switch. - let x = simd::i32x4::splat(arg as i32); - let y = x * x; - println!("simd result: {:?}", y); - Context::swap(ctx_slot, ctx_slot, 0); - panic!("i should be dead"); - } - - unsafe { - let stack = OsStack::new(4 << 20).unwrap(); - let mut ctx = Context::new(stack, permuter); - ctx_slot = &mut ctx; - - Context::swap(ctx_slot, ctx_slot, 10); - Context::swap(ctx_slot, ctx_slot, 20); - } - } - - unsafe extern "C" fn do_panic(arg: usize) -> ! { - match arg { - 0 => panic!("arg=0"), - 1 => { - Context::swap(ctx_slot, ctx_slot, 0); - panic!("arg=1"); - } - _ => unreachable!() - } - } - - #[test] - #[should_panic="arg=0"] - fn panic_after_start() { - unsafe { - let stack = OsStack::new(4 << 20).unwrap(); - let mut ctx = Context::new(stack, do_panic); - - Context::swap(&mut ctx, &ctx, 0); - } - } - - #[test] - #[should_panic="arg=1"] - fn panic_after_swap() { - unsafe { - let stack = OsStack::new(4 << 20).unwrap(); - let mut ctx = Context::new(stack, do_panic); - ctx_slot = &mut ctx; - - Context::swap(&mut ctx, &ctx, 1); - Context::swap(&mut ctx, &ctx, 0); - } - } - - #[bench] - fn swap(b: &mut test::Bencher) { - unsafe extern "C" fn loopback(mut arg: usize) -> ! { - // This deliberately does not ignore arg, to measure the time it takes - // to move the return value between registers. - let ctx_ptr = ctx_slot; - loop { arg = Context::swap(ctx_ptr, ctx_ptr, arg) } - } - - unsafe { - let stack = OsStack::new(4 << 20).unwrap(); - let mut ctx = Context::new(stack, loopback); - ctx_slot = &mut ctx; - - let ctx_ptr = &mut ctx; - b.iter(|| Context::swap(ctx_ptr, ctx_ptr, 0)); - } - } -} diff --git a/src/generator.rs b/src/generator.rs index 79fe2d1..6870c29 100644 --- a/src/generator.rs +++ b/src/generator.rs @@ -16,7 +16,8 @@ use core::{ptr, mem}; use core::cell::Cell; use stack; -use context::Context; +use debug; +use arch::{self, StackPointer}; #[derive(Debug, Clone, Copy)] pub enum State { @@ -80,9 +81,11 @@ pub enum State { /// ``` #[derive(Debug)] pub struct Generator { - state: State, - context: Context, - phantom: (PhantomData<*const Input>, PhantomData<*const Output>) + state: State, + stack: Stack, + stack_id: debug::StackId, + stack_ptr: arch::StackPointer, + phantom: (PhantomData<*const Input>, PhantomData<*const Output>) } impl Generator @@ -92,7 +95,7 @@ impl Generator /// See also the [contract](../trait.GuardedStack.html) that needs to be fulfilled by `stack`. pub fn new(stack: Stack, f: F) -> Generator where Stack: stack::GuardedStack, - F: FnOnce(&mut Yielder, Input) + Send { + F: FnOnce(&mut Yielder, Input) + Send { unsafe { Generator::unsafe_new(stack, f) } } @@ -104,35 +107,36 @@ impl Generator /// /// See also the [contract](../trait.Stack.html) that needs to be fulfilled by `stack`. pub unsafe fn unsafe_new(stack: Stack, f: F) -> Generator - where F: FnOnce(&mut Yielder, Input) + Send { - unsafe extern "C" fn generator_wrapper(env: usize) -> ! + where F: FnOnce(&mut Yielder, Input) + Send { + unsafe extern "C" fn generator_wrapper(env: usize, stack_ptr: StackPointer) -> ! where Input: Send, Output: Send, Stack: stack::Stack, - F: FnOnce(&mut Yielder, Input) { + F: FnOnce(&mut Yielder, Input) { // Retrieve our environment from the callee and return control to it. - let (mut yielder, f) = ptr::read(env as *mut (Yielder, F)); - let data = Context::swap(yielder.context.get(), yielder.context.get(), 0); + let f = ptr::read(env as *const F); + let (data, stack_ptr) = arch::swap(0, stack_ptr, None); // See the second half of Yielder::suspend_bare. - let (new_context, input) = ptr::read(data as *mut (*mut Context, Input)); - yielder.context.set(new_context as *mut Context); + let input = ptr::read(data as *const Input); // Run the body of the generator. + let mut yielder = Yielder::new(stack_ptr); f(&mut yielder, input); // Past this point, the generator has dropped everything it has held. loop { yielder.suspend_bare(None); } } - let mut generator = Generator { - state: State::Runnable, - context: Context::new(stack, generator_wrapper::), - phantom: (PhantomData, PhantomData) - }; + let stack_id = debug::StackId::register(&stack); + let stack_ptr = arch::init(&stack, generator_wrapper::); // Transfer environment to the callee. - let mut env = (Yielder::new(&mut generator.context), f); - Context::swap(&mut generator.context, &generator.context, - &mut env as *mut (Yielder, F) as usize); - mem::forget(env); + let stack_ptr = arch::swap(&f as *const F as usize, stack_ptr, Some(&stack)).1; + mem::forget(f); - generator + Generator { + state: State::Runnable, + stack: stack, + stack_id: stack_id, + stack_ptr: stack_ptr, + phantom: (PhantomData, PhantomData) + } } /// Resumes the generator and return the next value it yields. @@ -148,13 +152,10 @@ impl Generator // Switch to the generator function, and retrieve the yielded value. let val = unsafe { - let mut data_in = (&mut self.context as *mut Context, input); - let data_out = - ptr::read(Context::swap(&mut self.context, &self.context, - &mut data_in as *mut (*mut Context, Input) as usize) - as *mut Option); - mem::forget(data_in); - data_out + let (data_out, stack_ptr) = arch::swap(&input as *const Input as usize, self.stack_ptr, Some(&self.stack)); + self.stack_ptr = stack_ptr; + mem::forget(input); + ptr::read(data_out as *const Option) }; // Unless the generator function has returned, it can be switched to again, so @@ -177,7 +178,7 @@ impl Generator pub fn unwrap(self) -> Stack { match self.state { State::Runnable => panic!("Argh! Bastard! Don't touch that!"), - State::Unavailable => unsafe { self.context.unwrap() } + State::Unavailable => self.stack } } } @@ -185,35 +186,27 @@ impl Generator /// Yielder is an interface provided to every generator through which it /// returns a value. #[derive(Debug)] -pub struct Yielder { - context: Cell<*mut Context>, +pub struct Yielder { + stack_ptr: Cell, phantom: (PhantomData<*const Input>, PhantomData<*const Output>) } -impl Yielder - where Input: Send, Output: Send, Stack: stack::Stack { - fn new(context: *mut Context) -> Yielder { +impl Yielder + where Input: Send, Output: Send { + fn new(stack_ptr: StackPointer) -> Yielder { Yielder { - context: Cell::new(context), + stack_ptr: Cell::new(stack_ptr), phantom: (PhantomData, PhantomData) } } #[inline(always)] - fn suspend_bare(&self, mut val: Option) -> Input { + fn suspend_bare(&self, val: Option) -> Input { unsafe { - let data = Context::swap(self.context.get(), self.context.get(), - &mut val as *mut Option as usize); + let (data, stack_ptr) = arch::swap(&val as *const Option as usize, self.stack_ptr.get(), None); + self.stack_ptr.set(stack_ptr); mem::forget(val); - let (new_context, input) = ptr::read(data as *mut (*mut Context, Input)); - // The generator can be moved (and with it, the context). - // This changes the address of the context. - // Thus, we update it after each swap. - self.context.set(new_context); - // However, between this point and the next time we enter suspend_bare - // the generator cannot be moved, as a &mut Generator is necessary - // to resume the generator function. - input + ptr::read(data as *const Input) } } diff --git a/src/lib.rs b/src/lib.rs index 1242efe..d111301 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,7 @@ // copied, modified, or distributed except according to those terms. #![feature(asm, naked_functions, cfg_target_vendor)] #![cfg_attr(feature = "alloc", feature(alloc, heap_api))] -#![cfg_attr(test, feature(test, thread_local, const_fn))] +#![cfg_attr(test, feature(test))] #![no_std] //! libfringe is a library implementing safe, lightweight context switches, @@ -51,7 +51,6 @@ pub const STACK_ALIGNMENT: usize = arch::STACK_ALIGNMENT; mod debug; -mod context; mod stack; mod slice_stack; pub mod generator; diff --git a/tests/generator.rs b/tests/generator.rs index 7884b9b..174a378 100644 --- a/tests/generator.rs +++ b/tests/generator.rs @@ -7,10 +7,10 @@ // copied, modified, or distributed except according to those terms. extern crate fringe; -use fringe::{Stack, SliceStack, OwnedStack, OsStack}; +use fringe::{SliceStack, OwnedStack, OsStack}; use fringe::generator::{Generator, Yielder}; -fn add_one_fn(yielder: &mut Yielder, mut input: i32) { +fn add_one_fn(yielder: &mut Yielder, mut input: i32) { loop { if input == 0 { break } input = yielder.suspend(input + 1)