1
0
Fork 0

Clean up and simplify the implementation of generators

This commit is contained in:
Amanieu d'Antras 2016-09-07 13:56:32 +01:00 committed by edef
parent f7f209c1eb
commit fff625767c
9 changed files with 274 additions and 310 deletions

View File

@ -17,5 +17,5 @@ fn generate(b: &mut test::Bencher) {
loop { input = yielder.suspend(input) } loop { input = yielder.suspend(input) }
}); });
b.iter(|| test::black_box(identity.resume(test::black_box(0)))); b.iter(|| for _ in 0..10 { test::black_box(identity.resume(test::black_box(0))); });
} }

View File

@ -13,3 +13,114 @@ pub use self::imp::*;
#[cfg_attr(target_arch = "x86_64", path = "x86_64.rs")] #[cfg_attr(target_arch = "x86_64", path = "x86_64.rs")]
#[cfg_attr(target_arch = "or1k", path = "or1k.rs")] #[cfg_attr(target_arch = "or1k", path = "or1k.rs")]
mod imp; mod imp;
#[cfg(test)]
mod tests {
extern crate test;
extern crate simd;
use arch::{self, StackPointer};
use ::OsStack;
#[test]
fn context() {
unsafe extern "C" fn adder(arg: usize, stack_ptr: StackPointer) -> ! {
println!("it's alive! arg: {}", arg);
let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr, None);
println!("still alive! arg: {}", arg);
arch::swap(arg + 1, stack_ptr, None);
panic!("i should be dead");
}
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let stack_ptr = arch::init(&stack, adder);
let (ret, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack));
assert_eq!(ret, 11);
let (ret, _) = arch::swap(50, stack_ptr, Some(&stack));
assert_eq!(ret, 51);
}
}
#[test]
fn context_simd() {
unsafe extern "C" fn permuter(arg: usize, stack_ptr: StackPointer) -> ! {
// This will crash if the stack is not aligned properly.
let x = simd::i32x4::splat(arg as i32);
let y = x * x;
println!("simd result: {:?}", y);
let (_, stack_ptr) = arch::swap(0, stack_ptr, None);
// And try again after a context switch.
let x = simd::i32x4::splat(arg as i32);
let y = x * x;
println!("simd result: {:?}", y);
arch::swap(0, stack_ptr, None);
panic!("i should be dead");
}
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let stack_ptr = arch::init(&stack, permuter);
let (_, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack));
arch::swap(20, stack_ptr, Some(&stack));
}
}
unsafe extern "C" fn do_panic(arg: usize, stack_ptr: StackPointer) -> ! {
match arg {
0 => panic!("arg=0"),
1 => {
arch::swap(0, stack_ptr, None);
panic!("arg=1");
}
_ => unreachable!()
}
}
#[test]
#[should_panic="arg=0"]
fn panic_after_start() {
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let stack_ptr = arch::init(&stack, do_panic);
arch::swap(0, stack_ptr, Some(&stack));
}
}
#[test]
#[should_panic="arg=1"]
fn panic_after_swap() {
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let stack_ptr = arch::init(&stack, do_panic);
let (_, stack_ptr) = arch::swap(1, stack_ptr, Some(&stack));
arch::swap(0, stack_ptr, Some(&stack));
}
}
#[bench]
fn swap(b: &mut test::Bencher) {
unsafe extern "C" fn loopback(mut arg: usize, mut stack_ptr: StackPointer) -> ! {
// This deliberately does not ignore arg, to measure the time it takes
// to move the return value between registers.
loop {
let data = arch::swap(arg, stack_ptr, None);
arg = data.0;
stack_ptr = data.1;
}
}
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let mut stack_ptr = arch::init(&stack, loopback);
b.iter(|| for _ in 0..10 {
stack_ptr = arch::swap(0, stack_ptr, Some(&stack)).1;
});
}
}
}

View File

@ -14,7 +14,8 @@
// * OR1K C ABI passes the first argument in r3. We also use r3 to pass a value // * OR1K C ABI passes the first argument in r3. We also use r3 to pass a value
// while swapping context; this is an arbitrary choice // while swapping context; this is an arbitrary choice
// (we clobber all registers and could use any of them) but this allows us // (we clobber all registers and could use any of them) but this allows us
// to reuse the swap function to perform the initial call. // to reuse the swap function to perform the initial call. We do the same
// thing with r4 to pass the stack pointer to the new context.
// //
// To understand the DWARF CFI code in this file, keep in mind these facts: // To understand the DWARF CFI code in this file, keep in mind these facts:
// * CFI is "call frame information"; a set of instructions to a debugger or // * CFI is "call frame information"; a set of instructions to a debugger or
@ -47,7 +48,7 @@ pub const STACK_ALIGNMENT: usize = 4;
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct StackPointer(*mut usize); pub struct StackPointer(*mut usize);
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer { pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
#[naked] #[naked]
unsafe extern "C" fn trampoline_1() { unsafe extern "C" fn trampoline_1() {
asm!( asm!(
@ -96,6 +97,12 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
.cfi_offset r2, -4 .cfi_offset r2, -4
.cfi_offset r9, -8 .cfi_offset r9, -8
# This nop is here so that the return address of the swap trampoline
# doesn't point to the start of the symbol. This confuses gdb's backtraces,
# causing them to think the parent function is trampoline_1 instead of
# trampoline_2.
nop
# Call the provided function. # Call the provided function.
l.lwz r4, 8(r1) l.lwz r4, 8(r1)
l.jalr r4 l.jalr r4
@ -130,18 +137,24 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
// parent call frame. // parent call frame.
let frame = sp; let frame = sp;
push(&mut sp, frame.0 as usize); // Pointer to parent call frame push(&mut sp, frame.0 as usize); // Pointer to parent call frame
push(&mut sp, trampoline_2 as usize); // Entry point push(&mut sp, trampoline_2 as usize + 4); // Entry point, skip initial nop
// The call frame for swap::trampoline is actually in the red zone and not // The last two values are read by the swap trampoline and are actually in the
// below the stack pointer. // red zone and not below the stack pointer.
frame frame
} }
#[inline(always)] #[inline(always)]
pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, pub unsafe fn swap(arg: usize, new_sp: StackPointer,
new_stack: &Stack) -> usize { new_stack: Option<&Stack>) -> (usize, StackPointer) {
// Address of the topmost CFA stack slot. // Address of the topmost CFA stack slot.
let new_cfa = (new_stack.base() as *mut usize).offset(-2); let mut dummy: usize = mem::uninitialized();
let new_cfa = if let Some(new_stack) = new_stack {
(new_stack.base() as *mut usize).offset(-2)
} else {
// Just pass a dummy pointer if we aren't linking the stack
&mut dummy
};
#[naked] #[naked]
unsafe extern "C" fn trampoline() { unsafe extern "C" fn trampoline() {
@ -160,17 +173,13 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
l.addi r7, r1, -8 l.addi r7, r1, -8
l.sw 0(r6), r7 l.sw 0(r6), r7
# Switch to the new stack for unwinding purposes. The old stack may no # Pass the stack pointer of the old context to the new one.
# longer be valid now that we have modified the link. l.or r4, r0, r1
.cfi_def_cfa_register r5
# Save stack pointer of the old context.
l.sw 0(r4), r1
# Load stack pointer of the new context. # Load stack pointer of the new context.
l.or r1, r0, r5 l.or r1, r0, r5
.cfi_def_cfa_register r1
# Restore frame pointer and link register of the new context. # Restore frame pointer and link register of the new context.
# Load frame and instruction pointers of the new context.
l.lwz r2, -4(r1) l.lwz r2, -4(r1)
l.lwz r9, -8(r1) l.lwz r9, -8(r1)
@ -182,23 +191,24 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
} }
let ret: usize; let ret: usize;
let ret_sp: *mut usize;
asm!( asm!(
r#" r#"
# Call the trampoline to switch to the new context. # Call the trampoline to switch to the new context.
l.jal ${1} l.jal ${2}
l.nop l.nop
"# "#
: "={r3}" (ret) : "={r3}" (ret)
"={r4}" (ret_sp)
: "s" (trampoline as usize) : "s" (trampoline as usize)
"{r3}" (arg) "{r3}" (arg)
"{r4}" (old_sp)
"{r5}" (new_sp.0) "{r5}" (new_sp.0)
"{r6}" (new_cfa) "{r6}" (new_cfa)
:/*"r0", "r1", "r2", "r3",*/"r4", "r5", "r6", "r7", :/*"r0", "r1", "r2", "r3", "r4",*/"r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
"cc", "memory" "cc", "memory"
: "volatile"); : "volatile");
ret (ret, StackPointer(ret_sp))
} }

View File

@ -41,6 +41,7 @@
// * The 1st init trampoline tells the unwinder to restore %ebp and its return // * The 1st init trampoline tells the unwinder to restore %ebp and its return
// address from the stack frame at %ebp (in the parent stack), thus continuing // address from the stack frame at %ebp (in the parent stack), thus continuing
// unwinding at the swap call site instead of falling off the end of context stack. // unwinding at the swap call site instead of falling off the end of context stack.
use core::mem;
use stack::Stack; use stack::Stack;
pub const STACK_ALIGNMENT: usize = 16; pub const STACK_ALIGNMENT: usize = 16;
@ -48,7 +49,7 @@ pub const STACK_ALIGNMENT: usize = 16;
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct StackPointer(*mut usize); pub struct StackPointer(*mut usize);
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer { pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
#[cfg(not(target_vendor = "apple"))] #[cfg(not(target_vendor = "apple"))]
#[naked] #[naked]
unsafe extern "C" fn trampoline_1() { unsafe extern "C" fn trampoline_1() {
@ -69,8 +70,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
# will use %ebp+8 as the next call frame address, restore return address # will use %ebp+8 as the next call frame address, restore return address
# from CFA-4 and restore %ebp from CFA-8. This mirrors what the second half # from CFA-4 and restore %ebp from CFA-8. This mirrors what the second half
# of `swap_trampoline` does. # of `swap_trampoline` does.
.cfi_def_cfa ebp, 8 .cfi_def_cfa %ebp, 8
.cfi_offset ebp, -8 .cfi_offset %ebp, -8
# This nop is here so that the initial swap doesn't return to the start # This nop is here so that the initial swap doesn't return to the start
# of the trampoline, which confuses the unwinder since it will look for # of the trampoline, which confuses the unwinder since it will look for
@ -97,8 +98,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
# Identical to the above, except avoids .local/.size that aren't available on Mach-O. # Identical to the above, except avoids .local/.size that aren't available on Mach-O.
__morestack: __morestack:
.private_extern __morestack .private_extern __morestack
.cfi_def_cfa ebp, 8 .cfi_def_cfa %ebp, 8
.cfi_offset ebp, -8 .cfi_offset %ebp, -8
nop nop
nop nop
"# "#
@ -114,13 +115,20 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
# will restore %ebp (and thus CFA of the first trampoline) from the stack slot. # will restore %ebp (and thus CFA of the first trampoline) from the stack slot.
# This stack slot is updated every time swap() is called to point to the bottom # This stack slot is updated every time swap() is called to point to the bottom
# of the stack of the context switch just switched from. # of the stack of the context switch just switched from.
.cfi_def_cfa ebp, 8 .cfi_def_cfa %ebp, 8
.cfi_offset ebp, -8 .cfi_offset %ebp, -8
# Push argument. # This nop is here so that the return address of the swap trampoline
pushl %eax # doesn't point to the start of the symbol. This confuses gdb's backtraces,
# causing them to think the parent function is trampoline_1 instead of
# trampoline_2.
nop
# Push arguments.
pushl %esi
pushl %edi
# Call the provided function. # Call the provided function.
call *12(%esp) calll *16(%esp)
"# "#
: : : : "volatile") : : : : "volatile")
} }
@ -140,6 +148,9 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
// such as perf or dtrace. // such as perf or dtrace.
let mut sp = StackPointer(stack.base() as *mut usize); let mut sp = StackPointer(stack.base() as *mut usize);
push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
push(&mut sp, f as usize); // Function that trampoline_2 should call push(&mut sp, f as usize); // Function that trampoline_2 should call
// Call frame for trampoline_2. The CFA slot is updated by swap::trampoline // Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
@ -150,17 +161,23 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
// Call frame for swap::trampoline. We set up the %ebp value to point to the // Call frame for swap::trampoline. We set up the %ebp value to point to the
// parent call frame. // parent call frame.
let frame = sp; let frame = sp;
push(&mut sp, trampoline_2 as usize); // Entry point push(&mut sp, trampoline_2 as usize + 1); // Entry point, skip initial nop
push(&mut sp, frame.0 as usize); // Pointer to parent call frame push(&mut sp, frame.0 as usize); // Pointer to parent call frame
sp sp
} }
#[inline(always)] #[inline(always)]
pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, pub unsafe fn swap(arg: usize, new_sp: StackPointer,
new_stack: &Stack) -> usize { new_stack: Option<&Stack>) -> (usize, StackPointer) {
// Address of the topmost CFA stack slot. // Address of the topmost CFA stack slot.
let new_cfa = (new_stack.base() as *mut usize).offset(-3); let mut dummy: usize = mem::uninitialized();
let new_cfa = if let Some(new_stack) = new_stack {
(new_stack.base() as *mut usize).offset(-6)
} else {
// Just pass a dummy pointer if we aren't linking the stack
&mut dummy
};
#[naked] #[naked]
unsafe extern "C" fn trampoline() { unsafe extern "C" fn trampoline() {
@ -171,54 +188,50 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
# the call instruction that invoked the trampoline. # the call instruction that invoked the trampoline.
pushl %ebp pushl %ebp
.cfi_adjust_cfa_offset 4 .cfi_adjust_cfa_offset 4
.cfi_rel_offset ebp, 0 .cfi_rel_offset %ebp, 0
# Link the call stacks together by writing the current stack bottom # Link the call stacks together by writing the current stack bottom
# address to the CFA slot in the new stack. # address to the CFA slot in the new stack.
movl %esp, (%edi) movl %esp, (%ecx)
# Switch to the new stack for unwinding purposes. The old stack may no # Pass the stack pointer of the old context to the new one.
# longer be valid now that we have modified the link. movl %esp, %esi
.cfi_def_cfa_register edx
# Save stack pointer of the old context.
movl %esp, (%esi)
# Load stack pointer of the new context. # Load stack pointer of the new context.
movl %edx, %esp movl %edx, %esp
.cfi_def_cfa_register esp
# Restore frame pointer of the new context. # Restore frame pointer of the new context.
popl %ebp popl %ebp
.cfi_adjust_cfa_offset -4 .cfi_adjust_cfa_offset -4
.cfi_restore ebp .cfi_restore %ebp
# Return into the new context. Use `pop` and `jmp` instead of a `ret` # Return into the new context. Use `pop` and `jmp` instead of a `ret`
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge). # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
popl %ecx popl %eax
.cfi_adjust_cfa_offset -4 .cfi_adjust_cfa_offset -4
.cfi_register eip, ecx .cfi_register %eip, %eax
jmpl *%ecx jmpl *%eax
"# "#
: : : : "volatile") : : : : "volatile")
} }
let ret: usize; let ret: usize;
let ret_sp: *mut usize;
asm!( asm!(
r#" r#"
# Push instruction pointer of the old context and switch to # Push instruction pointer of the old context and switch to
# the new context. # the new context.
call ${1:c} call ${2:c}
"# "#
: "={eax}" (ret) : "={edi}" (ret)
"={esi}" (ret_sp)
: "s" (trampoline as usize) : "s" (trampoline as usize)
"{eax}" (arg) "{edi}" (arg)
"{esi}" (old_sp)
"{edx}" (new_sp.0) "{edx}" (new_sp.0)
"{edi}" (new_cfa) "{ecx}" (new_cfa)
:/*"eax",*/"ebx", "ecx", "edx", "esi", "edi",/*"ebp", "esp",*/ : "eax", "ebx", "ecx", "edx", /*"esi", "edi", "ebp", "esp",*/
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
"cc", "dirflag", "fpsr", "flags", "memory" "cc", "dirflag", "fpsr", "flags", "memory"
: "volatile"); : "volatile");
ret (ret, StackPointer(ret_sp))
} }

View File

@ -19,7 +19,8 @@
// * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi // * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi
// to pass a value while swapping context; this is an arbitrary choice // to pass a value while swapping context; this is an arbitrary choice
// (we clobber all registers and could use any of them) but this allows us // (we clobber all registers and could use any of them) but this allows us
// to reuse the swap function to perform the initial call. // to reuse the swap function to perform the initial call. We do the same
// thing with %rsi to pass the stack pointer to the new context.
// //
// To understand the DWARF CFI code in this file, keep in mind these facts: // To understand the DWARF CFI code in this file, keep in mind these facts:
// * CFI is "call frame information"; a set of instructions to a debugger or // * CFI is "call frame information"; a set of instructions to a debugger or
@ -45,6 +46,7 @@
// * The 1st init trampoline tells the unwinder to restore %rbp and its return // * The 1st init trampoline tells the unwinder to restore %rbp and its return
// address from the stack frame at %rbp (in the parent stack), thus continuing // address from the stack frame at %rbp (in the parent stack), thus continuing
// unwinding at the swap call site instead of falling off the end of context stack. // unwinding at the swap call site instead of falling off the end of context stack.
use core::mem;
use stack::Stack; use stack::Stack;
pub const STACK_ALIGNMENT: usize = 16; pub const STACK_ALIGNMENT: usize = 16;
@ -52,7 +54,7 @@ pub const STACK_ALIGNMENT: usize = 16;
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct StackPointer(*mut usize); pub struct StackPointer(*mut usize);
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer { pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
#[cfg(not(target_vendor = "apple"))] #[cfg(not(target_vendor = "apple"))]
#[naked] #[naked]
unsafe extern "C" fn trampoline_1() { unsafe extern "C" fn trampoline_1() {
@ -73,8 +75,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
# will use %rbp+16 as the next call frame address, restore return address # will use %rbp+16 as the next call frame address, restore return address
# from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half # from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
# of `swap_trampoline` does. # of `swap_trampoline` does.
.cfi_def_cfa rbp, 16 .cfi_def_cfa %rbp, 16
.cfi_offset rbp, -16 .cfi_offset %rbp, -16
# This nop is here so that the initial swap doesn't return to the start # This nop is here so that the initial swap doesn't return to the start
# of the trampoline, which confuses the unwinder since it will look for # of the trampoline, which confuses the unwinder since it will look for
@ -101,8 +103,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
# Identical to the above, except avoids .local/.size that aren't available on Mach-O. # Identical to the above, except avoids .local/.size that aren't available on Mach-O.
__morestack: __morestack:
.private_extern __morestack .private_extern __morestack
.cfi_def_cfa rbp, 16 .cfi_def_cfa %rbp, 16
.cfi_offset rbp, -16 .cfi_offset %rbp, -16
nop nop
nop nop
"# "#
@ -118,8 +120,14 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
# will restore %rbp (and thus CFA of the first trampoline) from the stack slot. # will restore %rbp (and thus CFA of the first trampoline) from the stack slot.
# This stack slot is updated every time swap() is called to point to the bottom # This stack slot is updated every time swap() is called to point to the bottom
# of the stack of the context switch just switched from. # of the stack of the context switch just switched from.
.cfi_def_cfa rbp, 16 .cfi_def_cfa %rbp, 16
.cfi_offset rbp, -16 .cfi_offset %rbp, -16
# This nop is here so that the return address of the swap trampoline
# doesn't point to the start of the symbol. This confuses gdb's backtraces,
# causing them to think the parent function is trampoline_1 instead of
# trampoline_2.
nop
# Call the provided function. # Call the provided function.
call *16(%rsp) call *16(%rsp)
@ -153,17 +161,23 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
// Call frame for swap::trampoline. We set up the %rbp value to point to the // Call frame for swap::trampoline. We set up the %rbp value to point to the
// parent call frame. // parent call frame.
let frame = sp; let frame = sp;
push(&mut sp, trampoline_2 as usize); // Entry point push(&mut sp, trampoline_2 as usize + 1); // Entry point, skip initial nop
push(&mut sp, frame.0 as usize); // Pointer to parent call frame push(&mut sp, frame.0 as usize); // Pointer to parent call frame
sp sp
} }
#[inline(always)] #[inline(always)]
pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer, pub unsafe fn swap(arg: usize, new_sp: StackPointer,
new_stack: &Stack) -> usize { new_stack: Option<&Stack>) -> (usize, StackPointer) {
// Address of the topmost CFA stack slot. // Address of the topmost CFA stack slot.
let new_cfa = (new_stack.base() as *mut usize).offset(-4); let mut dummy: usize = mem::uninitialized();
let new_cfa = if let Some(new_stack) = new_stack {
(new_stack.base() as *mut usize).offset(-4)
} else {
// Just pass a dummy pointer if we aren't linking the stack
&mut dummy
};
#[naked] #[naked]
unsafe extern "C" fn trampoline() { unsafe extern "C" fn trampoline() {
@ -174,51 +188,47 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
# the call instruction that invoked the trampoline. # the call instruction that invoked the trampoline.
pushq %rbp pushq %rbp
.cfi_adjust_cfa_offset 8 .cfi_adjust_cfa_offset 8
.cfi_rel_offset rbp, 0 .cfi_rel_offset %rbp, 0
# Link the call stacks together by writing the current stack bottom # Link the call stacks together by writing the current stack bottom
# address to the CFA slot in the new stack. # address to the CFA slot in the new stack.
movq %rsp, (%rcx) movq %rsp, (%rcx)
# Switch to the new stack for unwinding purposes. The old stack may no # Pass the stack pointer of the old context to the new one.
# longer be valid now that we have modified the link. movq %rsp, %rsi
.cfi_def_cfa_register rdx
# Save stack pointer of the old context.
movq %rsp, (%rsi)
# Load stack pointer of the new context. # Load stack pointer of the new context.
movq %rdx, %rsp movq %rdx, %rsp
.cfi_def_cfa_register rsp
# Restore frame pointer of the new context. # Restore frame pointer of the new context.
popq %rbp popq %rbp
.cfi_adjust_cfa_offset -8 .cfi_adjust_cfa_offset -8
.cfi_restore rbp .cfi_restore %rbp
# Return into the new context. Use `pop` and `jmp` instead of a `ret` # Return into the new context. Use `pop` and `jmp` instead of a `ret`
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge). # to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
popq %rax popq %rax
.cfi_adjust_cfa_offset -8 .cfi_adjust_cfa_offset -8
.cfi_register rip, rax .cfi_register %rip, %rax
jmpq *%rax jmpq *%rax
"# "#
: : : : "volatile") : : : : "volatile")
} }
let ret: usize; let ret: usize;
let ret_sp: *mut usize;
asm!( asm!(
r#" r#"
# Push instruction pointer of the old context and switch to # Push instruction pointer of the old context and switch to
# the new context. # the new context.
call ${1:c} call ${2:c}
"# "#
: "={rdi}" (ret) : "={rdi}" (ret)
"={rsi}" (ret_sp)
: "s" (trampoline as usize) : "s" (trampoline as usize)
"{rdi}" (arg) "{rdi}" (arg)
"{rsi}" (old_sp)
"{rdx}" (new_sp.0) "{rdx}" (new_sp.0)
"{rcx}" (new_cfa) "{rcx}" (new_cfa)
: "rax", "rbx", "rcx", "rdx", "rsi", /*"rdi", "rbp", "rsp",*/ : "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
@ -232,5 +242,5 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
// the "alignstack" LLVM inline assembly option does exactly the same // the "alignstack" LLVM inline assembly option does exactly the same
// thing on x86_64. // thing on x86_64.
: "volatile", "alignstack"); : "volatile", "alignstack");
ret (ret, StackPointer(ret_sp))
} }

View File

@ -1,172 +0,0 @@
// This file is part of libfringe, a low-level green threading library.
// Copyright (c) edef <edef@edef.eu>,
// whitequark <whitequark@whitequark.org>
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
use stack;
use debug;
use arch;
/// Context holds a suspended thread of execution along with a stack.
///
/// It can be swapped into and out of with the swap method,
/// and once you're done with it, you can get the stack back through unwrap.
///
/// Every operation is unsafe, because no guarantees can be made about
/// the state of the context.
#[derive(Debug)]
pub struct Context<Stack: stack::Stack> {
stack: Stack,
stack_id: debug::StackId,
stack_ptr: arch::StackPointer
}
unsafe impl<Stack> Send for Context<Stack>
where Stack: stack::Stack + Send {}
impl<Stack> Context<Stack> where Stack: stack::Stack {
/// Creates a new Context. When it is swapped into, it will call
/// `f(arg)`, where `arg` is the argument passed to `swap`.
pub unsafe fn new(stack: Stack, f: unsafe extern "C" fn(usize) -> !) -> Context<Stack> {
let stack_id = debug::StackId::register(&stack);
let stack_ptr = arch::init(&stack, f);
Context {
stack: stack,
stack_id: stack_id,
stack_ptr: stack_ptr
}
}
/// Unwraps the context, returning the stack it contained.
pub unsafe fn unwrap(self) -> Stack {
self.stack
}
}
impl<OldStack> Context<OldStack> where OldStack: stack::Stack {
/// Switches to `in_ctx`, saving the current thread of execution to `out_ctx`.
#[inline(always)]
pub unsafe fn swap<NewStack>(old_ctx: *mut Context<OldStack>,
new_ctx: *const Context<NewStack>,
arg: usize) -> usize
where NewStack: stack::Stack {
arch::swap(arg, &mut (*old_ctx).stack_ptr, (*new_ctx).stack_ptr, &(*new_ctx).stack)
}
}
#[cfg(test)]
mod test {
extern crate test;
extern crate simd;
use std::ptr;
use super::Context;
use ::OsStack;
#[thread_local]
static mut ctx_slot: *mut Context<OsStack> = ptr::null_mut();
#[test]
fn context() {
unsafe extern "C" fn adder(arg: usize) -> ! {
println!("it's alive! arg: {}", arg);
let arg = Context::swap(ctx_slot, ctx_slot, arg + 1);
println!("still alive! arg: {}", arg);
Context::swap(ctx_slot, ctx_slot, arg + 1);
panic!("i should be dead");
}
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let mut ctx = Context::new(stack, adder);
ctx_slot = &mut ctx;
let ret = Context::swap(ctx_slot, ctx_slot, 10);
assert_eq!(ret, 11);
let ret = Context::swap(ctx_slot, ctx_slot, 50);
assert_eq!(ret, 51);
}
}
#[test]
fn context_simd() {
unsafe extern "C" fn permuter(arg: usize) -> ! {
// This will crash if the stack is not aligned properly.
let x = simd::i32x4::splat(arg as i32);
let y = x * x;
println!("simd result: {:?}", y);
Context::swap(ctx_slot, ctx_slot, 0);
// And try again after a context switch.
let x = simd::i32x4::splat(arg as i32);
let y = x * x;
println!("simd result: {:?}", y);
Context::swap(ctx_slot, ctx_slot, 0);
panic!("i should be dead");
}
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let mut ctx = Context::new(stack, permuter);
ctx_slot = &mut ctx;
Context::swap(ctx_slot, ctx_slot, 10);
Context::swap(ctx_slot, ctx_slot, 20);
}
}
unsafe extern "C" fn do_panic(arg: usize) -> ! {
match arg {
0 => panic!("arg=0"),
1 => {
Context::swap(ctx_slot, ctx_slot, 0);
panic!("arg=1");
}
_ => unreachable!()
}
}
#[test]
#[should_panic="arg=0"]
fn panic_after_start() {
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let mut ctx = Context::new(stack, do_panic);
Context::swap(&mut ctx, &ctx, 0);
}
}
#[test]
#[should_panic="arg=1"]
fn panic_after_swap() {
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let mut ctx = Context::new(stack, do_panic);
ctx_slot = &mut ctx;
Context::swap(&mut ctx, &ctx, 1);
Context::swap(&mut ctx, &ctx, 0);
}
}
#[bench]
fn swap(b: &mut test::Bencher) {
unsafe extern "C" fn loopback(mut arg: usize) -> ! {
// This deliberately does not ignore arg, to measure the time it takes
// to move the return value between registers.
let ctx_ptr = ctx_slot;
loop { arg = Context::swap(ctx_ptr, ctx_ptr, arg) }
}
unsafe {
let stack = OsStack::new(4 << 20).unwrap();
let mut ctx = Context::new(stack, loopback);
ctx_slot = &mut ctx;
let ctx_ptr = &mut ctx;
b.iter(|| Context::swap(ctx_ptr, ctx_ptr, 0));
}
}
}

View File

@ -16,7 +16,8 @@ use core::{ptr, mem};
use core::cell::Cell; use core::cell::Cell;
use stack; use stack;
use context::Context; use debug;
use arch::{self, StackPointer};
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum State { pub enum State {
@ -81,7 +82,9 @@ pub enum State {
#[derive(Debug)] #[derive(Debug)]
pub struct Generator<Input: Send, Output: Send, Stack: stack::Stack> { pub struct Generator<Input: Send, Output: Send, Stack: stack::Stack> {
state: State, state: State,
context: Context<Stack>, stack: Stack,
stack_id: debug::StackId,
stack_ptr: arch::StackPointer,
phantom: (PhantomData<*const Input>, PhantomData<*const Output>) phantom: (PhantomData<*const Input>, PhantomData<*const Output>)
} }
@ -92,7 +95,7 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
/// See also the [contract](../trait.GuardedStack.html) that needs to be fulfilled by `stack`. /// See also the [contract](../trait.GuardedStack.html) that needs to be fulfilled by `stack`.
pub fn new<F>(stack: Stack, f: F) -> Generator<Input, Output, Stack> pub fn new<F>(stack: Stack, f: F) -> Generator<Input, Output, Stack>
where Stack: stack::GuardedStack, where Stack: stack::GuardedStack,
F: FnOnce(&mut Yielder<Input, Output, Stack>, Input) + Send { F: FnOnce(&mut Yielder<Input, Output>, Input) + Send {
unsafe { Generator::unsafe_new(stack, f) } unsafe { Generator::unsafe_new(stack, f) }
} }
@ -104,35 +107,36 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
/// ///
/// See also the [contract](../trait.Stack.html) that needs to be fulfilled by `stack`. /// See also the [contract](../trait.Stack.html) that needs to be fulfilled by `stack`.
pub unsafe fn unsafe_new<F>(stack: Stack, f: F) -> Generator<Input, Output, Stack> pub unsafe fn unsafe_new<F>(stack: Stack, f: F) -> Generator<Input, Output, Stack>
where F: FnOnce(&mut Yielder<Input, Output, Stack>, Input) + Send { where F: FnOnce(&mut Yielder<Input, Output>, Input) + Send {
unsafe extern "C" fn generator_wrapper<Input, Output, Stack, F>(env: usize) -> ! unsafe extern "C" fn generator_wrapper<Input, Output, Stack, F>(env: usize, stack_ptr: StackPointer) -> !
where Input: Send, Output: Send, Stack: stack::Stack, where Input: Send, Output: Send, Stack: stack::Stack,
F: FnOnce(&mut Yielder<Input, Output, Stack>, Input) { F: FnOnce(&mut Yielder<Input, Output>, Input) {
// Retrieve our environment from the callee and return control to it. // Retrieve our environment from the callee and return control to it.
let (mut yielder, f) = ptr::read(env as *mut (Yielder<Input, Output, Stack>, F)); let f = ptr::read(env as *const F);
let data = Context::swap(yielder.context.get(), yielder.context.get(), 0); let (data, stack_ptr) = arch::swap(0, stack_ptr, None);
// See the second half of Yielder::suspend_bare. // See the second half of Yielder::suspend_bare.
let (new_context, input) = ptr::read(data as *mut (*mut Context<Stack>, Input)); let input = ptr::read(data as *const Input);
yielder.context.set(new_context as *mut Context<Stack>);
// Run the body of the generator. // Run the body of the generator.
let mut yielder = Yielder::new(stack_ptr);
f(&mut yielder, input); f(&mut yielder, input);
// Past this point, the generator has dropped everything it has held. // Past this point, the generator has dropped everything it has held.
loop { yielder.suspend_bare(None); } loop { yielder.suspend_bare(None); }
} }
let mut generator = Generator { let stack_id = debug::StackId::register(&stack);
state: State::Runnable, let stack_ptr = arch::init(&stack, generator_wrapper::<Input, Output, Stack, F>);
context: Context::new(stack, generator_wrapper::<Input, Output, Stack, F>),
phantom: (PhantomData, PhantomData)
};
// Transfer environment to the callee. // Transfer environment to the callee.
let mut env = (Yielder::new(&mut generator.context), f); let stack_ptr = arch::swap(&f as *const F as usize, stack_ptr, Some(&stack)).1;
Context::swap(&mut generator.context, &generator.context, mem::forget(f);
&mut env as *mut (Yielder<Input, Output, Stack>, F) as usize);
mem::forget(env);
generator Generator {
state: State::Runnable,
stack: stack,
stack_id: stack_id,
stack_ptr: stack_ptr,
phantom: (PhantomData, PhantomData)
}
} }
/// Resumes the generator and return the next value it yields. /// Resumes the generator and return the next value it yields.
@ -148,13 +152,10 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
// Switch to the generator function, and retrieve the yielded value. // Switch to the generator function, and retrieve the yielded value.
let val = unsafe { let val = unsafe {
let mut data_in = (&mut self.context as *mut Context<Stack>, input); let (data_out, stack_ptr) = arch::swap(&input as *const Input as usize, self.stack_ptr, Some(&self.stack));
let data_out = self.stack_ptr = stack_ptr;
ptr::read(Context::swap(&mut self.context, &self.context, mem::forget(input);
&mut data_in as *mut (*mut Context<Stack>, Input) as usize) ptr::read(data_out as *const Option<Output>)
as *mut Option<Output>);
mem::forget(data_in);
data_out
}; };
// Unless the generator function has returned, it can be switched to again, so // Unless the generator function has returned, it can be switched to again, so
@ -177,7 +178,7 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
pub fn unwrap(self) -> Stack { pub fn unwrap(self) -> Stack {
match self.state { match self.state {
State::Runnable => panic!("Argh! Bastard! Don't touch that!"), State::Runnable => panic!("Argh! Bastard! Don't touch that!"),
State::Unavailable => unsafe { self.context.unwrap() } State::Unavailable => self.stack
} }
} }
} }
@ -185,35 +186,27 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
/// Yielder is an interface provided to every generator through which it /// Yielder is an interface provided to every generator through which it
/// returns a value. /// returns a value.
#[derive(Debug)] #[derive(Debug)]
pub struct Yielder<Input: Send, Output: Send, Stack: stack::Stack> { pub struct Yielder<Input: Send, Output: Send> {
context: Cell<*mut Context<Stack>>, stack_ptr: Cell<StackPointer>,
phantom: (PhantomData<*const Input>, PhantomData<*const Output>) phantom: (PhantomData<*const Input>, PhantomData<*const Output>)
} }
impl<Input, Output, Stack> Yielder<Input, Output, Stack> impl<Input, Output> Yielder<Input, Output>
where Input: Send, Output: Send, Stack: stack::Stack { where Input: Send, Output: Send {
fn new(context: *mut Context<Stack>) -> Yielder<Input, Output, Stack> { fn new(stack_ptr: StackPointer) -> Yielder<Input, Output> {
Yielder { Yielder {
context: Cell::new(context), stack_ptr: Cell::new(stack_ptr),
phantom: (PhantomData, PhantomData) phantom: (PhantomData, PhantomData)
} }
} }
#[inline(always)] #[inline(always)]
fn suspend_bare(&self, mut val: Option<Output>) -> Input { fn suspend_bare(&self, val: Option<Output>) -> Input {
unsafe { unsafe {
let data = Context::swap(self.context.get(), self.context.get(), let (data, stack_ptr) = arch::swap(&val as *const Option<Output> as usize, self.stack_ptr.get(), None);
&mut val as *mut Option<Output> as usize); self.stack_ptr.set(stack_ptr);
mem::forget(val); mem::forget(val);
let (new_context, input) = ptr::read(data as *mut (*mut Context<Stack>, Input)); ptr::read(data as *const Input)
// The generator can be moved (and with it, the context).
// This changes the address of the context.
// Thus, we update it after each swap.
self.context.set(new_context);
// However, between this point and the next time we enter suspend_bare
// the generator cannot be moved, as a &mut Generator is necessary
// to resume the generator function.
input
} }
} }

View File

@ -6,7 +6,7 @@
// copied, modified, or distributed except according to those terms. // copied, modified, or distributed except according to those terms.
#![feature(asm, naked_functions, cfg_target_vendor)] #![feature(asm, naked_functions, cfg_target_vendor)]
#![cfg_attr(feature = "alloc", feature(alloc, heap_api))] #![cfg_attr(feature = "alloc", feature(alloc, heap_api))]
#![cfg_attr(test, feature(test, thread_local, const_fn))] #![cfg_attr(test, feature(test))]
#![no_std] #![no_std]
//! libfringe is a library implementing safe, lightweight context switches, //! libfringe is a library implementing safe, lightweight context switches,
@ -51,7 +51,6 @@ pub const STACK_ALIGNMENT: usize = arch::STACK_ALIGNMENT;
mod debug; mod debug;
mod context;
mod stack; mod stack;
mod slice_stack; mod slice_stack;
pub mod generator; pub mod generator;

View File

@ -7,10 +7,10 @@
// copied, modified, or distributed except according to those terms. // copied, modified, or distributed except according to those terms.
extern crate fringe; extern crate fringe;
use fringe::{Stack, SliceStack, OwnedStack, OsStack}; use fringe::{SliceStack, OwnedStack, OsStack};
use fringe::generator::{Generator, Yielder}; use fringe::generator::{Generator, Yielder};
fn add_one_fn<S: Stack>(yielder: &mut Yielder<i32, i32, S>, mut input: i32) { fn add_one_fn(yielder: &mut Yielder<i32, i32>, mut input: i32) {
loop { loop {
if input == 0 { break } if input == 0 { break }
input = yielder.suspend(input + 1) input = yielder.suspend(input + 1)