Clean up and simplify the implementation of generators
This commit is contained in:
parent
f7f209c1eb
commit
fff625767c
|
@ -17,5 +17,5 @@ fn generate(b: &mut test::Bencher) {
|
|||
loop { input = yielder.suspend(input) }
|
||||
});
|
||||
|
||||
b.iter(|| test::black_box(identity.resume(test::black_box(0))));
|
||||
b.iter(|| for _ in 0..10 { test::black_box(identity.resume(test::black_box(0))); });
|
||||
}
|
||||
|
|
111
src/arch/mod.rs
111
src/arch/mod.rs
|
@ -13,3 +13,114 @@ pub use self::imp::*;
|
|||
#[cfg_attr(target_arch = "x86_64", path = "x86_64.rs")]
|
||||
#[cfg_attr(target_arch = "or1k", path = "or1k.rs")]
|
||||
mod imp;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
extern crate test;
|
||||
extern crate simd;
|
||||
|
||||
use arch::{self, StackPointer};
|
||||
use ::OsStack;
|
||||
|
||||
#[test]
|
||||
fn context() {
|
||||
unsafe extern "C" fn adder(arg: usize, stack_ptr: StackPointer) -> ! {
|
||||
println!("it's alive! arg: {}", arg);
|
||||
let (arg, stack_ptr) = arch::swap(arg + 1, stack_ptr, None);
|
||||
println!("still alive! arg: {}", arg);
|
||||
arch::swap(arg + 1, stack_ptr, None);
|
||||
panic!("i should be dead");
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let stack_ptr = arch::init(&stack, adder);
|
||||
|
||||
let (ret, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack));
|
||||
assert_eq!(ret, 11);
|
||||
let (ret, _) = arch::swap(50, stack_ptr, Some(&stack));
|
||||
assert_eq!(ret, 51);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_simd() {
|
||||
unsafe extern "C" fn permuter(arg: usize, stack_ptr: StackPointer) -> ! {
|
||||
// This will crash if the stack is not aligned properly.
|
||||
let x = simd::i32x4::splat(arg as i32);
|
||||
let y = x * x;
|
||||
println!("simd result: {:?}", y);
|
||||
let (_, stack_ptr) = arch::swap(0, stack_ptr, None);
|
||||
// And try again after a context switch.
|
||||
let x = simd::i32x4::splat(arg as i32);
|
||||
let y = x * x;
|
||||
println!("simd result: {:?}", y);
|
||||
arch::swap(0, stack_ptr, None);
|
||||
panic!("i should be dead");
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let stack_ptr = arch::init(&stack, permuter);
|
||||
|
||||
let (_, stack_ptr) = arch::swap(10, stack_ptr, Some(&stack));
|
||||
arch::swap(20, stack_ptr, Some(&stack));
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" fn do_panic(arg: usize, stack_ptr: StackPointer) -> ! {
|
||||
match arg {
|
||||
0 => panic!("arg=0"),
|
||||
1 => {
|
||||
arch::swap(0, stack_ptr, None);
|
||||
panic!("arg=1");
|
||||
}
|
||||
_ => unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic="arg=0"]
|
||||
fn panic_after_start() {
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let stack_ptr = arch::init(&stack, do_panic);
|
||||
|
||||
arch::swap(0, stack_ptr, Some(&stack));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic="arg=1"]
|
||||
fn panic_after_swap() {
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let stack_ptr = arch::init(&stack, do_panic);
|
||||
|
||||
let (_, stack_ptr) = arch::swap(1, stack_ptr, Some(&stack));
|
||||
arch::swap(0, stack_ptr, Some(&stack));
|
||||
}
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn swap(b: &mut test::Bencher) {
|
||||
unsafe extern "C" fn loopback(mut arg: usize, mut stack_ptr: StackPointer) -> ! {
|
||||
// This deliberately does not ignore arg, to measure the time it takes
|
||||
// to move the return value between registers.
|
||||
loop {
|
||||
let data = arch::swap(arg, stack_ptr, None);
|
||||
arg = data.0;
|
||||
stack_ptr = data.1;
|
||||
}
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let mut stack_ptr = arch::init(&stack, loopback);
|
||||
|
||||
b.iter(|| for _ in 0..10 {
|
||||
stack_ptr = arch::swap(0, stack_ptr, Some(&stack)).1;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
// * OR1K C ABI passes the first argument in r3. We also use r3 to pass a value
|
||||
// while swapping context; this is an arbitrary choice
|
||||
// (we clobber all registers and could use any of them) but this allows us
|
||||
// to reuse the swap function to perform the initial call.
|
||||
// to reuse the swap function to perform the initial call. We do the same
|
||||
// thing with r4 to pass the stack pointer to the new context.
|
||||
//
|
||||
// To understand the DWARF CFI code in this file, keep in mind these facts:
|
||||
// * CFI is "call frame information"; a set of instructions to a debugger or
|
||||
|
@ -47,7 +48,7 @@ pub const STACK_ALIGNMENT: usize = 4;
|
|||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct StackPointer(*mut usize);
|
||||
|
||||
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer {
|
||||
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
|
||||
#[naked]
|
||||
unsafe extern "C" fn trampoline_1() {
|
||||
asm!(
|
||||
|
@ -96,6 +97,12 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
.cfi_offset r2, -4
|
||||
.cfi_offset r9, -8
|
||||
|
||||
# This nop is here so that the return address of the swap trampoline
|
||||
# doesn't point to the start of the symbol. This confuses gdb's backtraces,
|
||||
# causing them to think the parent function is trampoline_1 instead of
|
||||
# trampoline_2.
|
||||
nop
|
||||
|
||||
# Call the provided function.
|
||||
l.lwz r4, 8(r1)
|
||||
l.jalr r4
|
||||
|
@ -123,25 +130,31 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
|
||||
// Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
|
||||
// each time a context switch is performed.
|
||||
push(&mut sp, 0xdead0cfa); // CFA slot
|
||||
push(&mut sp, 0xdead0cfa); // CFA slot
|
||||
push(&mut sp, trampoline_1 as usize + 4); // Return after the nop
|
||||
|
||||
// Call frame for swap::trampoline. We set up the r2 value to point to the
|
||||
// parent call frame.
|
||||
let frame = sp;
|
||||
push(&mut sp, frame.0 as usize); // Pointer to parent call frame
|
||||
push(&mut sp, trampoline_2 as usize); // Entry point
|
||||
push(&mut sp, frame.0 as usize); // Pointer to parent call frame
|
||||
push(&mut sp, trampoline_2 as usize + 4); // Entry point, skip initial nop
|
||||
|
||||
// The call frame for swap::trampoline is actually in the red zone and not
|
||||
// below the stack pointer.
|
||||
// The last two values are read by the swap trampoline and are actually in the
|
||||
// red zone and not below the stack pointer.
|
||||
frame
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
|
||||
new_stack: &Stack) -> usize {
|
||||
pub unsafe fn swap(arg: usize, new_sp: StackPointer,
|
||||
new_stack: Option<&Stack>) -> (usize, StackPointer) {
|
||||
// Address of the topmost CFA stack slot.
|
||||
let new_cfa = (new_stack.base() as *mut usize).offset(-2);
|
||||
let mut dummy: usize = mem::uninitialized();
|
||||
let new_cfa = if let Some(new_stack) = new_stack {
|
||||
(new_stack.base() as *mut usize).offset(-2)
|
||||
} else {
|
||||
// Just pass a dummy pointer if we aren't linking the stack
|
||||
&mut dummy
|
||||
};
|
||||
|
||||
#[naked]
|
||||
unsafe extern "C" fn trampoline() {
|
||||
|
@ -160,17 +173,13 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
|
|||
l.addi r7, r1, -8
|
||||
l.sw 0(r6), r7
|
||||
|
||||
# Switch to the new stack for unwinding purposes. The old stack may no
|
||||
# longer be valid now that we have modified the link.
|
||||
.cfi_def_cfa_register r5
|
||||
|
||||
# Save stack pointer of the old context.
|
||||
l.sw 0(r4), r1
|
||||
# Pass the stack pointer of the old context to the new one.
|
||||
l.or r4, r0, r1
|
||||
# Load stack pointer of the new context.
|
||||
l.or r1, r0, r5
|
||||
.cfi_def_cfa_register r1
|
||||
|
||||
# Restore frame pointer and link register of the new context.
|
||||
# Load frame and instruction pointers of the new context.
|
||||
l.lwz r2, -4(r1)
|
||||
l.lwz r9, -8(r1)
|
||||
|
||||
|
@ -182,23 +191,24 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
|
|||
}
|
||||
|
||||
let ret: usize;
|
||||
let ret_sp: *mut usize;
|
||||
asm!(
|
||||
r#"
|
||||
# Call the trampoline to switch to the new context.
|
||||
l.jal ${1}
|
||||
l.jal ${2}
|
||||
l.nop
|
||||
"#
|
||||
: "={r3}" (ret)
|
||||
"={r4}" (ret_sp)
|
||||
: "s" (trampoline as usize)
|
||||
"{r3}" (arg)
|
||||
"{r4}" (old_sp)
|
||||
"{r5}" (new_sp.0)
|
||||
"{r6}" (new_cfa)
|
||||
:/*"r0", "r1", "r2", "r3",*/"r4", "r5", "r6", "r7",
|
||||
:/*"r0", "r1", "r2", "r3", "r4",*/"r5", "r6", "r7",
|
||||
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
||||
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
|
||||
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
|
||||
"cc", "memory"
|
||||
: "volatile");
|
||||
ret
|
||||
(ret, StackPointer(ret_sp))
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
// * The 1st init trampoline tells the unwinder to restore %ebp and its return
|
||||
// address from the stack frame at %ebp (in the parent stack), thus continuing
|
||||
// unwinding at the swap call site instead of falling off the end of context stack.
|
||||
use core::mem;
|
||||
use stack::Stack;
|
||||
|
||||
pub const STACK_ALIGNMENT: usize = 16;
|
||||
|
@ -48,7 +49,7 @@ pub const STACK_ALIGNMENT: usize = 16;
|
|||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct StackPointer(*mut usize);
|
||||
|
||||
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer {
|
||||
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
#[naked]
|
||||
unsafe extern "C" fn trampoline_1() {
|
||||
|
@ -69,8 +70,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
# will use %ebp+8 as the next call frame address, restore return address
|
||||
# from CFA-4 and restore %ebp from CFA-8. This mirrors what the second half
|
||||
# of `swap_trampoline` does.
|
||||
.cfi_def_cfa ebp, 8
|
||||
.cfi_offset ebp, -8
|
||||
.cfi_def_cfa %ebp, 8
|
||||
.cfi_offset %ebp, -8
|
||||
|
||||
# This nop is here so that the initial swap doesn't return to the start
|
||||
# of the trampoline, which confuses the unwinder since it will look for
|
||||
|
@ -97,8 +98,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
# Identical to the above, except avoids .local/.size that aren't available on Mach-O.
|
||||
__morestack:
|
||||
.private_extern __morestack
|
||||
.cfi_def_cfa ebp, 8
|
||||
.cfi_offset ebp, -8
|
||||
.cfi_def_cfa %ebp, 8
|
||||
.cfi_offset %ebp, -8
|
||||
nop
|
||||
nop
|
||||
"#
|
||||
|
@ -114,13 +115,20 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
# will restore %ebp (and thus CFA of the first trampoline) from the stack slot.
|
||||
# This stack slot is updated every time swap() is called to point to the bottom
|
||||
# of the stack of the context switch just switched from.
|
||||
.cfi_def_cfa ebp, 8
|
||||
.cfi_offset ebp, -8
|
||||
.cfi_def_cfa %ebp, 8
|
||||
.cfi_offset %ebp, -8
|
||||
|
||||
# Push argument.
|
||||
pushl %eax
|
||||
# This nop is here so that the return address of the swap trampoline
|
||||
# doesn't point to the start of the symbol. This confuses gdb's backtraces,
|
||||
# causing them to think the parent function is trampoline_1 instead of
|
||||
# trampoline_2.
|
||||
nop
|
||||
|
||||
# Push arguments.
|
||||
pushl %esi
|
||||
pushl %edi
|
||||
# Call the provided function.
|
||||
call *12(%esp)
|
||||
calll *16(%esp)
|
||||
"#
|
||||
: : : : "volatile")
|
||||
}
|
||||
|
@ -140,27 +148,36 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
// such as perf or dtrace.
|
||||
let mut sp = StackPointer(stack.base() as *mut usize);
|
||||
|
||||
push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
|
||||
push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
|
||||
push(&mut sp, 0 as usize); // Padding to ensure the stack is properly aligned
|
||||
push(&mut sp, f as usize); // Function that trampoline_2 should call
|
||||
|
||||
// Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
|
||||
// each time a context switch is performed.
|
||||
push(&mut sp, trampoline_1 as usize + 2); // Return after the 2 nops
|
||||
push(&mut sp, 0xdead0cfa); // CFA slot
|
||||
push(&mut sp, 0xdead0cfa); // CFA slot
|
||||
|
||||
// Call frame for swap::trampoline. We set up the %ebp value to point to the
|
||||
// parent call frame.
|
||||
let frame = sp;
|
||||
push(&mut sp, trampoline_2 as usize); // Entry point
|
||||
push(&mut sp, frame.0 as usize); // Pointer to parent call frame
|
||||
push(&mut sp, trampoline_2 as usize + 1); // Entry point, skip initial nop
|
||||
push(&mut sp, frame.0 as usize); // Pointer to parent call frame
|
||||
|
||||
sp
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
|
||||
new_stack: &Stack) -> usize {
|
||||
pub unsafe fn swap(arg: usize, new_sp: StackPointer,
|
||||
new_stack: Option<&Stack>) -> (usize, StackPointer) {
|
||||
// Address of the topmost CFA stack slot.
|
||||
let new_cfa = (new_stack.base() as *mut usize).offset(-3);
|
||||
let mut dummy: usize = mem::uninitialized();
|
||||
let new_cfa = if let Some(new_stack) = new_stack {
|
||||
(new_stack.base() as *mut usize).offset(-6)
|
||||
} else {
|
||||
// Just pass a dummy pointer if we aren't linking the stack
|
||||
&mut dummy
|
||||
};
|
||||
|
||||
#[naked]
|
||||
unsafe extern "C" fn trampoline() {
|
||||
|
@ -171,54 +188,50 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
|
|||
# the call instruction that invoked the trampoline.
|
||||
pushl %ebp
|
||||
.cfi_adjust_cfa_offset 4
|
||||
.cfi_rel_offset ebp, 0
|
||||
.cfi_rel_offset %ebp, 0
|
||||
|
||||
# Link the call stacks together by writing the current stack bottom
|
||||
# address to the CFA slot in the new stack.
|
||||
movl %esp, (%edi)
|
||||
movl %esp, (%ecx)
|
||||
|
||||
# Switch to the new stack for unwinding purposes. The old stack may no
|
||||
# longer be valid now that we have modified the link.
|
||||
.cfi_def_cfa_register edx
|
||||
|
||||
# Save stack pointer of the old context.
|
||||
movl %esp, (%esi)
|
||||
# Pass the stack pointer of the old context to the new one.
|
||||
movl %esp, %esi
|
||||
# Load stack pointer of the new context.
|
||||
movl %edx, %esp
|
||||
.cfi_def_cfa_register esp
|
||||
|
||||
# Restore frame pointer of the new context.
|
||||
popl %ebp
|
||||
.cfi_adjust_cfa_offset -4
|
||||
.cfi_restore ebp
|
||||
.cfi_restore %ebp
|
||||
|
||||
# Return into the new context. Use `pop` and `jmp` instead of a `ret`
|
||||
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
|
||||
popl %ecx
|
||||
popl %eax
|
||||
.cfi_adjust_cfa_offset -4
|
||||
.cfi_register eip, ecx
|
||||
jmpl *%ecx
|
||||
.cfi_register %eip, %eax
|
||||
jmpl *%eax
|
||||
"#
|
||||
: : : : "volatile")
|
||||
}
|
||||
|
||||
let ret: usize;
|
||||
let ret_sp: *mut usize;
|
||||
asm!(
|
||||
r#"
|
||||
# Push instruction pointer of the old context and switch to
|
||||
# the new context.
|
||||
call ${1:c}
|
||||
call ${2:c}
|
||||
"#
|
||||
: "={eax}" (ret)
|
||||
: "={edi}" (ret)
|
||||
"={esi}" (ret_sp)
|
||||
: "s" (trampoline as usize)
|
||||
"{eax}" (arg)
|
||||
"{esi}" (old_sp)
|
||||
"{edi}" (arg)
|
||||
"{edx}" (new_sp.0)
|
||||
"{edi}" (new_cfa)
|
||||
:/*"eax",*/"ebx", "ecx", "edx", "esi", "edi",/*"ebp", "esp",*/
|
||||
"{ecx}" (new_cfa)
|
||||
: "eax", "ebx", "ecx", "edx", /*"esi", "edi", "ebp", "esp",*/
|
||||
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"cc", "dirflag", "fpsr", "flags", "memory"
|
||||
: "volatile");
|
||||
ret
|
||||
(ret, StackPointer(ret_sp))
|
||||
}
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
// * x86_64 SysV C ABI passes the first argument in %rdi. We also use %rdi
|
||||
// to pass a value while swapping context; this is an arbitrary choice
|
||||
// (we clobber all registers and could use any of them) but this allows us
|
||||
// to reuse the swap function to perform the initial call.
|
||||
// to reuse the swap function to perform the initial call. We do the same
|
||||
// thing with %rsi to pass the stack pointer to the new context.
|
||||
//
|
||||
// To understand the DWARF CFI code in this file, keep in mind these facts:
|
||||
// * CFI is "call frame information"; a set of instructions to a debugger or
|
||||
|
@ -45,6 +46,7 @@
|
|||
// * The 1st init trampoline tells the unwinder to restore %rbp and its return
|
||||
// address from the stack frame at %rbp (in the parent stack), thus continuing
|
||||
// unwinding at the swap call site instead of falling off the end of context stack.
|
||||
use core::mem;
|
||||
use stack::Stack;
|
||||
|
||||
pub const STACK_ALIGNMENT: usize = 16;
|
||||
|
@ -52,7 +54,7 @@ pub const STACK_ALIGNMENT: usize = 16;
|
|||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct StackPointer(*mut usize);
|
||||
|
||||
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackPointer {
|
||||
pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize, StackPointer) -> !) -> StackPointer {
|
||||
#[cfg(not(target_vendor = "apple"))]
|
||||
#[naked]
|
||||
unsafe extern "C" fn trampoline_1() {
|
||||
|
@ -73,8 +75,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
# will use %rbp+16 as the next call frame address, restore return address
|
||||
# from CFA-8 and restore %rbp from CFA-16. This mirrors what the second half
|
||||
# of `swap_trampoline` does.
|
||||
.cfi_def_cfa rbp, 16
|
||||
.cfi_offset rbp, -16
|
||||
.cfi_def_cfa %rbp, 16
|
||||
.cfi_offset %rbp, -16
|
||||
|
||||
# This nop is here so that the initial swap doesn't return to the start
|
||||
# of the trampoline, which confuses the unwinder since it will look for
|
||||
|
@ -101,8 +103,8 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
# Identical to the above, except avoids .local/.size that aren't available on Mach-O.
|
||||
__morestack:
|
||||
.private_extern __morestack
|
||||
.cfi_def_cfa rbp, 16
|
||||
.cfi_offset rbp, -16
|
||||
.cfi_def_cfa %rbp, 16
|
||||
.cfi_offset %rbp, -16
|
||||
nop
|
||||
nop
|
||||
"#
|
||||
|
@ -118,8 +120,14 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
# will restore %rbp (and thus CFA of the first trampoline) from the stack slot.
|
||||
# This stack slot is updated every time swap() is called to point to the bottom
|
||||
# of the stack of the context switch just switched from.
|
||||
.cfi_def_cfa rbp, 16
|
||||
.cfi_offset rbp, -16
|
||||
.cfi_def_cfa %rbp, 16
|
||||
.cfi_offset %rbp, -16
|
||||
|
||||
# This nop is here so that the return address of the swap trampoline
|
||||
# doesn't point to the start of the symbol. This confuses gdb's backtraces,
|
||||
# causing them to think the parent function is trampoline_1 instead of
|
||||
# trampoline_2.
|
||||
nop
|
||||
|
||||
# Call the provided function.
|
||||
call *16(%rsp)
|
||||
|
@ -148,22 +156,28 @@ pub unsafe fn init(stack: &Stack, f: unsafe extern "C" fn(usize) -> !) -> StackP
|
|||
// Call frame for trampoline_2. The CFA slot is updated by swap::trampoline
|
||||
// each time a context switch is performed.
|
||||
push(&mut sp, trampoline_1 as usize + 2); // Return after the 2 nops
|
||||
push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot
|
||||
push(&mut sp, 0xdeaddeaddead0cfa); // CFA slot
|
||||
|
||||
// Call frame for swap::trampoline. We set up the %rbp value to point to the
|
||||
// parent call frame.
|
||||
let frame = sp;
|
||||
push(&mut sp, trampoline_2 as usize); // Entry point
|
||||
push(&mut sp, frame.0 as usize); // Pointer to parent call frame
|
||||
push(&mut sp, trampoline_2 as usize + 1); // Entry point, skip initial nop
|
||||
push(&mut sp, frame.0 as usize); // Pointer to parent call frame
|
||||
|
||||
sp
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
|
||||
new_stack: &Stack) -> usize {
|
||||
pub unsafe fn swap(arg: usize, new_sp: StackPointer,
|
||||
new_stack: Option<&Stack>) -> (usize, StackPointer) {
|
||||
// Address of the topmost CFA stack slot.
|
||||
let new_cfa = (new_stack.base() as *mut usize).offset(-4);
|
||||
let mut dummy: usize = mem::uninitialized();
|
||||
let new_cfa = if let Some(new_stack) = new_stack {
|
||||
(new_stack.base() as *mut usize).offset(-4)
|
||||
} else {
|
||||
// Just pass a dummy pointer if we aren't linking the stack
|
||||
&mut dummy
|
||||
};
|
||||
|
||||
#[naked]
|
||||
unsafe extern "C" fn trampoline() {
|
||||
|
@ -174,51 +188,47 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
|
|||
# the call instruction that invoked the trampoline.
|
||||
pushq %rbp
|
||||
.cfi_adjust_cfa_offset 8
|
||||
.cfi_rel_offset rbp, 0
|
||||
.cfi_rel_offset %rbp, 0
|
||||
|
||||
# Link the call stacks together by writing the current stack bottom
|
||||
# address to the CFA slot in the new stack.
|
||||
movq %rsp, (%rcx)
|
||||
|
||||
# Switch to the new stack for unwinding purposes. The old stack may no
|
||||
# longer be valid now that we have modified the link.
|
||||
.cfi_def_cfa_register rdx
|
||||
|
||||
# Save stack pointer of the old context.
|
||||
movq %rsp, (%rsi)
|
||||
# Pass the stack pointer of the old context to the new one.
|
||||
movq %rsp, %rsi
|
||||
# Load stack pointer of the new context.
|
||||
movq %rdx, %rsp
|
||||
.cfi_def_cfa_register rsp
|
||||
|
||||
# Restore frame pointer of the new context.
|
||||
popq %rbp
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_restore rbp
|
||||
.cfi_restore %rbp
|
||||
|
||||
# Return into the new context. Use `pop` and `jmp` instead of a `ret`
|
||||
# to avoid return address mispredictions (~8ns per `ret` on Ivy Bridge).
|
||||
popq %rax
|
||||
.cfi_adjust_cfa_offset -8
|
||||
.cfi_register rip, rax
|
||||
.cfi_register %rip, %rax
|
||||
jmpq *%rax
|
||||
"#
|
||||
: : : : "volatile")
|
||||
}
|
||||
|
||||
let ret: usize;
|
||||
let ret_sp: *mut usize;
|
||||
asm!(
|
||||
r#"
|
||||
# Push instruction pointer of the old context and switch to
|
||||
# the new context.
|
||||
call ${1:c}
|
||||
call ${2:c}
|
||||
"#
|
||||
: "={rdi}" (ret)
|
||||
"={rsi}" (ret_sp)
|
||||
: "s" (trampoline as usize)
|
||||
"{rdi}" (arg)
|
||||
"{rsi}" (old_sp)
|
||||
"{rdx}" (new_sp.0)
|
||||
"{rcx}" (new_cfa)
|
||||
: "rax", "rbx", "rcx", "rdx", "rsi", /*"rdi", "rbp", "rsp",*/
|
||||
: "rax", "rbx", "rcx", "rdx", /*"rsi", "rdi", "rbp", "rsp",*/
|
||||
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
||||
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
|
@ -232,5 +242,5 @@ pub unsafe fn swap(arg: usize, old_sp: *mut StackPointer, new_sp: StackPointer,
|
|||
// the "alignstack" LLVM inline assembly option does exactly the same
|
||||
// thing on x86_64.
|
||||
: "volatile", "alignstack");
|
||||
ret
|
||||
(ret, StackPointer(ret_sp))
|
||||
}
|
||||
|
|
172
src/context.rs
172
src/context.rs
|
@ -1,172 +0,0 @@
|
|||
// This file is part of libfringe, a low-level green threading library.
|
||||
// Copyright (c) edef <edef@edef.eu>,
|
||||
// whitequark <whitequark@whitequark.org>
|
||||
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
|
||||
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
|
||||
// http://opensource.org/licenses/MIT>, at your option. This file may not be
|
||||
// copied, modified, or distributed except according to those terms.
|
||||
use stack;
|
||||
use debug;
|
||||
use arch;
|
||||
|
||||
/// Context holds a suspended thread of execution along with a stack.
|
||||
///
|
||||
/// It can be swapped into and out of with the swap method,
|
||||
/// and once you're done with it, you can get the stack back through unwrap.
|
||||
///
|
||||
/// Every operation is unsafe, because no guarantees can be made about
|
||||
/// the state of the context.
|
||||
#[derive(Debug)]
|
||||
pub struct Context<Stack: stack::Stack> {
|
||||
stack: Stack,
|
||||
stack_id: debug::StackId,
|
||||
stack_ptr: arch::StackPointer
|
||||
}
|
||||
|
||||
unsafe impl<Stack> Send for Context<Stack>
|
||||
where Stack: stack::Stack + Send {}
|
||||
|
||||
impl<Stack> Context<Stack> where Stack: stack::Stack {
|
||||
/// Creates a new Context. When it is swapped into, it will call
|
||||
/// `f(arg)`, where `arg` is the argument passed to `swap`.
|
||||
pub unsafe fn new(stack: Stack, f: unsafe extern "C" fn(usize) -> !) -> Context<Stack> {
|
||||
let stack_id = debug::StackId::register(&stack);
|
||||
let stack_ptr = arch::init(&stack, f);
|
||||
Context {
|
||||
stack: stack,
|
||||
stack_id: stack_id,
|
||||
stack_ptr: stack_ptr
|
||||
}
|
||||
}
|
||||
|
||||
/// Unwraps the context, returning the stack it contained.
|
||||
pub unsafe fn unwrap(self) -> Stack {
|
||||
self.stack
|
||||
}
|
||||
}
|
||||
|
||||
impl<OldStack> Context<OldStack> where OldStack: stack::Stack {
|
||||
/// Switches to `in_ctx`, saving the current thread of execution to `out_ctx`.
|
||||
#[inline(always)]
|
||||
pub unsafe fn swap<NewStack>(old_ctx: *mut Context<OldStack>,
|
||||
new_ctx: *const Context<NewStack>,
|
||||
arg: usize) -> usize
|
||||
where NewStack: stack::Stack {
|
||||
arch::swap(arg, &mut (*old_ctx).stack_ptr, (*new_ctx).stack_ptr, &(*new_ctx).stack)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
extern crate test;
|
||||
extern crate simd;
|
||||
|
||||
use std::ptr;
|
||||
use super::Context;
|
||||
use ::OsStack;
|
||||
|
||||
#[thread_local]
|
||||
static mut ctx_slot: *mut Context<OsStack> = ptr::null_mut();
|
||||
|
||||
#[test]
|
||||
fn context() {
|
||||
unsafe extern "C" fn adder(arg: usize) -> ! {
|
||||
println!("it's alive! arg: {}", arg);
|
||||
let arg = Context::swap(ctx_slot, ctx_slot, arg + 1);
|
||||
println!("still alive! arg: {}", arg);
|
||||
Context::swap(ctx_slot, ctx_slot, arg + 1);
|
||||
panic!("i should be dead");
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let mut ctx = Context::new(stack, adder);
|
||||
ctx_slot = &mut ctx;
|
||||
|
||||
let ret = Context::swap(ctx_slot, ctx_slot, 10);
|
||||
assert_eq!(ret, 11);
|
||||
let ret = Context::swap(ctx_slot, ctx_slot, 50);
|
||||
assert_eq!(ret, 51);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_simd() {
|
||||
unsafe extern "C" fn permuter(arg: usize) -> ! {
|
||||
// This will crash if the stack is not aligned properly.
|
||||
let x = simd::i32x4::splat(arg as i32);
|
||||
let y = x * x;
|
||||
println!("simd result: {:?}", y);
|
||||
Context::swap(ctx_slot, ctx_slot, 0);
|
||||
// And try again after a context switch.
|
||||
let x = simd::i32x4::splat(arg as i32);
|
||||
let y = x * x;
|
||||
println!("simd result: {:?}", y);
|
||||
Context::swap(ctx_slot, ctx_slot, 0);
|
||||
panic!("i should be dead");
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let mut ctx = Context::new(stack, permuter);
|
||||
ctx_slot = &mut ctx;
|
||||
|
||||
Context::swap(ctx_slot, ctx_slot, 10);
|
||||
Context::swap(ctx_slot, ctx_slot, 20);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" fn do_panic(arg: usize) -> ! {
|
||||
match arg {
|
||||
0 => panic!("arg=0"),
|
||||
1 => {
|
||||
Context::swap(ctx_slot, ctx_slot, 0);
|
||||
panic!("arg=1");
|
||||
}
|
||||
_ => unreachable!()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic="arg=0"]
|
||||
fn panic_after_start() {
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let mut ctx = Context::new(stack, do_panic);
|
||||
|
||||
Context::swap(&mut ctx, &ctx, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic="arg=1"]
|
||||
fn panic_after_swap() {
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let mut ctx = Context::new(stack, do_panic);
|
||||
ctx_slot = &mut ctx;
|
||||
|
||||
Context::swap(&mut ctx, &ctx, 1);
|
||||
Context::swap(&mut ctx, &ctx, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn swap(b: &mut test::Bencher) {
|
||||
unsafe extern "C" fn loopback(mut arg: usize) -> ! {
|
||||
// This deliberately does not ignore arg, to measure the time it takes
|
||||
// to move the return value between registers.
|
||||
let ctx_ptr = ctx_slot;
|
||||
loop { arg = Context::swap(ctx_ptr, ctx_ptr, arg) }
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let stack = OsStack::new(4 << 20).unwrap();
|
||||
let mut ctx = Context::new(stack, loopback);
|
||||
ctx_slot = &mut ctx;
|
||||
|
||||
let ctx_ptr = &mut ctx;
|
||||
b.iter(|| Context::swap(ctx_ptr, ctx_ptr, 0));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -16,7 +16,8 @@ use core::{ptr, mem};
|
|||
use core::cell::Cell;
|
||||
|
||||
use stack;
|
||||
use context::Context;
|
||||
use debug;
|
||||
use arch::{self, StackPointer};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum State {
|
||||
|
@ -80,9 +81,11 @@ pub enum State {
|
|||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct Generator<Input: Send, Output: Send, Stack: stack::Stack> {
|
||||
state: State,
|
||||
context: Context<Stack>,
|
||||
phantom: (PhantomData<*const Input>, PhantomData<*const Output>)
|
||||
state: State,
|
||||
stack: Stack,
|
||||
stack_id: debug::StackId,
|
||||
stack_ptr: arch::StackPointer,
|
||||
phantom: (PhantomData<*const Input>, PhantomData<*const Output>)
|
||||
}
|
||||
|
||||
impl<Input, Output, Stack> Generator<Input, Output, Stack>
|
||||
|
@ -92,7 +95,7 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
|
|||
/// See also the [contract](../trait.GuardedStack.html) that needs to be fulfilled by `stack`.
|
||||
pub fn new<F>(stack: Stack, f: F) -> Generator<Input, Output, Stack>
|
||||
where Stack: stack::GuardedStack,
|
||||
F: FnOnce(&mut Yielder<Input, Output, Stack>, Input) + Send {
|
||||
F: FnOnce(&mut Yielder<Input, Output>, Input) + Send {
|
||||
unsafe { Generator::unsafe_new(stack, f) }
|
||||
}
|
||||
|
||||
|
@ -104,35 +107,36 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
|
|||
///
|
||||
/// See also the [contract](../trait.Stack.html) that needs to be fulfilled by `stack`.
|
||||
pub unsafe fn unsafe_new<F>(stack: Stack, f: F) -> Generator<Input, Output, Stack>
|
||||
where F: FnOnce(&mut Yielder<Input, Output, Stack>, Input) + Send {
|
||||
unsafe extern "C" fn generator_wrapper<Input, Output, Stack, F>(env: usize) -> !
|
||||
where F: FnOnce(&mut Yielder<Input, Output>, Input) + Send {
|
||||
unsafe extern "C" fn generator_wrapper<Input, Output, Stack, F>(env: usize, stack_ptr: StackPointer) -> !
|
||||
where Input: Send, Output: Send, Stack: stack::Stack,
|
||||
F: FnOnce(&mut Yielder<Input, Output, Stack>, Input) {
|
||||
F: FnOnce(&mut Yielder<Input, Output>, Input) {
|
||||
// Retrieve our environment from the callee and return control to it.
|
||||
let (mut yielder, f) = ptr::read(env as *mut (Yielder<Input, Output, Stack>, F));
|
||||
let data = Context::swap(yielder.context.get(), yielder.context.get(), 0);
|
||||
let f = ptr::read(env as *const F);
|
||||
let (data, stack_ptr) = arch::swap(0, stack_ptr, None);
|
||||
// See the second half of Yielder::suspend_bare.
|
||||
let (new_context, input) = ptr::read(data as *mut (*mut Context<Stack>, Input));
|
||||
yielder.context.set(new_context as *mut Context<Stack>);
|
||||
let input = ptr::read(data as *const Input);
|
||||
// Run the body of the generator.
|
||||
let mut yielder = Yielder::new(stack_ptr);
|
||||
f(&mut yielder, input);
|
||||
// Past this point, the generator has dropped everything it has held.
|
||||
loop { yielder.suspend_bare(None); }
|
||||
}
|
||||
|
||||
let mut generator = Generator {
|
||||
state: State::Runnable,
|
||||
context: Context::new(stack, generator_wrapper::<Input, Output, Stack, F>),
|
||||
phantom: (PhantomData, PhantomData)
|
||||
};
|
||||
let stack_id = debug::StackId::register(&stack);
|
||||
let stack_ptr = arch::init(&stack, generator_wrapper::<Input, Output, Stack, F>);
|
||||
|
||||
// Transfer environment to the callee.
|
||||
let mut env = (Yielder::new(&mut generator.context), f);
|
||||
Context::swap(&mut generator.context, &generator.context,
|
||||
&mut env as *mut (Yielder<Input, Output, Stack>, F) as usize);
|
||||
mem::forget(env);
|
||||
let stack_ptr = arch::swap(&f as *const F as usize, stack_ptr, Some(&stack)).1;
|
||||
mem::forget(f);
|
||||
|
||||
generator
|
||||
Generator {
|
||||
state: State::Runnable,
|
||||
stack: stack,
|
||||
stack_id: stack_id,
|
||||
stack_ptr: stack_ptr,
|
||||
phantom: (PhantomData, PhantomData)
|
||||
}
|
||||
}
|
||||
|
||||
/// Resumes the generator and return the next value it yields.
|
||||
|
@ -148,13 +152,10 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
|
|||
|
||||
// Switch to the generator function, and retrieve the yielded value.
|
||||
let val = unsafe {
|
||||
let mut data_in = (&mut self.context as *mut Context<Stack>, input);
|
||||
let data_out =
|
||||
ptr::read(Context::swap(&mut self.context, &self.context,
|
||||
&mut data_in as *mut (*mut Context<Stack>, Input) as usize)
|
||||
as *mut Option<Output>);
|
||||
mem::forget(data_in);
|
||||
data_out
|
||||
let (data_out, stack_ptr) = arch::swap(&input as *const Input as usize, self.stack_ptr, Some(&self.stack));
|
||||
self.stack_ptr = stack_ptr;
|
||||
mem::forget(input);
|
||||
ptr::read(data_out as *const Option<Output>)
|
||||
};
|
||||
|
||||
// Unless the generator function has returned, it can be switched to again, so
|
||||
|
@ -177,7 +178,7 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
|
|||
pub fn unwrap(self) -> Stack {
|
||||
match self.state {
|
||||
State::Runnable => panic!("Argh! Bastard! Don't touch that!"),
|
||||
State::Unavailable => unsafe { self.context.unwrap() }
|
||||
State::Unavailable => self.stack
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -185,35 +186,27 @@ impl<Input, Output, Stack> Generator<Input, Output, Stack>
|
|||
/// Yielder is an interface provided to every generator through which it
|
||||
/// returns a value.
|
||||
#[derive(Debug)]
|
||||
pub struct Yielder<Input: Send, Output: Send, Stack: stack::Stack> {
|
||||
context: Cell<*mut Context<Stack>>,
|
||||
pub struct Yielder<Input: Send, Output: Send> {
|
||||
stack_ptr: Cell<StackPointer>,
|
||||
phantom: (PhantomData<*const Input>, PhantomData<*const Output>)
|
||||
}
|
||||
|
||||
impl<Input, Output, Stack> Yielder<Input, Output, Stack>
|
||||
where Input: Send, Output: Send, Stack: stack::Stack {
|
||||
fn new(context: *mut Context<Stack>) -> Yielder<Input, Output, Stack> {
|
||||
impl<Input, Output> Yielder<Input, Output>
|
||||
where Input: Send, Output: Send {
|
||||
fn new(stack_ptr: StackPointer) -> Yielder<Input, Output> {
|
||||
Yielder {
|
||||
context: Cell::new(context),
|
||||
stack_ptr: Cell::new(stack_ptr),
|
||||
phantom: (PhantomData, PhantomData)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn suspend_bare(&self, mut val: Option<Output>) -> Input {
|
||||
fn suspend_bare(&self, val: Option<Output>) -> Input {
|
||||
unsafe {
|
||||
let data = Context::swap(self.context.get(), self.context.get(),
|
||||
&mut val as *mut Option<Output> as usize);
|
||||
let (data, stack_ptr) = arch::swap(&val as *const Option<Output> as usize, self.stack_ptr.get(), None);
|
||||
self.stack_ptr.set(stack_ptr);
|
||||
mem::forget(val);
|
||||
let (new_context, input) = ptr::read(data as *mut (*mut Context<Stack>, Input));
|
||||
// The generator can be moved (and with it, the context).
|
||||
// This changes the address of the context.
|
||||
// Thus, we update it after each swap.
|
||||
self.context.set(new_context);
|
||||
// However, between this point and the next time we enter suspend_bare
|
||||
// the generator cannot be moved, as a &mut Generator is necessary
|
||||
// to resume the generator function.
|
||||
input
|
||||
ptr::read(data as *const Input)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
// copied, modified, or distributed except according to those terms.
|
||||
#![feature(asm, naked_functions, cfg_target_vendor)]
|
||||
#![cfg_attr(feature = "alloc", feature(alloc, heap_api))]
|
||||
#![cfg_attr(test, feature(test, thread_local, const_fn))]
|
||||
#![cfg_attr(test, feature(test))]
|
||||
#![no_std]
|
||||
|
||||
//! libfringe is a library implementing safe, lightweight context switches,
|
||||
|
@ -51,7 +51,6 @@ pub const STACK_ALIGNMENT: usize = arch::STACK_ALIGNMENT;
|
|||
|
||||
mod debug;
|
||||
|
||||
mod context;
|
||||
mod stack;
|
||||
mod slice_stack;
|
||||
pub mod generator;
|
||||
|
|
|
@ -7,10 +7,10 @@
|
|||
// copied, modified, or distributed except according to those terms.
|
||||
extern crate fringe;
|
||||
|
||||
use fringe::{Stack, SliceStack, OwnedStack, OsStack};
|
||||
use fringe::{SliceStack, OwnedStack, OsStack};
|
||||
use fringe::generator::{Generator, Yielder};
|
||||
|
||||
fn add_one_fn<S: Stack>(yielder: &mut Yielder<i32, i32, S>, mut input: i32) {
|
||||
fn add_one_fn(yielder: &mut Yielder<i32, i32>, mut input: i32) {
|
||||
loop {
|
||||
if input == 0 { break }
|
||||
input = yielder.suspend(input + 1)
|
||||
|
|
Loading…
Reference in New Issue