From 299a0a5d98f543ec00173c55071342fd5948d5b0 Mon Sep 17 00:00:00 2001 From: edef Date: Wed, 14 Jan 2015 08:31:17 +0100 Subject: [PATCH] complete rewrite! featuring 7ns inlineable context switches, no more separately-built assembly objects, and a vastly nicer interface. incontext/outcontext are no more, context switch calls now take a single context structure, which functions as both. everything now also functions without any heap allocations -- for the context setup, only an FnOnce() value is necessary. --- Cargo.toml | 3 -- benches/swap.rs | 43 ++++++----------- build.rs | 7 --- src/arch.rs | 123 ++++++++++++++++++------------------------------ src/arch.s | 99 -------------------------------------- src/context.rs | 55 ++++------------------ src/init.s | 17 +++++++ src/lib.rs | 8 +--- src/macros.rs | 29 ------------ src/main.rs | 41 +++++----------- src/platform.rs | 8 ++-- src/stack.rs | 37 ++------------- src/swap.s | 17 +++++++ 13 files changed, 126 insertions(+), 361 deletions(-) delete mode 100644 src/arch.s create mode 100644 src/init.s delete mode 100644 src/macros.rs create mode 100644 src/swap.s diff --git a/Cargo.toml b/Cargo.toml index 3c8617b..541f85d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,3 @@ name = "lwkt" version = "0.0.1" authors = ["edef "] build = "build.rs" - -[dependencies] -fn_box = "^1.0.1" diff --git a/benches/swap.rs b/benches/swap.rs index 152ad2c..4379907 100644 --- a/benches/swap.rs +++ b/benches/swap.rs @@ -1,36 +1,23 @@ -#![feature(unboxed_closures, default_type_params, box_syntax)] extern crate test; -extern crate libc; extern crate lwkt; -extern crate fn_box; -use test::Bencher; use lwkt::Context; -use fn_box::FnBox; -use std::ptr::null_mut; -use std::mem::{transmute, forget}; + +static mut ctx_slot: *mut Context = 0 as *mut Context; #[bench] -fn swap(b: &mut Bencher) { - let mut native = unsafe { Context::native() }; - let f: Box = unsafe { transmute((1u, 1u)) }; - - let mut ctx = box { (&mut native as *mut Context, null_mut()) }; - let mut green = Context::new(init, &mut *ctx as *mut _, f); - ctx.1 = &mut green as *mut Context; - - fn init(ctx: *mut (*mut Context, *mut Context), f: Box) -> ! { - unsafe { - let (native, green) = *ctx; - forget(f); - loop { Context::swap(&mut *green, &mut *native); } - } - } - +fn swap(b: &mut test::Bencher) { unsafe { - Context::swap(&mut native, &mut green); - } + let mut ctx = Context::new(move |:| { + let ctx_ptr = ctx_slot; + loop { + (*ctx_ptr).swap() + } + }); - b.iter(|| unsafe { - Context::swap(&mut native, &mut green); - }) + ctx_slot = &mut ctx; + + ctx.swap(); + + b.iter(|| ctx.swap()); + } } diff --git a/build.rs b/build.rs index ae19fd3..f3c4ce3 100644 --- a/build.rs +++ b/build.rs @@ -22,13 +22,6 @@ fn main() { .arg(outpath.clone()) .status().unwrap(); } - else if let Some(basename) = eat_extension(filename, ".s") { - outpath = format!("{}/{}.o", out_dir, basename); - - Command::new("nasm").args(&[filepath.as_slice(), "-felf64", "-o"]) - .arg(outpath.clone()) - .status().unwrap(); - } else { continue } objects.push(outpath); diff --git a/src/arch.rs b/src/arch.rs index ff31178..742818e 100644 --- a/src/arch.rs +++ b/src/arch.rs @@ -1,102 +1,69 @@ use core::prelude::*; -use core::simd::u64x2; -use core::mem::{size_of, zeroed}; +use core::mem::{size_of, align_of}; +use core::cmp::max; +use core::ptr; use stack::Stack; -extern "C" { - #[link_name = "lwt_bootstrap"] - pub fn bootstrap(); - #[link_name = "lwt_swapcontext"] - pub fn swapcontext(save: *mut Registers, restore: *mut Registers); - #[link_name = "lwt_abort"] - pub fn abort() -> !; -} - #[allow(non_camel_case_types)] pub type uintptr_t = u64; -#[repr(C)] -#[allow(dead_code)] pub struct Registers { - rbx: u64, - rsp: u64, - rbp: u64, - rdi: u64, - r12: u64, - r13: u64, - r14: u64, - r15: u64, - ip: u64, - xmm0: u64x2, - xmm1: u64x2, - xmm2: u64x2, - xmm3: u64x2, - xmm4: u64x2, - xmm5: u64x2, + rsp: *mut uintptr_t } -impl Registers { - pub fn new() -> Registers { - unsafe { - Registers { - ip: abort as uintptr_t, - .. zeroed() - } - } - } -} - -pub fn initialise_call_frame(stack: &mut Stack, init: uintptr_t, args: &[uintptr_t]) -> Registers { - let sp = stack.top() as *mut uintptr_t; - let sp = align_down_mut(sp, 16); - let sp = offset_mut(sp, -1); - unsafe { - *sp = 0; - } - - let mut regs = Registers { - rbp: 0, - rsp: sp as uintptr_t, - ip: bootstrap as uintptr_t, - rbx: init, - .. Registers::new() - }; - - match into_fields!(regs { rdi, r12, r13, r14, r15 } <- args.iter().cloned()) { - Some(mut args) => if args.next().is_some() { - panic!("too many arguments") - }, - None => {} - } - - regs -} - -// Rust stores a stack limit at [fs:0x70]. These two functions set and retrieve -// the limit. They're marked as #[inline(always)] so that they can be used in -// situations where the stack limit is invalid. +impl Copy for Registers {} #[inline(always)] -pub unsafe fn get_sp_limit() -> *const u8 { - let limit; - asm!("movq %fs:0x70, $0" : "=r"(limit) ::: "volatile"); - limit -} - -#[inline(always)] -pub unsafe fn set_sp_limit(limit: *const u8) { - asm!("movq $0, %fs:0x70" :: "r"(limit) :: "volatile"); +pub unsafe fn swap(regs: &mut Registers) { + asm!(include_str!("swap.s") + : + : "{rdi}" (&mut regs.rsp) + : "rax", "rbx", "rcx", "rdx", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", + "cc" + : "volatile"); } #[inline] +pub unsafe fn initialize_call_frame(stack: &mut S, f: F) -> Registers where S: Stack, F: FnOnce() { + let sp_limit = stack.limit(); + let mut sp = stack.top() as *mut uintptr_t; + let f_ptr = push(&mut sp, f); + + asm!(include_str!("init.s") + : "={rdi}"(sp) + : "{rdi}" (sp), + "{rsi}" (rust_trampoline::), + "{rdx}" (f_ptr), + "{rcx}" (sp_limit) + : + : "volatile"); + + Registers { rsp: sp } +} + +unsafe extern "C" fn rust_trampoline(f: *const F) { + ptr::read(f)() +} + +unsafe fn push(spp: &mut *mut uintptr_t, value: T) -> *mut T { + let mut sp = *spp as *mut T; + sp = offset_mut(sp, -1); + sp = align_down_mut(sp, max(align_of::(), 16)); + *sp = value; + *spp = sp as *mut uintptr_t; + sp +} + fn align_down_mut(sp: *mut T, n: usize) -> *mut T { let sp = (sp as usize) & !(n - 1); sp as *mut T } // ptr::offset_mut is positive ints only -#[inline] pub fn offset_mut(ptr: *mut T, count: isize) -> *mut T { (ptr as isize + count * (size_of::() as isize)) as *mut T } diff --git a/src/arch.s b/src/arch.s deleted file mode 100644 index 8afa2da..0000000 --- a/src/arch.s +++ /dev/null @@ -1,99 +0,0 @@ -; vim: ft=nasm -BITS 64 - -;; the structure containing every register that is saved on context switches. -;; this needs to match the struct in arch.rs, or shit will break badly. -struc context - ctx_rbx resq 1 - ctx_rsp resq 1 - ctx_rbp resq 1 - ctx_rdi resq 1 - ctx_r12 resq 1 - ctx_r13 resq 1 - ctx_r14 resq 1 - ctx_r15 resq 1 - ctx_ip: - resq 1 - alignb 16 - ctx_xmm0 resq 2 - ctx_xmm1 resq 2 - ctx_xmm2 resq 2 - ctx_xmm3 resq 2 - ctx_xmm4 resq 2 - ctx_xmm5 resq 2 -endstruc - -global lwt_swapcontext -lwt_swapcontext: -;; this is where the actual context switching takes place. first, save every -;; register in the current context into the leaving context, pointed at by rdi, -;; making sure the return address ends up in the IP slot. then, restore every -;; register from the entering context, pointed at by rsi, and jump to the -;; instruction pointer. - pop rax - - ; save instruction pointer - mov [rdi+ctx_ip], rax - - ; save non-volatile integer registers (including rsp) - mov [rdi+ctx_rbx], rbx - mov [rdi+ctx_rsp], rsp - mov [rdi+ctx_rbp], rbp - mov [rdi+ctx_r12], r12 - mov [rdi+ctx_r13], r13 - mov [rdi+ctx_r14], r14 - mov [rdi+ctx_r15], r15 - - ; save 0th argument register - mov [rdi+ctx_rdi], rdi - - ; save non-volatile XMM registers - movapd [rdi+ctx_xmm0], xmm0 - movapd [rdi+ctx_xmm1], xmm1 - movapd [rdi+ctx_xmm2], xmm2 - movapd [rdi+ctx_xmm3], xmm3 - movapd [rdi+ctx_xmm4], xmm4 - movapd [rdi+ctx_xmm5], xmm5 - - ; restore non-volatile integer registers - mov rbx, [rsi+ctx_rbx] - mov rsp, [rsi+ctx_rsp] - mov rbp, [rsi+ctx_rbp] - mov r12, [rsi+ctx_r12] - mov r13, [rsi+ctx_r13] - mov r14, [rsi+ctx_r14] - mov r15, [rsi+ctx_r15] - - ; restore 0th argument register - mov rdi, [rsi+ctx_rdi] - - ; restore non-volatile XMM registers - movapd xmm0, [rsi+ctx_xmm0] - movapd xmm1, [rsi+ctx_xmm1] - movapd xmm2, [rsi+ctx_xmm2] - movapd xmm3, [rsi+ctx_xmm3] - movapd xmm4, [rsi+ctx_xmm4] - movapd xmm5, [rsi+ctx_xmm5] - - jmp [rsi+ctx_ip] - -global lwt_bootstrap -lwt_bootstrap: -;; some of the parameter registers aren't saved on context switch, and thus -;; can't be set into the struct directly. thus, initialisation from Rust-land -;; places the parameters in unrelated registers, and we frob them into place -;; out here, in assembly-land. below are the parameter registers in order, -;; along with the alternative register used in parentheses, if there is one. -;; rdi, rsi (r12), rdx (r13), rcx (r14), r8(r15), r9 - mov rsi, r12 - mov rdx, r13 - mov rcx, r14 - mov r8, r15 - jmp rbx - -global lwt_abort -lwt_abort: -;; when a context is created for a native thread, it should only be switched -;; out of. if it's accidentally switched into, it'll hit this, because that's -;; what we set the initial IP to. - ud2 diff --git a/src/context.rs b/src/context.rs index 9092ae0..00f6c5e 100644 --- a/src/context.rs +++ b/src/context.rs @@ -1,61 +1,26 @@ use core::prelude::*; - -use core::mem::transmute; -use core::raw; -use alloc::boxed::Box; -use fn_box::FnBox; - -use stack::Stack; +use platform::Stack; use arch::{self, Registers}; +use platform; pub struct Context { regs: Registers, - stack: Stack + _stack: platform::Stack } -pub type BoxedFn = Box + Send + 'static>; -pub type StartFn = fn(data: *mut T, f: BoxedFn) -> !; - impl Context { - pub fn new(init: StartFn, data: *mut T, - f: BoxedFn) -> Context { + #[inline] + pub unsafe fn new(f: F) -> Context where F: FnOnce() + Send + 'static { let mut stack = Stack::new(4 << 20); - let f: raw::TraitObject = unsafe { transmute(f) }; - + let regs = arch::initialize_call_frame(&mut stack, f); Context { - regs: arch::initialise_call_frame(&mut stack, - init_ctx:: as arch::uintptr_t, - &[init as arch::uintptr_t, - data as arch::uintptr_t, - f.data as arch::uintptr_t, - f.vtable as arch::uintptr_t]), - stack: stack + regs: regs, + _stack: stack } } -} - -unsafe extern "C" fn init_ctx(start: StartFn, data: *mut T, - f_data: *mut (), f_vtable: *mut ()) -> ! { - let f: BoxedFn = transmute(raw::TraitObject { - data: f_data, - vtable: f_vtable - }); - - start(data, f) -} - -impl Context { - pub unsafe fn native() -> Context { - Context { - regs: Registers::new(), - stack: Stack::native(arch::get_sp_limit()) - } - } - #[inline(always)] - pub unsafe fn swap(out_context: &mut Context, in_context: &mut Context) { - arch::set_sp_limit(in_context.stack.limit()); - arch::swapcontext(&mut out_context.regs, &mut in_context.regs); + pub unsafe fn swap(&mut self) { + arch::swap(&mut self.regs) } } diff --git a/src/init.s b/src/init.s new file mode 100644 index 0000000..b1e3382 --- /dev/null +++ b/src/init.s @@ -0,0 +1,17 @@ +xchg %rsp, %rdi + +pushq %rsi +pushq %rdx +pushq %rcx +call 1f + +popq %fs:0x70 +popq %rdi +popq %rax + +movq $$0, %rbp +call *%rax +ud2 + +1: + xchg %rsp, %rdi diff --git a/src/lib.rs b/src/lib.rs index 8765a16..9acd058 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,16 +2,12 @@ #![no_std] #[macro_use] +#[allow(unstable)] extern crate core; -extern crate alloc; -extern crate fn_box; pub use context::Context; -mod std { pub use core::fmt; } - -#[macro_use] -mod macros; +mod std { pub use core::*; } mod context; mod stack; diff --git a/src/macros.rs b/src/macros.rs deleted file mode 100644 index a4d0fdb..0000000 --- a/src/macros.rs +++ /dev/null @@ -1,29 +0,0 @@ -macro_rules! __into_fields { - ($x:ident { $field:ident } <- $iter:ident) => { - match $iter.next() { - Some(value) => { - $x.$field = value; - Some($iter) - } - None => None - } - }; - ($x:ident { $field:ident, $($fields_rest:ident),* } <- $iter:ident) => { - match $iter.next() { - Some(value) => { - $x.$field = value; - __into_fields!($x { $($fields_rest),* } <- $iter) - } - None => None - } - }; -} - -macro_rules! into_fields { - ($x:ident { $($fields_rest:ident),* } <- $iter:expr) => { - { - let mut iter = $iter; - __into_fields!($x { $($fields_rest),* } <- iter) - } - } -} diff --git a/src/main.rs b/src/main.rs index 2ce02b7..e1cf924 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,36 +1,19 @@ +#![feature(thread_local)] extern crate lwkt; -extern crate fn_box; - -use std::ptr::null_mut; -use std::intrinsics::abort; use lwkt::Context; -use fn_box::FnBox; + +#[thread_local] +static mut ctx_slot: *mut Context = 0 as *mut Context; fn main() { - let f = Box::new(move |:| { - println!("Hello, world!") - }); - - let mut native = unsafe { Context::native() }; - - fn init(ctx: *mut (*mut Context, *mut Context), f: Box>) -> ! { - unsafe { - let (native, green) = *ctx; - - f(); - - Context::swap(&mut *green, &mut *native); - abort(); - } - } - - let mut ctx = Box::new((&mut native as *mut Context, null_mut())); - let mut green = Context::new(init, &mut *ctx as *mut _, f); - ctx.1 = &mut green as *mut Context; - unsafe { - Context::swap(&mut native, &mut green); - } + let mut ctx = Context::new(move |:| { + println!("it's alive!"); + (*ctx_slot).swap(); + }); - println!("size_of::() == {}", std::mem::size_of::()); + ctx_slot = &mut ctx; + + (*ctx_slot).swap(); + } } diff --git a/src/platform.rs b/src/platform.rs index de69ed0..a084832 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -1,8 +1,10 @@ +#![allow(unstable)] extern crate libc; extern crate std; use self::std::prelude::v1::*; use self::std::os::{errno, page_size, MemoryMap}; use self::std::os::MapOption::{MapReadable, MapWritable, MapNonStandardFlags}; +use stack; extern "C" { #[link_name = "lwt_stack_register"] @@ -62,14 +64,14 @@ impl Drop for Stack { } } -impl Stack { - pub fn top(&mut self) -> *mut u8 { +impl stack::Stack for Stack { + fn top(&mut self) -> *mut u8 { unsafe { self.buf.data().offset(self.buf.len() as isize) } } - pub fn limit(&self) -> *const u8 { + fn limit(&self) -> *const u8 { unsafe { self.buf.data().offset(page_size() as isize) as *const _ } diff --git a/src/stack.rs b/src/stack.rs index 0a9af93..81b8326 100644 --- a/src/stack.rs +++ b/src/stack.rs @@ -1,35 +1,4 @@ -use platform; -use core::ptr; - -pub enum Stack { - Native { - sp_limit: *const u8 - }, - Managed(platform::Stack) -} - -impl Stack { - pub fn new(size: usize) -> Stack { - Stack::Managed(platform::Stack::new(size)) - } - - pub unsafe fn native(limit: *const u8) -> Stack { - Stack::Native { - sp_limit: limit - } - } - - pub fn top(&mut self) -> *mut u8 { - match *self { - Stack::Native { .. } => ptr::null_mut(), - Stack::Managed(ref mut stack) => stack.top() - } - } - - pub fn limit(&self) -> *const u8 { - match *self { - Stack::Native { sp_limit, .. } => sp_limit, - Stack::Managed(ref stack) => stack.limit() - } - } +pub trait Stack { + fn top(&mut self) -> *mut u8; + fn limit(&self) -> *const u8; } diff --git a/src/swap.s b/src/swap.s new file mode 100644 index 0000000..1866b85 --- /dev/null +++ b/src/swap.s @@ -0,0 +1,17 @@ +sub $$128, %rsp +pushq %fs:0x70 +pushq %rbp +call 1f + +popq %rbp +popq %fs:0x70 +add $$128, %rsp +jmp 2f + +1: + movq (%rdi), %rax + movq %rsp, (%rdi) + movq %rax, %rsp + popq %rax + jmpq *%rax +2: