complete rewrite!

featuring 7ns inlineable context switches, no more separately-built
assembly objects, and a vastly nicer interface.
incontext/outcontext are no more, context switch calls now take a single
context structure, which functions as both.
everything now also functions without any heap allocations -- for the
context setup, only an FnOnce() value is necessary.
This commit is contained in:
edef 2015-01-14 08:31:17 +01:00
parent 5cf3a35d65
commit 299a0a5d98
13 changed files with 126 additions and 361 deletions

View File

@ -3,6 +3,3 @@ name = "lwkt"
version = "0.0.1" version = "0.0.1"
authors = ["edef <edef@edef.eu>"] authors = ["edef <edef@edef.eu>"]
build = "build.rs" build = "build.rs"
[dependencies]
fn_box = "^1.0.1"

View File

@ -1,36 +1,23 @@
#![feature(unboxed_closures, default_type_params, box_syntax)]
extern crate test; extern crate test;
extern crate libc;
extern crate lwkt; extern crate lwkt;
extern crate fn_box;
use test::Bencher;
use lwkt::Context; use lwkt::Context;
use fn_box::FnBox;
use std::ptr::null_mut; static mut ctx_slot: *mut Context = 0 as *mut Context;
use std::mem::{transmute, forget};
#[bench] #[bench]
fn swap(b: &mut Bencher) { fn swap(b: &mut test::Bencher) {
let mut native = unsafe { Context::native() };
let f: Box<FnBox() + Send + 'static> = unsafe { transmute((1u, 1u)) };
let mut ctx = box { (&mut native as *mut Context, null_mut()) };
let mut green = Context::new(init, &mut *ctx as *mut _, f);
ctx.1 = &mut green as *mut Context;
fn init(ctx: *mut (*mut Context, *mut Context), f: Box<FnBox()>) -> ! {
unsafe {
let (native, green) = *ctx;
forget(f);
loop { Context::swap(&mut *green, &mut *native); }
}
}
unsafe { unsafe {
Context::swap(&mut native, &mut green); let mut ctx = Context::new(move |:| {
} let ctx_ptr = ctx_slot;
loop {
(*ctx_ptr).swap()
}
});
b.iter(|| unsafe { ctx_slot = &mut ctx;
Context::swap(&mut native, &mut green);
}) ctx.swap();
b.iter(|| ctx.swap());
}
} }

View File

@ -22,13 +22,6 @@ fn main() {
.arg(outpath.clone()) .arg(outpath.clone())
.status().unwrap(); .status().unwrap();
} }
else if let Some(basename) = eat_extension(filename, ".s") {
outpath = format!("{}/{}.o", out_dir, basename);
Command::new("nasm").args(&[filepath.as_slice(), "-felf64", "-o"])
.arg(outpath.clone())
.status().unwrap();
}
else { continue } else { continue }
objects.push(outpath); objects.push(outpath);

View File

@ -1,102 +1,69 @@
use core::prelude::*; use core::prelude::*;
use core::simd::u64x2; use core::mem::{size_of, align_of};
use core::mem::{size_of, zeroed}; use core::cmp::max;
use core::ptr;
use stack::Stack; use stack::Stack;
extern "C" {
#[link_name = "lwt_bootstrap"]
pub fn bootstrap();
#[link_name = "lwt_swapcontext"]
pub fn swapcontext(save: *mut Registers, restore: *mut Registers);
#[link_name = "lwt_abort"]
pub fn abort() -> !;
}
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
pub type uintptr_t = u64; pub type uintptr_t = u64;
#[repr(C)]
#[allow(dead_code)]
pub struct Registers { pub struct Registers {
rbx: u64, rsp: *mut uintptr_t
rsp: u64,
rbp: u64,
rdi: u64,
r12: u64,
r13: u64,
r14: u64,
r15: u64,
ip: u64,
xmm0: u64x2,
xmm1: u64x2,
xmm2: u64x2,
xmm3: u64x2,
xmm4: u64x2,
xmm5: u64x2,
} }
impl Registers { impl Copy for Registers {}
pub fn new() -> Registers {
unsafe {
Registers {
ip: abort as uintptr_t,
.. zeroed()
}
}
}
}
pub fn initialise_call_frame(stack: &mut Stack, init: uintptr_t, args: &[uintptr_t]) -> Registers {
let sp = stack.top() as *mut uintptr_t;
let sp = align_down_mut(sp, 16);
let sp = offset_mut(sp, -1);
unsafe {
*sp = 0;
}
let mut regs = Registers {
rbp: 0,
rsp: sp as uintptr_t,
ip: bootstrap as uintptr_t,
rbx: init,
.. Registers::new()
};
match into_fields!(regs { rdi, r12, r13, r14, r15 } <- args.iter().cloned()) {
Some(mut args) => if args.next().is_some() {
panic!("too many arguments")
},
None => {}
}
regs
}
// Rust stores a stack limit at [fs:0x70]. These two functions set and retrieve
// the limit. They're marked as #[inline(always)] so that they can be used in
// situations where the stack limit is invalid.
#[inline(always)] #[inline(always)]
pub unsafe fn get_sp_limit() -> *const u8 { pub unsafe fn swap(regs: &mut Registers) {
let limit; asm!(include_str!("swap.s")
asm!("movq %fs:0x70, $0" : "=r"(limit) ::: "volatile"); :
limit : "{rdi}" (&mut regs.rsp)
} : "rax", "rbx", "rcx", "rdx", "rsi", "rdi",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
#[inline(always)] "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
pub unsafe fn set_sp_limit(limit: *const u8) { "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
asm!("movq $0, %fs:0x70" :: "r"(limit) :: "volatile"); "cc"
: "volatile");
} }
#[inline] #[inline]
pub unsafe fn initialize_call_frame<S, F>(stack: &mut S, f: F) -> Registers where S: Stack, F: FnOnce() {
let sp_limit = stack.limit();
let mut sp = stack.top() as *mut uintptr_t;
let f_ptr = push(&mut sp, f);
asm!(include_str!("init.s")
: "={rdi}"(sp)
: "{rdi}" (sp),
"{rsi}" (rust_trampoline::<F>),
"{rdx}" (f_ptr),
"{rcx}" (sp_limit)
:
: "volatile");
Registers { rsp: sp }
}
unsafe extern "C" fn rust_trampoline<F: FnOnce()>(f: *const F) {
ptr::read(f)()
}
unsafe fn push<T>(spp: &mut *mut uintptr_t, value: T) -> *mut T {
let mut sp = *spp as *mut T;
sp = offset_mut(sp, -1);
sp = align_down_mut(sp, max(align_of::<T>(), 16));
*sp = value;
*spp = sp as *mut uintptr_t;
sp
}
fn align_down_mut<T>(sp: *mut T, n: usize) -> *mut T { fn align_down_mut<T>(sp: *mut T, n: usize) -> *mut T {
let sp = (sp as usize) & !(n - 1); let sp = (sp as usize) & !(n - 1);
sp as *mut T sp as *mut T
} }
// ptr::offset_mut is positive ints only // ptr::offset_mut is positive ints only
#[inline]
pub fn offset_mut<T>(ptr: *mut T, count: isize) -> *mut T { pub fn offset_mut<T>(ptr: *mut T, count: isize) -> *mut T {
(ptr as isize + count * (size_of::<T>() as isize)) as *mut T (ptr as isize + count * (size_of::<T>() as isize)) as *mut T
} }

View File

@ -1,99 +0,0 @@
; vim: ft=nasm
BITS 64
;; the structure containing every register that is saved on context switches.
;; this needs to match the struct in arch.rs, or shit will break badly.
struc context
ctx_rbx resq 1
ctx_rsp resq 1
ctx_rbp resq 1
ctx_rdi resq 1
ctx_r12 resq 1
ctx_r13 resq 1
ctx_r14 resq 1
ctx_r15 resq 1
ctx_ip:
resq 1
alignb 16
ctx_xmm0 resq 2
ctx_xmm1 resq 2
ctx_xmm2 resq 2
ctx_xmm3 resq 2
ctx_xmm4 resq 2
ctx_xmm5 resq 2
endstruc
global lwt_swapcontext
lwt_swapcontext:
;; this is where the actual context switching takes place. first, save every
;; register in the current context into the leaving context, pointed at by rdi,
;; making sure the return address ends up in the IP slot. then, restore every
;; register from the entering context, pointed at by rsi, and jump to the
;; instruction pointer.
pop rax
; save instruction pointer
mov [rdi+ctx_ip], rax
; save non-volatile integer registers (including rsp)
mov [rdi+ctx_rbx], rbx
mov [rdi+ctx_rsp], rsp
mov [rdi+ctx_rbp], rbp
mov [rdi+ctx_r12], r12
mov [rdi+ctx_r13], r13
mov [rdi+ctx_r14], r14
mov [rdi+ctx_r15], r15
; save 0th argument register
mov [rdi+ctx_rdi], rdi
; save non-volatile XMM registers
movapd [rdi+ctx_xmm0], xmm0
movapd [rdi+ctx_xmm1], xmm1
movapd [rdi+ctx_xmm2], xmm2
movapd [rdi+ctx_xmm3], xmm3
movapd [rdi+ctx_xmm4], xmm4
movapd [rdi+ctx_xmm5], xmm5
; restore non-volatile integer registers
mov rbx, [rsi+ctx_rbx]
mov rsp, [rsi+ctx_rsp]
mov rbp, [rsi+ctx_rbp]
mov r12, [rsi+ctx_r12]
mov r13, [rsi+ctx_r13]
mov r14, [rsi+ctx_r14]
mov r15, [rsi+ctx_r15]
; restore 0th argument register
mov rdi, [rsi+ctx_rdi]
; restore non-volatile XMM registers
movapd xmm0, [rsi+ctx_xmm0]
movapd xmm1, [rsi+ctx_xmm1]
movapd xmm2, [rsi+ctx_xmm2]
movapd xmm3, [rsi+ctx_xmm3]
movapd xmm4, [rsi+ctx_xmm4]
movapd xmm5, [rsi+ctx_xmm5]
jmp [rsi+ctx_ip]
global lwt_bootstrap
lwt_bootstrap:
;; some of the parameter registers aren't saved on context switch, and thus
;; can't be set into the struct directly. thus, initialisation from Rust-land
;; places the parameters in unrelated registers, and we frob them into place
;; out here, in assembly-land. below are the parameter registers in order,
;; along with the alternative register used in parentheses, if there is one.
;; rdi, rsi (r12), rdx (r13), rcx (r14), r8(r15), r9
mov rsi, r12
mov rdx, r13
mov rcx, r14
mov r8, r15
jmp rbx
global lwt_abort
lwt_abort:
;; when a context is created for a native thread, it should only be switched
;; out of. if it's accidentally switched into, it'll hit this, because that's
;; what we set the initial IP to.
ud2

View File

@ -1,61 +1,26 @@
use core::prelude::*; use core::prelude::*;
use platform::Stack;
use core::mem::transmute;
use core::raw;
use alloc::boxed::Box;
use fn_box::FnBox;
use stack::Stack;
use arch::{self, Registers}; use arch::{self, Registers};
use platform;
pub struct Context { pub struct Context {
regs: Registers, regs: Registers,
stack: Stack _stack: platform::Stack
} }
pub type BoxedFn<Args, Result> = Box<FnBox<Args, Result> + Send + 'static>;
pub type StartFn<T, Args, Result> = fn(data: *mut T, f: BoxedFn<Args, Result>) -> !;
impl Context { impl Context {
pub fn new<T, Args, Result>(init: StartFn<T, Args, Result>, data: *mut T, #[inline]
f: BoxedFn<Args, Result>) -> Context { pub unsafe fn new<F>(f: F) -> Context where F: FnOnce() + Send + 'static {
let mut stack = Stack::new(4 << 20); let mut stack = Stack::new(4 << 20);
let f: raw::TraitObject = unsafe { transmute(f) }; let regs = arch::initialize_call_frame(&mut stack, f);
Context { Context {
regs: arch::initialise_call_frame(&mut stack, regs: regs,
init_ctx::<T, Args, Result> as arch::uintptr_t, _stack: stack
&[init as arch::uintptr_t,
data as arch::uintptr_t,
f.data as arch::uintptr_t,
f.vtable as arch::uintptr_t]),
stack: stack
} }
} }
}
unsafe extern "C" fn init_ctx<T, A, R>(start: StartFn<T, A, R>, data: *mut T,
f_data: *mut (), f_vtable: *mut ()) -> ! {
let f: BoxedFn<A, R> = transmute(raw::TraitObject {
data: f_data,
vtable: f_vtable
});
start(data, f)
}
impl Context {
pub unsafe fn native() -> Context {
Context {
regs: Registers::new(),
stack: Stack::native(arch::get_sp_limit())
}
}
#[inline(always)] #[inline(always)]
pub unsafe fn swap(out_context: &mut Context, in_context: &mut Context) { pub unsafe fn swap(&mut self) {
arch::set_sp_limit(in_context.stack.limit()); arch::swap(&mut self.regs)
arch::swapcontext(&mut out_context.regs, &mut in_context.regs);
} }
} }

17
src/init.s Normal file
View File

@ -0,0 +1,17 @@
xchg %rsp, %rdi
pushq %rsi
pushq %rdx
pushq %rcx
call 1f
popq %fs:0x70
popq %rdi
popq %rax
movq $$0, %rbp
call *%rax
ud2
1:
xchg %rsp, %rdi

View File

@ -2,16 +2,12 @@
#![no_std] #![no_std]
#[macro_use] #[macro_use]
#[allow(unstable)]
extern crate core; extern crate core;
extern crate alloc;
extern crate fn_box;
pub use context::Context; pub use context::Context;
mod std { pub use core::fmt; } mod std { pub use core::*; }
#[macro_use]
mod macros;
mod context; mod context;
mod stack; mod stack;

View File

@ -1,29 +0,0 @@
macro_rules! __into_fields {
($x:ident { $field:ident } <- $iter:ident) => {
match $iter.next() {
Some(value) => {
$x.$field = value;
Some($iter)
}
None => None
}
};
($x:ident { $field:ident, $($fields_rest:ident),* } <- $iter:ident) => {
match $iter.next() {
Some(value) => {
$x.$field = value;
__into_fields!($x { $($fields_rest),* } <- $iter)
}
None => None
}
};
}
macro_rules! into_fields {
($x:ident { $($fields_rest:ident),* } <- $iter:expr) => {
{
let mut iter = $iter;
__into_fields!($x { $($fields_rest),* } <- iter)
}
}
}

View File

@ -1,36 +1,19 @@
#![feature(thread_local)]
extern crate lwkt; extern crate lwkt;
extern crate fn_box;
use std::ptr::null_mut;
use std::intrinsics::abort;
use lwkt::Context; use lwkt::Context;
use fn_box::FnBox;
#[thread_local]
static mut ctx_slot: *mut Context = 0 as *mut Context;
fn main() { fn main() {
let f = Box::new(move |:| {
println!("Hello, world!")
});
let mut native = unsafe { Context::native() };
fn init(ctx: *mut (*mut Context, *mut Context), f: Box<FnBox<(), ()>>) -> ! {
unsafe {
let (native, green) = *ctx;
f();
Context::swap(&mut *green, &mut *native);
abort();
}
}
let mut ctx = Box::new((&mut native as *mut Context, null_mut()));
let mut green = Context::new(init, &mut *ctx as *mut _, f);
ctx.1 = &mut green as *mut Context;
unsafe { unsafe {
Context::swap(&mut native, &mut green); let mut ctx = Context::new(move |:| {
} println!("it's alive!");
(*ctx_slot).swap();
});
println!("size_of::<Context>() == {}", std::mem::size_of::<Context>()); ctx_slot = &mut ctx;
(*ctx_slot).swap();
}
} }

View File

@ -1,8 +1,10 @@
#![allow(unstable)]
extern crate libc; extern crate libc;
extern crate std; extern crate std;
use self::std::prelude::v1::*; use self::std::prelude::v1::*;
use self::std::os::{errno, page_size, MemoryMap}; use self::std::os::{errno, page_size, MemoryMap};
use self::std::os::MapOption::{MapReadable, MapWritable, MapNonStandardFlags}; use self::std::os::MapOption::{MapReadable, MapWritable, MapNonStandardFlags};
use stack;
extern "C" { extern "C" {
#[link_name = "lwt_stack_register"] #[link_name = "lwt_stack_register"]
@ -62,14 +64,14 @@ impl Drop for Stack {
} }
} }
impl Stack { impl stack::Stack for Stack {
pub fn top(&mut self) -> *mut u8 { fn top(&mut self) -> *mut u8 {
unsafe { unsafe {
self.buf.data().offset(self.buf.len() as isize) self.buf.data().offset(self.buf.len() as isize)
} }
} }
pub fn limit(&self) -> *const u8 { fn limit(&self) -> *const u8 {
unsafe { unsafe {
self.buf.data().offset(page_size() as isize) as *const _ self.buf.data().offset(page_size() as isize) as *const _
} }

View File

@ -1,35 +1,4 @@
use platform; pub trait Stack {
use core::ptr; fn top(&mut self) -> *mut u8;
fn limit(&self) -> *const u8;
pub enum Stack {
Native {
sp_limit: *const u8
},
Managed(platform::Stack)
}
impl Stack {
pub fn new(size: usize) -> Stack {
Stack::Managed(platform::Stack::new(size))
}
pub unsafe fn native(limit: *const u8) -> Stack {
Stack::Native {
sp_limit: limit
}
}
pub fn top(&mut self) -> *mut u8 {
match *self {
Stack::Native { .. } => ptr::null_mut(),
Stack::Managed(ref mut stack) => stack.top()
}
}
pub fn limit(&self) -> *const u8 {
match *self {
Stack::Native { sp_limit, .. } => sp_limit,
Stack::Managed(ref stack) => stack.limit()
}
}
} }

17
src/swap.s Normal file
View File

@ -0,0 +1,17 @@
sub $$128, %rsp
pushq %fs:0x70
pushq %rbp
call 1f
popq %rbp
popq %fs:0x70
add $$128, %rsp
jmp 2f
1:
movq (%rdi), %rax
movq %rsp, (%rdi)
movq %rax, %rsp
popq %rax
jmpq *%rax
2: