complete rewrite!
featuring 7ns inlineable context switches, no more separately-built assembly objects, and a vastly nicer interface. incontext/outcontext are no more, context switch calls now take a single context structure, which functions as both. everything now also functions without any heap allocations -- for the context setup, only an FnOnce() value is necessary.
This commit is contained in:
parent
5cf3a35d65
commit
299a0a5d98
|
@ -3,6 +3,3 @@ name = "lwkt"
|
|||
version = "0.0.1"
|
||||
authors = ["edef <edef@edef.eu>"]
|
||||
build = "build.rs"
|
||||
|
||||
[dependencies]
|
||||
fn_box = "^1.0.1"
|
||||
|
|
|
@ -1,36 +1,23 @@
|
|||
#![feature(unboxed_closures, default_type_params, box_syntax)]
|
||||
extern crate test;
|
||||
extern crate libc;
|
||||
extern crate lwkt;
|
||||
extern crate fn_box;
|
||||
use test::Bencher;
|
||||
use lwkt::Context;
|
||||
use fn_box::FnBox;
|
||||
use std::ptr::null_mut;
|
||||
use std::mem::{transmute, forget};
|
||||
|
||||
static mut ctx_slot: *mut Context = 0 as *mut Context;
|
||||
|
||||
#[bench]
|
||||
fn swap(b: &mut Bencher) {
|
||||
let mut native = unsafe { Context::native() };
|
||||
let f: Box<FnBox() + Send + 'static> = unsafe { transmute((1u, 1u)) };
|
||||
|
||||
let mut ctx = box { (&mut native as *mut Context, null_mut()) };
|
||||
let mut green = Context::new(init, &mut *ctx as *mut _, f);
|
||||
ctx.1 = &mut green as *mut Context;
|
||||
|
||||
fn init(ctx: *mut (*mut Context, *mut Context), f: Box<FnBox()>) -> ! {
|
||||
fn swap(b: &mut test::Bencher) {
|
||||
unsafe {
|
||||
let (native, green) = *ctx;
|
||||
forget(f);
|
||||
loop { Context::swap(&mut *green, &mut *native); }
|
||||
}
|
||||
let mut ctx = Context::new(move |:| {
|
||||
let ctx_ptr = ctx_slot;
|
||||
loop {
|
||||
(*ctx_ptr).swap()
|
||||
}
|
||||
});
|
||||
|
||||
unsafe {
|
||||
Context::swap(&mut native, &mut green);
|
||||
}
|
||||
ctx_slot = &mut ctx;
|
||||
|
||||
b.iter(|| unsafe {
|
||||
Context::swap(&mut native, &mut green);
|
||||
})
|
||||
ctx.swap();
|
||||
|
||||
b.iter(|| ctx.swap());
|
||||
}
|
||||
}
|
||||
|
|
7
build.rs
7
build.rs
|
@ -22,13 +22,6 @@ fn main() {
|
|||
.arg(outpath.clone())
|
||||
.status().unwrap();
|
||||
}
|
||||
else if let Some(basename) = eat_extension(filename, ".s") {
|
||||
outpath = format!("{}/{}.o", out_dir, basename);
|
||||
|
||||
Command::new("nasm").args(&[filepath.as_slice(), "-felf64", "-o"])
|
||||
.arg(outpath.clone())
|
||||
.status().unwrap();
|
||||
}
|
||||
else { continue }
|
||||
|
||||
objects.push(outpath);
|
||||
|
|
123
src/arch.rs
123
src/arch.rs
|
@ -1,102 +1,69 @@
|
|||
use core::prelude::*;
|
||||
use core::simd::u64x2;
|
||||
use core::mem::{size_of, zeroed};
|
||||
use core::mem::{size_of, align_of};
|
||||
use core::cmp::max;
|
||||
use core::ptr;
|
||||
|
||||
use stack::Stack;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "lwt_bootstrap"]
|
||||
pub fn bootstrap();
|
||||
#[link_name = "lwt_swapcontext"]
|
||||
pub fn swapcontext(save: *mut Registers, restore: *mut Registers);
|
||||
#[link_name = "lwt_abort"]
|
||||
pub fn abort() -> !;
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type uintptr_t = u64;
|
||||
|
||||
#[repr(C)]
|
||||
#[allow(dead_code)]
|
||||
pub struct Registers {
|
||||
rbx: u64,
|
||||
rsp: u64,
|
||||
rbp: u64,
|
||||
rdi: u64,
|
||||
r12: u64,
|
||||
r13: u64,
|
||||
r14: u64,
|
||||
r15: u64,
|
||||
ip: u64,
|
||||
xmm0: u64x2,
|
||||
xmm1: u64x2,
|
||||
xmm2: u64x2,
|
||||
xmm3: u64x2,
|
||||
xmm4: u64x2,
|
||||
xmm5: u64x2,
|
||||
rsp: *mut uintptr_t
|
||||
}
|
||||
|
||||
impl Registers {
|
||||
pub fn new() -> Registers {
|
||||
unsafe {
|
||||
Registers {
|
||||
ip: abort as uintptr_t,
|
||||
.. zeroed()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn initialise_call_frame(stack: &mut Stack, init: uintptr_t, args: &[uintptr_t]) -> Registers {
|
||||
let sp = stack.top() as *mut uintptr_t;
|
||||
let sp = align_down_mut(sp, 16);
|
||||
let sp = offset_mut(sp, -1);
|
||||
unsafe {
|
||||
*sp = 0;
|
||||
}
|
||||
|
||||
let mut regs = Registers {
|
||||
rbp: 0,
|
||||
rsp: sp as uintptr_t,
|
||||
ip: bootstrap as uintptr_t,
|
||||
rbx: init,
|
||||
.. Registers::new()
|
||||
};
|
||||
|
||||
match into_fields!(regs { rdi, r12, r13, r14, r15 } <- args.iter().cloned()) {
|
||||
Some(mut args) => if args.next().is_some() {
|
||||
panic!("too many arguments")
|
||||
},
|
||||
None => {}
|
||||
}
|
||||
|
||||
regs
|
||||
}
|
||||
|
||||
// Rust stores a stack limit at [fs:0x70]. These two functions set and retrieve
|
||||
// the limit. They're marked as #[inline(always)] so that they can be used in
|
||||
// situations where the stack limit is invalid.
|
||||
impl Copy for Registers {}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn get_sp_limit() -> *const u8 {
|
||||
let limit;
|
||||
asm!("movq %fs:0x70, $0" : "=r"(limit) ::: "volatile");
|
||||
limit
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn set_sp_limit(limit: *const u8) {
|
||||
asm!("movq $0, %fs:0x70" :: "r"(limit) :: "volatile");
|
||||
pub unsafe fn swap(regs: &mut Registers) {
|
||||
asm!(include_str!("swap.s")
|
||||
:
|
||||
: "{rdi}" (&mut regs.rsp)
|
||||
: "rax", "rbx", "rcx", "rdx", "rsi", "rdi",
|
||||
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15",
|
||||
"cc"
|
||||
: "volatile");
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn initialize_call_frame<S, F>(stack: &mut S, f: F) -> Registers where S: Stack, F: FnOnce() {
|
||||
let sp_limit = stack.limit();
|
||||
let mut sp = stack.top() as *mut uintptr_t;
|
||||
let f_ptr = push(&mut sp, f);
|
||||
|
||||
asm!(include_str!("init.s")
|
||||
: "={rdi}"(sp)
|
||||
: "{rdi}" (sp),
|
||||
"{rsi}" (rust_trampoline::<F>),
|
||||
"{rdx}" (f_ptr),
|
||||
"{rcx}" (sp_limit)
|
||||
:
|
||||
: "volatile");
|
||||
|
||||
Registers { rsp: sp }
|
||||
}
|
||||
|
||||
unsafe extern "C" fn rust_trampoline<F: FnOnce()>(f: *const F) {
|
||||
ptr::read(f)()
|
||||
}
|
||||
|
||||
unsafe fn push<T>(spp: &mut *mut uintptr_t, value: T) -> *mut T {
|
||||
let mut sp = *spp as *mut T;
|
||||
sp = offset_mut(sp, -1);
|
||||
sp = align_down_mut(sp, max(align_of::<T>(), 16));
|
||||
*sp = value;
|
||||
*spp = sp as *mut uintptr_t;
|
||||
sp
|
||||
}
|
||||
|
||||
fn align_down_mut<T>(sp: *mut T, n: usize) -> *mut T {
|
||||
let sp = (sp as usize) & !(n - 1);
|
||||
sp as *mut T
|
||||
}
|
||||
|
||||
// ptr::offset_mut is positive ints only
|
||||
#[inline]
|
||||
pub fn offset_mut<T>(ptr: *mut T, count: isize) -> *mut T {
|
||||
(ptr as isize + count * (size_of::<T>() as isize)) as *mut T
|
||||
}
|
||||
|
|
99
src/arch.s
99
src/arch.s
|
@ -1,99 +0,0 @@
|
|||
; vim: ft=nasm
|
||||
BITS 64
|
||||
|
||||
;; the structure containing every register that is saved on context switches.
|
||||
;; this needs to match the struct in arch.rs, or shit will break badly.
|
||||
struc context
|
||||
ctx_rbx resq 1
|
||||
ctx_rsp resq 1
|
||||
ctx_rbp resq 1
|
||||
ctx_rdi resq 1
|
||||
ctx_r12 resq 1
|
||||
ctx_r13 resq 1
|
||||
ctx_r14 resq 1
|
||||
ctx_r15 resq 1
|
||||
ctx_ip:
|
||||
resq 1
|
||||
alignb 16
|
||||
ctx_xmm0 resq 2
|
||||
ctx_xmm1 resq 2
|
||||
ctx_xmm2 resq 2
|
||||
ctx_xmm3 resq 2
|
||||
ctx_xmm4 resq 2
|
||||
ctx_xmm5 resq 2
|
||||
endstruc
|
||||
|
||||
global lwt_swapcontext
|
||||
lwt_swapcontext:
|
||||
;; this is where the actual context switching takes place. first, save every
|
||||
;; register in the current context into the leaving context, pointed at by rdi,
|
||||
;; making sure the return address ends up in the IP slot. then, restore every
|
||||
;; register from the entering context, pointed at by rsi, and jump to the
|
||||
;; instruction pointer.
|
||||
pop rax
|
||||
|
||||
; save instruction pointer
|
||||
mov [rdi+ctx_ip], rax
|
||||
|
||||
; save non-volatile integer registers (including rsp)
|
||||
mov [rdi+ctx_rbx], rbx
|
||||
mov [rdi+ctx_rsp], rsp
|
||||
mov [rdi+ctx_rbp], rbp
|
||||
mov [rdi+ctx_r12], r12
|
||||
mov [rdi+ctx_r13], r13
|
||||
mov [rdi+ctx_r14], r14
|
||||
mov [rdi+ctx_r15], r15
|
||||
|
||||
; save 0th argument register
|
||||
mov [rdi+ctx_rdi], rdi
|
||||
|
||||
; save non-volatile XMM registers
|
||||
movapd [rdi+ctx_xmm0], xmm0
|
||||
movapd [rdi+ctx_xmm1], xmm1
|
||||
movapd [rdi+ctx_xmm2], xmm2
|
||||
movapd [rdi+ctx_xmm3], xmm3
|
||||
movapd [rdi+ctx_xmm4], xmm4
|
||||
movapd [rdi+ctx_xmm5], xmm5
|
||||
|
||||
; restore non-volatile integer registers
|
||||
mov rbx, [rsi+ctx_rbx]
|
||||
mov rsp, [rsi+ctx_rsp]
|
||||
mov rbp, [rsi+ctx_rbp]
|
||||
mov r12, [rsi+ctx_r12]
|
||||
mov r13, [rsi+ctx_r13]
|
||||
mov r14, [rsi+ctx_r14]
|
||||
mov r15, [rsi+ctx_r15]
|
||||
|
||||
; restore 0th argument register
|
||||
mov rdi, [rsi+ctx_rdi]
|
||||
|
||||
; restore non-volatile XMM registers
|
||||
movapd xmm0, [rsi+ctx_xmm0]
|
||||
movapd xmm1, [rsi+ctx_xmm1]
|
||||
movapd xmm2, [rsi+ctx_xmm2]
|
||||
movapd xmm3, [rsi+ctx_xmm3]
|
||||
movapd xmm4, [rsi+ctx_xmm4]
|
||||
movapd xmm5, [rsi+ctx_xmm5]
|
||||
|
||||
jmp [rsi+ctx_ip]
|
||||
|
||||
global lwt_bootstrap
|
||||
lwt_bootstrap:
|
||||
;; some of the parameter registers aren't saved on context switch, and thus
|
||||
;; can't be set into the struct directly. thus, initialisation from Rust-land
|
||||
;; places the parameters in unrelated registers, and we frob them into place
|
||||
;; out here, in assembly-land. below are the parameter registers in order,
|
||||
;; along with the alternative register used in parentheses, if there is one.
|
||||
;; rdi, rsi (r12), rdx (r13), rcx (r14), r8(r15), r9
|
||||
mov rsi, r12
|
||||
mov rdx, r13
|
||||
mov rcx, r14
|
||||
mov r8, r15
|
||||
jmp rbx
|
||||
|
||||
global lwt_abort
|
||||
lwt_abort:
|
||||
;; when a context is created for a native thread, it should only be switched
|
||||
;; out of. if it's accidentally switched into, it'll hit this, because that's
|
||||
;; what we set the initial IP to.
|
||||
ud2
|
|
@ -1,61 +1,26 @@
|
|||
use core::prelude::*;
|
||||
|
||||
use core::mem::transmute;
|
||||
use core::raw;
|
||||
use alloc::boxed::Box;
|
||||
use fn_box::FnBox;
|
||||
|
||||
use stack::Stack;
|
||||
use platform::Stack;
|
||||
use arch::{self, Registers};
|
||||
use platform;
|
||||
|
||||
pub struct Context {
|
||||
regs: Registers,
|
||||
stack: Stack
|
||||
_stack: platform::Stack
|
||||
}
|
||||
|
||||
pub type BoxedFn<Args, Result> = Box<FnBox<Args, Result> + Send + 'static>;
|
||||
pub type StartFn<T, Args, Result> = fn(data: *mut T, f: BoxedFn<Args, Result>) -> !;
|
||||
|
||||
impl Context {
|
||||
pub fn new<T, Args, Result>(init: StartFn<T, Args, Result>, data: *mut T,
|
||||
f: BoxedFn<Args, Result>) -> Context {
|
||||
#[inline]
|
||||
pub unsafe fn new<F>(f: F) -> Context where F: FnOnce() + Send + 'static {
|
||||
let mut stack = Stack::new(4 << 20);
|
||||
let f: raw::TraitObject = unsafe { transmute(f) };
|
||||
|
||||
let regs = arch::initialize_call_frame(&mut stack, f);
|
||||
Context {
|
||||
regs: arch::initialise_call_frame(&mut stack,
|
||||
init_ctx::<T, Args, Result> as arch::uintptr_t,
|
||||
&[init as arch::uintptr_t,
|
||||
data as arch::uintptr_t,
|
||||
f.data as arch::uintptr_t,
|
||||
f.vtable as arch::uintptr_t]),
|
||||
stack: stack
|
||||
regs: regs,
|
||||
_stack: stack
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern "C" fn init_ctx<T, A, R>(start: StartFn<T, A, R>, data: *mut T,
|
||||
f_data: *mut (), f_vtable: *mut ()) -> ! {
|
||||
let f: BoxedFn<A, R> = transmute(raw::TraitObject {
|
||||
data: f_data,
|
||||
vtable: f_vtable
|
||||
});
|
||||
|
||||
start(data, f)
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub unsafe fn native() -> Context {
|
||||
Context {
|
||||
regs: Registers::new(),
|
||||
stack: Stack::native(arch::get_sp_limit())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn swap(out_context: &mut Context, in_context: &mut Context) {
|
||||
arch::set_sp_limit(in_context.stack.limit());
|
||||
arch::swapcontext(&mut out_context.regs, &mut in_context.regs);
|
||||
pub unsafe fn swap(&mut self) {
|
||||
arch::swap(&mut self.regs)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
xchg %rsp, %rdi
|
||||
|
||||
pushq %rsi
|
||||
pushq %rdx
|
||||
pushq %rcx
|
||||
call 1f
|
||||
|
||||
popq %fs:0x70
|
||||
popq %rdi
|
||||
popq %rax
|
||||
|
||||
movq $$0, %rbp
|
||||
call *%rax
|
||||
ud2
|
||||
|
||||
1:
|
||||
xchg %rsp, %rdi
|
|
@ -2,16 +2,12 @@
|
|||
#![no_std]
|
||||
|
||||
#[macro_use]
|
||||
#[allow(unstable)]
|
||||
extern crate core;
|
||||
extern crate alloc;
|
||||
extern crate fn_box;
|
||||
|
||||
pub use context::Context;
|
||||
|
||||
mod std { pub use core::fmt; }
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
mod std { pub use core::*; }
|
||||
|
||||
mod context;
|
||||
mod stack;
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
macro_rules! __into_fields {
|
||||
($x:ident { $field:ident } <- $iter:ident) => {
|
||||
match $iter.next() {
|
||||
Some(value) => {
|
||||
$x.$field = value;
|
||||
Some($iter)
|
||||
}
|
||||
None => None
|
||||
}
|
||||
};
|
||||
($x:ident { $field:ident, $($fields_rest:ident),* } <- $iter:ident) => {
|
||||
match $iter.next() {
|
||||
Some(value) => {
|
||||
$x.$field = value;
|
||||
__into_fields!($x { $($fields_rest),* } <- $iter)
|
||||
}
|
||||
None => None
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! into_fields {
|
||||
($x:ident { $($fields_rest:ident),* } <- $iter:expr) => {
|
||||
{
|
||||
let mut iter = $iter;
|
||||
__into_fields!($x { $($fields_rest),* } <- iter)
|
||||
}
|
||||
}
|
||||
}
|
37
src/main.rs
37
src/main.rs
|
@ -1,36 +1,19 @@
|
|||
#![feature(thread_local)]
|
||||
extern crate lwkt;
|
||||
extern crate fn_box;
|
||||
|
||||
use std::ptr::null_mut;
|
||||
use std::intrinsics::abort;
|
||||
use lwkt::Context;
|
||||
use fn_box::FnBox;
|
||||
|
||||
#[thread_local]
|
||||
static mut ctx_slot: *mut Context = 0 as *mut Context;
|
||||
|
||||
fn main() {
|
||||
let f = Box::new(move |:| {
|
||||
println!("Hello, world!")
|
||||
unsafe {
|
||||
let mut ctx = Context::new(move |:| {
|
||||
println!("it's alive!");
|
||||
(*ctx_slot).swap();
|
||||
});
|
||||
|
||||
let mut native = unsafe { Context::native() };
|
||||
ctx_slot = &mut ctx;
|
||||
|
||||
fn init(ctx: *mut (*mut Context, *mut Context), f: Box<FnBox<(), ()>>) -> ! {
|
||||
unsafe {
|
||||
let (native, green) = *ctx;
|
||||
|
||||
f();
|
||||
|
||||
Context::swap(&mut *green, &mut *native);
|
||||
abort();
|
||||
(*ctx_slot).swap();
|
||||
}
|
||||
}
|
||||
|
||||
let mut ctx = Box::new((&mut native as *mut Context, null_mut()));
|
||||
let mut green = Context::new(init, &mut *ctx as *mut _, f);
|
||||
ctx.1 = &mut green as *mut Context;
|
||||
|
||||
unsafe {
|
||||
Context::swap(&mut native, &mut green);
|
||||
}
|
||||
|
||||
println!("size_of::<Context>() == {}", std::mem::size_of::<Context>());
|
||||
}
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
#![allow(unstable)]
|
||||
extern crate libc;
|
||||
extern crate std;
|
||||
use self::std::prelude::v1::*;
|
||||
use self::std::os::{errno, page_size, MemoryMap};
|
||||
use self::std::os::MapOption::{MapReadable, MapWritable, MapNonStandardFlags};
|
||||
use stack;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = "lwt_stack_register"]
|
||||
|
@ -62,14 +64,14 @@ impl Drop for Stack {
|
|||
}
|
||||
}
|
||||
|
||||
impl Stack {
|
||||
pub fn top(&mut self) -> *mut u8 {
|
||||
impl stack::Stack for Stack {
|
||||
fn top(&mut self) -> *mut u8 {
|
||||
unsafe {
|
||||
self.buf.data().offset(self.buf.len() as isize)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limit(&self) -> *const u8 {
|
||||
fn limit(&self) -> *const u8 {
|
||||
unsafe {
|
||||
self.buf.data().offset(page_size() as isize) as *const _
|
||||
}
|
||||
|
|
37
src/stack.rs
37
src/stack.rs
|
@ -1,35 +1,4 @@
|
|||
use platform;
|
||||
use core::ptr;
|
||||
|
||||
pub enum Stack {
|
||||
Native {
|
||||
sp_limit: *const u8
|
||||
},
|
||||
Managed(platform::Stack)
|
||||
}
|
||||
|
||||
impl Stack {
|
||||
pub fn new(size: usize) -> Stack {
|
||||
Stack::Managed(platform::Stack::new(size))
|
||||
}
|
||||
|
||||
pub unsafe fn native(limit: *const u8) -> Stack {
|
||||
Stack::Native {
|
||||
sp_limit: limit
|
||||
}
|
||||
}
|
||||
|
||||
pub fn top(&mut self) -> *mut u8 {
|
||||
match *self {
|
||||
Stack::Native { .. } => ptr::null_mut(),
|
||||
Stack::Managed(ref mut stack) => stack.top()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limit(&self) -> *const u8 {
|
||||
match *self {
|
||||
Stack::Native { sp_limit, .. } => sp_limit,
|
||||
Stack::Managed(ref stack) => stack.limit()
|
||||
}
|
||||
}
|
||||
pub trait Stack {
|
||||
fn top(&mut self) -> *mut u8;
|
||||
fn limit(&self) -> *const u8;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
sub $$128, %rsp
|
||||
pushq %fs:0x70
|
||||
pushq %rbp
|
||||
call 1f
|
||||
|
||||
popq %rbp
|
||||
popq %fs:0x70
|
||||
add $$128, %rsp
|
||||
jmp 2f
|
||||
|
||||
1:
|
||||
movq (%rdi), %rax
|
||||
movq %rsp, (%rdi)
|
||||
movq %rax, %rsp
|
||||
popq %rax
|
||||
jmpq *%rax
|
||||
2:
|
Loading…
Reference in New Issue