multiprocessing demo

This commit is contained in:
Björn Stein 2019-08-30 15:55:59 +08:00 committed by Sebastien Bourdeauducq
parent 6771531255
commit 60bab77a19
8 changed files with 483 additions and 11 deletions

1
link.x
View File

@ -1,5 +1,6 @@
ENTRY(_boot_cores); ENTRY(_boot_cores);
/* Size of stack for core 0 in bytes */
STACK_SIZE = 0x8000; STACK_SIZE = 0x8000;
/* Provide some defaults */ /* Provide some defaults */

View File

@ -10,6 +10,12 @@ pub fn wfe() {
unsafe { asm!("wfe" :::: "volatile") } unsafe { asm!("wfe" :::: "volatile") }
} }
/// Send Event
#[inline]
pub fn sev() {
unsafe { asm!("sev" :::: "volatile") }
}
/// Data Memory Barrier /// Data Memory Barrier
#[inline] #[inline]
pub fn dmb() { pub fn dmb() {
@ -27,3 +33,4 @@ pub fn dsb() {
pub fn isb() { pub fn isb() {
unsafe { asm!("isb" :::: "volatile") } unsafe { asm!("isb" :::: "volatile") }
} }

View File

@ -124,6 +124,7 @@ impl L1Table {
tex: 0b101, tex: 0b101,
domain: 0b1111, domain: 0b1111,
exec: true, exec: true,
// TODO: temporarily turn on cache for SMP testing
cacheable: false, cacheable: false,
bufferable: true, bufferable: true,
}); });

View File

@ -115,6 +115,45 @@ register_bit!(sctlr,
/// Thumb Exception Enable /// Thumb Exception Enable
te, 30); te, 30);
impl crate::regs::RegisterRW for SCTLR {
fn modify<F: FnOnce(Self::R, Self::W) -> Self::W>(&mut self, f: F) {
// todo: this may fail for .nmfi and, in non-secure state,
// also RR (bit 14)
let inner = self.read().inner;
let inner_w = f(
sctlr::Read { inner },
sctlr::Write { inner }
);
self.write(inner_w);
}
}
/// Auxiliary Control Register
pub struct ACTLR;
wrap_reg!(actlr);
def_reg_r!(ACTLR, actlr::Read, "mrc p15, 0, $0, c1, c0, 1");
def_reg_w!(ACTLR, actlr::Write, "mcr p15, 0, $0, c1, c0, 1");
// SMP bit
register_bit!(actlr, parity_on, 9);
register_bit!(actlr, alloc_one_way, 8);
register_bit!(actlr, excl, 7);
register_bit!(actlr, smp, 6);
register_bit!(actlr, write_full_line_of_zeros, 3);
register_bit!(actlr, l1_prefetch_enable, 2);
// Cache/TLB maintenance broadcast
register_bit!(actlr, fw, 0);
impl crate::regs::RegisterRW for ACTLR {
fn modify<F: FnOnce(Self::R, Self::W) -> Self::W>(&mut self, f: F) {
let inner = self.read().inner;
let inner_w = f(
actlr::Read { inner },
actlr::Write { inner }
);
self.write(inner_w);
}
}
/// Domain Access Control Register /// Domain Access Control Register
pub struct DACR; pub struct DACR;
def_reg_r!(DACR, u32, "mrc p15, 0, $0, c3, c0, 0"); def_reg_r!(DACR, u32, "mrc p15, 0, $0, c3, c0, 0");
@ -163,9 +202,51 @@ pub fn bpiall() {
/// Invalidate D-Cache /// Invalidate D-Cache
#[inline(always)] #[inline(always)]
pub fn dccisw() { pub fn dcisw(setway: u32) {
// TODO: $0 is r11 at what value? // TODO: $0 is r11 at what value?
unsafe { unsafe {
asm!("mcr p15, 0, $0, c7, c5, 6" :: "r" (0) :: "volatile"); // steinb: the following is incorrect
//asm!("mcr p15, 0, $0, c7, c5, 6" :: "r" (0) :: "volatile");
// acc. to ARM Architecture Reference Manual, Figure B3-32;
// also see example code (for DCCISW, but DCISW will be
// analogous) "Example code for cache maintenance operations"
// on pages B2-1286 and B2-1287.
asm!("mcr p15, 0, $0, c7, c6, 2" :: "r" (setway) :: "volatile");
}
}
/// A made-up "instruction": invalidate all of the L1 D-Cache
#[inline(always)]
pub fn dciall() {
// the cache associativity could be read from a register, but will
// always be 4 in L1 data cache of a cortex a9
let ways = 4;
let bit_pos_of_way = 30; // 32 - log2(ways)
// the cache sets could be read from a register, but are always
// 256 for the cores in the zync-7000; in general, 128 or 512 are
// also possible.
let sets = 256;
let bit_pos_of_set = 5; // for a line size of 8 words = 2^5 bytes
// select L1 data cache
unsafe {
asm!("mcr p15, 2, $0, c0, c0, 0" :: "r" (0) :: "volatile");
}
// Invalidate entire D-Cache by iterating every set and every way
for set in 0..sets {
for way in 0..ways {
dcisw((set << bit_pos_of_set) | (way << bit_pos_of_way));
}
}
}
/// clear cache line by virtual address to point of coherency (DCCMVAC)
#[inline]
pub fn dccmvac(addr: u32) {
unsafe {
asm!("mcr p15, 0, $0, c7, c10, 1" :: "r" (addr) :: "volatile");
} }
} }

131
src/mailbox.rs Normal file
View File

@ -0,0 +1,131 @@
use crate::cortex_a9::asm;
use core::ptr::{read_volatile, write_volatile};
/*
One-way mailbox:
All transmissions must originate from one core only,
and all receives from the other core only.
Example transmission (to be executed on core 0):
{
while (!MAILBOX_FROM_CORE0.acknowledged()) {}
println!("ready to send");
MAILBOX_FROM_CORE0.send(&data);
println!("sent");
while (!MAILBOX_FROM_CORE0.acknowledged()) {}
println!("got receipt (acknowledgement)");
}
Example reception (to be executed on core 1):
{
println("wait for data");
while (!MAILBOX_FROM_CORE0.available()) {}
let data = MAILBOX_FROM_CORE0.receive();
println("data received");
MAILBOX_FROM_CORE0.acknowledge(data);
}
Note that unsafe { ... } blocks must be used around most functions;
these have been omitted from the examples for clarity.
*/
pub struct OneWayMailbox {
// pointer (data to be transferred): write-only for sending core,
// readable and clearable (to 0) for receiving core
pointer: usize,
// helper variable (last pointer value received) for receiving
// core
echo: usize,
}
pub static mut MAILBOX_FROM_CORE0: OneWayMailbox = OneWayMailbox::new();
pub static mut MAILBOX_FROM_CORE1: OneWayMailbox = OneWayMailbox::new();
impl OneWayMailbox {
// instantiate a one-way mailbox with no undelivered message
pub const fn new() -> OneWayMailbox {
OneWayMailbox { pointer: 0, echo: 0 }
}
// recreate pristine condition; may only be called when producers
// and consumers are stopped (e.g. when starting core 1 from core
// 0).
pub fn reset_discard(&mut self) {
unsafe {
write_volatile(&mut self.pointer, 0);
write_volatile(&mut self.echo, 0);
}
}
// send a pointer from one core to be received by the other core
pub fn send(&mut self, ptr: usize) -> usize {
assert!(ptr != 0); // ptr may not be the NULL-like flag
asm::dmb(); // ensure data at (ptr) has been fully written
unsafe {
write_volatile(&mut self.pointer, ptr);
}
ptr
}
// receive a pointer from the other core, or 0 if none is present
pub fn receive(&self) -> usize {
let ptr = unsafe {
read_volatile(&self.pointer)
};
// necessary memory barrier to guarantee that the data at
// (ptr) has been fully written before it may be accessed
// by the caller of this function
asm::dmb();
ptr
}
// return true if and only if the next self.receive() will return
// actual data rather than 0
pub fn available(&self) -> bool {
let ptr = unsafe {
asm::dmb();
read_volatile(&self.pointer)
};
ptr != 0
}
// acknowledge receipt of data to the sender (i.e. release it)
pub fn acknowledge(&mut self, ptr: usize) {
// ensure that the data we release is the data last sent
assert_eq!(ptr, unsafe {
read_volatile(&self.pointer)
});
// first possibility for "release" flag:
// pointer and echo are equal
unsafe {
write_volatile(&mut self.echo, ptr);
}
asm::dmb(); // write to self.echo before self.pointer
// second possibility for "release" flag:
// NULL-like pointer
unsafe {
write_volatile(&mut self.pointer, 0);
}
asm::dmb();
// reset echo
unsafe {
write_volatile(&mut self.echo, 0);
}
}
// has data been acknowledged?
pub fn acknowledged(&self) -> bool {
let ptr = unsafe {
read_volatile(&self.pointer)
};
// read self.pointer before self.echo, not after
asm::dmb();
let echo = unsafe {
read_volatile(&self.echo)
};
(ptr == 0) || (ptr == echo)
}
}

View File

@ -9,30 +9,40 @@
#![allow(dead_code)] #![allow(dead_code)]
use core::mem::{uninitialized, transmute}; use core::mem::{uninitialized, transmute};
use core::ptr::write_volatile;
use r0::zero_bss; use r0::zero_bss;
use compiler_builtins as _; use compiler_builtins as _;
use smoltcp::wire::{EthernetAddress, IpAddress, IpCidr}; use smoltcp::wire::{EthernetAddress, IpAddress, IpCidr};
use smoltcp::iface::{NeighborCache, EthernetInterfaceBuilder, EthernetInterface}; use smoltcp::iface::{NeighborCache, EthernetInterfaceBuilder, EthernetInterface};
use smoltcp::time::Instant; use smoltcp::time::Instant;
use smoltcp::socket::SocketSet; use smoltcp::socket::SocketSet;
use mailbox::{MAILBOX_FROM_CORE0, MAILBOX_FROM_CORE1};
mod regs; mod regs;
mod cortex_a9; mod cortex_a9;
mod clocks; mod clocks;
mod mailbox;
mod mpcore;
mod slcr; mod slcr;
mod uart; mod uart;
mod stdio; mod stdio;
mod eth; mod eth;
use crate::regs::{RegisterR, RegisterW}; use crate::regs::{RegisterR, RegisterW, RegisterRW};
use crate::cortex_a9::{asm, regs::*, mmu}; use crate::cortex_a9::{asm, regs::*, mmu};
extern "C" { extern "C" {
static mut __bss_start: u32; static mut __bss_start: u32;
static mut __bss_end: u32; static mut __bss_end: u32;
static mut __stack_start: u32; static mut __stack_start: u32; // refers to the stack for core 0
static mut __stack1_start: u32; // refers to the stack for core 1
} }
// program address as u32, for execution after setting up core 1
static mut START_ADDR_CORE1: u32 = 0;
// initial stack pointer for starting core 1
static mut INITIAL_SP_CORE1: u32 = 0; // must be zero (as a flag)
#[link_section = ".text.boot"] #[link_section = ".text.boot"]
#[no_mangle] #[no_mangle]
#[naked] #[naked]
@ -41,13 +51,24 @@ pub unsafe extern "C" fn _boot_cores() -> ! {
match MPIDR.read() & CORE_MASK { match MPIDR.read() & CORE_MASK {
0 => { 0 => {
// executing on core 0
SP.write(&mut __stack_start as *mut _ as u32); SP.write(&mut __stack_start as *mut _ as u32);
boot_core0(); boot_core0();
} }
_ => loop { _ => {
// if not core0, infinitely wait for events // executing on core 1 (as there are only cores 0 and 1)
asm::wfe(); while INITIAL_SP_CORE1 == 0 {
}, // NOTE: This wfe and its loop can be removed as long
// as the regular boot loader remains in place
// (i.e. this program is not written into ROM).
asm::wfe();
}
// the following requires a stack (at least later, for the
// function for setting up the MMU)
SP.write(INITIAL_SP_CORE1);
boot_core1();
}
} }
} }
@ -55,16 +76,59 @@ pub unsafe extern "C" fn _boot_cores() -> ! {
#[inline(never)] #[inline(never)]
unsafe fn boot_core0() -> ! { unsafe fn boot_core0() -> ! {
l1_cache_init(); l1_cache_init();
// Invalidate SCU, for all cores
mpcore::RegisterBlock::new().scu_invalidate.write(0xffff);
zero_bss(&mut __bss_start, &mut __bss_end); zero_bss(&mut __bss_start, &mut __bss_end);
let mmu_table = mmu::L1Table::get() let mmu_table = mmu::L1Table::get()
.setup_flat_layout(); .setup_flat_layout();
mmu::with_mmu(mmu_table, || { mmu::with_mmu(mmu_table, || {
// start SCU
mpcore::RegisterBlock::new().scu_control.modify(
|_, w| w.enable(true)
);
// enable SMP (for starting correct SCU operation)
ACTLR.modify(|_, w|
w.smp(true) // SMP mode
.fw(true) // cache and TLB maintenance broadcast on
);
asm::dmb();
asm::dsb();
main(); main();
panic!("return from main"); panic!("return from main");
}); });
} }
#[naked]
#[inline(never)]
unsafe fn boot_core1() -> ! {
l1_cache_init();
// Invalidate SCU, for core1 only
mpcore::RegisterBlock::new().scu_invalidate.write(0x00f0);
// use the MMU L1 Table already set up by core 0
let mmu_table = mmu::L1Table::get();
mmu::with_mmu(mmu_table, || {
// enable SMP (for correct SCU operation)
ACTLR.modify(|_, w|
w.smp(true) // SMP mode
.fw(true) // cache and TLB maintenance broadcast
);
asm::dmb();
asm::dsb();
// now that the MMU is active using the same table as active
// on the other core, one can branch to any normal memory
// location in which the code may reside
asm!("bx r1" :: "{r1}"(START_ADDR_CORE1) :: "volatile");
unreachable!();
});
}
fn l1_cache_init() { fn l1_cache_init() {
// Invalidate TLBs // Invalidate TLBs
tlbiall(); tlbiall();
@ -73,13 +137,118 @@ fn l1_cache_init() {
// Invalidate Branch Predictor Array // Invalidate Branch Predictor Array
bpiall(); bpiall();
// Invalidate D-Cache // Invalidate D-Cache
dccisw(); //
// Note: Do use dcisw rather than dccisw to only invalidate rather
// than also clear (which may write values back into the
// underlying L2 cache or memory!)
//
// use the "made-up instruction" (see definition) dciall()
dciall();
asm::dsb();
asm::isb();
} }
fn stop_core1() {
slcr::RegisterBlock::unlocked(|slcr| {
slcr.a9_cpu_rst_ctrl.modify(|_, w| {
w.a9_rst1(true)
});
slcr.a9_cpu_rst_ctrl.modify(|_, w| {
w.a9_clkstop1(true)
});
slcr.a9_cpu_rst_ctrl.modify(|_, w| {
w.a9_rst1(false)
});
});
}
// Execute f on core 1 using the given stack. Note that these
// semantics are inherently unsafe as the stack needs to live longer
// than Rust semantics dictate...hence this method is marked as unsafe
// to remind the caller to take special care (but also many operations
// performed would otherwise require `unsafe` blocks).
unsafe fn run_on_core1(f: fn() -> !, stack: &mut [u32]) {
// reset and stop core 1 (this is safe to repeat, if the caller
// has already performed this)
stop_core1();
// ensure any mailbox access finishes before the mailbox reset
asm::dmb();
// reset the mailbox for sending messages
MAILBOX_FROM_CORE0.reset_discard();
MAILBOX_FROM_CORE1.reset_discard();
// determine address of f and save it as start address for core 1
write_volatile(
&mut START_ADDR_CORE1,
f as *const () as u32
);
write_volatile(
&mut INITIAL_SP_CORE1,
&mut stack[stack.len() - 1] as *const _ as u32
);
// ensure the above is written to cache before it is cleaned
asm::dmb();
// TODO: Is the following necessary, considering that the SCU
// should take care of coherency of all (normal) memory?
//
// clean cache lines containing START_ADDR_CORE1 and
// INITIAL_SP_CORE1
dccmvac(&START_ADDR_CORE1 as *const _ as u32);
dccmvac(&INITIAL_SP_CORE1 as *const _ as u32);
// clean cache lines containing mailboxes
dccmvac(&MAILBOX_FROM_CORE0 as *const _ as u32);
dccmvac(&MAILBOX_FROM_CORE1 as *const _ as u32);
// restart core 1
slcr::RegisterBlock::unlocked(|slcr| {
slcr.a9_cpu_rst_ctrl.modify(|_, w| {
w.a9_rst1(false)
});
slcr.a9_cpu_rst_ctrl.modify(|_, w| {
w.a9_clkstop1(false)
});
});
}
fn main_core1() -> ! {
let mut data: [u32; 2] = [42, 42];
loop {
// effectively perform something similar to `println!("from
// core 1");` by passing a message to core 0 and having core 0
// output it via the println! macro
unsafe {
MAILBOX_FROM_CORE1.send(&data as *const _ as usize);
while !MAILBOX_FROM_CORE1.acknowledged() {}
}
// change data to make it more interesting
data[1] += 1;
}
}
fn main_core1_program2() -> ! {
let mut data: [u32; 2] = [4200, 4200];
loop {
unsafe {
MAILBOX_FROM_CORE1.send(&data as *const _ as usize);
while !MAILBOX_FROM_CORE1.acknowledged() {}
}
// change data to make it more interesting
data[0] -= 1;
data[1] += 1;
}
}
// reserve some memory as stack for core1
static mut STACK_CORE1: [u32; 256] = [0; 256];
const HWADDR: [u8; 6] = [0, 0x23, 0xde, 0xea, 0xbe, 0xef]; const HWADDR: [u8; 6] = [0, 0x23, 0xde, 0xea, 0xbe, 0xef];
fn main() { fn main() {
println!("Main."); println!("Main.");
println!("Core 0 SP: 0x{:X}", SP.read());
let clocks = clocks::CpuClocks::get(); let clocks = clocks::CpuClocks::get();
println!("Clocks: {:?}", clocks); println!("Clocks: {:?}", clocks);
println!("CPU speeds: {}/{}/{}/{} MHz", println!("CPU speeds: {}/{}/{}/{} MHz",
@ -92,6 +261,52 @@ fn main() {
println!("Eth on"); println!("Eth on");
eth.reset_phy(); eth.reset_phy();
// start executing main_core1() on core 1
unsafe {
run_on_core1(main_core1, &mut STACK_CORE1[..]);
}
println!("Started main_core1() on core 1");
for _ in 0..5 {
// wait for data
while unsafe { !MAILBOX_FROM_CORE1.available() } {}
// receive data
let data_ptr = unsafe { MAILBOX_FROM_CORE1.receive() };
println!(
"Received via mailbox from core 1: data {} and {} at address 0x{:X}",
unsafe { (*(data_ptr as *const [u32; 2]))[0] },
unsafe { (*(data_ptr as *const [u32; 2]))[1] },
data_ptr
);
unsafe {
MAILBOX_FROM_CORE1.acknowledge(data_ptr);
}
}
stop_core1();
println!("Stopped core 1.");
// start executing main_core1_program2() on core 1
unsafe {
run_on_core1(main_core1_program2, &mut STACK_CORE1[..]);
}
println!("Started main_core1_program2() on core 1");
for _ in 0..5 {
// wait for data
while unsafe { !MAILBOX_FROM_CORE1.available() } {}
// receive data
let data_ptr = unsafe { MAILBOX_FROM_CORE1.receive() };
println!(
"Received via mailbox from core 1: data {} and {} at address 0x{:X}",
unsafe { (*(data_ptr as *const [u32; 2]))[0] },
unsafe { (*(data_ptr as *const [u32; 2]))[1] },
data_ptr
);
unsafe {
MAILBOX_FROM_CORE1.acknowledge(data_ptr);
}
}
stop_core1();
println!("Stopped core 1.");
const RX_LEN: usize = 1; const RX_LEN: usize = 1;
let mut rx_descs: [eth::rx::DescEntry; RX_LEN] = unsafe { uninitialized() }; let mut rx_descs: [eth::rx::DescEntry; RX_LEN] = unsafe { uninitialized() };
let mut rx_buffers = [[0u8; eth::MTU]; RX_LEN]; let mut rx_buffers = [[0u8; eth::MTU]; RX_LEN];

29
src/mpcore.rs Normal file
View File

@ -0,0 +1,29 @@
///! Register definitions for Application Processing Unit (mpcore)
use volatile_register::{RO, RW, WO};
use crate::{register, register_at, register_bit};
#[repr(C)]
pub struct RegisterBlock {
pub scu_control: ScuControl,
pub scu_config: RO<u32>,
pub scu_cpu_power: RW<u32>,
pub scu_invalidate: WO<u32>,
reserved0: [u32; 12],
pub filter_start: RW<u32>,
pub filter_end: RW<u32>,
reserved1: [u32; 2],
pub scu_access_control: RW<u32>,
pub scu_non_secure_access_control: RW<u32>,
// there is plenty more (unimplemented)
}
register_at!(RegisterBlock, 0xF8F00000, new);
register!(scu_control, ScuControl, RW, u32);
register_bit!(scu_control, ic_standby_enable, 6);
register_bit!(scu_control, scu_standby_enable, 5);
register_bit!(scu_control, force_to_port0_enable, 4);
register_bit!(scu_control, scu_speculative_linefill_enable, 3);
register_bit!(scu_control, scu_rams_parity_enable, 2);
register_bit!(scu_control, address_filtering_enable, 1);
register_bit!(scu_control, enable, 0);

View File

@ -90,7 +90,7 @@ pub struct RegisterBlock {
pub ocm_rst_ctrl: RW<u32>, pub ocm_rst_ctrl: RW<u32>,
reserved4: [u32; 1], reserved4: [u32; 1],
pub fpga_rst_ctrl: RW<u32>, pub fpga_rst_ctrl: RW<u32>,
pub a9_cpu_rst_ctrl: RW<u32>, pub a9_cpu_rst_ctrl: A9CpuRstCtrl,
reserved5: [u32; 1], reserved5: [u32; 1],
pub rs_awdt_ctrl: RW<u32>, pub rs_awdt_ctrl: RW<u32>,
reserved6: [u32; 2], reserved6: [u32; 2],
@ -365,6 +365,13 @@ impl UartRstCtrl {
register!(pss_rst_ctrl, PssRstCtrl, RW, u32); register!(pss_rst_ctrl, PssRstCtrl, RW, u32);
register_bit!(pss_rst_ctrl, soft_rst, 1); register_bit!(pss_rst_ctrl, soft_rst, 1);
register!(a9_cpu_rst_ctrl, A9CpuRstCtrl, RW, u32);
register_bit!(a9_cpu_rst_ctrl, peri_rst, 8);
register_bit!(a9_cpu_rst_ctrl, a9_clkstop1, 5);
register_bit!(a9_cpu_rst_ctrl, a9_clkstop0, 4);
register_bit!(a9_cpu_rst_ctrl, a9_rst1, 1);
register_bit!(a9_cpu_rst_ctrl, a9_rst0, 0);
/// Used for MioPin*.io_type /// Used for MioPin*.io_type
#[repr(u8)] #[repr(u8)]
pub enum IoBufferType { pub enum IoBufferType {