From 60bab77a19e8e32b2b1d5927b804888acec19826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Stein?= Date: Fri, 30 Aug 2019 15:55:59 +0800 Subject: [PATCH] multiprocessing demo --- link.x | 1 + src/cortex_a9/asm.rs | 7 ++ src/cortex_a9/mmu.rs | 3 +- src/cortex_a9/regs.rs | 85 +++++++++++++++- src/mailbox.rs | 131 ++++++++++++++++++++++++ src/main.rs | 229 ++++++++++++++++++++++++++++++++++++++++-- src/mpcore.rs | 29 ++++++ src/slcr.rs | 9 +- 8 files changed, 483 insertions(+), 11 deletions(-) create mode 100644 src/mailbox.rs create mode 100644 src/mpcore.rs diff --git a/link.x b/link.x index 3a0c6ca..88b3236 100644 --- a/link.x +++ b/link.x @@ -1,5 +1,6 @@ ENTRY(_boot_cores); +/* Size of stack for core 0 in bytes */ STACK_SIZE = 0x8000; /* Provide some defaults */ diff --git a/src/cortex_a9/asm.rs b/src/cortex_a9/asm.rs index 022530e..7dd416e 100644 --- a/src/cortex_a9/asm.rs +++ b/src/cortex_a9/asm.rs @@ -10,6 +10,12 @@ pub fn wfe() { unsafe { asm!("wfe" :::: "volatile") } } +/// Send Event +#[inline] +pub fn sev() { + unsafe { asm!("sev" :::: "volatile") } +} + /// Data Memory Barrier #[inline] pub fn dmb() { @@ -27,3 +33,4 @@ pub fn dsb() { pub fn isb() { unsafe { asm!("isb" :::: "volatile") } } + diff --git a/src/cortex_a9/mmu.rs b/src/cortex_a9/mmu.rs index 435a844..1a235a1 100644 --- a/src/cortex_a9/mmu.rs +++ b/src/cortex_a9/mmu.rs @@ -124,7 +124,8 @@ impl L1Table { tex: 0b101, domain: 0b1111, exec: true, - cacheable: false, + // TODO: temporarily turn on cache for SMP testing + cacheable: false, bufferable: true, }); /* (DDR cacheable) */ diff --git a/src/cortex_a9/regs.rs b/src/cortex_a9/regs.rs index 647db47..72396e6 100644 --- a/src/cortex_a9/regs.rs +++ b/src/cortex_a9/regs.rs @@ -115,6 +115,45 @@ register_bit!(sctlr, /// Thumb Exception Enable te, 30); +impl crate::regs::RegisterRW for SCTLR { + fn modify Self::W>(&mut self, f: F) { + // todo: this may fail for .nmfi and, in non-secure state, + // also RR (bit 14) + let inner = self.read().inner; + let inner_w = f( + sctlr::Read { inner }, + sctlr::Write { inner } + ); + self.write(inner_w); + } +} + +/// Auxiliary Control Register +pub struct ACTLR; +wrap_reg!(actlr); +def_reg_r!(ACTLR, actlr::Read, "mrc p15, 0, $0, c1, c0, 1"); +def_reg_w!(ACTLR, actlr::Write, "mcr p15, 0, $0, c1, c0, 1"); +// SMP bit +register_bit!(actlr, parity_on, 9); +register_bit!(actlr, alloc_one_way, 8); +register_bit!(actlr, excl, 7); +register_bit!(actlr, smp, 6); +register_bit!(actlr, write_full_line_of_zeros, 3); +register_bit!(actlr, l1_prefetch_enable, 2); +// Cache/TLB maintenance broadcast +register_bit!(actlr, fw, 0); + +impl crate::regs::RegisterRW for ACTLR { + fn modify Self::W>(&mut self, f: F) { + let inner = self.read().inner; + let inner_w = f( + actlr::Read { inner }, + actlr::Write { inner } + ); + self.write(inner_w); + } +} + /// Domain Access Control Register pub struct DACR; def_reg_r!(DACR, u32, "mrc p15, 0, $0, c3, c0, 0"); @@ -163,9 +202,51 @@ pub fn bpiall() { /// Invalidate D-Cache #[inline(always)] -pub fn dccisw() { +pub fn dcisw(setway: u32) { // TODO: $0 is r11 at what value? unsafe { - asm!("mcr p15, 0, $0, c7, c5, 6" :: "r" (0) :: "volatile"); + // steinb: the following is incorrect + //asm!("mcr p15, 0, $0, c7, c5, 6" :: "r" (0) :: "volatile"); + + // acc. to ARM Architecture Reference Manual, Figure B3-32; + // also see example code (for DCCISW, but DCISW will be + // analogous) "Example code for cache maintenance operations" + // on pages B2-1286 and B2-1287. + asm!("mcr p15, 0, $0, c7, c6, 2" :: "r" (setway) :: "volatile"); + } +} + +/// A made-up "instruction": invalidate all of the L1 D-Cache +#[inline(always)] +pub fn dciall() { + // the cache associativity could be read from a register, but will + // always be 4 in L1 data cache of a cortex a9 + let ways = 4; + let bit_pos_of_way = 30; // 32 - log2(ways) + + // the cache sets could be read from a register, but are always + // 256 for the cores in the zync-7000; in general, 128 or 512 are + // also possible. + let sets = 256; + let bit_pos_of_set = 5; // for a line size of 8 words = 2^5 bytes + + // select L1 data cache + unsafe { + asm!("mcr p15, 2, $0, c0, c0, 0" :: "r" (0) :: "volatile"); + } + + // Invalidate entire D-Cache by iterating every set and every way + for set in 0..sets { + for way in 0..ways { + dcisw((set << bit_pos_of_set) | (way << bit_pos_of_way)); + } + } +} + +/// clear cache line by virtual address to point of coherency (DCCMVAC) +#[inline] +pub fn dccmvac(addr: u32) { + unsafe { + asm!("mcr p15, 0, $0, c7, c10, 1" :: "r" (addr) :: "volatile"); } } diff --git a/src/mailbox.rs b/src/mailbox.rs new file mode 100644 index 0000000..1459a2d --- /dev/null +++ b/src/mailbox.rs @@ -0,0 +1,131 @@ +use crate::cortex_a9::asm; +use core::ptr::{read_volatile, write_volatile}; + +/* + One-way mailbox: + + All transmissions must originate from one core only, + and all receives from the other core only. + + Example transmission (to be executed on core 0): + { + while (!MAILBOX_FROM_CORE0.acknowledged()) {} + println!("ready to send"); + MAILBOX_FROM_CORE0.send(&data); + println!("sent"); + while (!MAILBOX_FROM_CORE0.acknowledged()) {} + println!("got receipt (acknowledgement)"); + } + + Example reception (to be executed on core 1): + { + println("wait for data"); + while (!MAILBOX_FROM_CORE0.available()) {} + let data = MAILBOX_FROM_CORE0.receive(); + println("data received"); + MAILBOX_FROM_CORE0.acknowledge(data); + } + + Note that unsafe { ... } blocks must be used around most functions; + these have been omitted from the examples for clarity. + +*/ + +pub struct OneWayMailbox { + // pointer (data to be transferred): write-only for sending core, + // readable and clearable (to 0) for receiving core + pointer: usize, + + // helper variable (last pointer value received) for receiving + // core + echo: usize, +} + +pub static mut MAILBOX_FROM_CORE0: OneWayMailbox = OneWayMailbox::new(); +pub static mut MAILBOX_FROM_CORE1: OneWayMailbox = OneWayMailbox::new(); + +impl OneWayMailbox { + // instantiate a one-way mailbox with no undelivered message + pub const fn new() -> OneWayMailbox { + OneWayMailbox { pointer: 0, echo: 0 } + } + + // recreate pristine condition; may only be called when producers + // and consumers are stopped (e.g. when starting core 1 from core + // 0). + pub fn reset_discard(&mut self) { + unsafe { + write_volatile(&mut self.pointer, 0); + write_volatile(&mut self.echo, 0); + } + } + + // send a pointer from one core to be received by the other core + pub fn send(&mut self, ptr: usize) -> usize { + assert!(ptr != 0); // ptr may not be the NULL-like flag + asm::dmb(); // ensure data at (ptr) has been fully written + unsafe { + write_volatile(&mut self.pointer, ptr); + } + ptr + } + + // receive a pointer from the other core, or 0 if none is present + pub fn receive(&self) -> usize { + let ptr = unsafe { + read_volatile(&self.pointer) + }; + // necessary memory barrier to guarantee that the data at + // (ptr) has been fully written before it may be accessed + // by the caller of this function + asm::dmb(); + ptr + } + + // return true if and only if the next self.receive() will return + // actual data rather than 0 + pub fn available(&self) -> bool { + let ptr = unsafe { + asm::dmb(); + read_volatile(&self.pointer) + }; + ptr != 0 + } + + // acknowledge receipt of data to the sender (i.e. release it) + pub fn acknowledge(&mut self, ptr: usize) { + // ensure that the data we release is the data last sent + assert_eq!(ptr, unsafe { + read_volatile(&self.pointer) + }); + // first possibility for "release" flag: + // pointer and echo are equal + unsafe { + write_volatile(&mut self.echo, ptr); + } + asm::dmb(); // write to self.echo before self.pointer + // second possibility for "release" flag: + // NULL-like pointer + unsafe { + write_volatile(&mut self.pointer, 0); + } + asm::dmb(); + // reset echo + unsafe { + write_volatile(&mut self.echo, 0); + } + } + + // has data been acknowledged? + pub fn acknowledged(&self) -> bool { + let ptr = unsafe { + read_volatile(&self.pointer) + }; + // read self.pointer before self.echo, not after + asm::dmb(); + let echo = unsafe { + read_volatile(&self.echo) + }; + (ptr == 0) || (ptr == echo) + } +} diff --git a/src/main.rs b/src/main.rs index a204a4d..6ff368a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,30 +9,40 @@ #![allow(dead_code)] use core::mem::{uninitialized, transmute}; +use core::ptr::write_volatile; use r0::zero_bss; use compiler_builtins as _; use smoltcp::wire::{EthernetAddress, IpAddress, IpCidr}; use smoltcp::iface::{NeighborCache, EthernetInterfaceBuilder, EthernetInterface}; use smoltcp::time::Instant; use smoltcp::socket::SocketSet; +use mailbox::{MAILBOX_FROM_CORE0, MAILBOX_FROM_CORE1}; mod regs; mod cortex_a9; mod clocks; +mod mailbox; +mod mpcore; mod slcr; mod uart; mod stdio; mod eth; -use crate::regs::{RegisterR, RegisterW}; +use crate::regs::{RegisterR, RegisterW, RegisterRW}; use crate::cortex_a9::{asm, regs::*, mmu}; extern "C" { static mut __bss_start: u32; static mut __bss_end: u32; - static mut __stack_start: u32; + static mut __stack_start: u32; // refers to the stack for core 0 + static mut __stack1_start: u32; // refers to the stack for core 1 } +// program address as u32, for execution after setting up core 1 +static mut START_ADDR_CORE1: u32 = 0; +// initial stack pointer for starting core 1 +static mut INITIAL_SP_CORE1: u32 = 0; // must be zero (as a flag) + #[link_section = ".text.boot"] #[no_mangle] #[naked] @@ -41,13 +51,24 @@ pub unsafe extern "C" fn _boot_cores() -> ! { match MPIDR.read() & CORE_MASK { 0 => { + // executing on core 0 SP.write(&mut __stack_start as *mut _ as u32); boot_core0(); } - _ => loop { - // if not core0, infinitely wait for events - asm::wfe(); - }, + _ => { + // executing on core 1 (as there are only cores 0 and 1) + while INITIAL_SP_CORE1 == 0 { + // NOTE: This wfe and its loop can be removed as long + // as the regular boot loader remains in place + // (i.e. this program is not written into ROM). + asm::wfe(); + } + + // the following requires a stack (at least later, for the + // function for setting up the MMU) + SP.write(INITIAL_SP_CORE1); + boot_core1(); + } } } @@ -55,16 +76,59 @@ pub unsafe extern "C" fn _boot_cores() -> ! { #[inline(never)] unsafe fn boot_core0() -> ! { l1_cache_init(); + + // Invalidate SCU, for all cores + mpcore::RegisterBlock::new().scu_invalidate.write(0xffff); + zero_bss(&mut __bss_start, &mut __bss_end); let mmu_table = mmu::L1Table::get() .setup_flat_layout(); mmu::with_mmu(mmu_table, || { + // start SCU + mpcore::RegisterBlock::new().scu_control.modify( + |_, w| w.enable(true) + ); + // enable SMP (for starting correct SCU operation) + ACTLR.modify(|_, w| + w.smp(true) // SMP mode + .fw(true) // cache and TLB maintenance broadcast on + ); + asm::dmb(); + asm::dsb(); main(); panic!("return from main"); }); } +#[naked] +#[inline(never)] +unsafe fn boot_core1() -> ! { + l1_cache_init(); + + // Invalidate SCU, for core1 only + mpcore::RegisterBlock::new().scu_invalidate.write(0x00f0); + + // use the MMU L1 Table already set up by core 0 + let mmu_table = mmu::L1Table::get(); + mmu::with_mmu(mmu_table, || { + // enable SMP (for correct SCU operation) + ACTLR.modify(|_, w| + w.smp(true) // SMP mode + .fw(true) // cache and TLB maintenance broadcast + ); + + asm::dmb(); + asm::dsb(); + + // now that the MMU is active using the same table as active + // on the other core, one can branch to any normal memory + // location in which the code may reside + asm!("bx r1" :: "{r1}"(START_ADDR_CORE1) :: "volatile"); + unreachable!(); + }); +} + fn l1_cache_init() { // Invalidate TLBs tlbiall(); @@ -73,13 +137,118 @@ fn l1_cache_init() { // Invalidate Branch Predictor Array bpiall(); // Invalidate D-Cache - dccisw(); + // + // Note: Do use dcisw rather than dccisw to only invalidate rather + // than also clear (which may write values back into the + // underlying L2 cache or memory!) + // + // use the "made-up instruction" (see definition) dciall() + dciall(); + + asm::dsb(); + asm::isb(); } +fn stop_core1() { + slcr::RegisterBlock::unlocked(|slcr| { + slcr.a9_cpu_rst_ctrl.modify(|_, w| { + w.a9_rst1(true) + }); + slcr.a9_cpu_rst_ctrl.modify(|_, w| { + w.a9_clkstop1(true) + }); + slcr.a9_cpu_rst_ctrl.modify(|_, w| { + w.a9_rst1(false) + }); + }); +} + +// Execute f on core 1 using the given stack. Note that these +// semantics are inherently unsafe as the stack needs to live longer +// than Rust semantics dictate...hence this method is marked as unsafe +// to remind the caller to take special care (but also many operations +// performed would otherwise require `unsafe` blocks). +unsafe fn run_on_core1(f: fn() -> !, stack: &mut [u32]) { + // reset and stop core 1 (this is safe to repeat, if the caller + // has already performed this) + stop_core1(); + + // ensure any mailbox access finishes before the mailbox reset + asm::dmb(); + // reset the mailbox for sending messages + MAILBOX_FROM_CORE0.reset_discard(); + MAILBOX_FROM_CORE1.reset_discard(); + // determine address of f and save it as start address for core 1 + write_volatile( + &mut START_ADDR_CORE1, + f as *const () as u32 + ); + write_volatile( + &mut INITIAL_SP_CORE1, + &mut stack[stack.len() - 1] as *const _ as u32 + ); + // ensure the above is written to cache before it is cleaned + asm::dmb(); + // TODO: Is the following necessary, considering that the SCU + // should take care of coherency of all (normal) memory? + // + // clean cache lines containing START_ADDR_CORE1 and + // INITIAL_SP_CORE1 + dccmvac(&START_ADDR_CORE1 as *const _ as u32); + dccmvac(&INITIAL_SP_CORE1 as *const _ as u32); + + // clean cache lines containing mailboxes + dccmvac(&MAILBOX_FROM_CORE0 as *const _ as u32); + dccmvac(&MAILBOX_FROM_CORE1 as *const _ as u32); + + // restart core 1 + slcr::RegisterBlock::unlocked(|slcr| { + slcr.a9_cpu_rst_ctrl.modify(|_, w| { + w.a9_rst1(false) + }); + slcr.a9_cpu_rst_ctrl.modify(|_, w| { + w.a9_clkstop1(false) + }); + }); +} + +fn main_core1() -> ! { + let mut data: [u32; 2] = [42, 42]; + loop { + // effectively perform something similar to `println!("from + // core 1");` by passing a message to core 0 and having core 0 + // output it via the println! macro + unsafe { + MAILBOX_FROM_CORE1.send(&data as *const _ as usize); + while !MAILBOX_FROM_CORE1.acknowledged() {} + } + + // change data to make it more interesting + data[1] += 1; + } +} + +fn main_core1_program2() -> ! { + let mut data: [u32; 2] = [4200, 4200]; + loop { + unsafe { + MAILBOX_FROM_CORE1.send(&data as *const _ as usize); + while !MAILBOX_FROM_CORE1.acknowledged() {} + } + // change data to make it more interesting + data[0] -= 1; + data[1] += 1; + } +} + +// reserve some memory as stack for core1 +static mut STACK_CORE1: [u32; 256] = [0; 256]; + const HWADDR: [u8; 6] = [0, 0x23, 0xde, 0xea, 0xbe, 0xef]; fn main() { println!("Main."); + println!("Core 0 SP: 0x{:X}", SP.read()); let clocks = clocks::CpuClocks::get(); println!("Clocks: {:?}", clocks); println!("CPU speeds: {}/{}/{}/{} MHz", @@ -92,6 +261,52 @@ fn main() { println!("Eth on"); eth.reset_phy(); + // start executing main_core1() on core 1 + unsafe { + run_on_core1(main_core1, &mut STACK_CORE1[..]); + } + println!("Started main_core1() on core 1"); + for _ in 0..5 { + // wait for data + while unsafe { !MAILBOX_FROM_CORE1.available() } {} + // receive data + let data_ptr = unsafe { MAILBOX_FROM_CORE1.receive() }; + println!( + "Received via mailbox from core 1: data {} and {} at address 0x{:X}", + unsafe { (*(data_ptr as *const [u32; 2]))[0] }, + unsafe { (*(data_ptr as *const [u32; 2]))[1] }, + data_ptr + ); + unsafe { + MAILBOX_FROM_CORE1.acknowledge(data_ptr); + } + } + stop_core1(); + println!("Stopped core 1."); + + // start executing main_core1_program2() on core 1 + unsafe { + run_on_core1(main_core1_program2, &mut STACK_CORE1[..]); + } + println!("Started main_core1_program2() on core 1"); + for _ in 0..5 { + // wait for data + while unsafe { !MAILBOX_FROM_CORE1.available() } {} + // receive data + let data_ptr = unsafe { MAILBOX_FROM_CORE1.receive() }; + println!( + "Received via mailbox from core 1: data {} and {} at address 0x{:X}", + unsafe { (*(data_ptr as *const [u32; 2]))[0] }, + unsafe { (*(data_ptr as *const [u32; 2]))[1] }, + data_ptr + ); + unsafe { + MAILBOX_FROM_CORE1.acknowledge(data_ptr); + } + } + stop_core1(); + println!("Stopped core 1."); + const RX_LEN: usize = 1; let mut rx_descs: [eth::rx::DescEntry; RX_LEN] = unsafe { uninitialized() }; let mut rx_buffers = [[0u8; eth::MTU]; RX_LEN]; diff --git a/src/mpcore.rs b/src/mpcore.rs new file mode 100644 index 0000000..36e503c --- /dev/null +++ b/src/mpcore.rs @@ -0,0 +1,29 @@ +///! Register definitions for Application Processing Unit (mpcore) + +use volatile_register::{RO, RW, WO}; +use crate::{register, register_at, register_bit}; + +#[repr(C)] +pub struct RegisterBlock { + pub scu_control: ScuControl, + pub scu_config: RO, + pub scu_cpu_power: RW, + pub scu_invalidate: WO, + reserved0: [u32; 12], + pub filter_start: RW, + pub filter_end: RW, + reserved1: [u32; 2], + pub scu_access_control: RW, + pub scu_non_secure_access_control: RW, + // there is plenty more (unimplemented) +} +register_at!(RegisterBlock, 0xF8F00000, new); + +register!(scu_control, ScuControl, RW, u32); +register_bit!(scu_control, ic_standby_enable, 6); +register_bit!(scu_control, scu_standby_enable, 5); +register_bit!(scu_control, force_to_port0_enable, 4); +register_bit!(scu_control, scu_speculative_linefill_enable, 3); +register_bit!(scu_control, scu_rams_parity_enable, 2); +register_bit!(scu_control, address_filtering_enable, 1); +register_bit!(scu_control, enable, 0); diff --git a/src/slcr.rs b/src/slcr.rs index df714ec..69aba44 100644 --- a/src/slcr.rs +++ b/src/slcr.rs @@ -90,7 +90,7 @@ pub struct RegisterBlock { pub ocm_rst_ctrl: RW, reserved4: [u32; 1], pub fpga_rst_ctrl: RW, - pub a9_cpu_rst_ctrl: RW, + pub a9_cpu_rst_ctrl: A9CpuRstCtrl, reserved5: [u32; 1], pub rs_awdt_ctrl: RW, reserved6: [u32; 2], @@ -365,6 +365,13 @@ impl UartRstCtrl { register!(pss_rst_ctrl, PssRstCtrl, RW, u32); register_bit!(pss_rst_ctrl, soft_rst, 1); +register!(a9_cpu_rst_ctrl, A9CpuRstCtrl, RW, u32); +register_bit!(a9_cpu_rst_ctrl, peri_rst, 8); +register_bit!(a9_cpu_rst_ctrl, a9_clkstop1, 5); +register_bit!(a9_cpu_rst_ctrl, a9_clkstop0, 4); +register_bit!(a9_cpu_rst_ctrl, a9_rst1, 1); +register_bit!(a9_cpu_rst_ctrl, a9_rst0, 0); + /// Used for MioPin*.io_type #[repr(u8)] pub enum IoBufferType {