Compare commits

..

2 Commits

Author SHA1 Message Date
Astro e54edbf32d libcortex_a9: add sync_channel 2020-04-09 02:49:24 +02:00
Astro 64771bf233 libcortex_a9: revamp cache maintenance 2020-04-09 00:18:23 +02:00
4 changed files with 215 additions and 165 deletions

View File

@ -5,7 +5,7 @@ extern crate alloc;
use core::{mem::transmute, task::Poll};
use alloc::{borrow::ToOwned, collections::BTreeMap, format};
use libcortex_a9::mutex::Mutex;
use libcortex_a9::{mutex::Mutex, sync_channel::{self, sync_channel}};
use libboard_zynq::{
print, println,
self as zynq, clocks::Clocks, clocks::source::{ClockSource, ArmPll, IoPll},
@ -134,17 +134,22 @@ pub fn main_core0() {
println!("{} bytes stack for core1", core1_stack.len());
let core1 = boot::Core1::start(core1_stack);
for _ in 0..0x1000000 {
let mut l = SHARED.lock();
*l += 1;
}
while !*DONE.lock() {
let x = { *SHARED.lock() };
println!("shared: {:08X}", x);
}
let x = { *SHARED.lock() };
println!("done shared: {:08X}", x);
let (tx, mut rx) = sync_channel(1000);
*SHARED.lock() = Some(tx);
let mut i = 0u32;
loop {
let r = rx.recv();
// println!("Recvd {}", r);
if i != *r {
println!("Expected {}, received {}", i, r);
}
if i % 100000 == 0 {
println!("{} Ok", i);
}
i += 1;
}
core1.reset();
libcortex_a9::asm::dsb();
@ -246,51 +251,27 @@ pub fn main_core0() {
time += 1;
Instant::from_millis(time)
});
// loop {
// time += 1;
// let timestamp = Instant::from_millis(time);
// match iface.poll(&mut sockets, timestamp) {
// Ok(_) => {},
// Err(e) => {
// println!("poll error: {}", e);
// }
// }
// // (mostly) taken from smoltcp example: TCP echo server
// let mut socket = sockets.get::<TcpSocket>(tcp_handle);
// if !socket.is_open() {
// socket.listen(TCP_PORT).unwrap()
// }
// if socket.may_recv() && socket.can_send() {
// socket.recv(|buf| {
// let len = buf.len().min(4096);
// let buffer = buf[..len].iter().cloned().collect::<Vec<_>>();
// (len, buffer)
// })
// .and_then(|buffer| socket.send_slice(&buffer[..]))
// .map(|_| {})
// .unwrap_or_else(|e| println!("tcp: {:?}", e));
// }
// }
// #[allow(unreachable_code)]
// drop(tx_descs);
// #[allow(unreachable_code)]
// drop(tx_buffers);
}
static SHARED: Mutex<u32> = Mutex::new(0);
static SHARED: Mutex<Option<sync_channel::Sender<u32>>> = Mutex::new(None);
static DONE: Mutex<bool> = Mutex::new(false);
#[no_mangle]
pub fn main_core1() {
println!("Hello from core1!");
for _ in 0..0x1000000 {
let mut l = SHARED.lock();
*l += 1;
let mut tx = None;
while tx.is_none() {
tx = SHARED.lock().take();
}
println!("Core1 got tx");
let mut tx = tx.unwrap();
for i in 0.. {
// println!("S {}", i);
tx.send(i);
}
println!("core1 done!");
*DONE.lock() = true;

View File

@ -22,6 +22,15 @@ pub fn bpiall() {
}
}
/// Data cache clean by set/way
#[inline(always)]
pub fn dccsw(setway: u32) {
unsafe {
asm!("mcr p15, 0, $0, c7, c10, 2" :: "r" (setway) :: "volatile");
}
}
/// Data cache invalidate by set/way
#[inline(always)]
pub fn dcisw(setway: u32) {
unsafe {
@ -60,74 +69,76 @@ pub fn dciall() {
}
}
/// Data cache clear and invalidate by memory virtual address. This
const CACHE_LINE: usize = 0x20;
const CACHE_LINE_MASK: usize = CACHE_LINE - 1;
#[inline]
fn cache_line_addrs(first_addr: usize, beyond_addr: usize) -> impl Iterator<Item = usize> {
let first_addr = first_addr & !CACHE_LINE_MASK;
let beyond_addr = (beyond_addr | CACHE_LINE_MASK) + 1;
(first_addr..beyond_addr).step_by(CACHE_LINE)
}
fn object_cache_line_addrs<T>(object: &T) -> impl Iterator<Item = usize> {
let first_addr = object as *const _ as usize;
let beyond_addr = (object as *const _ as usize) + core::mem::size_of_val(object);
cache_line_addrs(first_addr, beyond_addr)
}
fn slice_cache_line_addrs<T>(slice: &[T]) -> impl Iterator<Item = usize> {
let first_addr = &slice[0] as *const _ as usize;
let beyond_addr = (&slice[slice.len() - 1] as *const _ as usize) +
core::mem::size_of_val(&slice[slice.len() - 1]);
cache_line_addrs(first_addr, beyond_addr)
}
/// Data cache clean and invalidate by memory virtual address. This
/// flushes data out to the point of coherency, and invalidates the
/// corresponding cache line (as appropriate when DMA is meant to be
/// writing into it).
#[inline(always)]
pub fn dccimva(addr: usize) {
pub fn dccimvac(addr: usize) {
unsafe {
asm!("mcr p15, 0, $0, c7, c14, 1" :: "r" (addr) :: "volatile");
}
}
/// clear cache line by virtual address to point of coherency (DCCMVAC)
#[inline]
pub fn dccmvac(addr: u32) {
/// Data cache clean and invalidate for an object.
pub fn dcci<T>(object: &T) {
for addr in object_cache_line_addrs(object) {
dccimvac(addr);
}
}
pub fn dcci_slice<T>(slice: &mut [T]) {
for addr in slice_cache_line_addrs(slice) {
dccimvac(addr);
}
}
/// Data cache clean by memory virtual address.
#[inline(always)]
pub fn dccmvac(addr: usize) {
unsafe {
asm!("mcr p15, 0, $0, c7, c10, 1" :: "r" (addr) :: "volatile");
}
}
/// The DCCIVMA (data cache clear and invalidate) applied to the
/// region of memory occupied by the argument. This does not modify
/// the argument, but due to the invalidate part (only ever needed if
/// external write access is to be granted, e.g. by DMA) it only makes
/// sense if the caller has exclusive access to it as otherwise other
/// accesses might just bring it back into the data cache.
pub fn dcci<T>(object: &mut T) {
let cache_line = 0x20;
let first_addr =
(object as *mut _ as *const _ as usize) & !(cache_line - 1);
let beyond_addr = (
(object as *mut _ as *const _ as usize)
+ core::mem::size_of_val(object)
+ (cache_line - 1)
) & !(cache_line - 1);
for addr in (first_addr..beyond_addr).step_by(cache_line) {
dccimva(addr);
/// Data cache clean for an object.
pub fn dcc<T>(object: &T) {
for addr in object_cache_line_addrs(object) {
dccmvac(addr);
}
}
pub fn dcci_slice_content<T>(slice: &mut [T]) {
if slice.len() == 0 {
return;
}
let cache_line = 0x20;
let first_addr =
(&slice[0] as *const _ as usize) & !(cache_line - 1);
let beyond_addr = (
(&slice[slice.len() - 1] as *const _ as usize)
+ (cache_line - 1)
) & !(cache_line - 1);
for addr in (first_addr..beyond_addr).step_by(cache_line) {
dccimva(addr);
}
}
pub fn dcci_slice_content_unmut<T>(slice: &[T]) {
if slice.len() == 0 {
return;
}
let cache_line = 0x20;
let first_addr =
(&slice[0] as *const _ as usize) & !(cache_line - 1);
let beyond_addr = (
(&slice[slice.len() - 1] as *const _ as usize)
+ (cache_line - 1)
) & !(cache_line - 1);
for addr in (first_addr..beyond_addr).step_by(cache_line) {
dccimva(addr);
/// Data cache clean for an object. Panics if not properly
/// aligned and properly sized to be contained in an exact number of
/// cache lines.
pub fn dcc_slice<T>(slice: &[T]) {
for addr in slice_cache_line_addrs(slice) {
dccmvac(addr);
}
}
@ -136,79 +147,30 @@ pub fn dcci_slice_content_unmut<T>(slice: &[T]) {
/// unsafe, as this discards a write-back cache line, potentially
/// affecting more data than intended.
#[inline(always)]
pub unsafe fn dcimva(addr: usize) {
pub unsafe fn dcimvac(addr: usize) {
asm!("mcr p15, 0, $0, c7, c6, 1" :: "r" (addr) :: "volatile");
}
/// Data cache invalidate for an object. Panics if not properly
/// aligned and properly sized to be contained in an exact number of
/// cache lines.
pub fn dci<T>(object: &mut T) {
let cache_line = 0x20;
let first_addr = object as *mut _ as *const _ as usize;
let beyond_addr = (object as *mut _ as *const _ as usize) +
core::mem::size_of_val(object);
assert_eq!((first_addr & (cache_line - 1)), 0x00);
assert_eq!((beyond_addr & (cache_line - 1)), 0x00);
for addr in (first_addr..beyond_addr).step_by(cache_line) {
unsafe {
dcimva(addr);
}
/// Data cache clean and invalidate for an object.
pub unsafe fn dci<T>(object: &mut T) {
let first_addr = object as *const _ as usize;
let beyond_addr = (object as *const _ as usize) + core::mem::size_of_val(object);
assert_eq!(first_addr & CACHE_LINE_MASK, 0, "dci object first_addr must be aligned");
assert_eq!(beyond_addr & CACHE_LINE_MASK, 0, "dci object beyond_addr must be aligned");
for addr in (first_addr..beyond_addr).step_by(CACHE_LINE) {
dcimvac(addr);
}
}
/// Data cache invalidate for the contents of a slice. Panics if not
/// properly aligned and properly sized to be contained in an exact
/// number of cache lines.
pub fn dci_slice_content<T>(slice: &mut [T]) {
if slice.len() == 0 {
return;
}
let cache_line = 0x20;
pub unsafe fn dci_slice<T>(slice: &mut [T]) {
let first_addr = &slice[0] as *const _ as usize;
let beyond_addr = (&slice[slice.len() - 1] as *const _ as usize)
+ core::mem::size_of::<T>();
assert_eq!((first_addr & (cache_line - 1)), 0x00);
assert_eq!((beyond_addr & (cache_line - 1)), 0x00);
for addr in (first_addr..beyond_addr).step_by(cache_line) {
unsafe {
dcimva(addr);
}
}
}
let beyond_addr = (&slice[slice.len() - 1] as *const _ as usize) +
core::mem::size_of_val(&slice[slice.len() - 1]);
assert_eq!(first_addr & CACHE_LINE_MASK, 0, "dci slice first_addr must be aligned");
assert_eq!(beyond_addr & CACHE_LINE_MASK, 0, "dci slice beyond_addr must be aligned");
pub unsafe fn dci_more_than_slice_content<T>(slice: &mut [T]) {
if slice.len() == 0 {
return;
}
let cache_line = 0x20;
let first_addr =
(&slice[0] as *const _ as usize) & !(cache_line - 1);
let beyond_addr = (
(&slice[slice.len() - 1] as *const _ as usize)
+ (cache_line - 1)
) & !(cache_line - 1);
assert_eq!((first_addr & (cache_line - 1)), 0x00);
assert_eq!((beyond_addr & (cache_line - 1)), 0x00);
for addr in (first_addr..beyond_addr).step_by(cache_line) {
dcimva(addr);
}
}
pub unsafe fn dci_more_than_slice_content_nonmut<T>(slice: &[T]) {
if slice.len() == 0 {
return;
}
let cache_line = 0x20;
let first_addr =
(&slice[0] as *const _ as usize) & !(cache_line - 1);
let beyond_addr = (
(&slice[slice.len() - 1] as *const _ as usize)
+ (cache_line - 1)
) & !(cache_line - 1);
assert_eq!((first_addr & (cache_line - 1)), 0x00);
assert_eq!((beyond_addr & (cache_line - 1)), 0x00);
for addr in (first_addr..beyond_addr).step_by(cache_line) {
dcimva(addr);
for addr in (first_addr..beyond_addr).step_by(CACHE_LINE) {
dcimvac(addr);
}
}

View File

@ -2,10 +2,13 @@
#![feature(asm, global_asm)]
#![feature(never_type)]
extern crate alloc;
pub mod asm;
pub mod regs;
pub mod cache;
pub mod mmu;
pub mod mutex;
pub mod sync_channel;
global_asm!(include_str!("exceptions.s"));

View File

@ -0,0 +1,104 @@
use core::{
ptr::null_mut,
sync::atomic::{AtomicPtr, Ordering},
};
use alloc::{
boxed::Box,
sync::Arc,
vec::Vec,
};
use super::asm::*;
type Channel<T> = Vec<AtomicPtr<T>>;
/// Create a bounded channel
///
/// Returns `(tx, rx)` where one should be used one the local core,
/// and the other is to be shared with another core.
pub fn sync_channel<T>(bound: usize) -> (Sender<T>, Receiver<T>) {
// allow for bound=0
let len = bound + 1;
let mut channel = Vec::with_capacity(len);
for _ in 0..len {
channel.push(AtomicPtr::default());
}
let channel = Arc::new(channel);
let sender = Sender {
channel: channel.clone(),
pos: 0,
};
let receiver = Receiver {
channel: channel,
pos: 0,
};
(sender, receiver)
}
/// Sending half of a channel
pub struct Sender<T> {
channel: Arc<Channel<T>>,
pos: usize,
}
impl<T> Sender<T> {
/// Blocking send
pub fn send<B: Into<Box<T>>>(&mut self, content: B) {
let ptr = Box::into_raw(content.into());
let entry = &self.channel[self.pos];
// try to write the new pointer if the current pointer is
// NULL, retrying while it is not NULL
while entry.compare_and_swap(null_mut(), ptr, Ordering::Acquire) != null_mut() {
// power-saving
wfe();
}
dsb();
// wake power-saving receivers
sev();
// advance
self.pos += 1;
// wrap
if self.pos >= self.channel.len() {
self.pos = 0;
}
}
}
/// Receiving half of a channel
pub struct Receiver<T> {
channel: Arc<Channel<T>>,
pos: usize,
}
impl<T> Receiver<T> {
/// Blocking receive
pub fn recv(&mut self) -> Box<T> {
let entry = &self.channel[self.pos];
loop {
dmb();
let ptr = entry.swap(null_mut(), Ordering::Release);
if ptr != null_mut() {
dsb();
// wake power-saving senders
sev();
let content = unsafe { Box::from_raw(ptr) };
// advance
self.pos += 1;
// wrap
if self.pos >= self.channel.len() {
self.pos = 0;
}
return content;
}
// power-saving
wfe();
}
}
}