RTIO DMA: Compiled but not working.

* Cache flush should be done before playback instead when getting the
  handler.
* `csr::rtio_dma::enable_read()` would loop forever, probably bug in the
  gateware.
core0-buffer
pca006132 2020-07-21 16:57:14 +08:00
parent 64dad88a32
commit 0310421085
10 changed files with 369 additions and 2 deletions

View File

@ -12,7 +12,11 @@ device_db = {
"target": "cortexa9"
}
},
"core_dma": {
"type": "local",
"module": "artiq.coredevice.dma",
"class": "CoreDMA"
},
# led? are common to all variants
"led0": {
"type": "local",

26
examples/dma.py Normal file
View File

@ -0,0 +1,26 @@
from artiq.experiment import *
class DMAPulses(EnvExperiment):
def build(self):
self.setattr_device("core")
self.setattr_device("core_dma")
self.setattr_device("led0")
@kernel
def record(self):
with self.core_dma.record("pulses"):
# all RTIO operations now go to the "pulses"
# DMA buffer, instead of being executed immediately.
self.led0.pulse(100*ns)
delay(100*ns)
@kernel
def run(self):
self.core.reset()
self.record()
# prefetch the address of the DMA buffer
# for faster playback trigger
pulses_handle = self.core_dma.get_handle("pulses")
self.core.break_realtime()
self.core_dma.playback_handle(pulses_handle)

1
src/Cargo.lock generated
View File

@ -96,6 +96,7 @@ dependencies = [
name = "dyld"
version = "0.1.0"
dependencies = [
"libcortex_a9",
"log",
]

View File

@ -8,3 +8,5 @@ name = "dyld"
[dependencies]
log = "0.4"
libcortex_a9 = { git = "https://git.m-labs.hk/M-Labs/zc706.git" }

View File

@ -2,6 +2,7 @@
extern crate alloc;
extern crate log;
extern crate libcortex_a9;
use core::{convert, fmt, str};
use alloc::string::String;

View File

@ -7,6 +7,10 @@ use super::{
image::Image,
Library,
};
use libcortex_a9::{
cache::{dcci_slice, iciallu, bpiall},
asm::{dsb, isb},
};
pub trait Relocatable {
fn offset(&self) -> usize;
@ -154,6 +158,13 @@ pub fn rebind(
_ => {}
}
}
// FIXME: the cache maintainance operations may be more than enough,
// may cause performance degradation.
dcci_slice(lib.image.data);
iciallu();
bpiall();
dsb();
isb();
Ok(())
}

View File

@ -3,6 +3,7 @@ use libm;
use crate::eh_artiq;
use crate::rtio;
use super::rpc::{rpc_send, rpc_send_async, rpc_recv};
use super::dma;
macro_rules! api {
($i:ident) => ({
@ -50,6 +51,13 @@ pub fn resolve(required: &[u8]) -> Option<u32> {
api!(rtio_input_timestamped_data = rtio::input_timestamped_data),
api!(rtio_log = rtio::log),
// rtio dma
api!(dma_record_start = dma::dma_record_start),
api!(dma_record_stop = dma::dma_record_stop),
api!(dma_erase = dma::dma_erase),
api!(dma_retrieve = dma::dma_retrieve),
api!(dma_playback = dma::dma_playback),
// Double-precision floating-point arithmetic helper functions
// RTABI chapter 4.1.2, Table 2
api!(__aeabi_dadd),

View File

@ -16,8 +16,10 @@ use crate::eh_artiq;
use super::{
api::resolve,
rpc::rpc_send_async,
dma::init_dma,
CHANNEL_0TO1, CHANNEL_1TO0,
KERNEL_CHANNEL_0TO1, KERNEL_CHANNEL_1TO0,
KERNEL_LIBRARY,
Message,
};
@ -93,6 +95,10 @@ impl KernelImage {
})
}
pub fn get_library_ptr(&mut self) -> *mut Library {
&mut self.library as *mut Library
}
pub unsafe fn exec(&mut self) {
// Flush data cache entries for the image in DDR, including
// Memory/Instruction Synchronization Barriers
@ -118,6 +124,9 @@ pub fn main_core1() {
enable_fpu();
debug!("FPU enabled on Core1");
init_dma();
debug!("Init DMA!");
let mut core1_tx = None;
while core1_tx.is_none() {
core1_tx = CHANNEL_1TO0.lock().take();
@ -139,9 +148,10 @@ pub fn main_core1() {
let result = dyld::load(&data, &resolve)
.and_then(KernelImage::new);
match result {
Ok(kernel) => {
Ok(mut kernel) => {
unsafe {
KERNEL_LOAD_ADDR = kernel.library.image.as_ptr() as usize;
KERNEL_LIBRARY = kernel.get_library_ptr();
}
loaded_kernel = Some(kernel);
debug!("kernel loaded");

View File

@ -0,0 +1,300 @@
use crate::{
pl::csr,
artiq_raise,
rtio,
};
use alloc::{vec::Vec, string::String, collections::BTreeMap, str};
use cslice::CSlice;
use super::KERNEL_LIBRARY;
use core::mem;
use log::debug;
use libcortex_a9::{
cache::dcci_slice,
asm::dsb,
};
const ALIGNMENT: usize = 16 * 8;
const DMA_BUFFER_SIZE: usize = 16 * 8 * 1024;
struct DmaRecorder {
active: bool,
data_len: usize,
buffer: [u8; DMA_BUFFER_SIZE],
}
static mut DMA_RECORDER: DmaRecorder = DmaRecorder {
active: false,
data_len: 0,
buffer: [0; DMA_BUFFER_SIZE],
};
#[derive(Debug)]
struct Entry {
trace: Vec<u8>,
padding_len: usize,
duration: u64
}
#[derive(Debug)]
pub struct Manager {
entries: BTreeMap<String, Entry>,
recording_name: String,
recording_trace: Vec<u8>
}
// Copied from https://github.com/m-labs/artiq/blob/master/artiq/firmware/runtime/rtio_dma.rs
// basically without modification except removing some warnings.
impl Manager {
pub fn new() -> Manager {
Manager {
entries: BTreeMap::new(),
recording_name: String::new(),
recording_trace: Vec::new(),
}
}
pub fn record_start(&mut self, name: &str) {
self.recording_name = String::from(name);
self.recording_trace = Vec::new();
// or we could needlessly OOM replacing a large trace
self.entries.remove(name);
}
pub fn record_append(&mut self, data: &[u8]) {
self.recording_trace.extend_from_slice(data);
}
pub fn record_stop(&mut self, duration: u64) {
let mut trace = Vec::new();
mem::swap(&mut self.recording_trace, &mut trace);
trace.push(0);
let data_len = trace.len();
// Realign.
trace.reserve(ALIGNMENT - 1);
let padding = ALIGNMENT - trace.as_ptr() as usize % ALIGNMENT;
let padding = if padding == ALIGNMENT { 0 } else { padding };
for _ in 0..padding {
// Vec guarantees that this will not reallocate
trace.push(0)
}
for i in 1..data_len + 1 {
trace[data_len + padding - i] = trace[data_len - i]
}
let mut name = String::new();
mem::swap(&mut self.recording_name, &mut name);
self.entries.insert(name, Entry {
trace, duration,
padding_len: padding,
});
}
pub fn erase(&mut self, name: &str) {
self.entries.remove(name);
}
pub fn with_trace<F, R>(&self, name: &str, f: F) -> R
where F: FnOnce(Option<&[u8]>, u64) -> R {
match self.entries.get(name) {
Some(entry) => f(Some(&entry.trace[entry.padding_len..]), entry.duration),
None => f(None, 0)
}
}
}
static mut DMA_MANAGER: Option<Manager> = None;
#[repr(C)]
pub struct DmaTrace {
duration: i64,
address: i32,
}
pub fn init_dma() {
unsafe {
DMA_MANAGER = Some(Manager::new());
}
}
fn dma_record_flush() {
unsafe {
let manager = DMA_MANAGER.as_mut().unwrap();
manager.record_append(&DMA_RECORDER.buffer[..DMA_RECORDER.data_len]);
DMA_RECORDER.data_len = 0;
}
}
pub extern fn dma_record_start(name: CSlice<u8>) {
let name = str::from_utf8(name.as_ref()).unwrap();
unsafe {
if DMA_RECORDER.active {
artiq_raise!("DMAError", "DMA is already recording")
}
let library = KERNEL_LIBRARY.as_mut().unwrap();
library.rebind(b"rtio_output",
dma_record_output as *const ()).unwrap();
library.rebind(b"rtio_output_wide",
dma_record_output_wide as *const ()).unwrap();
DMA_RECORDER.active = true;
let manager = DMA_MANAGER.as_mut().unwrap();
manager.record_start(name);
}
}
pub extern fn dma_record_stop(duration: i64) {
unsafe {
dma_record_flush();
if !DMA_RECORDER.active {
artiq_raise!("DMAError", "DMA is not recording")
}
let library = KERNEL_LIBRARY.as_mut().unwrap();
library.rebind(b"rtio_output",
rtio::output as *const ()).unwrap();
library.rebind(b"rtio_output_wide",
rtio::output_wide as *const ()).unwrap();
DMA_RECORDER.active = false;
let manager = DMA_MANAGER.as_mut().unwrap();
manager.record_stop(duration as u64);
}
}
#[inline(always)]
unsafe fn dma_record_output_prepare(timestamp: i64, target: i32,
words: usize) -> &'static mut [u8] {
// See gateware/rtio/dma.py.
const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/1;
let length = HEADER_LENGTH + /*data*/words * 4;
if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length {
dma_record_flush()
}
let record = &mut DMA_RECORDER.buffer[DMA_RECORDER.data_len..
DMA_RECORDER.data_len + length];
DMA_RECORDER.data_len += length;
let (header, data) = record.split_at_mut(HEADER_LENGTH);
header.copy_from_slice(&[
(length >> 0) as u8,
(target >> 8) as u8,
(target >> 16) as u8,
(target >> 24) as u8,
(timestamp >> 0) as u8,
(timestamp >> 8) as u8,
(timestamp >> 16) as u8,
(timestamp >> 24) as u8,
(timestamp >> 32) as u8,
(timestamp >> 40) as u8,
(timestamp >> 48) as u8,
(timestamp >> 56) as u8,
(target >> 0) as u8,
]);
data
}
pub extern fn dma_record_output(target: i32, word: i32) {
unsafe {
let timestamp = csr::rtio::now_read() as i64;
let data = dma_record_output_prepare(timestamp, target, 1);
data.copy_from_slice(&[
(word >> 0) as u8,
(word >> 8) as u8,
(word >> 16) as u8,
(word >> 24) as u8,
]);
}
}
pub extern fn dma_record_output_wide(target: i32, words: CSlice<i32>) {
assert!(words.len() <= 16); // enforce the hardware limit
unsafe {
let timestamp = csr::rtio::now_read() as i64;
let mut data = dma_record_output_prepare(timestamp, target, words.len());
for word in words.as_ref().iter() {
data[..4].copy_from_slice(&[
(word >> 0) as u8,
(word >> 8) as u8,
(word >> 16) as u8,
(word >> 24) as u8,
]);
data = &mut data[4..];
}
}
}
pub extern fn dma_erase(name: CSlice<u8>) {
let name = str::from_utf8(name.as_ref()).unwrap();
let manager = unsafe {
DMA_MANAGER.as_mut().unwrap()
};
manager.erase(name);
}
pub extern fn dma_retrieve(name: CSlice<u8>) -> DmaTrace {
let name = str::from_utf8(name.as_ref()).unwrap();
let manager = unsafe {
DMA_MANAGER.as_mut().unwrap()
};
let (trace, duration) = manager.with_trace(name, |trace, duration| (trace.map(|v| {
dcci_slice(v);
dsb();
v.as_ptr()
}), duration));
match trace {
Some(ptr) => Ok(DmaTrace {
address: ptr as i32,
duration: duration as i64,
}),
None => Err(())
}.unwrap_or_else(|_| {
artiq_raise!("DMAError", "DMA trace not found");
})
}
pub extern fn dma_playback(timestamp: i64, ptr: i32) {
assert!(ptr % ALIGNMENT as i32 == 0);
debug!("DMA Playback");
unsafe {
csr::rtio_dma::base_address_write(ptr as u32);
csr::rtio_dma::time_offset_write(timestamp as u64);
csr::cri_con::selected_write(1);
csr::rtio_dma::enable_write(1);
while csr::rtio_dma::enable_read() != 0 {}
csr::cri_con::selected_write(0);
let error = csr::rtio_dma::error_read();
if error != 0 {
let timestamp = csr::rtio_dma::error_timestamp_read();
let channel = csr::rtio_dma::error_channel_read();
csr::rtio_dma::error_write(1);
if error & 1 != 0 {
artiq_raise!("RTIOUnderflow",
"RTIO underflow at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0);
}
if error & 2 != 0 {
artiq_raise!("RTIODestinationUnreachable",
"RTIO destination unreachable, output, at {0} mu, channel {1}",
timestamp as i64, channel as i64, 0);
}
}
}
}

View File

@ -1,6 +1,7 @@
use core::ptr;
use alloc::{vec::Vec, sync::Arc, string::String};
use dyld::Library;
use libcortex_a9::{mutex::Mutex, sync_channel};
use crate::eh_artiq;
@ -9,6 +10,7 @@ pub use control::Control;
pub mod core1;
mod api;
mod rpc;
mod dma;
#[derive(Debug)]
pub struct RPCException {
@ -39,3 +41,5 @@ static CHANNEL_1TO0: Mutex<Option<sync_channel::Sender<Message>>> = Mutex::new(N
static mut KERNEL_CHANNEL_0TO1: *mut () = ptr::null_mut();
static mut KERNEL_CHANNEL_1TO0: *mut () = ptr::null_mut();
static mut KERNEL_LIBRARY: *mut Library = ptr::null_mut();