forked from M-Labs/nac3
RTIO DMA: Compiled but not working.
* Cache flush should be done before playback instead when getting the handler. * `csr::rtio_dma::enable_read()` would loop forever, probably bug in the gateware.
This commit is contained in:
parent
64dad88a32
commit
0310421085
|
@ -12,7 +12,11 @@ device_db = {
|
|||
"target": "cortexa9"
|
||||
}
|
||||
},
|
||||
|
||||
"core_dma": {
|
||||
"type": "local",
|
||||
"module": "artiq.coredevice.dma",
|
||||
"class": "CoreDMA"
|
||||
},
|
||||
# led? are common to all variants
|
||||
"led0": {
|
||||
"type": "local",
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
from artiq.experiment import *
|
||||
|
||||
class DMAPulses(EnvExperiment):
|
||||
def build(self):
|
||||
self.setattr_device("core")
|
||||
self.setattr_device("core_dma")
|
||||
self.setattr_device("led0")
|
||||
|
||||
@kernel
|
||||
def record(self):
|
||||
with self.core_dma.record("pulses"):
|
||||
# all RTIO operations now go to the "pulses"
|
||||
# DMA buffer, instead of being executed immediately.
|
||||
self.led0.pulse(100*ns)
|
||||
delay(100*ns)
|
||||
|
||||
@kernel
|
||||
def run(self):
|
||||
self.core.reset()
|
||||
self.record()
|
||||
# prefetch the address of the DMA buffer
|
||||
# for faster playback trigger
|
||||
pulses_handle = self.core_dma.get_handle("pulses")
|
||||
self.core.break_realtime()
|
||||
self.core_dma.playback_handle(pulses_handle)
|
||||
|
|
@ -96,6 +96,7 @@ dependencies = [
|
|||
name = "dyld"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"libcortex_a9",
|
||||
"log",
|
||||
]
|
||||
|
||||
|
|
|
@ -8,3 +8,5 @@ name = "dyld"
|
|||
|
||||
[dependencies]
|
||||
log = "0.4"
|
||||
libcortex_a9 = { git = "https://git.m-labs.hk/M-Labs/zc706.git" }
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
extern crate alloc;
|
||||
extern crate log;
|
||||
extern crate libcortex_a9;
|
||||
|
||||
use core::{convert, fmt, str};
|
||||
use alloc::string::String;
|
||||
|
|
|
@ -7,6 +7,10 @@ use super::{
|
|||
image::Image,
|
||||
Library,
|
||||
};
|
||||
use libcortex_a9::{
|
||||
cache::{dcci_slice, iciallu, bpiall},
|
||||
asm::{dsb, isb},
|
||||
};
|
||||
|
||||
pub trait Relocatable {
|
||||
fn offset(&self) -> usize;
|
||||
|
@ -154,6 +158,13 @@ pub fn rebind(
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
// FIXME: the cache maintainance operations may be more than enough,
|
||||
// may cause performance degradation.
|
||||
dcci_slice(lib.image.data);
|
||||
iciallu();
|
||||
bpiall();
|
||||
dsb();
|
||||
isb();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use libm;
|
|||
use crate::eh_artiq;
|
||||
use crate::rtio;
|
||||
use super::rpc::{rpc_send, rpc_send_async, rpc_recv};
|
||||
use super::dma;
|
||||
|
||||
macro_rules! api {
|
||||
($i:ident) => ({
|
||||
|
@ -50,6 +51,13 @@ pub fn resolve(required: &[u8]) -> Option<u32> {
|
|||
api!(rtio_input_timestamped_data = rtio::input_timestamped_data),
|
||||
api!(rtio_log = rtio::log),
|
||||
|
||||
// rtio dma
|
||||
api!(dma_record_start = dma::dma_record_start),
|
||||
api!(dma_record_stop = dma::dma_record_stop),
|
||||
api!(dma_erase = dma::dma_erase),
|
||||
api!(dma_retrieve = dma::dma_retrieve),
|
||||
api!(dma_playback = dma::dma_playback),
|
||||
|
||||
// Double-precision floating-point arithmetic helper functions
|
||||
// RTABI chapter 4.1.2, Table 2
|
||||
api!(__aeabi_dadd),
|
||||
|
|
|
@ -16,8 +16,10 @@ use crate::eh_artiq;
|
|||
use super::{
|
||||
api::resolve,
|
||||
rpc::rpc_send_async,
|
||||
dma::init_dma,
|
||||
CHANNEL_0TO1, CHANNEL_1TO0,
|
||||
KERNEL_CHANNEL_0TO1, KERNEL_CHANNEL_1TO0,
|
||||
KERNEL_LIBRARY,
|
||||
Message,
|
||||
};
|
||||
|
||||
|
@ -93,6 +95,10 @@ impl KernelImage {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn get_library_ptr(&mut self) -> *mut Library {
|
||||
&mut self.library as *mut Library
|
||||
}
|
||||
|
||||
pub unsafe fn exec(&mut self) {
|
||||
// Flush data cache entries for the image in DDR, including
|
||||
// Memory/Instruction Synchronization Barriers
|
||||
|
@ -118,6 +124,9 @@ pub fn main_core1() {
|
|||
enable_fpu();
|
||||
debug!("FPU enabled on Core1");
|
||||
|
||||
init_dma();
|
||||
debug!("Init DMA!");
|
||||
|
||||
let mut core1_tx = None;
|
||||
while core1_tx.is_none() {
|
||||
core1_tx = CHANNEL_1TO0.lock().take();
|
||||
|
@ -139,9 +148,10 @@ pub fn main_core1() {
|
|||
let result = dyld::load(&data, &resolve)
|
||||
.and_then(KernelImage::new);
|
||||
match result {
|
||||
Ok(kernel) => {
|
||||
Ok(mut kernel) => {
|
||||
unsafe {
|
||||
KERNEL_LOAD_ADDR = kernel.library.image.as_ptr() as usize;
|
||||
KERNEL_LIBRARY = kernel.get_library_ptr();
|
||||
}
|
||||
loaded_kernel = Some(kernel);
|
||||
debug!("kernel loaded");
|
||||
|
|
|
@ -0,0 +1,300 @@
|
|||
use crate::{
|
||||
pl::csr,
|
||||
artiq_raise,
|
||||
rtio,
|
||||
};
|
||||
use alloc::{vec::Vec, string::String, collections::BTreeMap, str};
|
||||
use cslice::CSlice;
|
||||
use super::KERNEL_LIBRARY;
|
||||
use core::mem;
|
||||
use log::debug;
|
||||
|
||||
use libcortex_a9::{
|
||||
cache::dcci_slice,
|
||||
asm::dsb,
|
||||
};
|
||||
|
||||
const ALIGNMENT: usize = 16 * 8;
|
||||
const DMA_BUFFER_SIZE: usize = 16 * 8 * 1024;
|
||||
|
||||
struct DmaRecorder {
|
||||
active: bool,
|
||||
data_len: usize,
|
||||
buffer: [u8; DMA_BUFFER_SIZE],
|
||||
}
|
||||
|
||||
static mut DMA_RECORDER: DmaRecorder = DmaRecorder {
|
||||
active: false,
|
||||
data_len: 0,
|
||||
buffer: [0; DMA_BUFFER_SIZE],
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Entry {
|
||||
trace: Vec<u8>,
|
||||
padding_len: usize,
|
||||
duration: u64
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Manager {
|
||||
entries: BTreeMap<String, Entry>,
|
||||
recording_name: String,
|
||||
recording_trace: Vec<u8>
|
||||
}
|
||||
|
||||
// Copied from https://github.com/m-labs/artiq/blob/master/artiq/firmware/runtime/rtio_dma.rs
|
||||
// basically without modification except removing some warnings.
|
||||
impl Manager {
|
||||
pub fn new() -> Manager {
|
||||
Manager {
|
||||
entries: BTreeMap::new(),
|
||||
recording_name: String::new(),
|
||||
recording_trace: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn record_start(&mut self, name: &str) {
|
||||
self.recording_name = String::from(name);
|
||||
self.recording_trace = Vec::new();
|
||||
|
||||
// or we could needlessly OOM replacing a large trace
|
||||
self.entries.remove(name);
|
||||
}
|
||||
|
||||
pub fn record_append(&mut self, data: &[u8]) {
|
||||
self.recording_trace.extend_from_slice(data);
|
||||
}
|
||||
|
||||
pub fn record_stop(&mut self, duration: u64) {
|
||||
let mut trace = Vec::new();
|
||||
mem::swap(&mut self.recording_trace, &mut trace);
|
||||
trace.push(0);
|
||||
let data_len = trace.len();
|
||||
|
||||
// Realign.
|
||||
trace.reserve(ALIGNMENT - 1);
|
||||
let padding = ALIGNMENT - trace.as_ptr() as usize % ALIGNMENT;
|
||||
let padding = if padding == ALIGNMENT { 0 } else { padding };
|
||||
for _ in 0..padding {
|
||||
// Vec guarantees that this will not reallocate
|
||||
trace.push(0)
|
||||
}
|
||||
for i in 1..data_len + 1 {
|
||||
trace[data_len + padding - i] = trace[data_len - i]
|
||||
}
|
||||
|
||||
let mut name = String::new();
|
||||
mem::swap(&mut self.recording_name, &mut name);
|
||||
self.entries.insert(name, Entry {
|
||||
trace, duration,
|
||||
padding_len: padding,
|
||||
});
|
||||
}
|
||||
|
||||
pub fn erase(&mut self, name: &str) {
|
||||
self.entries.remove(name);
|
||||
}
|
||||
|
||||
pub fn with_trace<F, R>(&self, name: &str, f: F) -> R
|
||||
where F: FnOnce(Option<&[u8]>, u64) -> R {
|
||||
match self.entries.get(name) {
|
||||
Some(entry) => f(Some(&entry.trace[entry.padding_len..]), entry.duration),
|
||||
None => f(None, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static mut DMA_MANAGER: Option<Manager> = None;
|
||||
|
||||
#[repr(C)]
|
||||
pub struct DmaTrace {
|
||||
duration: i64,
|
||||
address: i32,
|
||||
}
|
||||
|
||||
pub fn init_dma() {
|
||||
unsafe {
|
||||
DMA_MANAGER = Some(Manager::new());
|
||||
}
|
||||
}
|
||||
|
||||
fn dma_record_flush() {
|
||||
unsafe {
|
||||
let manager = DMA_MANAGER.as_mut().unwrap();
|
||||
manager.record_append(&DMA_RECORDER.buffer[..DMA_RECORDER.data_len]);
|
||||
DMA_RECORDER.data_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pub extern fn dma_record_start(name: CSlice<u8>) {
|
||||
let name = str::from_utf8(name.as_ref()).unwrap();
|
||||
|
||||
unsafe {
|
||||
if DMA_RECORDER.active {
|
||||
artiq_raise!("DMAError", "DMA is already recording")
|
||||
}
|
||||
|
||||
let library = KERNEL_LIBRARY.as_mut().unwrap();
|
||||
library.rebind(b"rtio_output",
|
||||
dma_record_output as *const ()).unwrap();
|
||||
library.rebind(b"rtio_output_wide",
|
||||
dma_record_output_wide as *const ()).unwrap();
|
||||
|
||||
DMA_RECORDER.active = true;
|
||||
let manager = DMA_MANAGER.as_mut().unwrap();
|
||||
manager.record_start(name);
|
||||
}
|
||||
}
|
||||
|
||||
pub extern fn dma_record_stop(duration: i64) {
|
||||
unsafe {
|
||||
dma_record_flush();
|
||||
|
||||
if !DMA_RECORDER.active {
|
||||
artiq_raise!("DMAError", "DMA is not recording")
|
||||
}
|
||||
|
||||
let library = KERNEL_LIBRARY.as_mut().unwrap();
|
||||
library.rebind(b"rtio_output",
|
||||
rtio::output as *const ()).unwrap();
|
||||
library.rebind(b"rtio_output_wide",
|
||||
rtio::output_wide as *const ()).unwrap();
|
||||
|
||||
DMA_RECORDER.active = false;
|
||||
let manager = DMA_MANAGER.as_mut().unwrap();
|
||||
manager.record_stop(duration as u64);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn dma_record_output_prepare(timestamp: i64, target: i32,
|
||||
words: usize) -> &'static mut [u8] {
|
||||
// See gateware/rtio/dma.py.
|
||||
const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/1;
|
||||
let length = HEADER_LENGTH + /*data*/words * 4;
|
||||
|
||||
if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length {
|
||||
dma_record_flush()
|
||||
}
|
||||
|
||||
let record = &mut DMA_RECORDER.buffer[DMA_RECORDER.data_len..
|
||||
DMA_RECORDER.data_len + length];
|
||||
DMA_RECORDER.data_len += length;
|
||||
|
||||
let (header, data) = record.split_at_mut(HEADER_LENGTH);
|
||||
|
||||
header.copy_from_slice(&[
|
||||
(length >> 0) as u8,
|
||||
(target >> 8) as u8,
|
||||
(target >> 16) as u8,
|
||||
(target >> 24) as u8,
|
||||
(timestamp >> 0) as u8,
|
||||
(timestamp >> 8) as u8,
|
||||
(timestamp >> 16) as u8,
|
||||
(timestamp >> 24) as u8,
|
||||
(timestamp >> 32) as u8,
|
||||
(timestamp >> 40) as u8,
|
||||
(timestamp >> 48) as u8,
|
||||
(timestamp >> 56) as u8,
|
||||
(target >> 0) as u8,
|
||||
]);
|
||||
|
||||
data
|
||||
}
|
||||
|
||||
pub extern fn dma_record_output(target: i32, word: i32) {
|
||||
unsafe {
|
||||
let timestamp = csr::rtio::now_read() as i64;
|
||||
let data = dma_record_output_prepare(timestamp, target, 1);
|
||||
data.copy_from_slice(&[
|
||||
(word >> 0) as u8,
|
||||
(word >> 8) as u8,
|
||||
(word >> 16) as u8,
|
||||
(word >> 24) as u8,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
pub extern fn dma_record_output_wide(target: i32, words: CSlice<i32>) {
|
||||
assert!(words.len() <= 16); // enforce the hardware limit
|
||||
|
||||
unsafe {
|
||||
let timestamp = csr::rtio::now_read() as i64;
|
||||
let mut data = dma_record_output_prepare(timestamp, target, words.len());
|
||||
for word in words.as_ref().iter() {
|
||||
data[..4].copy_from_slice(&[
|
||||
(word >> 0) as u8,
|
||||
(word >> 8) as u8,
|
||||
(word >> 16) as u8,
|
||||
(word >> 24) as u8,
|
||||
]);
|
||||
data = &mut data[4..];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub extern fn dma_erase(name: CSlice<u8>) {
|
||||
let name = str::from_utf8(name.as_ref()).unwrap();
|
||||
|
||||
let manager = unsafe {
|
||||
DMA_MANAGER.as_mut().unwrap()
|
||||
};
|
||||
manager.erase(name);
|
||||
}
|
||||
|
||||
pub extern fn dma_retrieve(name: CSlice<u8>) -> DmaTrace {
|
||||
let name = str::from_utf8(name.as_ref()).unwrap();
|
||||
|
||||
let manager = unsafe {
|
||||
DMA_MANAGER.as_mut().unwrap()
|
||||
};
|
||||
let (trace, duration) = manager.with_trace(name, |trace, duration| (trace.map(|v| {
|
||||
dcci_slice(v);
|
||||
dsb();
|
||||
v.as_ptr()
|
||||
}), duration));
|
||||
match trace {
|
||||
Some(ptr) => Ok(DmaTrace {
|
||||
address: ptr as i32,
|
||||
duration: duration as i64,
|
||||
}),
|
||||
None => Err(())
|
||||
}.unwrap_or_else(|_| {
|
||||
artiq_raise!("DMAError", "DMA trace not found");
|
||||
})
|
||||
}
|
||||
|
||||
pub extern fn dma_playback(timestamp: i64, ptr: i32) {
|
||||
assert!(ptr % ALIGNMENT as i32 == 0);
|
||||
|
||||
debug!("DMA Playback");
|
||||
unsafe {
|
||||
csr::rtio_dma::base_address_write(ptr as u32);
|
||||
csr::rtio_dma::time_offset_write(timestamp as u64);
|
||||
|
||||
csr::cri_con::selected_write(1);
|
||||
csr::rtio_dma::enable_write(1);
|
||||
while csr::rtio_dma::enable_read() != 0 {}
|
||||
csr::cri_con::selected_write(0);
|
||||
|
||||
let error = csr::rtio_dma::error_read();
|
||||
if error != 0 {
|
||||
let timestamp = csr::rtio_dma::error_timestamp_read();
|
||||
let channel = csr::rtio_dma::error_channel_read();
|
||||
csr::rtio_dma::error_write(1);
|
||||
if error & 1 != 0 {
|
||||
artiq_raise!("RTIOUnderflow",
|
||||
"RTIO underflow at {0} mu, channel {1}",
|
||||
timestamp as i64, channel as i64, 0);
|
||||
}
|
||||
if error & 2 != 0 {
|
||||
artiq_raise!("RTIODestinationUnreachable",
|
||||
"RTIO destination unreachable, output, at {0} mu, channel {1}",
|
||||
timestamp as i64, channel as i64, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
use core::ptr;
|
||||
use alloc::{vec::Vec, sync::Arc, string::String};
|
||||
|
||||
use dyld::Library;
|
||||
use libcortex_a9::{mutex::Mutex, sync_channel};
|
||||
use crate::eh_artiq;
|
||||
|
||||
|
@ -9,6 +10,7 @@ pub use control::Control;
|
|||
pub mod core1;
|
||||
mod api;
|
||||
mod rpc;
|
||||
mod dma;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RPCException {
|
||||
|
@ -39,3 +41,5 @@ static CHANNEL_1TO0: Mutex<Option<sync_channel::Sender<Message>>> = Mutex::new(N
|
|||
|
||||
static mut KERNEL_CHANNEL_0TO1: *mut () = ptr::null_mut();
|
||||
static mut KERNEL_CHANNEL_1TO0: *mut () = ptr::null_mut();
|
||||
static mut KERNEL_LIBRARY: *mut Library = ptr::null_mut();
|
||||
|
||||
|
|
Loading…
Reference in New Issue