mirror of https://github.com/m-labs/artiq.git
firmware: optimize dma_record_output.
This removes a number of bounds checks and adds a fast path for outputting exactly one word to DMA, which is the most common operation.
This commit is contained in:
parent
bdd18de2c1
commit
38d60100ff
|
@ -1,5 +1,5 @@
|
||||||
#![feature(lang_items, asm, panic_unwind, libc, unwind_attributes,
|
#![feature(lang_items, asm, panic_unwind, libc, unwind_attributes,
|
||||||
panic_implementation, panic_info_message)]
|
panic_implementation, panic_info_message, nll)]
|
||||||
#![no_std]
|
#![no_std]
|
||||||
|
|
||||||
extern crate libc;
|
extern crate libc;
|
||||||
|
@ -300,19 +300,24 @@ extern fn dma_record_stop(duration: i64) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[unwind(aborts)]
|
#[unwind(aborts)]
|
||||||
extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i32) {
|
#[inline(always)]
|
||||||
dma_record_output_wide(timestamp, channel, address, [word].as_c_slice())
|
unsafe fn dma_record_output_prepare(timestamp: i64, channel: i32, address: i32,
|
||||||
|
words: usize) -> &'static mut [u8] {
|
||||||
|
// See gateware/rtio/dma.py.
|
||||||
|
const HEADER_LENGTH: usize = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2;
|
||||||
|
let length = HEADER_LENGTH + /*data*/words * 4;
|
||||||
|
|
||||||
|
if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length {
|
||||||
|
dma_record_flush()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[unwind(aborts)]
|
let record = &mut DMA_RECORDER.buffer[DMA_RECORDER.data_len..
|
||||||
extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, words: CSlice<i32>) {
|
DMA_RECORDER.data_len + length];
|
||||||
assert!(words.len() <= 16); // enforce the hardware limit
|
DMA_RECORDER.data_len += length;
|
||||||
|
|
||||||
// See gateware/rtio/dma.py.
|
let (header, data) = record.split_at_mut(HEADER_LENGTH);
|
||||||
let header_length = /*length*/1 + /*channel*/3 + /*timestamp*/8 + /*address*/2;
|
|
||||||
let length = header_length + /*data*/words.len() * 4;
|
|
||||||
|
|
||||||
let header = [
|
header.copy_from_slice(&[
|
||||||
(length >> 0) as u8,
|
(length >> 0) as u8,
|
||||||
(channel >> 0) as u8,
|
(channel >> 0) as u8,
|
||||||
(channel >> 8) as u8,
|
(channel >> 8) as u8,
|
||||||
|
@ -327,29 +332,39 @@ extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, wor
|
||||||
(timestamp >> 56) as u8,
|
(timestamp >> 56) as u8,
|
||||||
(address >> 0) as u8,
|
(address >> 0) as u8,
|
||||||
(address >> 8) as u8,
|
(address >> 8) as u8,
|
||||||
];
|
]);
|
||||||
|
|
||||||
let mut data = [0; 16 * 4];
|
data
|
||||||
for (i, &word) in words.as_ref().iter().enumerate() {
|
}
|
||||||
let part = [
|
|
||||||
|
#[unwind(aborts)]
|
||||||
|
extern fn dma_record_output(timestamp: i64, channel: i32, address: i32, word: i32) {
|
||||||
|
unsafe {
|
||||||
|
let data = dma_record_output_prepare(timestamp, channel, address, 1);
|
||||||
|
data.copy_from_slice(&[
|
||||||
(word >> 0) as u8,
|
(word >> 0) as u8,
|
||||||
(word >> 8) as u8,
|
(word >> 8) as u8,
|
||||||
(word >> 16) as u8,
|
(word >> 16) as u8,
|
||||||
(word >> 24) as u8,
|
(word >> 24) as u8,
|
||||||
];
|
]);
|
||||||
data[i * 4..(i + 1) * 4].copy_from_slice(&part[..]);
|
|
||||||
}
|
}
|
||||||
let data = &data[..words.len() * 4];
|
}
|
||||||
|
|
||||||
|
#[unwind(aborts)]
|
||||||
|
extern fn dma_record_output_wide(timestamp: i64, channel: i32, address: i32, words: CSlice<i32>) {
|
||||||
|
assert!(words.len() <= 16); // enforce the hardware limit
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
if DMA_RECORDER.buffer.len() - DMA_RECORDER.data_len < length {
|
let mut data = dma_record_output_prepare(timestamp, channel, address, 1);
|
||||||
dma_record_flush()
|
for word in words.as_ref().iter() {
|
||||||
|
data[..4].copy_from_slice(&[
|
||||||
|
(word >> 0) as u8,
|
||||||
|
(word >> 8) as u8,
|
||||||
|
(word >> 16) as u8,
|
||||||
|
(word >> 24) as u8,
|
||||||
|
]);
|
||||||
|
data = &mut data[4..];
|
||||||
}
|
}
|
||||||
let dst = &mut DMA_RECORDER.buffer[DMA_RECORDER.data_len..
|
|
||||||
DMA_RECORDER.data_len + length];
|
|
||||||
dst[..header_length].copy_from_slice(&header[..]);
|
|
||||||
dst[header_length..].copy_from_slice(&data[..]);
|
|
||||||
DMA_RECORDER.data_len += length;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -641,7 +641,7 @@ class DMATest(ExperimentCase):
|
||||||
exp.record_many(count)
|
exp.record_many(count)
|
||||||
dt = self.dataset_mgr.get("dma_record_time")
|
dt = self.dataset_mgr.get("dma_record_time")
|
||||||
print("dt={}, dt/count={}".format(dt, dt/count))
|
print("dt={}, dt/count={}".format(dt, dt/count))
|
||||||
self.assertLess(dt/count, 20*us)
|
self.assertLess(dt/count, 11*us)
|
||||||
|
|
||||||
def test_dma_playback_time(self):
|
def test_dma_playback_time(self):
|
||||||
# Skip on Kasli until #946 is resolved.
|
# Skip on Kasli until #946 is resolved.
|
||||||
|
|
Loading…
Reference in New Issue