Compare commits

...

7 Commits

10 changed files with 133 additions and 33 deletions

View File

@ -12,7 +12,7 @@
"emit-debug-gdb-scripts": false,
"env": "",
"executables": true,
"features": "+v7,+vfp3,-d32,+thumb2,-neon",
"features": "+v7,+vfp3,-d32,+thumb2,-neon,+strict-align",
"is-builtin": false,
"linker": "rust-lld",
"linker-flavor": "ld.lld",

View File

@ -15,6 +15,7 @@ use libboard_zynq::{
clocks::source::{ArmPll, ClockSource, IoPll},
clocks::Clocks,
print, println,
setup_l2cache,
sdio::sd_card::SdCard,
smoltcp::{
self,
@ -32,7 +33,7 @@ use libregister::RegisterR;
use libsupport_zynq::{
boot, ram,
};
use log::info;
use log::{info, warn};
mod ps7_init;
@ -81,7 +82,11 @@ pub fn main_core0() {
clocks.cpu_2x(),
clocks.cpu_1x()
);
info!("Setup L2Cache");
setup_l2cache();
info!("L2Cache done");
if false {
let sd = libboard_zynq::sdio::SDIO::sdio0(true);
// only test SD card if it is inserted
if sd.is_card_inserted() {
@ -114,6 +119,7 @@ pub fn main_core0() {
println!("");
}
let mut flash = flash.stop();
}
let timer = libboard_zynq::timer::GlobalTimer::start();
@ -122,6 +128,7 @@ pub fn main_core0() {
ddr.memtest();
ram::init_alloc_ddr(&mut ddr);
if false {
#[cfg(dev)]
for i in 0..=1 {
let mut flash_io = flash.manual_mode(i);
@ -175,14 +182,15 @@ pub fn main_core0() {
}
});
core1.disable();
}
let eth = zynq::eth::Eth::default(HWADDR.clone());
println!("Eth on");
const RX_LEN: usize = 8;
const RX_LEN: usize = 4096;
// Number of transmission buffers (minimum is two because with
// one, duplicate packet transmission occurs)
const TX_LEN: usize = 8;
const TX_LEN: usize = 4096;
let eth = eth.start_rx(RX_LEN);
let mut eth = eth.start_tx(TX_LEN);
@ -237,18 +245,42 @@ pub fn main_core0() {
Ok(())
}
let counter = alloc::rc::Rc::new(core::cell::RefCell::new(0));
// (rx, tx)
let stats = alloc::rc::Rc::new(core::cell::RefCell::new((0, 0)));
let stats_tx = stats.clone();
task::spawn(async move {
while let Ok(stream) = TcpStream::accept(TCP_PORT, 2048, 2408).await {
let counter = counter.clone();
while let Ok(stream) = TcpStream::accept(TCP_PORT, 0x10_0000, 0x10_0000).await {
let stats_tx = stats_tx.clone();
task::spawn(async move {
*counter.borrow_mut() += 1;
println!("Serving {} connections", *counter.borrow());
handle_connection(stream)
.await
.unwrap_or_else(|e| println!("Connection: {:?}", e));
*counter.borrow_mut() -= 1;
println!("Now serving {} connections", *counter.borrow());
let tx_data = (0..=255).take(65536).collect::<alloc::vec::Vec<u8>>();
loop {
// const CHUNK_SIZE: usize = 65536;
// match stream.send((0..=255).cycle().take(CHUNK_SIZE)).await {
match stream.send_slice(&tx_data[..]).await {
Ok(len) => stats_tx.borrow_mut().1 += tx_data.len(), //CHUNK_SIZE,
Err(e) => {
warn!("tx: {:?}", e);
break
}
}
}
});
}
});
let stats_rx = stats.clone();
task::spawn(async move {
while let Ok(stream) = TcpStream::accept(TCP_PORT+1, 0x10_0000, 0x10_0000).await {
let stats_rx = stats_rx.clone();
task::spawn(async move {
loop {
match stream.recv(|buf| Poll::Ready((buf.len(), buf.len()))).await {
Ok(len) => stats_rx.borrow_mut().0 += len,
Err(e) => {
warn!("rx: {:?}", e);
break
}
}
}
});
}
});
@ -261,7 +293,13 @@ pub fn main_core0() {
let timestamp = timer.get_us();
let seconds = timestamp / 1_000_000;
let micros = timestamp % 1_000_000;
info!("time: {:6}.{:06}s", seconds, micros);
let (rx, tx) = {
let mut stats = stats.borrow_mut();
let result = *stats;
*stats = (0, 0);
result
};
info!("time: {:6}.{:06}s, rx: {}k/s, tx: {}k/s", seconds, micros, rx / 1024, tx / 1024);
}
});

View File

@ -21,5 +21,6 @@ libcortex_a9 = { path = "../libcortex_a9" }
[dependencies.smoltcp]
version = "0.6"
# features = ["ethernet", "proto-ipv4", "socket-tcp", "log"]
features = ["ethernet", "proto-ipv4", "socket-tcp"]
default-features = false

View File

@ -2,6 +2,8 @@ use core::ops::Deref;
use alloc::{vec, vec::Vec};
use libcortex_a9::{asm::*, cache::*, UncachedSlice};
use libregister::*;
use log::debug;
use crate::l2cache;
use super::Buffer;
#[derive(Debug)]
@ -81,9 +83,6 @@ impl DescList {
entry.word1.write(
DescWord1::zeroed()
);
// Flush buffer from cache, to be filled by the peripheral
// before next read
dcci_slice(&buffer[..]);
}
DescList {
@ -105,6 +104,9 @@ impl DescList {
let word1 = entry.word1.read();
let len = word1.frame_length_lsbs().into();
let buffer = &mut self.buffers[self.next][0..len];
// Invalidate caches for packet buffer
l2cache().invalidate_slice(&mut buffer[..]);
dcci_slice(&buffer[..]);
self.next += 1;
if self.next >= list_len {
@ -113,8 +115,10 @@ impl DescList {
let pkt = PktRef { entry, buffer };
if word1.start_of_frame() && word1.end_of_frame() {
// debug!("pkt {}: {:08X}..{:08X}", len, &pkt.buffer[0] as *const _ as usize, &pkt.buffer[pkt.len()-1] as *const _ as usize);
Ok(Some(pkt))
} else {
debug!("pkt trunc");
Err(Error::Truncated)
}
} else {
@ -131,9 +135,6 @@ pub struct PktRef<'a> {
impl<'a> Drop for PktRef<'a> {
fn drop(&mut self) {
// Flush buffer from cache, to be filled by the peripheral
// before next read
dcci_slice(self.buffer);
self.entry.word0.modify(|_, w| w.used(false));
dmb();

View File

@ -1,7 +1,9 @@
use core::ops::{Deref, DerefMut};
use alloc::{vec, vec::Vec};
use libcortex_a9::{cache::dcc_slice, UncachedSlice};
use libcortex_a9::{asm::dmb, cache::dcc_slice, UncachedSlice};
use libregister::*;
use log::{debug, warn};
use crate::l2cache;
use super::{Buffer, regs};
/// Descriptor entry
@ -90,8 +92,10 @@ impl DescList {
}
pub fn send<'s: 'p, 'p>(&'s mut self, regs: &'s mut regs::RegisterBlock, length: usize) -> Option<PktRef<'p>> {
// debug!("send {}", length);
let list_len = self.list.len();
let entry = &mut self.list[self.next];
dmb();
if entry.word1.read().used() {
let buffer = &mut self.buffers[self.next][0..length];
entry.word1.write(DescWord1::zeroed()
@ -109,6 +113,7 @@ impl DescList {
Some(PktRef { entry, buffer, regs })
} else {
// Still in use by HW (sending too fast, ring exceeded)
warn!("tx ring overflow");
None
}
}
@ -124,10 +129,13 @@ pub struct PktRef<'a> {
impl<'a> Drop for PktRef<'a> {
fn drop(&mut self) {
// Write back all dirty cachelines of this buffer
// Write back all dirty cachelines of packet buffer
dcc_slice(self.buffer);
l2cache().clean_slice(self.buffer);
self.entry.word1.modify(|_, w| w.used(false));
dmb();
// dsb();
if ! self.regs.tx_status.read().tx_go() {
// Start TX if not already running
self.regs.net_ctrl.modify(|_, w| w.start_tx(true));

View File

@ -34,8 +34,10 @@ pub fn setup_l2cache() {
assert_eq!(&slcr.unnamed1 as *const _ as u32, 0xF8000A1C);
unsafe { slcr.unnamed1.write(0x020202); }
});
let mut l2 = l2cache();
use log::info;
info!("l2 aux={:08X}", l2.regs.aux_control.read());
// TODO: set prefetch
// Configure ZYNQ-specific latency

View File

@ -44,6 +44,15 @@ pub fn dcisw(setway: u32) {
}
}
/// Data cache clean by set/way
#[inline(always)]
pub fn dccisw(setway: u32) {
unsafe {
llvm_asm!("mcr p15, 0, $0, c7, c14, 2" :: "r" (setway) :: "volatile");
}
}
/// A made-up "instruction": invalidate all of the L1 D-Cache
#[inline(always)]
pub fn dciall() {
@ -71,6 +80,33 @@ pub fn dciall() {
}
}
/// A made-up "instruction": flush and invalidate all of the L1 D-Cache
#[inline(always)]
pub fn dcciall() {
// the cache associativity could be read from a register, but will
// always be 4 in L1 data cache of a cortex a9
let ways = 4;
let bit_pos_of_way = 30; // 32 - log2(ways)
// the cache sets could be read from a register, but are always
// 256 for the cores in the zync-7000; in general, 128 or 512 are
// also possible.
let sets = 256;
let bit_pos_of_set = 5; // for a line size of 8 words = 2^5 bytes
// select L1 data cache
unsafe {
llvm_asm!("mcr p15, 2, $0, c0, c0, 0" :: "r" (0) :: "volatile");
}
// Invalidate entire D-Cache by iterating every set and every way
for set in 0..sets {
for way in 0..ways {
dccisw((set << bit_pos_of_set) | (way << bit_pos_of_way));
}
}
}
const CACHE_LINE: usize = 0x20;
const CACHE_LINE_MASK: usize = CACHE_LINE - 1;

View File

@ -1,5 +1,5 @@
use bit_field::BitField;
use super::{regs::*, asm, cache};
use super::{regs::*, asm::*, cache::*};
use libregister::RegisterW;
#[derive(Copy, Clone)]
@ -158,7 +158,7 @@ impl L1Table {
global: true,
shareable: true,
access: AccessPermissions::FullAccess,
tex: 0b101,
tex: 0b111,
domain: 0b1111,
exec: true,
cacheable: true,
@ -368,10 +368,19 @@ impl L1Table {
let result = f(&mut section);
entry.set_section(section);
asm::dmb();
cache::tlbiall();
asm::dsb();
asm::isb();
// Flush L1Dcache
dcciall();
// // TODO: L2?
// Invalidate TLB
tlbiall();
// Invalidate all branch predictors
bpiall();
// ensure completion of the BP and TLB invalidation
dsb();
// synchronize context on this processor
isb();
result
}
@ -406,9 +415,9 @@ pub fn with_mmu<F: FnMut() -> !>(l1table: &L1Table, mut f: F) -> ! {
// Synchronization barriers
// Allows MMU to start
asm::dsb();
dsb();
// Flushes pre-fetch buffer
asm::isb();
isb();
f();
}

View File

@ -25,8 +25,10 @@ impl<T> UncachedSlice<T> {
for page_start in (start..(start + size)).step_by(L1_PAGE_SIZE) {
L1Table::get()
.update(page_start as *const (), |l1_section| {
// Shareable Device
l1_section.tex = 0b000;
l1_section.cacheable = false;
l1_section.bufferable = false;
l1_section.bufferable = true;
});
}

View File

@ -1,3 +1,5 @@
use libregister::RegisterR;
use libcortex_a9::regs::DFSR;
use libboard_zynq::{println, slcr, stdio};
#[no_mangle]
@ -15,6 +17,7 @@ pub unsafe extern "C" fn DataAbort() {
stdio::drop_uart();
println!("DataAbort");
println!("DFSR: {:03X}", DFSR.read());
slcr::RegisterBlock::unlocked(|slcr| slcr.soft_reset());
loop {}