From 1804c4c6e87092116156eb1abb711f2d92e6aaa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Stein?= Date: Fri, 18 Oct 2019 00:11:51 +0200 Subject: [PATCH] cortex_a9: add proper L1 cache invalidation --- src/cortex_a9/cache.rs | 206 +++++++++++++++++++++++++++++++++++++++++ src/cortex_a9/mod.rs | 1 + src/cortex_a9/regs.rs | 33 ------- src/main.rs | 11 ++- 4 files changed, 217 insertions(+), 34 deletions(-) create mode 100644 src/cortex_a9/cache.rs diff --git a/src/cortex_a9/cache.rs b/src/cortex_a9/cache.rs new file mode 100644 index 0000000..eaac979 --- /dev/null +++ b/src/cortex_a9/cache.rs @@ -0,0 +1,206 @@ +/// Invalidate TLBs +#[inline(always)] +pub fn tlbiall() { + unsafe { + asm!("mcr p15, 0, $0, c8, c7, 0" :: "r" (0) :: "volatile"); + } +} + +/// Invalidate I-Cache +#[inline(always)] +pub fn iciallu() { + unsafe { + asm!("mcr p15, 0, $0, c7, c5, 0" :: "r" (0) :: "volatile"); + } +} + +/// Invalidate Branch Predictor Array +#[inline(always)] +pub fn bpiall() { + unsafe { + asm!("mcr p15, 0, $0, c7, c5, 6" :: "r" (0) :: "volatile"); + } +} + +#[inline(always)] +pub fn dcisw(setway: u32) { + unsafe { + // acc. to ARM Architecture Reference Manual, Figure B3-32; + // also see example code (for DCCISW, but DCISW will be + // analogous) "Example code for cache maintenance operations" + // on pages B2-1286 and B2-1287. + asm!("mcr p15, 0, $0, c7, c6, 2" :: "r" (setway) :: "volatile"); + } +} + +/// A made-up "instruction": invalidate all of the L1 D-Cache +#[inline(always)] +pub fn dciall() { + // the cache associativity could be read from a register, but will + // always be 4 in L1 data cache of a cortex a9 + let ways = 4; + let bit_pos_of_way = 30; // 32 - log2(ways) + + // the cache sets could be read from a register, but are always + // 256 for the cores in the zync-7000; in general, 128 or 512 are + // also possible. + let sets = 256; + let bit_pos_of_set = 5; // for a line size of 8 words = 2^5 bytes + + // select L1 data cache + unsafe { + asm!("mcr p15, 2, $0, c0, c0, 0" :: "r" (0) :: "volatile"); + } + + // Invalidate entire D-Cache by iterating every set and every way + for set in 0..sets { + for way in 0..ways { + dcisw((set << bit_pos_of_set) | (way << bit_pos_of_way)); + } + } +} + +/// Data cache clear and invalidate by memory virtual address. This +/// flushes data out to the point of coherency, and invalidates the +/// corresponding cache line (as appropriate when DMA is meant to be +/// writing into it). +#[inline(always)] +pub fn dccimva(addr: usize) { + unsafe { + asm!("mcr p15, 0, $0, c7, c14, 1" :: "r" (addr) :: "volatile"); + } +} + +/// The DCCIVMA (data cache clear and invalidate) applied to the +/// region of memory occupied by the argument. This does not modify +/// the argument, but due to the invalidate part (only ever needed if +/// external write access is to be granted, e.g. by DMA) it only makes +/// sense if the caller has exclusive access to it as otherwise other +/// accesses might just bring it back into the data cache. +pub fn dcci(object: &mut T) { + let cache_line = 0x20; + let first_addr = + (object as *mut _ as *const _ as usize) & !(cache_line - 1); + let beyond_addr = ( + (object as *mut _ as *const _ as usize) + + core::mem::size_of_val(object) + + (cache_line - 1) + ) & !(cache_line - 1); + for addr in (first_addr..beyond_addr).step_by(cache_line) { + dccimva(addr); + } +} + +pub fn dcci_slice_content(slice: &mut [T]) { + if slice.len() == 0 { + return; + } + let cache_line = 0x20; + let first_addr = + (&slice[0] as *const _ as usize) & !(cache_line - 1); + let beyond_addr = ( + (&slice[slice.len() - 1] as *const _ as usize) + + (cache_line - 1) + ) & !(cache_line - 1); + for addr in (first_addr..beyond_addr).step_by(cache_line) { + dccimva(addr); + } +} + +pub fn dcci_slice_content_unmut(slice: &[T]) { + if slice.len() == 0 { + return; + } + let cache_line = 0x20; + let first_addr = + (&slice[0] as *const _ as usize) & !(cache_line - 1); + let beyond_addr = ( + (&slice[slice.len() - 1] as *const _ as usize) + + (cache_line - 1) + ) & !(cache_line - 1); + for addr in (first_addr..beyond_addr).step_by(cache_line) { + dccimva(addr); + } +} + +/// Data cache invalidate by memory virtual address. This and +/// invalidates the cache line containing the given address. Super +/// unsafe, as this discards a write-back cache line, potentially +/// affecting more data than intended. +#[inline(always)] +pub unsafe fn dcimva(addr: usize) { + asm!("mcr p15, 0, $0, c7, c6, 1" :: "r" (addr) :: "volatile"); +} + +/// Data cache invalidate for an object. Panics if not properly +/// aligned and properly sized to be contained in an exact number of +/// cache lines. +pub fn dci(object: &mut T) { + let cache_line = 0x20; + let first_addr = object as *mut _ as *const _ as usize; + let beyond_addr = (object as *mut _ as *const _ as usize) + + core::mem::size_of_val(object); + assert_eq!((first_addr & (cache_line - 1)), 0x00); + assert_eq!((beyond_addr & (cache_line - 1)), 0x00); + for addr in (first_addr..beyond_addr).step_by(cache_line) { + unsafe { + dcimva(addr); + } + } +} + +/// Data cache invalidate for the contents of a slice. Panics if not +/// properly aligned and properly sized to be contained in an exact +/// number of cache lines. +pub fn dci_slice_content(slice: &mut [T]) { + if slice.len() == 0 { + return; + } + let cache_line = 0x20; + let first_addr = &slice[0] as *const _ as usize; + let beyond_addr = (&slice[slice.len() - 1] as *const _ as usize) + + core::mem::size_of::(); + assert_eq!((first_addr & (cache_line - 1)), 0x00); + assert_eq!((beyond_addr & (cache_line - 1)), 0x00); + for addr in (first_addr..beyond_addr).step_by(cache_line) { + unsafe { + dcimva(addr); + } + } +} + +pub unsafe fn dci_more_than_slice_content(slice: &mut [T]) { + if slice.len() == 0 { + return; + } + let cache_line = 0x20; + let first_addr = + (&slice[0] as *const _ as usize) & !(cache_line - 1); + let beyond_addr = ( + (&slice[slice.len() - 1] as *const _ as usize) + + (cache_line - 1) + ) & !(cache_line - 1); + assert_eq!((first_addr & (cache_line - 1)), 0x00); + assert_eq!((beyond_addr & (cache_line - 1)), 0x00); + for addr in (first_addr..beyond_addr).step_by(cache_line) { + dcimva(addr); + } +} + +pub unsafe fn dci_more_than_slice_content_nonmut(slice: &[T]) { + if slice.len() == 0 { + return; + } + let cache_line = 0x20; + let first_addr = + (&slice[0] as *const _ as usize) & !(cache_line - 1); + let beyond_addr = ( + (&slice[slice.len() - 1] as *const _ as usize) + + (cache_line - 1) + ) & !(cache_line - 1); + assert_eq!((first_addr & (cache_line - 1)), 0x00); + assert_eq!((beyond_addr & (cache_line - 1)), 0x00); + for addr in (first_addr..beyond_addr).step_by(cache_line) { + dcimva(addr); + } +} diff --git a/src/cortex_a9/mod.rs b/src/cortex_a9/mod.rs index e4d0b5c..8d9cdb0 100644 --- a/src/cortex_a9/mod.rs +++ b/src/cortex_a9/mod.rs @@ -1,5 +1,6 @@ pub mod asm; pub mod regs; +pub mod cache; pub mod mmu; global_asm!(include_str!("exceptions.s")); diff --git a/src/cortex_a9/regs.rs b/src/cortex_a9/regs.rs index 647db47..70bae68 100644 --- a/src/cortex_a9/regs.rs +++ b/src/cortex_a9/regs.rs @@ -136,36 +136,3 @@ register_bit!(ttbr, /// Translation table walk to shared memory? s, 1); register_bit!(ttbr, irgn1, 0); - -/// Invalidate TLBs -#[inline(always)] -pub fn tlbiall() { - unsafe { - asm!("mcr p15, 0, $0, c8, c7, 0" :: "r" (0) :: "volatile"); - } -} - -/// Invalidate I-Cache -#[inline(always)] -pub fn iciallu() { - unsafe { - asm!("mcr p15, 0, $0, c7, c5, 0" :: "r" (0) :: "volatile"); - } -} - -/// Invalidate Branch Predictor Array -#[inline(always)] -pub fn bpiall() { - unsafe { - asm!("mcr p15, 0, $0, c7, c5, 6" :: "r" (0) :: "volatile"); - } -} - -/// Invalidate D-Cache -#[inline(always)] -pub fn dccisw() { - // TODO: $0 is r11 at what value? - unsafe { - asm!("mcr p15, 0, $0, c7, c5, 6" :: "r" (0) :: "volatile"); - } -} diff --git a/src/main.rs b/src/main.rs index c25ab6a..580f180 100644 --- a/src/main.rs +++ b/src/main.rs @@ -66,6 +66,8 @@ unsafe fn boot_core0() -> ! { } fn l1_cache_init() { + use crate::cortex_a9::cache::*; + // Invalidate TLBs tlbiall(); // Invalidate I-Cache @@ -73,7 +75,14 @@ fn l1_cache_init() { // Invalidate Branch Predictor Array bpiall(); // Invalidate D-Cache - dccisw(); + // + // NOTE: It is both faster and correct to only invalidate instead + // of also flush the cache (as was done before with + // `dccisw()`) and it is correct to perform this operation + // for all of the L1 data cache rather than a (previously + // unspecified) combination of one cache set and one cache + // way. + dciall(); } const HWADDR: [u8; 6] = [0, 0x23, 0xde, 0xea, 0xbe, 0xef];