From 75c6ccca71bc94bd38ce9d4c7a8356b98e9872b9 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Thu, 29 Jun 2017 22:40:58 -0500 Subject: [PATCH] optimize 32-bit aligned mem{cpy,clr,set} intrinsics for ARM this reduces the execution time of all these routines by 40-70% --- src/arm.rs | 102 ++++++++++------ src/lib.rs | 2 +- src/mem.rs | 8 +- tests/aeabi_memclr.rs | 58 +++++++++ tests/aeabi_memcpy.rs | 69 +++++++++++ tests/aeabi_memset.rs | 274 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 473 insertions(+), 40 deletions(-) create mode 100644 tests/aeabi_memclr.rs create mode 100644 tests/aeabi_memcpy.rs create mode 100644 tests/aeabi_memset.rs diff --git a/src/arm.rs b/src/arm.rs index 389bf9a..227ab05 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -1,7 +1,6 @@ -use core::intrinsics; +use core::{intrinsics, ptr}; -#[cfg(feature = "mem")] -use mem::{memcpy, memmove, memset}; +use mem; // NOTE This function and the ones below are implemented using assembly because they using a custom // calling convention which can't be implemented using a normal Rust function @@ -60,65 +59,98 @@ pub unsafe fn __aeabi_ldivmod() { intrinsics::unreachable(); } -// TODO: These aeabi_* functions should be defined as aliases -#[cfg(not(feature = "mem"))] -extern "C" { - fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8; - fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8; - fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8; -} - // FIXME: The `*4` and `*8` variants should be defined as aliases. #[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { - memcpy(dest, src, n); -} -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { - memcpy(dest, src, n); -} -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { - memcpy(dest, src, n); + mem::memcpy(dest, src, n); } #[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] +pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) { + let mut dest = dest as *mut u32; + let mut src = src as *mut u32; + + while n >= 4 { + ptr::write(dest, ptr::read(src)); + dest = dest.offset(1); + src = src.offset(1); + n -= 4; + } + + __aeabi_memcpy(dest as *mut u8, src as *const u8, n); +} + +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] +pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { + __aeabi_memcpy4(dest, src, n); +} + +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { - memmove(dest, src, n); + mem::memmove(dest, src, n); } + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { - memmove(dest, src, n); + __aeabi_memmove(dest, src, n); } + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { - memmove(dest, src, n); + __aeabi_memmove(dest, src, n); } // Note the different argument order #[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { - memset(dest, c, n); -} -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { - memset(dest, c, n); -} -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] -pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { - memset(dest, c, n); + mem::memset(dest, c, n); } #[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] +pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) { + let mut dest = dest as *mut u32; + + let byte = (c as u32) & 0xff; + let c = (byte << 24) | (byte << 16) | (byte << 8) | byte; + + while n >= 4 { + ptr::write(dest, c); + dest = dest.offset(1); + n -= 4; + } + + __aeabi_memset(dest as *mut u8, n, byte as i32); +} + +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] +pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { + __aeabi_memset4(dest, n, c); +} + +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { - memset(dest, 0, n); + __aeabi_memset(dest, n, 0); } + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { - memset(dest, 0, n); + __aeabi_memset4(dest, n, 0); } + #[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[linkage = "weak"] pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { - memset(dest, 0, n); + __aeabi_memset4(dest, n, 0); } diff --git a/src/lib.rs b/src/lib.rs index 3fa7923..39bf854 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,6 +16,7 @@ #![feature(i128_type)] #![feature(repr_simd)] #![feature(abi_unadjusted)] +#![feature(linkage)] #![allow(unused_features)] #![no_builtins] #![unstable(feature = "compiler_builtins_lib", @@ -45,7 +46,6 @@ mod macros; pub mod int; pub mod float; -#[cfg(feature = "mem")] pub mod mem; #[cfg(target_arch = "arm")] diff --git a/src/mem.rs b/src/mem.rs index cb8baec..c56391c 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -5,7 +5,7 @@ type c_int = i16; #[cfg(not(target_pointer_width = "16"))] type c_int = i32; -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) @@ -18,7 +18,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8, dest } -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) @@ -41,7 +41,7 @@ pub unsafe extern "C" fn memmove(dest: *mut u8, dest } -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { let mut i = 0; while i < n { @@ -51,7 +51,7 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { s } -#[cfg_attr(not(feature = "mangled-names"), no_mangle)] +#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { let mut i = 0; while i < n { diff --git a/tests/aeabi_memclr.rs b/tests/aeabi_memclr.rs new file mode 100644 index 0000000..72e944e --- /dev/null +++ b/tests/aeabi_memclr.rs @@ -0,0 +1,58 @@ +#![cfg(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem"))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +use core::mem; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memclr4(dest: *mut u8, n: usize); + fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new() -> Self { + Aligned { + array: [0; 8], + _alignment: [], + } + } +} + +#[test] +fn memclr4() { + let mut aligned = Aligned::new();; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + + for n in 0..9 { + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, 0xff); + __aeabi_memclr4(xs.as_mut_ptr(), n); + } + + assert!(xs[0..n].iter().all(|x| *x == 0)); + } +} diff --git a/tests/aeabi_memcpy.rs b/tests/aeabi_memcpy.rs new file mode 100644 index 0000000..2ba942c --- /dev/null +++ b/tests/aeabi_memcpy.rs @@ -0,0 +1,69 @@ +#![cfg(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem"))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize); + fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new(array: [u8; 8]) -> Self { + Aligned { + array: array, + _alignment: [], + } + } +} + +#[test] +fn memcpy() { + let mut dest = [0; 4]; + let src = [0xde, 0xad, 0xbe, 0xef]; + + for n in 0..dest.len() { + dest.copy_from_slice(&[0; 4]); + + unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) } + + assert_eq!(&dest[0..n], &src[0..n]) + } +} + +#[test] +fn memcpy4() { + let mut aligned = Aligned::new([0; 8]); + let dest = &mut aligned.array; + let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d]; + + for n in 0..dest.len() { + dest.copy_from_slice(&[0; 8]); + + unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) } + + assert_eq!(&dest[0..n], &src[0..n]) + } +} diff --git a/tests/aeabi_memset.rs b/tests/aeabi_memset.rs new file mode 100644 index 0000000..0919f14 --- /dev/null +++ b/tests/aeabi_memset.rs @@ -0,0 +1,274 @@ +#![cfg(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + feature = "mem"))] +#![feature(compiler_builtins_lib)] +#![no_std] + +extern crate compiler_builtins; + +// test runner +extern crate utest_cortex_m_qemu; + +// overrides `panic!` +#[macro_use] +extern crate utest_macros; + +use core::mem; + +macro_rules! panic { + ($($tt:tt)*) => { + upanic!($($tt)*); + }; +} + +extern "C" { + fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32); +} + +struct Aligned { + array: [u8; 8], + _alignment: [u32; 0], +} + +impl Aligned { + fn new(array: [u8; 8]) -> Self { + Aligned { + array: array, + _alignment: [], + } + } +} + +#[test] +fn zero() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), 0, c) + } + + assert_eq!(*xs, [0; 8]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), 0, c) + } + + assert_eq!(*xs, [1; 8]); +} + +#[test] +fn one() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 1; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]); +} + +#[test] +fn two() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 2; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]); +} + +#[test] +fn three() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 3; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]); +} + +#[test] +fn four() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 4; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]); +} + +#[test] +fn five() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 5; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]); +} + +#[test] +fn six() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 6; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]); +} + +#[test] +fn seven() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 7; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]); +} + +#[test] +fn eight() { + let mut aligned = Aligned::new([0u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let n = 8; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); + + let mut aligned = Aligned::new([1u8; 8]);; + assert_eq!(mem::align_of_val(&aligned), 4); + let xs = &mut aligned.array; + let c = 0xdeadbeef; + + unsafe { + __aeabi_memset4(xs.as_mut_ptr(), n, c) + } + + assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]); +}