optimize 32-bit aligned mem{cpy,clr,set} intrinsics for ARM

this reduces the execution time of all these routines by 40-70%
This commit is contained in:
Jorge Aparicio 2017-06-29 22:40:58 -05:00
parent c311deeb76
commit 75c6ccca71
6 changed files with 473 additions and 40 deletions

View File

@ -1,7 +1,6 @@
use core::intrinsics; use core::{intrinsics, ptr};
#[cfg(feature = "mem")] use mem;
use mem::{memcpy, memmove, memset};
// NOTE This function and the ones below are implemented using assembly because they using a custom // NOTE This function and the ones below are implemented using assembly because they using a custom
// calling convention which can't be implemented using a normal Rust function // calling convention which can't be implemented using a normal Rust function
@ -60,65 +59,98 @@ pub unsafe fn __aeabi_ldivmod() {
intrinsics::unreachable(); intrinsics::unreachable();
} }
// TODO: These aeabi_* functions should be defined as aliases
#[cfg(not(feature = "mem"))]
extern "C" {
fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8;
}
// FIXME: The `*4` and `*8` variants should be defined as aliases. // FIXME: The `*4` and `*8` variants should be defined as aliases.
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n); mem::memcpy(dest, src, n);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) {
let mut dest = dest as *mut u32;
let mut src = src as *mut u32;
while n >= 4 {
ptr::write(dest, ptr::read(src));
dest = dest.offset(1);
src = src.offset(1);
n -= 4;
}
__aeabi_memcpy(dest as *mut u8, src as *const u8, n);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
__aeabi_memcpy4(dest, src, n);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n); mem::memmove(dest, src, n);
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n); __aeabi_memmove(dest, src, n);
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n); __aeabi_memmove(dest, src, n);
} }
// Note the different argument order // Note the different argument order
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n); mem::memset(dest, c, n);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) {
let mut dest = dest as *mut u32;
let byte = (c as u32) & 0xff;
let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
while n >= 4 {
ptr::write(dest, c);
dest = dest.offset(1);
n -= 4;
}
__aeabi_memset(dest as *mut u8, n, byte as i32);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
__aeabi_memset4(dest, n, c);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
memset(dest, 0, n); __aeabi_memset(dest, n, 0);
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
memset(dest, 0, n); __aeabi_memset4(dest, n, 0);
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[linkage = "weak"]
pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
memset(dest, 0, n); __aeabi_memset4(dest, n, 0);
} }

View File

@ -16,6 +16,7 @@
#![feature(i128_type)] #![feature(i128_type)]
#![feature(repr_simd)] #![feature(repr_simd)]
#![feature(abi_unadjusted)] #![feature(abi_unadjusted)]
#![feature(linkage)]
#![allow(unused_features)] #![allow(unused_features)]
#![no_builtins] #![no_builtins]
#![unstable(feature = "compiler_builtins_lib", #![unstable(feature = "compiler_builtins_lib",
@ -45,7 +46,6 @@ mod macros;
pub mod int; pub mod int;
pub mod float; pub mod float;
#[cfg(feature = "mem")]
pub mod mem; pub mod mem;
#[cfg(target_arch = "arm")] #[cfg(target_arch = "arm")]

View File

@ -5,7 +5,7 @@ type c_int = i16;
#[cfg(not(target_pointer_width = "16"))] #[cfg(not(target_pointer_width = "16"))]
type c_int = i32; type c_int = i32;
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memcpy(dest: *mut u8, pub unsafe extern "C" fn memcpy(dest: *mut u8,
src: *const u8, src: *const u8,
n: usize) n: usize)
@ -18,7 +18,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8,
dest dest
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memmove(dest: *mut u8, pub unsafe extern "C" fn memmove(dest: *mut u8,
src: *const u8, src: *const u8,
n: usize) n: usize)
@ -41,7 +41,7 @@ pub unsafe extern "C" fn memmove(dest: *mut u8,
dest dest
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 { pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
let mut i = 0; let mut i = 0;
while i < n { while i < n {
@ -51,7 +51,7 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
s s
} }
#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
let mut i = 0; let mut i = 0;
while i < n { while i < n {

58
tests/aeabi_memclr.rs Normal file
View File

@ -0,0 +1,58 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
use core::mem;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memclr4(dest: *mut u8, n: usize);
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new() -> Self {
Aligned {
array: [0; 8],
_alignment: [],
}
}
}
#[test]
fn memclr4() {
let mut aligned = Aligned::new();;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
for n in 0..9 {
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
__aeabi_memclr4(xs.as_mut_ptr(), n);
}
assert!(xs[0..n].iter().all(|x| *x == 0));
}
}

69
tests/aeabi_memcpy.rs Normal file
View File

@ -0,0 +1,69 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new(array: [u8; 8]) -> Self {
Aligned {
array: array,
_alignment: [],
}
}
}
#[test]
fn memcpy() {
let mut dest = [0; 4];
let src = [0xde, 0xad, 0xbe, 0xef];
for n in 0..dest.len() {
dest.copy_from_slice(&[0; 4]);
unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }
assert_eq!(&dest[0..n], &src[0..n])
}
}
#[test]
fn memcpy4() {
let mut aligned = Aligned::new([0; 8]);
let dest = &mut aligned.array;
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];
for n in 0..dest.len() {
dest.copy_from_slice(&[0; 8]);
unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }
assert_eq!(&dest[0..n], &src[0..n])
}
}

274
tests/aeabi_memset.rs Normal file
View File

@ -0,0 +1,274 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
use core::mem;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new(array: [u8; 8]) -> Self {
Aligned {
array: array,
_alignment: [],
}
}
}
#[test]
fn zero() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), 0, c)
}
assert_eq!(*xs, [0; 8]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), 0, c)
}
assert_eq!(*xs, [1; 8]);
}
#[test]
fn one() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 1;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]);
}
#[test]
fn two() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 2;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]);
}
#[test]
fn three() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 3;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]);
}
#[test]
fn four() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 4;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]);
}
#[test]
fn five() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 5;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]);
}
#[test]
fn six() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 6;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]);
}
#[test]
fn seven() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 7;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]);
}
#[test]
fn eight() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 8;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
}