Auto merge of #164 - rust-lang-nursery:memclr, r=alexcrichton

optimize memset and memclr for ARM

This commit optimizes those routines by rewriting them in assembly and
performing the memory copying in 32-bit chunks, rather than in 8-bit chunks
as it was done before this commit. This assembly implementation is
compatible with the ARMv6 and ARMv7 architectures.

This change results in a reduction of runtime of about 40-70% in all cases
that matter (the compiler will never use these intrinsics for sizes smaller
than 4 bytes). See data below:

| Bytes | HEAD | this PR | diff       |
| ----- | ---- | ------- | ---------- |
| 0     | 6    | 14      | +133.3333% |
| 1     | 10   | 13      | +30%       |
| 2     | 14   | 13      | -7.1429%   |
| 3     | 18   | 13      | -27.77%    |
| 4     | 24   | 21      | -12.5%     |
| 16    | 70   | 36      | -48.5714%  |
| 64    | 263  | 97      | -63.1179%  |
| 256   | 1031 | 337     | -67.3133%  |
| 1024  | 4103 | 1297    | -68.389%   |

All times are in clock cycles. The measurements were done on a Cortex-M3
processor running at 8 MHz using the technique described [here].

[here]: http://blog.japaric.io/rtfm-overhead

---

For relevance all pure Rust programs for Cortex-M microcontrollers use memclr to
zero the .bss during startup so this change results in a quicker boot time.

Some questions / comments:

- ~~the original code (it had a bug) comes from this [repo] and it's licensed
  under the ICS license. I have preserved the copyright and license text in the
  source code. IANAL, is that OK?~~ no longer applies. The intrinsics are written in Rust now.

- ~~I don't know whether this ARM implementation works for ARMv4 or ARMv5.
  @FenrirWolf and @Uvekilledkenny may want to take look at it first.~~ no longer applies. The intrinsics are written in Rust now.

- ~~No idea whether this implementation works on processors that have no thumb
  instruction set. The current implementation uses 16-bit thumb instructions.~~ no longer applies. The intrinsics are written in Rust now.

- ~~The loop code can be rewritten in less instructions but using 32-bit thumb
  instructions. That 32-bit version would only work on ARMv7 though. I have yet
  to check whether that makes any difference in the runtime of the intrinsic.~~ no longer applies. The intrinsics are written in Rust now.

- ~~I'll look into memcpy4 next.~~ done

[repo]: https://github.com/bobbl/libaeabi-cortexm0
master
bors 2017-07-01 07:27:55 +00:00
commit b0300b16ed
6 changed files with 481 additions and 36 deletions

View File

@ -1,7 +1,6 @@
use core::intrinsics;
use core::{intrinsics, ptr};
#[cfg(feature = "mem")]
use mem::{memcpy, memmove, memset};
use mem;
// NOTE This function and the ones below are implemented using assembly because they using a custom
// calling convention which can't be implemented using a normal Rust function
@ -60,65 +59,110 @@ pub unsafe fn __aeabi_ldivmod() {
intrinsics::unreachable();
}
// TODO: These aeabi_* functions should be defined as aliases
#[cfg(not(feature = "mem"))]
extern "C" {
fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8;
fn memset(dest: *mut u8, c: i32, n: usize) -> *mut u8;
}
// FIXME: The `*4` and `*8` variants should be defined as aliases.
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
mem::memcpy(dest, src, n);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, mut n: usize) {
let mut dest = dest as *mut u32;
let mut src = src as *mut u32;
while n >= 4 {
ptr::write(dest, ptr::read(src));
dest = dest.offset(1);
src = src.offset(1);
n -= 4;
}
__aeabi_memcpy(dest as *mut u8, src as *const u8, n);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
memcpy(dest, src, n);
__aeabi_memcpy4(dest, src, n);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
mem::memmove(dest, src, n);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
__aeabi_memmove(dest, src, n);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) {
memmove(dest, src, n);
__aeabi_memmove(dest, src, n);
}
// Note the different argument order
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
memset(dest, c, n);
// Note the different argument order
mem::memset(dest, c, n);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, mut n: usize, c: i32) {
let mut dest = dest as *mut u32;
let byte = (c as u32) & 0xff;
let c = (byte << 24) | (byte << 16) | (byte << 8) | byte;
while n >= 4 {
ptr::write(dest, c);
dest = dest.offset(1);
n -= 4;
}
__aeabi_memset(dest as *mut u8, n, byte as i32);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) {
__aeabi_memset4(dest, n, c);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset(dest, n, 0);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset4(dest, n, 0);
}
#[cfg(not(target_os = "ios"))]
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(thumb, linkage = "weak")]
pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) {
memset(dest, 0, n);
__aeabi_memset4(dest, n, 0);
}

View File

@ -16,6 +16,7 @@
#![feature(i128_type)]
#![feature(repr_simd)]
#![feature(abi_unadjusted)]
#![feature(linkage)]
#![allow(unused_features)]
#![no_builtins]
#![unstable(feature = "compiler_builtins_lib",
@ -45,7 +46,6 @@ mod macros;
pub mod int;
pub mod float;
#[cfg(feature = "mem")]
pub mod mem;
#[cfg(target_arch = "arm")]

View File

@ -5,7 +5,7 @@ type c_int = i16;
#[cfg(not(target_pointer_width = "16"))]
type c_int = i32;
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memcpy(dest: *mut u8,
src: *const u8,
n: usize)
@ -18,7 +18,7 @@ pub unsafe extern "C" fn memcpy(dest: *mut u8,
dest
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memmove(dest: *mut u8,
src: *const u8,
n: usize)
@ -41,7 +41,7 @@ pub unsafe extern "C" fn memmove(dest: *mut u8,
dest
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
let mut i = 0;
while i < n {
@ -51,7 +51,7 @@ pub unsafe extern "C" fn memset(s: *mut u8, c: c_int, n: usize) -> *mut u8 {
s
}
#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
#[cfg_attr(all(feature = "mem", not(feature = "mangled-names")), no_mangle)]
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
let mut i = 0;
while i < n {

58
tests/aeabi_memclr.rs Normal file
View File

@ -0,0 +1,58 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
use core::mem;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memclr4(dest: *mut u8, n: usize);
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new() -> Self {
Aligned {
array: [0; 8],
_alignment: [],
}
}
}
#[test]
fn memclr4() {
let mut aligned = Aligned::new();;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
for n in 0..9 {
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, 0xff);
__aeabi_memclr4(xs.as_mut_ptr(), n);
}
assert!(xs[0..n].iter().all(|x| *x == 0));
}
}

69
tests/aeabi_memcpy.rs Normal file
View File

@ -0,0 +1,69 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new(array: [u8; 8]) -> Self {
Aligned {
array: array,
_alignment: [],
}
}
}
#[test]
fn memcpy() {
let mut dest = [0; 4];
let src = [0xde, 0xad, 0xbe, 0xef];
for n in 0..dest.len() {
dest.copy_from_slice(&[0; 4]);
unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }
assert_eq!(&dest[0..n], &src[0..n])
}
}
#[test]
fn memcpy4() {
let mut aligned = Aligned::new([0; 8]);
let dest = &mut aligned.array;
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];
for n in 0..dest.len() {
dest.copy_from_slice(&[0; 8]);
unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }
assert_eq!(&dest[0..n], &src[0..n])
}
}

274
tests/aeabi_memset.rs Normal file
View File

@ -0,0 +1,274 @@
#![cfg(all(target_arch = "arm",
not(any(target_env = "gnu", target_env = "musl")),
target_os = "linux",
feature = "mem"))]
#![feature(compiler_builtins_lib)]
#![no_std]
extern crate compiler_builtins;
// test runner
extern crate utest_cortex_m_qemu;
// overrides `panic!`
#[macro_use]
extern crate utest_macros;
use core::mem;
macro_rules! panic {
($($tt:tt)*) => {
upanic!($($tt)*);
};
}
extern "C" {
fn __aeabi_memset4(dest: *mut u8, n: usize, c: u32);
}
struct Aligned {
array: [u8; 8],
_alignment: [u32; 0],
}
impl Aligned {
fn new(array: [u8; 8]) -> Self {
Aligned {
array: array,
_alignment: [],
}
}
}
#[test]
fn zero() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), 0, c)
}
assert_eq!(*xs, [0; 8]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), 0, c)
}
assert_eq!(*xs, [1; 8]);
}
#[test]
fn one() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 1;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0, 0, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 1, 1, 1, 1, 1, 1, 1]);
}
#[test]
fn two() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 2;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 1, 1, 1, 1, 1, 1]);
}
#[test]
fn three() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 3;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 1, 1, 1, 1, 1]);
}
#[test]
fn four() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 4;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 1, 1, 1, 1]);
}
#[test]
fn five() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 5;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 1, 1, 1]);
}
#[test]
fn six() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 6;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1, 1]);
}
#[test]
fn seven() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 7;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 1]);
}
#[test]
fn eight() {
let mut aligned = Aligned::new([0u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let n = 8;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
let mut aligned = Aligned::new([1u8; 8]);;
assert_eq!(mem::align_of_val(&aligned), 4);
let xs = &mut aligned.array;
let c = 0xdeadbeef;
unsafe {
__aeabi_memset4(xs.as_mut_ptr(), n, c)
}
assert_eq!(*xs, [0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef, 0xef]);
}