From dfa7b161aa197f99ab0f9b74fbaa008d7371e488 Mon Sep 17 00:00:00 2001 From: Jorge Aparicio Date: Tue, 7 Feb 2017 09:41:26 -0500 Subject: [PATCH] use AAPCS calling convention on all aeabi intrinsics also, on ARM, inline(always) the actual implementation of the intrinsics so we end with code like this: ``` 00000000 <__aeabi_dadd>: (implementation here) ``` instead of "trampolines" like this: ``` 00000000 <__aeabi_dadd>: (shuffle registers) (call __adddf3) 00000000 <__adddf3>: (implementation here) ``` closes #116 --- src/arm.rs | 40 ++++++++++++++++++++-------------------- src/float/add.rs | 3 ++- src/int/mul.rs | 3 ++- src/int/sdiv.rs | 3 ++- src/int/shift.rs | 4 ++++ src/int/udiv.rs | 3 ++- src/lib.rs | 4 ++++ 7 files changed, 36 insertions(+), 24 deletions(-) diff --git a/src/arm.rs b/src/arm.rs index 345ff24..67947a1 100644 --- a/src/arm.rs +++ b/src/arm.rs @@ -62,44 +62,44 @@ pub unsafe fn __aeabi_ldivmod() { // TODO: These aeabi_* functions should be defined as aliases #[cfg_attr(not(test), no_mangle)] -pub extern "C" fn __aeabi_dadd(a: f64, b: f64) -> f64 { +pub extern "aapcs" fn __aeabi_dadd(a: f64, b: f64) -> f64 { ::float::add::__adddf3(a, b) } #[cfg_attr(not(test), no_mangle)] -pub extern "C" fn __aeabi_fadd(a: f32, b: f32) -> f32 { +pub extern "aapcs" fn __aeabi_fadd(a: f32, b: f32) -> f32 { ::float::add::__addsf3(a, b) } #[cfg(not(all(feature = "c", target_arch = "arm", not(target_os = "ios"), not(thumbv6m))))] #[cfg_attr(not(test), no_mangle)] -pub extern "C" fn __aeabi_idiv(a: i32, b: i32) -> i32 { +pub extern "aapcs" fn __aeabi_idiv(a: i32, b: i32) -> i32 { ::int::sdiv::__divsi3(a, b) } #[cfg_attr(not(test), no_mangle)] -pub extern "C" fn __aeabi_lasr(a: i64, b: u32) -> i64 { +pub extern "aapcs" fn __aeabi_lasr(a: i64, b: u32) -> i64 { ::int::shift::__ashrdi3(a, b) } #[cfg_attr(not(test), no_mangle)] -pub extern "C" fn __aeabi_llsl(a: u64, b: u32) -> u64 { +pub extern "aapcs" fn __aeabi_llsl(a: u64, b: u32) -> u64 { ::int::shift::__ashldi3(a, b) } #[cfg_attr(not(test), no_mangle)] -pub extern "C" fn __aeabi_llsr(a: u64, b: u32) -> u64 { +pub extern "aapcs" fn __aeabi_llsr(a: u64, b: u32) -> u64 { ::int::shift::__lshrdi3(a, b) } #[cfg_attr(not(test), no_mangle)] -pub extern "C" fn __aeabi_lmul(a: u64, b: u64) -> u64 { +pub extern "aapcs" fn __aeabi_lmul(a: u64, b: u64) -> u64 { ::int::mul::__muldi3(a, b) } #[cfg(not(all(feature = "c", target_arch = "arm", not(target_os = "ios"), not(thumbv6m))))] #[cfg_attr(not(test), no_mangle)] -pub extern "C" fn __aeabi_uidiv(a: u32, b: u32) -> u32 { +pub extern "aapcs" fn __aeabi_uidiv(a: u32, b: u32) -> u32 { ::int::udiv::__udivsi3(a, b) } @@ -113,55 +113,55 @@ extern "C" { // FIXME: The `*4` and `*8` variants should be defined as aliases. #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { memcpy(dest, src, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { memcpy(dest, src, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { memcpy(dest, src, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { memmove(dest, src, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { memmove(dest, src, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { memmove(dest, src, n); } // Note the different argument order #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { +pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { memset(dest, c, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { +pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { memset(dest, c, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { +pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { memset(dest, c, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memclr(dest: *mut u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { memset(dest, 0, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memclr4(dest: *mut u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { memset(dest, 0, n); } #[cfg_attr(not(test), no_mangle)] -pub unsafe extern "C" fn __aeabi_memclr8(dest: *mut u8, n: usize) { +pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { memset(dest, 0, n); } diff --git a/src/float/add.rs b/src/float/add.rs index 0b2490a..8bb5b4b 100644 --- a/src/float/add.rs +++ b/src/float/add.rs @@ -7,7 +7,8 @@ macro_rules! add { ($intrinsic:ident: $ty:ty) => { /// Returns `a + b` #[allow(unused_parens)] - #[cfg_attr(not(test), no_mangle)] + #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)] + #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))] pub extern fn $intrinsic(a: $ty, b: $ty) -> $ty { let one = Wrapping(1 as <$ty as Float>::Int); let zero = Wrapping(0 as <$ty as Float>::Int); diff --git a/src/int/mul.rs b/src/int/mul.rs index 27709e5..046382a 100644 --- a/src/int/mul.rs +++ b/src/int/mul.rs @@ -4,7 +4,8 @@ use int::Int; macro_rules! mul { ($intrinsic:ident: $ty:ty) => { /// Returns `a * b` - #[cfg_attr(not(test), no_mangle)] + #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)] + #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))] pub extern "C" fn $intrinsic(a: $ty, b: $ty) -> $ty { let half_bits = <$ty>::bits() / 4; let lower_mask = !0 >> half_bits; diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs index 023fad4..2676428 100644 --- a/src/int/sdiv.rs +++ b/src/int/sdiv.rs @@ -42,7 +42,8 @@ macro_rules! mod_ { macro_rules! divmod { ($intrinsic:ident, $div:ident: $ty:ty) => { /// Returns `a / b` and sets `*rem = n % d` - #[cfg_attr(not(test), no_mangle)] + #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)] + #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))] pub extern "C" fn $intrinsic(a: $ty, b: $ty, rem: &mut $ty) -> $ty { #[cfg(all(feature = "c", any(target_arch = "x86")))] extern { diff --git a/src/int/shift.rs b/src/int/shift.rs index e5dc38f..b93b7bf 100644 --- a/src/int/shift.rs +++ b/src/int/shift.rs @@ -4,6 +4,8 @@ macro_rules! ashl { ($intrinsic:ident: $ty:ty) => { /// Returns `a << b`, requires `b < $ty::bits()` #[cfg_attr(not(test), no_mangle)] + #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)] + #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))] pub extern "C" fn $intrinsic(a: $ty, b: u32) -> $ty { let half_bits = <$ty>::bits() / 2; if b & half_bits != 0 { @@ -21,6 +23,8 @@ macro_rules! ashr { ($intrinsic:ident: $ty:ty) => { /// Returns arithmetic `a >> b`, requires `b < $ty::bits()` #[cfg_attr(not(test), no_mangle)] + #[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)] + #[cfg_attr(all(not(test), target_arch = "arm"), inline(always))] pub extern "C" fn $intrinsic(a: $ty, b: u32) -> $ty { let half_bits = <$ty>::bits() / 2; if b & half_bits != 0 { diff --git a/src/int/udiv.rs b/src/int/udiv.rs index 57d5fe5..820c4cf 100644 --- a/src/int/udiv.rs +++ b/src/int/udiv.rs @@ -3,7 +3,8 @@ use int::{Int, LargeInt}; /// Returns `n / d` #[cfg(not(all(feature = "c", target_arch = "arm", not(target_os = "ios"), not(thumbv6m))))] -#[cfg_attr(not(test), no_mangle)] +#[cfg_attr(all(not(test), not(target_arch = "arm")), no_mangle)] +#[cfg_attr(all(not(test), target_arch = "arm"), inline(always))] pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { // Special cases if d == 0 { diff --git a/src/lib.rs b/src/lib.rs index 0a80ad2..0da336e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,10 @@ // NOTE cfg(all(feature = "c", ..)) indicate that compiler-rt provides an arch optimized // implementation of that intrinsic and we'll prefer to use that +// NOTE(aapcs, aeabi, arm) ARM targets use intrinsics named __aeabi_* instead of the intrinsics +// that follow "x86 naming convention" (e.g. addsf3). Those aeabi intrinsics must adhere to the +// AAPCS calling convention (`extern "aapcs"`) because that's how LLVM will call them. + // TODO(rust-lang/rust#37029) use e.g. checked_div(_).unwrap_or_else(|| abort()) macro_rules! udiv { ($a:expr, $b:expr) => {