From baab4fd89cdd945e46fed31166e5dcad7224ed87 Mon Sep 17 00:00:00 2001 From: Paolo Teti Date: Mon, 17 Sep 2018 19:37:18 +0200 Subject: [PATCH] Conversion from a wider to a narrower IEEE-754 floating-point type Adds generic conversion from a wider to a narrower IEEE-754 floating-point type. Implement `__truncdfsf2` and `__truncdfsf2vfp` and associated test-cases. --- README.md | 4 +- build.rs | 1 - src/float/mod.rs | 1 + src/float/truncate.rs | 116 ++++++++++++++++++++++++++++++++++++++++++ testcrate/build.rs | 18 +++++++ 5 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 src/float/truncate.rs diff --git a/README.md b/README.md index cae8852..fceaa63 100644 --- a/README.md +++ b/README.md @@ -129,7 +129,7 @@ features = ["c"] - [x] arm/softfloat-alias.list - [x] arm/subdf3vfp.S - [x] arm/subsf3vfp.S -- [ ] arm/truncdfsf2vfp.S +- [x] arm/truncdfsf2vfp.S - [ ] arm/udivmodsi4.S (generic version is done) - [ ] arm/udivsi3.S (generic version is done) - [ ] arm/umodsi3.S (generic version is done) @@ -186,7 +186,7 @@ features = ["c"] - [x] subdf3.c - [x] subsf3.c - [ ] truncdfhf2.c -- [ ] truncdfsf2.c +- [x] truncdfsf2.c - [ ] truncsfhf2.c - [x] udivdi3.c - [x] udivmoddi4.c diff --git a/build.rs b/build.rs index 6f2cc76..917dd96 100644 --- a/build.rs +++ b/build.rs @@ -174,7 +174,6 @@ mod c { "subvdi3.c", "subvsi3.c", "truncdfhf2.c", - "truncdfsf2.c", "truncsfhf2.c", "ucmpdi2.c", ], diff --git a/src/float/mod.rs b/src/float/mod.rs index 3bb13ab..2b8ddb9 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -11,6 +11,7 @@ pub mod sub; pub mod mul; pub mod div; pub mod extend; +pub mod truncate; /// Trait for some basic operations on floats pub trait Float: diff --git a/src/float/truncate.rs b/src/float/truncate.rs new file mode 100644 index 0000000..99d4807 --- /dev/null +++ b/src/float/truncate.rs @@ -0,0 +1,116 @@ +use float::Float; +use int::{CastInto, Int}; + +/// Generic conversion from a wider to a narrower IEEE-754 floating-point type +fn truncate(a: F) -> R +where + F::Int: CastInto, + u64: CastInto, + F::Int: CastInto, + u32: CastInto, + u32: CastInto, + R::Int: CastInto, + F::Int: CastInto, +{ + let src_one = F::Int::ONE; + let src_bits = F::BITS; + let src_sign_bits = F::SIGNIFICAND_BITS; + let src_exp_bias = F::EXPONENT_BIAS; + let src_min_normal = F::IMPLICIT_BIT; + let src_infinity = F::EXPONENT_MASK; + let src_sign_mask = F::SIGN_MASK as F::Int; + let src_abs_mask = src_sign_mask - src_one; + let src_qnan = F::SIGNIFICAND_MASK; + let src_nan_code = src_qnan - src_one; + + let dst_bits = R::BITS; + let dst_sign_bits = R::SIGNIFICAND_BITS; + let dst_inf_exp = R::EXPONENT_MAX; + let dst_exp_bias = R::EXPONENT_BIAS; + + let dst_zero = R::Int::ZERO; + let dst_one = R::Int::ONE; + let dst_qnan = R::SIGNIFICAND_MASK; + let dst_nan_code = dst_qnan - dst_one; + + let round_mask = (src_one << src_sign_bits - dst_sign_bits) - src_one; + let half = src_one << src_sign_bits - dst_sign_bits - 1; + let underflow_exp = src_exp_bias + 1 - dst_exp_bias; + let overflow_exp = src_exp_bias + dst_inf_exp - dst_exp_bias; + let underflow: F::Int = underflow_exp.cast(); // << src_sign_bits; + let overflow: F::Int = overflow_exp.cast(); //<< src_sign_bits; + + let a_abs = a.repr() & src_abs_mask; + let sign = a.repr() & src_sign_mask; + let mut abs_result: R::Int; + + let src_underflow = underflow << src_sign_bits; + let src_overflow = overflow << src_sign_bits; + + if a_abs.wrapping_sub(src_underflow) < a_abs.wrapping_sub(src_overflow) { + // The exponent of a is within the range of normal numbers + let bias_delta: R::Int = (src_exp_bias - dst_exp_bias).cast(); + abs_result = a_abs.cast(); + abs_result = abs_result >> src_sign_bits - dst_sign_bits; + abs_result = abs_result - bias_delta.wrapping_shl(dst_sign_bits); + let round_bits: F::Int = a_abs & round_mask; + abs_result += if round_bits > half { + dst_one + } else { + abs_result & dst_one + }; + } else if a_abs > src_infinity { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field + let nan_result: R::Int = (a_abs & src_nan_code).cast(); + abs_result = dst_inf_exp.cast(); + abs_result = abs_result.wrapping_shl(dst_sign_bits); + abs_result |= dst_qnan; + abs_result |= (nan_result >> (src_sign_bits - dst_sign_bits)) & dst_nan_code; + } else if a_abs >= src_overflow { + // a overflows to infinity. + abs_result = dst_inf_exp.cast(); + abs_result = abs_result.wrapping_shl(dst_sign_bits); + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + let a_exp = a_abs >> src_sign_bits; + let mut shift: u32 = a_exp.cast(); + shift = src_exp_bias - dst_exp_bias - shift + 1; + + let significand = (a.repr() & src_sign_mask) | src_min_normal; + if shift > src_sign_bits { + abs_result = dst_zero; + } else { + let sticky = significand << src_bits - shift; + let mut denormalized_significand: R::Int = significand.cast(); + let sticky_shift: u32 = sticky.cast(); + denormalized_significand = denormalized_significand >> (shift | sticky_shift); + abs_result = denormalized_significand >> src_sign_bits - dst_sign_bits; + let round_bits = denormalized_significand & round_mask.cast(); + if round_bits > half.cast() { + abs_result += dst_one; // Round to nearest + } else if round_bits == half.cast() { + abs_result += abs_result & dst_one; // Ties to even + } + } + } + // Finally apply the sign bit + let s = sign >> src_bits - dst_bits; + R::from_repr(abs_result | s.cast()) +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_d2f] + pub extern "C" fn __truncdfsf2(a: f64) -> f32 { + truncate(a) + } + + #[cfg(target_arch = "arm")] + pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 { + a as f32 + } +} diff --git a/testcrate/build.rs b/testcrate/build.rs index d862e0d..f02a67c 100644 --- a/testcrate/build.rs +++ b/testcrate/build.rs @@ -348,6 +348,24 @@ fn main() { "builtins::float::extend::__extendsfdf2vfp(a)"); } + // float/truncate.rs + gen(|a: MyF64| { + if a.0.is_nan() { + return None; + } + Some(a.0 as f32) + }, + "builtins::float::truncate::__truncdfsf2(a)"); + if target_arch_arm { + gen(|a: LargeF64| { + if a.0.is_nan() { + return None; + } + Some(a.0 as f32) + }, + "builtins::float::truncate::__truncdfsf2vfp(a)"); + } + // float/conv.rs gen(|a: MyF64| i64(a.0).ok(), "builtins::float::conv::__fixdfdi(a)");