diff --git a/README.md b/README.md index 6356996..d1c57c7 100644 --- a/README.md +++ b/README.md @@ -170,11 +170,11 @@ features = ["c"] - [x] lshrdi3.c - [x] moddi3.c - [x] modsi3.c -- [ ] muldf3.c +- [x] muldf3.c - [x] muldi3.c - [x] mulodi4.c - [x] mulosi4.c -- [ ] mulsf3.c +- [x] mulsf3.c - [x] powidf2.c - [x] powisf2.c - [ ] subdf3.c diff --git a/build.rs b/build.rs index e94914b..be0b136 100644 --- a/build.rs +++ b/build.rs @@ -121,6 +121,10 @@ mod tests { Subdf3, Subsf3, + // float/mul.rs + Mulsf3, + Muldf3, + // int/mul.rs Muldi3, Mulodi4, @@ -3221,6 +3225,181 @@ fn subsf3() { } } + #[derive(Eq, Hash, PartialEq)] + pub struct Mulsf3 { + a: u32, // f32 + b: u32, // f32 + c: u32, // f32 + } + + impl TestCase for Mulsf3 { + fn name() -> &'static str { + "mulsf3" + } + + fn generate(rng: &mut R) -> Option + where + R: Rng, + Self: Sized, + { + let a = gen_large_f32(rng); + let b = gen_large_f32(rng); + let c = a * b; + // TODO accept NaNs. We don't do that right now because we can't check + // for NaN-ness on the thumb targets (due to missing intrinsics) + if a.is_nan() || b.is_nan() || c.is_nan() { + return None; + } + + Some( + Mulsf3 { + a: to_u32(a), + b: to_u32(b), + c: to_u32(c), + }, + ) + } + + fn to_string(&self, buffer: &mut String) { + writeln!( + buffer, + "(({a}, {b}), {c}),", + a = self.a, + b = self.b, + c = self.c + ) + .unwrap(); + } + + fn prologue() -> &'static str { + r#" +#[cfg(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test))] +use core::mem; +#[cfg(not(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test)))] +use std::mem; +use compiler_builtins::float::mul::__mulsf3; + +fn mk_f32(x: u32) -> f32 { + unsafe { mem::transmute(x) } +} + +fn to_u32(x: f32) -> u32 { + unsafe { mem::transmute(x) } +} + +static TEST_CASES: &[((u32, u32), u32)] = &[ +"# + } + + fn epilogue() -> &'static str { + " +]; + +#[test] +fn mulsf3() { + for &((a, b), c) in TEST_CASES { + let c_ = __mulsf3(mk_f32(a), mk_f32(b)); + assert_eq!(((a, b), c), ((a, b), to_u32(c_))); + } +} +" + } + } + + #[derive(Eq, Hash, PartialEq)] + pub struct Muldf3 { + a: u64, // f64 + b: u64, // f64 + c: u64, // f64 + } + + impl TestCase for Muldf3 { + fn name() -> &'static str { + "muldf3" + } + + fn generate(rng: &mut R) -> Option + where + R: Rng, + Self: Sized, + { + let a = gen_large_f64(rng); + let b = gen_large_f64(rng); + let c = a * b; + // TODO accept NaNs. We don't do that right now because we can't check + // for NaN-ness on the thumb targets (due to missing intrinsics) + if a.is_nan() || b.is_nan() || c.is_nan() { + return None; + } + + Some( + Muldf3 { + a: to_u64(a), + b: to_u64(b), + c: to_u64(c), + }, + ) + } + + fn to_string(&self, buffer: &mut String) { + writeln!( + buffer, + "(({a}, {b}), {c}),", + a = self.a, + b = self.b, + c = self.c + ) + .unwrap(); + } + + fn prologue() -> &'static str { + r#" +#[cfg(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test))] +use core::mem; +#[cfg(not(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test)))] +use std::mem; +use compiler_builtins::float::mul::__muldf3; + +fn mk_f64(x: u64) -> f64 { + unsafe { mem::transmute(x) } +} + +fn to_u64(x: f64) -> u64 { + unsafe { mem::transmute(x) } +} + +static TEST_CASES: &[((u64, u64), u64)] = &[ +"# + } + + fn epilogue() -> &'static str { + " +]; + +#[test] +fn muldf3() { + for &((a, b), c) in TEST_CASES { + let c_ = __muldf3(mk_f64(a), mk_f64(b)); + assert_eq!(((a, b), c), ((a, b), to_u64(c_))); + } +} +" + } + } + + #[derive(Eq, Hash, PartialEq)] pub struct Udivdi3 { a: u64, @@ -3899,6 +4078,57 @@ macro_rules! panic { gen_float!(gen_f32, f32, u32, 32, 23); gen_float!(gen_f64, f64, u64, 64, 52); + macro_rules! gen_large_float { + ($name:ident, + $fty:ident, + $uty:ident, + $bits:expr, + $significand_bits:expr) => { + pub fn $name(rng: &mut R) -> $fty + where + R: Rng, + { + const BITS: u8 = $bits; + const SIGNIFICAND_BITS: u8 = $significand_bits; + + const SIGNIFICAND_MASK: $uty = (1 << SIGNIFICAND_BITS) - 1; + const SIGN_MASK: $uty = (1 << (BITS - 1)); + const EXPONENT_MASK: $uty = !(SIGN_MASK | SIGNIFICAND_MASK); + + fn mk_f32(sign: bool, exponent: $uty, significand: $uty) -> $fty { + unsafe { + mem::transmute(((sign as $uty) << (BITS - 1)) | + ((exponent & EXPONENT_MASK) << + SIGNIFICAND_BITS) | + (significand & SIGNIFICAND_MASK)) + } + } + + if rng.gen_weighted_bool(10) { + // Special values + *rng.choose(&[-0.0, + 0.0, + ::std::$fty::NAN, + ::std::$fty::INFINITY, + -::std::$fty::INFINITY]) + .unwrap() + } else if rng.gen_weighted_bool(10) { + // NaN patterns + mk_f32(rng.gen(), rng.gen(), 0) + } else if rng.gen() { + // Denormalized + mk_f32(rng.gen(), 0, rng.gen()) + } else { + // Random anything + rng.gen::<$fty>() + } + } + } + } + + gen_large_float!(gen_large_f32, f32, u32, 32, 23); + gen_large_float!(gen_large_f64, f64, u64, 64, 52); + pub fn gen_u128(rng: &mut R) -> u128 where R: Rng, diff --git a/src/float/mod.rs b/src/float/mod.rs index 23aef32..55dde9a 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -7,6 +7,7 @@ pub mod conv; pub mod add; pub mod pow; pub mod sub; +pub mod mul; /// Trait for some basic operations on floats pub trait Float: diff --git a/src/float/mul.rs b/src/float/mul.rs new file mode 100644 index 0000000..696adea --- /dev/null +++ b/src/float/mul.rs @@ -0,0 +1,191 @@ +use int::{CastInto, Int, WideInt}; +use float::Float; + +fn mul(a: F, b: F) -> F +where + u32: CastInto, + F::Int: CastInto, + i32: CastInto, + F::Int: CastInto, + F::Int: WideInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + let bits = F::BITS; + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let exponent_bias = F::EXPONENT_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + let exponent_bits = F::EXPONENT_BITS; + + let a_rep = a.repr(); + let b_rep = b.repr(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let product_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN + anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_rep | quiet_bit); + } + // anything + NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs != zero { + // infinity * non-zero = +/- infinity + return F::from_repr(a_abs | product_sign); + } else { + // infinity * zero = NaN + return F::from_repr(qnan_rep); + } + } + + if b_abs == inf_rep { + if a_abs != zero { + // infinity * non-zero = +/- infinity + return F::from_repr(b_abs | product_sign); + } else { + // infinity * zero = NaN + return F::from_repr(qnan_rep); + } + } + + // zero * anything = +/- zero + if a_abs == zero { + return F::from_repr(product_sign); + } + + // anything * zero = +/- zero + if b_abs == zero { + return F::from_repr(product_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale += exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + + // Get the significand of a*b. Before multiplying the significands, shift + // one of them left to left-align it in the field. Thus, the product will + // have (exponentBits + 2) integral digits, all but two of which must be + // zero. Normalizing this result is just a conditional left-shift by one + // and bumping the exponent accordingly. + let (mut product_high, mut product_low) = + ::wide_mul(a_significand, b_significand << exponent_bits); + + let a_exponent_i32: i32 = a_exponent.cast(); + let b_exponent_i32: i32 = b_exponent.cast(); + let mut product_exponent: i32 = a_exponent_i32 + .wrapping_add(b_exponent_i32) + .wrapping_add(scale) + .wrapping_sub(exponent_bias as i32); + + // Normalize the significand, adjust exponent if needed. + if (product_high & implicit_bit) != zero { + product_exponent = product_exponent.wrapping_add(1); + } else { + ::wide_shift_left(&mut product_high, &mut product_low, 1); + } + + // If we have overflowed the type, return +/- infinity. + if product_exponent >= max_exponent as i32 { + return F::from_repr(inf_rep | product_sign); + } + + if product_exponent <= 0 { + // Result is denormal before rounding + // + // If the result is so small that it just underflows to zero, return + // a zero of the appropriate sign. Mathematically there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + let shift = one.wrapping_sub(product_exponent.cast()).cast(); + if shift >= bits as i32 { + return F::from_repr(product_sign); + } + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of productLo. + ::wide_shift_right_with_sticky( + &mut product_high, + &mut product_low, + shift, + ) + } else { + // Result is normal before rounding; insert the exponent. + product_high &= significand_mask; + product_high |= product_exponent.cast() << significand_bits; + } + + // Insert the sign of the result: + product_high |= product_sign; + + // Final rounding. The final result may overflow to infinity, or underflow + // to zero, but those are the correct results in those cases. We use the + // default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if product_low > sign_bit { + product_high += one; + } + + if product_low == sign_bit { + product_high += product_high & one; + } + + return F::from_repr(product_high); +} + +intrinsics! { + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_fmul] + pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { + mul(a, b) + } + + #[aapcs_on_arm] + #[arm_aeabi_alias = __aeabi_dmul] + pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 { + mul(a, b) + } +} diff --git a/src/int/mod.rs b/src/int/mod.rs index 24b27b1..37dac8c 100644 --- a/src/int/mod.rs +++ b/src/int/mod.rs @@ -249,3 +249,48 @@ cast_into!(u64); cast_into!(i64); cast_into!(u128); cast_into!(i128); + +pub trait WideInt: Int { + type Output: Int; + + fn wide_mul(self, other: Self) -> (Self, Self); + fn wide_shift_left(&mut self, low: &mut Self, count: i32); + fn wide_shift_right_with_sticky(&mut self, low: &mut Self, count: i32); +} + +macro_rules! impl_wide_int { + ($ty:ty, $tywide:ty, $bits:expr) => { + impl WideInt for $ty { + type Output = $ty; + + fn wide_mul(self, other: Self) -> (Self, Self) { + let product = (self as $tywide).wrapping_mul(other as $tywide); + ((product >> ($bits as $ty)) as $ty, product as $ty) + } + + fn wide_shift_left(&mut self, low: &mut Self, count: i32) { + *self = (*self << count) | (*low >> ($bits - count)); + *low = *low << count; + } + + fn wide_shift_right_with_sticky(&mut self, low: &mut Self, count: i32) { + if count < $bits { + let sticky = *low << ($bits - count); + *low = *self << ($bits - count) | *low >> count | sticky; + *self = *self >> count; + } else if count < 2*$bits { + let sticky = *self << (2*$bits - count) | *low; + *low = *self >> (count - $bits ) | sticky; + *self = 0; + } else { + let sticky = *self | *low; + *self = sticky; + *self = 0; + } + } + } + } +} + +impl_wide_int!(u32, u64, 32); +impl_wide_int!(u64, u128, 64); diff --git a/tests/muldf3.rs b/tests/muldf3.rs new file mode 100644 index 0000000..82d06d8 --- /dev/null +++ b/tests/muldf3.rs @@ -0,0 +1,7 @@ +#![feature(compiler_builtins_lib)] +#![feature(i128_type)] +#![cfg_attr(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", test), + no_std)] + +include!(concat!(env!("OUT_DIR"), "/muldf3.rs")); diff --git a/tests/mulsf3.rs b/tests/mulsf3.rs new file mode 100644 index 0000000..fe278d8 --- /dev/null +++ b/tests/mulsf3.rs @@ -0,0 +1,7 @@ +#![feature(compiler_builtins_lib)] +#![feature(i128_type)] +#![cfg_attr(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", test), + no_std)] + +include!(concat!(env!("OUT_DIR"), "/mulsf3.rs"));