diff --git a/README.md b/README.md index d1c57c7..5606ef4 100644 --- a/README.md +++ b/README.md @@ -133,11 +133,11 @@ features = ["c"] - [ ] arm/unordsf2vfp.S - [x] ashldi3.c - [x] ashrdi3.c -- [ ] divdf3.c +- [x] divdf3.c - [x] divdi3.c - [x] divmoddi4.c - [x] divmodsi4.c -- [ ] divsf3.c +- [x] divsf3.c - [x] divsi3.c - [ ] extendhfsf2.c - [ ] extendsfdf2.c diff --git a/build.rs b/build.rs index be0b136..2885d9d 100644 --- a/build.rs +++ b/build.rs @@ -125,6 +125,10 @@ mod tests { Mulsf3, Muldf3, + // float/div.rs + Divsf3, + Divdf3, + // int/mul.rs Muldi3, Mulodi4, @@ -3399,6 +3403,187 @@ fn muldf3() { } } + #[derive(Eq, Hash, PartialEq)] + pub struct Divsf3 { + a: u32, // f32 + b: u32, // f32 + c: u32, // f32 + } + + impl TestCase for Divsf3 { + fn name() -> &'static str { + "divsf3" + } + + fn generate(rng: &mut R) -> Option + where + R: Rng, + Self: Sized, + { + let a = gen_large_f32(rng); + let b = gen_large_f32(rng); + if b == 0.0 { + return None; + } + let c = a / b; + // TODO accept NaNs. We don't do that right now because we can't check + // for NaN-ness on the thumb targets (due to missing intrinsics) + if a.is_nan() || b.is_nan() || c.is_nan()|| c.abs() <= unsafe { mem::transmute(16777215u32) } { + return None; + } + + Some( + Divsf3 { + a: to_u32(a), + b: to_u32(b), + c: to_u32(c), + }, + ) + } + + fn to_string(&self, buffer: &mut String) { + writeln!( + buffer, + "(({a}, {b}), {c}),", + a = self.a, + b = self.b, + c = self.c + ) + .unwrap(); + } + + fn prologue() -> &'static str { + r#" +#[cfg(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test))] +use core::mem; +#[cfg(not(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test)))] +use std::mem; +use compiler_builtins::float::div::__divsf3; + +fn mk_f32(x: u32) -> f32 { + unsafe { mem::transmute(x) } +} + +fn to_u32(x: f32) -> u32 { + unsafe { mem::transmute(x) } +} + +static TEST_CASES: &[((u32, u32), u32)] = &[ +"# + } + + fn epilogue() -> &'static str { + " +]; + +#[test] +fn divsf3() { + for &((a, b), c) in TEST_CASES { + let c_ = __divsf3(mk_f32(a), mk_f32(b)); + assert_eq!(((a, b), c), ((a, b), to_u32(c_))); + } +} +" + } + } + + #[derive(Eq, Hash, PartialEq)] + pub struct Divdf3 { + a: u64, // f64 + b: u64, // f64 + c: u64, // f64 + } + + impl TestCase for Divdf3 { + fn name() -> &'static str { + "divdf3" + } + + fn generate(rng: &mut R) -> Option + where + R: Rng, + Self: Sized, + { + let a = gen_large_f64(rng); + let b = gen_large_f64(rng); + if b == 0.0 { + return None; + } + let c = a / b; + // TODO accept NaNs. We don't do that right now because we can't check + // for NaN-ness on the thumb targets (due to missing intrinsics) + if a.is_nan() || b.is_nan() || c.is_nan() + || c.abs() <= unsafe { mem::transmute(4503599627370495u64) } { + return None; + } + + Some( + Divdf3 { + a: to_u64(a), + b: to_u64(b), + c: to_u64(c), + }, + ) + } + + fn to_string(&self, buffer: &mut String) { + writeln!( + buffer, + "(({a}, {b}), {c}),", + a = self.a, + b = self.b, + c = self.c + ) + .unwrap(); + } + + fn prologue() -> &'static str { + r#" +#[cfg(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test))] +use core::mem; +#[cfg(not(all(target_arch = "arm", + not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", + test)))] +use std::mem; +use compiler_builtins::float::div::__divdf3; + +fn mk_f64(x: u64) -> f64 { + unsafe { mem::transmute(x) } +} + +fn to_u64(x: f64) -> u64 { + unsafe { mem::transmute(x) } +} + +static TEST_CASES: &[((u64, u64), u64)] = &[ +"# + } + + fn epilogue() -> &'static str { + " +]; + +#[test] +fn divdf3() { + for &((a, b), c) in TEST_CASES { + let c_ = __divdf3(mk_f64(a), mk_f64(b)); + assert_eq!(((a, b), c), ((a, b), to_u64(c_))); + } +} +" + } + } + #[derive(Eq, Hash, PartialEq)] pub struct Udivdi3 { diff --git a/src/float/div.rs b/src/float/div.rs new file mode 100644 index 0000000..e0e287a --- /dev/null +++ b/src/float/div.rs @@ -0,0 +1,457 @@ +use int::{CastInto, Int, WideInt}; +use float::Float; + + + +fn div32(a: F, b: F) -> F +where + u32: CastInto, + F::Int: CastInto, + i32: CastInto, + F::Int: CastInto, + F::Int: WideInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + // let bits = F::BITS; + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let exponent_bias = F::EXPONENT_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + + #[inline(always)] + fn negate_u32(a: u32) -> u32 { + (::wrapping_neg(a as i32)) as u32 + } + + let a_rep = a.repr(); + let b_rep = b.repr(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let quotient_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN / anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_rep | quiet_bit); + } + // anything / NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs == inf_rep { + // infinity / infinity = NaN + return F::from_repr(qnan_rep); + } else { + // infinity / anything else = +/- infinity + return F::from_repr(a_abs | quotient_sign); + } + } + + // anything else / infinity = +/- 0 + if b_abs == inf_rep { + return F::from_repr(quotient_sign); + } + + if a_abs == zero { + if b_abs == zero { + // zero / zero = NaN + return F::from_repr(qnan_rep); + } else { + // zero / anything else = +/- zero + return F::from_repr(quotient_sign); + } + } + + // anything else / zero = +/- infinity + if b_abs == zero { + return F::from_repr(inf_rep | quotient_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale -= exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + let mut quotient_exponent: i32 = CastInto::::cast(a_exponent) + .wrapping_sub(CastInto::::cast(b_exponent)) + .wrapping_add(scale); + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + let q31b = CastInto::::cast(b_significand << 8.cast()); + let mut reciprocal = (0x7504f333u32).wrapping_sub(q31b); + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + let mut correction: u32; + correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + correction = negate_u32(((reciprocal as u64).wrapping_mul(q31b as u64) >> 32) as u32); + reciprocal = ((reciprocal as u64).wrapping_mul(correction as u64) as u64 >> 31) as u32; + + // Exhaustive testing shows that the error in reciprocal after three steps + // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our + // expectations. We bump the reciprocal by a tiny value to force the error + // to be strictly positive (in the range [0x1.4fdfp-37,0x1.287246p-29], to + // be specific). This also causes 1/1 to give a sensible approximation + // instead of zero (due to overflow). + reciprocal = reciprocal.wrapping_sub(2); + + // The numerical reciprocal is accurate to within 2^-28, lies in the + // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller + // than the true reciprocal of b. Multiplying a by this reciprocal thus + // gives a numerical q = a/b in Q24 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0x1.000000eep-1, 0x1.fffffffcp0) + // 3. the error in q is at most 2^-24 + 2^-27 -- the 2^24 term comes + // from the fact that we truncate the product, and the 2^27 term + // is the error in the reciprocal of b scaled by the maximum + // possible value of a. As a consequence of this error bound, + // either q or nextafter(q) is the correctly rounded + let (mut quotient, _) = ::wide_mul(a_significand << 1, reciprocal.cast()); + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + let residual = if quotient < (implicit_bit << 1) { + quotient_exponent = quotient_exponent.wrapping_sub(1); + (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand)) + } else { + quotient >>= 1; + (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand)) + }; + + let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32); + + if written_exponent >= max_exponent as i32 { + // If we have overflowed the exponent, return infinity. + return F::from_repr(inf_rep | quotient_sign); + } else if written_exponent < 1 { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return F::from_repr(quotient_sign); + } else { + let round = ((residual << 1) > b_significand) as u32; + // Clear the implicit bits + let mut abs_result = quotient & significand_mask; + // Insert the exponent + abs_result |= written_exponent.cast() << significand_bits; + // Round + abs_result = abs_result.wrapping_add(round.cast()); + // Insert the sign and return + return F::from_repr(abs_result | quotient_sign); + } +} + +fn div64(a: F, b: F) -> F +where + u32: CastInto, + F::Int: CastInto, + i32: CastInto, + F::Int: CastInto, + u64: CastInto, + F::Int: CastInto, + i64: CastInto, + F::Int: CastInto, + F::Int: WideInt, +{ + let one = F::Int::ONE; + let zero = F::Int::ZERO; + + // let bits = F::BITS; + let significand_bits = F::SIGNIFICAND_BITS; + let max_exponent = F::EXPONENT_MAX; + + let exponent_bias = F::EXPONENT_BIAS; + + let implicit_bit = F::IMPLICIT_BIT; + let significand_mask = F::SIGNIFICAND_MASK; + let sign_bit = F::SIGN_MASK as F::Int; + let abs_mask = sign_bit - one; + let exponent_mask = F::EXPONENT_MASK; + let inf_rep = exponent_mask; + let quiet_bit = implicit_bit >> 1; + let qnan_rep = exponent_mask | quiet_bit; + // let exponent_bits = F::EXPONENT_BITS; + + #[inline(always)] + fn negate_u32(a: u32) -> u32 { + (::wrapping_neg(a as i32)) as u32 + } + + #[inline(always)] + fn negate_u64(a: u64) -> u64 { + (::wrapping_neg(a as i64)) as u64 + } + + let a_rep = a.repr(); + let b_rep = b.repr(); + + let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); + let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); + let quotient_sign = (a_rep ^ b_rep) & sign_bit; + + let mut a_significand = a_rep & significand_mask; + let mut b_significand = b_rep & significand_mask; + let mut scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() + { + let a_abs = a_rep & abs_mask; + let b_abs = b_rep & abs_mask; + + // NaN / anything = qNaN + if a_abs > inf_rep { + return F::from_repr(a_rep | quiet_bit); + } + // anything / NaN = qNaN + if b_abs > inf_rep { + return F::from_repr(b_rep | quiet_bit); + } + + if a_abs == inf_rep { + if b_abs == inf_rep { + // infinity / infinity = NaN + return F::from_repr(qnan_rep); + } else { + // infinity / anything else = +/- infinity + return F::from_repr(a_abs | quotient_sign); + } + } + + // anything else / infinity = +/- 0 + if b_abs == inf_rep { + return F::from_repr(quotient_sign); + } + + if a_abs == zero { + if b_abs == zero { + // zero / zero = NaN + return F::from_repr(qnan_rep); + } else { + // zero / anything else = +/- zero + return F::from_repr(quotient_sign); + } + } + + // anything else / zero = +/- infinity + if b_abs == zero { + return F::from_repr(inf_rep | quotient_sign); + } + + // one or both of a or b is denormal, the other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if a_abs < implicit_bit { + let (exponent, significand) = F::normalize(a_significand); + scale += exponent; + a_significand = significand; + } + + if b_abs < implicit_bit { + let (exponent, significand) = F::normalize(b_significand); + scale -= exponent; + b_significand = significand; + } + } + + // Or in the implicit significand bit. (If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything.) + a_significand |= implicit_bit; + b_significand |= implicit_bit; + let mut quotient_exponent: i32 = CastInto::::cast(a_exponent) + .wrapping_sub(CastInto::::cast(b_exponent)) + .wrapping_add(scale); + + // Align the significand of b as a Q31 fixed-point number in the range + // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax + // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This + // is accurate to about 3.5 binary digits. + let q31b = CastInto::::cast(b_significand >> 21.cast()); + let mut recip32 = (0x7504f333u32).wrapping_sub(q31b); + + // Now refine the reciprocal estimate using a Newton-Raphson iteration: + // + // x1 = x0 * (2 - x0 * b) + // + // This doubles the number of correct binary digits in the approximation + // with each iteration, so after three iterations, we have about 28 binary + // digits of accuracy. + let mut correction32: u32; + correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + correction32 = negate_u32(((recip32 as u64).wrapping_mul(q31b as u64) >> 32) as u32); + recip32 = ((recip32 as u64).wrapping_mul(correction32 as u64) >> 31) as u32; + + // recip32 might have overflowed to exactly zero in the preceeding + // computation if the high word of b is exactly 1.0. This would sabotage + // the full-width final stage of the computation that follows, so we adjust + // recip32 downward by one bit. + recip32 = recip32.wrapping_sub(1); + + // We need to perform one more iteration to get us to 56 binary digits; + // The last iteration needs to happen with extra precision. + let q63blo = CastInto::::cast(b_significand << 11.cast()); + let correction: u64; + let mut reciprocal: u64; + correction = negate_u64( + (recip32 as u64) + .wrapping_mul(q31b as u64) + .wrapping_add((recip32 as u64).wrapping_mul(q63blo as u64) >> 32), + ); + let c_hi = (correction >> 32) as u32; + let c_lo = correction as u32; + reciprocal = (recip32 as u64) + .wrapping_mul(c_hi as u64) + .wrapping_add((recip32 as u64).wrapping_mul(c_lo as u64) >> 32); + + // We already adjusted the 32-bit estimate, now we need to adjust the final + // 64-bit reciprocal estimate downward to ensure that it is strictly smaller + // than the infinitely precise exact reciprocal. Because the computation + // of the Newton-Raphson step is truncating at every step, this adjustment + // is small; most of the work is already done. + reciprocal = reciprocal.wrapping_sub(2); + + // The numerical reciprocal is accurate to within 2^-56, lies in the + // interval [0.5, 1.0), and is strictly smaller than the true reciprocal + // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b + // in Q53 with the following properties: + // + // 1. q < a/b + // 2. q is in the interval [0.5, 2.0) + // 3. the error in q is bounded away from 2^-53 (actually, we have a + // couple of bits to spare, but this is all we need). + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + // let mut quotient: F::Int = ((((reciprocal as u64) + // .wrapping_mul(CastInto::::cast(a_significand << 1) as u64)) + // >> 32) as u32) + // .cast(); + + // We need a 64 x 64 multiply high to compute q, which isn't a basic + // operation in C, so we need to be a little bit fussy. + let (mut quotient, _) = ::wide_mul(a_significand << 2, reciprocal.cast()); + + + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). + // In either case, we are going to compute a residual of the form + // + // r = a - q*b + // + // We know from the construction of q that r satisfies: + // + // 0 <= r < ulp(q)*b + // + // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we + // already have the correct result. The exact halfway case cannot occur. + // We also take this time to right shift quotient if it falls in the [1,2) + // range and adjust the exponent accordingly. + let residual = if quotient < (implicit_bit << 1) { + quotient_exponent = quotient_exponent.wrapping_sub(1); + (a_significand << (significand_bits + 1)).wrapping_sub(quotient.wrapping_mul(b_significand)) + } else { + quotient >>= 1; + (a_significand << significand_bits).wrapping_sub(quotient.wrapping_mul(b_significand)) + }; + + let written_exponent = quotient_exponent.wrapping_add(exponent_bias as i32); + + if written_exponent >= max_exponent as i32 { + // If we have overflowed the exponent, return infinity. + return F::from_repr(inf_rep | quotient_sign); + } else if written_exponent < 1 { + // Flush denormals to zero. In the future, it would be nice to add + // code to round them correctly. + return F::from_repr(quotient_sign); + } else { + let round = ((residual << 1) > b_significand) as u32; + // Clear the implicit bits + let mut abs_result = quotient & significand_mask; + // Insert the exponent + abs_result |= written_exponent.cast() << significand_bits; + // Round + abs_result = abs_result.wrapping_add(round.cast()); + // Insert the sign and return + return F::from_repr(abs_result | quotient_sign); + } +} + + +intrinsics! { + #[arm_aeabi_alias = __aeabi_fdiv] + pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { + div32(a, b) + } + + #[arm_aeabi_alias = __aeabi_ddiv] + pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { + div64(a, b) + } + +} diff --git a/src/float/mod.rs b/src/float/mod.rs index 55dde9a..6daf7b8 100644 --- a/src/float/mod.rs +++ b/src/float/mod.rs @@ -8,6 +8,7 @@ pub mod add; pub mod pow; pub mod sub; pub mod mul; +pub mod div; /// Trait for some basic operations on floats pub trait Float: diff --git a/tests/divdf3.rs b/tests/divdf3.rs new file mode 100644 index 0000000..98d32d1 --- /dev/null +++ b/tests/divdf3.rs @@ -0,0 +1,7 @@ +#![feature(compiler_builtins_lib)] +#![feature(i128_type)] +#![cfg_attr(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", test), + no_std)] + +include!(concat!(env!("OUT_DIR"), "/divdf3.rs")); diff --git a/tests/divsf3.rs b/tests/divsf3.rs new file mode 100644 index 0000000..5cf3e86 --- /dev/null +++ b/tests/divsf3.rs @@ -0,0 +1,7 @@ +#![feature(compiler_builtins_lib)] +#![feature(i128_type)] +#![cfg_attr(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), + target_os = "linux", test), + no_std)] + +include!(concat!(env!("OUT_DIR"), "/divsf3.rs"));