diff --git a/README.md b/README.md
index 6356996..d1c57c7 100644
--- a/README.md
+++ b/README.md
@@ -170,11 +170,11 @@ features = ["c"]
 - [x] lshrdi3.c
 - [x] moddi3.c
 - [x] modsi3.c
-- [ ] muldf3.c
+- [x] muldf3.c
 - [x] muldi3.c
 - [x] mulodi4.c
 - [x] mulosi4.c
-- [ ] mulsf3.c
+- [x] mulsf3.c
 - [x] powidf2.c
 - [x] powisf2.c
 - [ ] subdf3.c
diff --git a/build.rs b/build.rs
index e94914b..be0b136 100644
--- a/build.rs
+++ b/build.rs
@@ -121,6 +121,10 @@ mod tests {
             Subdf3,
             Subsf3,
 
+            // float/mul.rs
+            Mulsf3,
+            Muldf3,
+
             // int/mul.rs
             Muldi3,
             Mulodi4,
@@ -3221,6 +3225,181 @@ fn subsf3() {
         }
     }
 
+    #[derive(Eq, Hash, PartialEq)]
+    pub struct Mulsf3 {
+        a: u32,  // f32
+        b: u32,  // f32
+        c: u32,  // f32
+    }
+
+    impl TestCase for Mulsf3 {
+        fn name() -> &'static str {
+            "mulsf3"
+        }
+
+        fn generate<R>(rng: &mut R) -> Option<Self>
+        where
+            R: Rng,
+            Self: Sized,
+        {
+            let a = gen_large_f32(rng);
+            let b = gen_large_f32(rng);
+            let c = a * b;
+            // TODO accept NaNs. We don't do that right now because we can't check
+            // for NaN-ness on the thumb targets (due to missing intrinsics)
+            if a.is_nan() || b.is_nan() || c.is_nan() {
+                return None;
+            }
+
+            Some(
+                Mulsf3 {
+                    a: to_u32(a),
+                    b: to_u32(b),
+                    c: to_u32(c),
+                },
+            )
+        }
+
+        fn to_string(&self, buffer: &mut String) {
+            writeln!(
+                buffer,
+                "(({a}, {b}), {c}),",
+                a = self.a,
+                b = self.b,
+                c = self.c
+            )
+                    .unwrap();
+        }
+
+        fn prologue() -> &'static str {
+            r#"
+#[cfg(all(target_arch = "arm",
+          not(any(target_env = "gnu", target_env = "musl")),
+          target_os = "linux",
+          test))]
+use core::mem;
+#[cfg(not(all(target_arch = "arm",
+              not(any(target_env = "gnu", target_env = "musl")),
+              target_os = "linux",
+              test)))]
+use std::mem;
+use compiler_builtins::float::mul::__mulsf3;
+
+fn mk_f32(x: u32) -> f32 {
+    unsafe { mem::transmute(x) }
+}
+
+fn to_u32(x: f32) -> u32 {
+    unsafe { mem::transmute(x) }
+}
+
+static TEST_CASES: &[((u32, u32), u32)] = &[
+"#
+        }
+
+        fn epilogue() -> &'static str {
+            "
+];
+
+#[test]
+fn mulsf3() {
+    for &((a, b), c) in TEST_CASES {
+        let c_ = __mulsf3(mk_f32(a), mk_f32(b));
+        assert_eq!(((a, b), c), ((a, b), to_u32(c_)));
+    }
+}
+"
+        }
+    }
+
+    #[derive(Eq, Hash, PartialEq)]
+    pub struct Muldf3 {
+        a: u64,  // f64
+        b: u64,  // f64
+        c: u64,  // f64
+    }
+
+    impl TestCase for Muldf3 {
+        fn name() -> &'static str {
+            "muldf3"
+        }
+
+        fn generate<R>(rng: &mut R) -> Option<Self>
+        where
+            R: Rng,
+            Self: Sized,
+        {
+            let a = gen_large_f64(rng);
+            let b = gen_large_f64(rng);
+            let c = a * b;
+            // TODO accept NaNs. We don't do that right now because we can't check
+            // for NaN-ness on the thumb targets (due to missing intrinsics)
+            if a.is_nan() || b.is_nan() || c.is_nan() {
+                return None;
+            }
+
+            Some(
+                Muldf3 {
+                    a: to_u64(a),
+                    b: to_u64(b),
+                    c: to_u64(c),
+                },
+            )
+        }
+
+        fn to_string(&self, buffer: &mut String) {
+            writeln!(
+                buffer,
+                "(({a}, {b}), {c}),",
+                a = self.a,
+                b = self.b,
+                c = self.c
+            )
+                    .unwrap();
+        }
+
+        fn prologue() -> &'static str {
+            r#"
+#[cfg(all(target_arch = "arm",
+          not(any(target_env = "gnu", target_env = "musl")),
+          target_os = "linux",
+          test))]
+use core::mem;
+#[cfg(not(all(target_arch = "arm",
+              not(any(target_env = "gnu", target_env = "musl")),
+              target_os = "linux",
+              test)))]
+use std::mem;
+use compiler_builtins::float::mul::__muldf3;
+
+fn mk_f64(x: u64) -> f64 {
+    unsafe { mem::transmute(x) }
+}
+
+fn to_u64(x: f64) -> u64 {
+    unsafe { mem::transmute(x) }
+}
+
+static TEST_CASES: &[((u64, u64), u64)] = &[
+"#
+        }
+
+        fn epilogue() -> &'static str {
+            "
+];
+
+#[test]
+fn muldf3() {
+    for &((a, b), c) in TEST_CASES {
+        let c_ = __muldf3(mk_f64(a), mk_f64(b));
+        assert_eq!(((a, b), c), ((a, b), to_u64(c_)));
+    }
+}
+"
+        }
+    }
+
+
     #[derive(Eq, Hash, PartialEq)]
     pub struct Udivdi3 {
         a: u64,
@@ -3899,6 +4078,57 @@ macro_rules! panic {
     gen_float!(gen_f32, f32, u32, 32, 23);
     gen_float!(gen_f64, f64, u64, 64, 52);
 
+    macro_rules! gen_large_float {
+        ($name:ident,
+         $fty:ident,
+         $uty:ident,
+         $bits:expr,
+         $significand_bits:expr) => {
+            pub fn $name<R>(rng: &mut R) -> $fty
+            where
+                R: Rng,
+            {
+                const BITS: u8 = $bits;
+                const SIGNIFICAND_BITS: u8 = $significand_bits;
+
+                const SIGNIFICAND_MASK: $uty = (1 << SIGNIFICAND_BITS) - 1;
+                const SIGN_MASK: $uty = (1 << (BITS - 1));
+                const EXPONENT_MASK: $uty = !(SIGN_MASK | SIGNIFICAND_MASK);
+
+                fn mk_f32(sign: bool, exponent: $uty, significand: $uty) -> $fty {
+                    unsafe {
+                        mem::transmute(((sign as $uty) << (BITS - 1)) |
+                                       ((exponent & EXPONENT_MASK) <<
+                                        SIGNIFICAND_BITS) |
+                                       (significand & SIGNIFICAND_MASK))
+                    }
+                }
+
+                if rng.gen_weighted_bool(10) {
+                    // Special values
+                    *rng.choose(&[-0.0,
+                                  0.0,
+                                  ::std::$fty::NAN,
+                                  ::std::$fty::INFINITY,
+                                  -::std::$fty::INFINITY])
+                        .unwrap()
+                } else if rng.gen_weighted_bool(10) {
+                    // NaN patterns
+                    mk_f32(rng.gen(), rng.gen(), 0)
+                } else if rng.gen() {
+                    // Denormalized
+                    mk_f32(rng.gen(), 0, rng.gen())
+                } else {
+                    // Random anything
+                    rng.gen::<$fty>()
+                }
+            }
+        }
+    }
+
+    gen_large_float!(gen_large_f32, f32, u32, 32, 23);
+    gen_large_float!(gen_large_f64, f64, u64, 64, 52);
+
     pub fn gen_u128<R>(rng: &mut R) -> u128
     where
         R: Rng,
diff --git a/src/float/mod.rs b/src/float/mod.rs
index 23aef32..55dde9a 100644
--- a/src/float/mod.rs
+++ b/src/float/mod.rs
@@ -7,6 +7,7 @@ pub mod conv;
 pub mod add;
 pub mod pow;
 pub mod sub;
+pub mod mul;
 
 /// Trait for some basic operations on floats
 pub trait Float:
diff --git a/src/float/mul.rs b/src/float/mul.rs
new file mode 100644
index 0000000..696adea
--- /dev/null
+++ b/src/float/mul.rs
@@ -0,0 +1,191 @@
+use int::{CastInto, Int, WideInt};
+use float::Float;
+
+fn mul<F: Float>(a: F, b: F) -> F
+where
+    u32: CastInto<F::Int>,
+    F::Int: CastInto<u32>,
+    i32: CastInto<F::Int>,
+    F::Int: CastInto<i32>,
+    F::Int: WideInt,
+{
+    let one = F::Int::ONE;
+    let zero = F::Int::ZERO;
+
+    let bits = F::BITS;
+    let significand_bits = F::SIGNIFICAND_BITS;
+    let max_exponent = F::EXPONENT_MAX;
+
+    let exponent_bias = F::EXPONENT_BIAS;
+
+    let implicit_bit = F::IMPLICIT_BIT;
+    let significand_mask = F::SIGNIFICAND_MASK;
+    let sign_bit = F::SIGN_MASK as F::Int;
+    let abs_mask = sign_bit - one;
+    let exponent_mask = F::EXPONENT_MASK;
+    let inf_rep = exponent_mask;
+    let quiet_bit = implicit_bit >> 1;
+    let qnan_rep = exponent_mask | quiet_bit;
+    let exponent_bits = F::EXPONENT_BITS;
+
+    let a_rep = a.repr();
+    let b_rep = b.repr();
+
+    let a_exponent = (a_rep >> significand_bits) & max_exponent.cast();
+    let b_exponent = (b_rep >> significand_bits) & max_exponent.cast();
+    let product_sign = (a_rep ^ b_rep) & sign_bit;
+
+    let mut a_significand = a_rep & significand_mask;
+    let mut b_significand = b_rep & significand_mask;
+    let mut scale = 0;
+
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
+        || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast()
+    {
+        let a_abs = a_rep & abs_mask;
+        let b_abs = b_rep & abs_mask;
+
+        // NaN + anything = qNaN
+        if a_abs > inf_rep {
+            return F::from_repr(a_rep | quiet_bit);
+        }
+        // anything + NaN = qNaN
+        if b_abs > inf_rep {
+            return F::from_repr(b_rep | quiet_bit);
+        }
+
+        if a_abs == inf_rep {
+            if b_abs != zero {
+                // infinity * non-zero = +/- infinity
+                return F::from_repr(a_abs | product_sign);
+            } else {
+                // infinity * zero = NaN
+                return F::from_repr(qnan_rep);
+            }
+        }
+
+        if b_abs == inf_rep {
+            if a_abs != zero {
+                // infinity * non-zero = +/- infinity
+                return F::from_repr(b_abs | product_sign);
+            } else {
+                // infinity * zero = NaN
+                return F::from_repr(qnan_rep);
+            }
+        }
+
+        // zero * anything = +/- zero
+        if a_abs == zero {
+            return F::from_repr(product_sign);
+        }
+
+        // anything * zero = +/- zero
+        if b_abs == zero {
+            return F::from_repr(product_sign);
+        }
+
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if a_abs < implicit_bit {
+            let (exponent, significand) = F::normalize(a_significand);
+            scale += exponent;
+            a_significand = significand;
+        }
+
+        if b_abs < implicit_bit {
+            let (exponent, significand) = F::normalize(b_significand);
+            scale += exponent;
+            b_significand = significand;
+        }
+    }
+
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    a_significand |= implicit_bit;
+    b_significand |= implicit_bit;
+
+    // Get the significand of a*b.  Before multiplying the significands, shift
+    // one of them left to left-align it in the field.  Thus, the product will
+    // have (exponentBits + 2) integral digits, all but two of which must be
+    // zero.  Normalizing this result is just a conditional left-shift by one
+    // and bumping the exponent accordingly.
+    let (mut product_high, mut product_low) =
+        <F::Int as WideInt>::wide_mul(a_significand, b_significand << exponent_bits);
+
+    let a_exponent_i32: i32 = a_exponent.cast();
+    let b_exponent_i32: i32 = b_exponent.cast();
+    let mut product_exponent: i32 = a_exponent_i32
+        .wrapping_add(b_exponent_i32)
+        .wrapping_add(scale)
+        .wrapping_sub(exponent_bias as i32);
+
+    // Normalize the significand, adjust exponent if needed.
+    if (product_high & implicit_bit) != zero {
+        product_exponent = product_exponent.wrapping_add(1);
+    } else {
+        <F::Int as WideInt>::wide_shift_left(&mut product_high, &mut product_low, 1);
+    }
+
+    // If we have overflowed the type, return +/- infinity.
+    if product_exponent >= max_exponent as i32 {
+        return F::from_repr(inf_rep | product_sign);
+    }
+
+    if product_exponent <= 0 {
+        // Result is denormal before rounding
+        //
+        // If the result is so small that it just underflows to zero, return
+        // a zero of the appropriate sign.  Mathematically there is no need to
+        // handle this case separately, but we make it a special case to
+        // simplify the shift logic.
+        let shift = one.wrapping_sub(product_exponent.cast()).cast();
+        if shift >= bits as i32 {
+            return F::from_repr(product_sign);
+        }
+
+        // Otherwise, shift the significand of the result so that the round
+        // bit is the high bit of productLo.
+        <F::Int as WideInt>::wide_shift_right_with_sticky(
+            &mut product_high,
+            &mut product_low,
+            shift,
+        )
+    } else {
+        // Result is normal before rounding; insert the exponent.
+        product_high &= significand_mask;
+        product_high |= product_exponent.cast() << significand_bits;
+    }
+
+    // Insert the sign of the result:
+    product_high |= product_sign;
+
+    // Final rounding.  The final result may overflow to infinity, or underflow
+    // to zero, but those are the correct results in those cases.  We use the
+    // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
+    if product_low > sign_bit {
+        product_high += one;
+    }
+
+    if product_low == sign_bit {
+        product_high += product_high & one;
+    }
+
+    return F::from_repr(product_high);
+}
+
+intrinsics! {
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_fmul]
+    pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 {
+        mul(a, b)
+    }
+
+    #[aapcs_on_arm]
+    #[arm_aeabi_alias = __aeabi_dmul]
+    pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 {
+        mul(a, b)
+    }
+}
diff --git a/src/int/mod.rs b/src/int/mod.rs
index 24b27b1..37dac8c 100644
--- a/src/int/mod.rs
+++ b/src/int/mod.rs
@@ -249,3 +249,48 @@ cast_into!(u64);
 cast_into!(i64);
 cast_into!(u128);
 cast_into!(i128);
+
+pub trait WideInt: Int {
+    type Output: Int;
+
+    fn wide_mul(self, other: Self) -> (Self, Self);
+    fn wide_shift_left(&mut self, low: &mut Self, count: i32);
+    fn wide_shift_right_with_sticky(&mut self, low: &mut Self, count: i32);
+}
+
+macro_rules! impl_wide_int {
+    ($ty:ty, $tywide:ty, $bits:expr) => {
+        impl WideInt for $ty {
+            type Output = $ty;
+
+            fn wide_mul(self, other: Self) -> (Self, Self) {
+                let product = (self as $tywide).wrapping_mul(other as $tywide);
+                ((product >> ($bits as $ty)) as $ty, product as $ty)
+            }
+
+            fn wide_shift_left(&mut self, low: &mut Self, count: i32) {
+                *self = (*self << count) | (*low >> ($bits - count));
+                *low = *low << count;
+            }
+
+            fn wide_shift_right_with_sticky(&mut self, low: &mut Self, count: i32) {
+                if count < $bits {
+                    let sticky = *low << ($bits - count);
+                    *low = *self << ($bits - count) | *low >> count | sticky;
+                    *self = *self >> count;
+                } else if count < 2*$bits {
+                    let sticky = *self << (2*$bits - count) | *low;
+                    *low = *self >> (count - $bits ) | sticky;
+                    *self = 0;
+                } else {
+                    let sticky = *self | *low;
+                    *self = sticky;
+                    *self = 0;
+                }
+            }
+        }
+    }
+}
+
+impl_wide_int!(u32, u64, 32);
+impl_wide_int!(u64, u128, 64);
diff --git a/tests/muldf3.rs b/tests/muldf3.rs
new file mode 100644
index 0000000..82d06d8
--- /dev/null
+++ b/tests/muldf3.rs
@@ -0,0 +1,7 @@
+#![feature(compiler_builtins_lib)]
+#![feature(i128_type)]
+#![cfg_attr(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")),
+                 target_os = "linux", test),
+           no_std)]
+
+include!(concat!(env!("OUT_DIR"), "/muldf3.rs"));
diff --git a/tests/mulsf3.rs b/tests/mulsf3.rs
new file mode 100644
index 0000000..fe278d8
--- /dev/null
+++ b/tests/mulsf3.rs
@@ -0,0 +1,7 @@
+#![feature(compiler_builtins_lib)]
+#![feature(i128_type)]
+#![cfg_attr(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")),
+                 target_os = "linux", test),
+           no_std)]
+
+include!(concat!(env!("OUT_DIR"), "/mulsf3.rs"));