diff --git a/README.md b/README.md
index d66d888..0810edf 100644
--- a/README.md
+++ b/README.md
@@ -193,9 +193,9 @@ features = ["c"]
 
 These builtins are needed to support 128-bit integers, which are in the process of being added to Rust.
 
-- [ ] ashlti3.c
-- [ ] ashrti3.c
-- [ ] divti3.c
+- [x] ashlti3.c
+- [x] ashrti3.c
+- [x] divti3.c
 - [ ] fixdfti.c
 - [ ] fixsfti.c
 - [ ] fixunsdfti.c
@@ -204,13 +204,13 @@ These builtins are needed to support 128-bit integers, which are in the process
 - [ ] floattisf.c
 - [ ] floatuntidf.c
 - [ ] floatuntisf.c
-- [ ] lshrti3.c
-- [ ] modti3.c
-- [ ] muloti4.c
-- [ ] multi3.c
-- [ ] udivmodti4.c
-- [ ] udivti3.c
-- [ ] umodti3.c
+- [x] lshrti3.c
+- [x] modti3.c
+- [x] muloti4.c
+- [x] multi3.c
+- [x] udivmodti4.c
+- [x] udivti3.c
+- [x] umodti3.c
 
 ## Unimplemented functions
 
diff --git a/build.rs b/build.rs
index e6ad4c6..ca58b67 100644
--- a/build.rs
+++ b/build.rs
@@ -150,7 +150,6 @@ fn main() {
                          "int_util.c",
                          "muldc3.c",
                          "muldf3.c",
-                         "muloti4.c",
                          "mulsc3.c",
                          "mulsf3.c",
                          "mulvdi3.c",
@@ -179,13 +178,10 @@ fn main() {
             sources.extend(&["absvti2.c",
                              "addtf3.c",
                              "addvti3.c",
-                             "ashlti3.c",
-                             "ashrti3.c",
                              "clzti2.c",
                              "cmpti2.c",
                              "ctzti2.c",
                              "divtf3.c",
-                             "divti3.c",
                              "ffsti2.c",
                              "fixdfti.c",
                              "fixsfti.c",
@@ -199,10 +195,7 @@ fn main() {
                              "floatuntidf.c",
                              "floatuntisf.c",
                              "floatuntixf.c",
-                             "lshrti3.c",
-                             "modti3.c",
                              "multf3.c",
-                             "multi3.c",
                              "mulvti3.c",
                              "negti2.c",
                              "negvti2.c",
@@ -212,10 +205,7 @@ fn main() {
                              "subtf3.c",
                              "subvti3.c",
                              "trampoline_setup.c",
-                             "ucmpti2.c",
-                             "udivmodti4.c",
-                             "udivti3.c",
-                             "umodti3.c"]);
+                             "ucmpti2.c"]);
         }
 
         if target_vendor == "apple" {
diff --git a/compiler-rt/compiler-rt-cdylib/build.rs b/compiler-rt/compiler-rt-cdylib/build.rs
index a4d53cc..269070c 100644
--- a/compiler-rt/compiler-rt-cdylib/build.rs
+++ b/compiler-rt/compiler-rt-cdylib/build.rs
@@ -60,6 +60,17 @@ fn main() {
         "addsf3.c",
         "powidf2.c",
         "powisf2.c",
+        // 128 bit integers
+        "lshrti3.c",
+        "modti3.c",
+        "muloti4.c",
+        "multi3.c",
+        "udivmodti4.c",
+        "udivti3.c",
+        "umodti3.c",
+        "ashlti3.c",
+        "ashrti3.c",
+        "divti3.c",
     ]);
 
     let builtins_dir = Path::new("compiler-rt/lib/builtins");
diff --git a/compiler-rt/compiler-rt-cdylib/src/lib.rs b/compiler-rt/compiler-rt-cdylib/src/lib.rs
index 81affa2..a724614 100644
--- a/compiler-rt/compiler-rt-cdylib/src/lib.rs
+++ b/compiler-rt/compiler-rt-cdylib/src/lib.rs
@@ -58,6 +58,36 @@ declare!(___adddf3, __adddf3);
 declare!(___powisf2, __powisf2);
 declare!(___powidf2, __powidf2);
 
+#[cfg(all(not(windows),
+          not(target_arch = "mips64"),
+          not(target_arch = "mips64el"),
+          target_pointer_width="64"))]
+pub mod int_128 {
+    extern {
+        fn __lshrti3();
+        fn __modti3();
+        fn __muloti4();
+        fn __multi3();
+        fn __udivmodti4();
+        fn __udivti3();
+        fn __umodti3();
+        fn __ashlti3();
+        fn __ashrti3();
+        fn __divti3();
+    }
+
+    declare!(___lshrti3, __lshrti3);
+    declare!(___modti3, __modti3);
+    declare!(___muloti4, __muloti4);
+    declare!(___multi3, __multi3);
+    declare!(___udivmodti4, __udivmodti4);
+    declare!(___udivti3, __udivti3);
+    declare!(___umodti3, __umodti3);
+    declare!(___ashlti3, __ashlti3);
+    declare!(___ashrti3, __ashrti3);
+    declare!(___divti3, __divti3);
+}
+
 #[lang = "eh_personality"]
 fn eh_personality() {}
 #[lang = "panic_fmt"]
diff --git a/src/bin/intrinsics.rs b/src/bin/intrinsics.rs
index e0b619f..4c5c884 100644
--- a/src/bin/intrinsics.rs
+++ b/src/bin/intrinsics.rs
@@ -12,6 +12,7 @@
 #![feature(lang_items)]
 #![feature(libc)]
 #![feature(start)]
+#![feature(i128_type)]
 #![no_std]
 
 #[cfg(not(thumb))]
@@ -300,6 +301,42 @@ mod intrinsics {
     pub fn umoddi3(a: u64, b: u64) -> u64 {
         a % b
     }
+
+    pub fn muloti4(a: u128, b: u128) -> Option<u128> {
+        a.checked_mul(b)
+    }
+
+    pub fn multi3(a: u128, b: u128) -> u128 {
+        a.wrapping_mul(b)
+    }
+
+    pub fn ashlti3(a: u128, b: usize) -> u128 {
+        a >> b
+    }
+
+    pub fn ashrti3(a: u128, b: usize) -> u128 {
+        a << b
+    }
+
+    pub fn lshrti3(a: i128, b: usize) -> i128 {
+        a >> b
+    }
+
+    pub fn udivti3(a: u128, b: u128) -> u128 {
+        a / b
+    }
+
+    pub fn umodti3(a: u128, b: u128) -> u128 {
+        a % b
+    }
+
+    pub fn divti3(a: i128, b: i128) -> i128 {
+        a / b
+    }
+
+    pub fn modti3(a: i128, b: i128) -> i128 {
+        a % b
+    }
 }
 
 #[cfg(feature = "c")]
@@ -356,6 +393,15 @@ fn run() {
     bb(powidf2(bb(2.), bb(3)));
     bb(powisf2(bb(2.), bb(3)));
     bb(umoddi3(bb(2), bb(3)));
+    bb(muloti4(bb(2), bb(2)));
+    bb(multi3(bb(2), bb(2)));
+    bb(ashlti3(bb(2), bb(2)));
+    bb(ashrti3(bb(2), bb(2)));
+    bb(lshrti3(bb(2), bb(2)));
+    bb(udivti3(bb(2), bb(2)));
+    bb(umodti3(bb(2), bb(2)));
+    bb(divti3(bb(2), bb(2)));
+    bb(modti3(bb(2), bb(2)));
 }
 
 #[cfg(all(feature = "c", not(thumb)))]
diff --git a/src/int/mod.rs b/src/int/mod.rs
index 37e0537..647410c 100644
--- a/src/int/mod.rs
+++ b/src/int/mod.rs
@@ -1,3 +1,14 @@
+macro_rules! hty {
+    ($ty:ty) => {
+        <$ty as LargeInt>::HighHalf
+    }
+}
+
+macro_rules! os_ty {
+    ($ty:ty) => {
+        <$ty as Int>::OtherSign
+    }
+}
 
 pub mod mul;
 pub mod sdiv;
@@ -6,32 +17,33 @@ pub mod udiv;
 
 /// Trait for some basic operations on integers
 pub trait Int {
+    /// Type with the same width but other signedness
+    type OtherSign;
     /// Returns the bitwidth of the int type
     fn bits() -> u32;
 }
 
-// TODO: Once i128/u128 support lands, we'll want to add impls for those as well
-impl Int for u32 {
-    fn bits() -> u32 {
-        32
-    }
-}
-impl Int for i32 {
-    fn bits() -> u32 {
-        32
-    }
-}
-impl Int for u64 {
-    fn bits() -> u32 {
-        64
-    }
-}
-impl Int for i64 {
-    fn bits() -> u32 {
-        64
+macro_rules! int_impl {
+    ($ity:ty, $sty:ty, $bits:expr) => {
+        impl Int for $ity {
+            type OtherSign = $sty;
+            fn bits() -> u32 {
+                $bits
+            }
+        }
+        impl Int for $sty {
+            type OtherSign = $ity;
+            fn bits() -> u32 {
+                $bits
+            }
+        }
     }
 }
 
+int_impl!(i32, u32, 32);
+int_impl!(i64, u64, 64);
+int_impl!(i128, u128, 128);
+
 /// Trait to convert an integer to/from smaller parts
 pub trait LargeInt {
     type LowHalf;
@@ -42,32 +54,26 @@ pub trait LargeInt {
     fn from_parts(low: Self::LowHalf, high: Self::HighHalf) -> Self;
 }
 
-// TODO: Once i128/u128 support lands, we'll want to add impls for those as well
-impl LargeInt for u64 {
-    type LowHalf = u32;
-    type HighHalf = u32;
+macro_rules! large_int {
+    ($ty:ty, $tylow:ty, $tyhigh:ty, $halfbits:expr) => {
+        impl LargeInt for $ty {
+            type LowHalf = $tylow;
+            type HighHalf = $tyhigh;
 
-    fn low(self) -> u32 {
-        self as u32
-    }
-    fn high(self) -> u32 {
-        (self >> 32) as u32
-    }
-    fn from_parts(low: u32, high: u32) -> u64 {
-        low as u64 | ((high as u64) << 32)
+            fn low(self) -> $tylow {
+                self as $tylow
+            }
+            fn high(self) -> $tyhigh {
+                (self >> $halfbits) as $tyhigh
+            }
+            fn from_parts(low: $tylow, high: $tyhigh) -> $ty {
+                low as $ty | ((high as $ty) << $halfbits)
+            }
+        }
     }
 }
-impl LargeInt for i64 {
-    type LowHalf = u32;
-    type HighHalf = i32;
 
-    fn low(self) -> u32 {
-        self as u32
-    }
-    fn high(self) -> i32 {
-        (self >> 32) as i32
-    }
-    fn from_parts(low: u32, high: i32) -> i64 {
-        low as i64 | ((high as i64) << 32)
-    }
-}
+large_int!(u64, u32, u32, 32);
+large_int!(i64, u32, i32, 32);
+large_int!(u128, u64, u64, 64);
+large_int!(i128, u64, i64, 64);
diff --git a/src/int/mul.rs b/src/int/mul.rs
index 0403aa9..27709e5 100644
--- a/src/int/mul.rs
+++ b/src/int/mul.rs
@@ -1,4 +1,3 @@
-#[cfg(not(all(feature = "c", target_arch = "x86")))]
 use int::LargeInt;
 use int::Int;
 
@@ -14,14 +13,15 @@ macro_rules! mul {
             low &= lower_mask;
             t += (a.low() >> half_bits).wrapping_mul(b.low() & lower_mask);
             low += (t & lower_mask) << half_bits;
-            let mut high = t >> half_bits;
+            let mut high = (t >> half_bits) as hty!($ty);
             t = low >> half_bits;
             low &= lower_mask;
             t += (b.low() >> half_bits).wrapping_mul(a.low() & lower_mask);
             low += (t & lower_mask) << half_bits;
-            high += t >> half_bits;
-            high += (a.low() >> half_bits).wrapping_mul(b.low() >> half_bits);
-            high = high.wrapping_add(a.high().wrapping_mul(b.low()).wrapping_add(a.low().wrapping_mul(b.high())));
+            high += (t >> half_bits) as hty!($ty);
+            high += (a.low() >> half_bits).wrapping_mul(b.low() >> half_bits) as hty!($ty);
+            high = high.wrapping_add(a.high().wrapping_mul(b.low() as hty!($ty)))
+                       .wrapping_add((a.low() as hty!($ty)).wrapping_mul(b.high()));
             <$ty>::from_parts(low, high)
         }
     }
@@ -29,9 +29,13 @@ macro_rules! mul {
 
 macro_rules! mulo {
     ($intrinsic:ident: $ty:ty) => {
+        // Default is "C" ABI
+        mulo!($intrinsic: $ty, "C");
+    };
+    ($intrinsic:ident: $ty:ty, $abi:tt) => {
         /// Returns `a * b` and sets `*overflow = 1` if `a * b` overflows
         #[cfg_attr(not(test), no_mangle)]
-        pub extern "C" fn $intrinsic(a: $ty, b: $ty, overflow: &mut i32) -> $ty {
+        pub extern $abi fn $intrinsic(a: $ty, b: $ty, overflow: &mut i32) -> $ty {
             *overflow = 0;
             let result = a.wrapping_mul(b);
             if a == <$ty>::min_value() {
@@ -71,9 +75,16 @@ macro_rules! mulo {
 #[cfg(not(all(feature = "c", target_arch = "x86")))]
 mul!(__muldi3: u64);
 
+mul!(__multi3: i128);
+
 mulo!(__mulosi4: i32);
 mulo!(__mulodi4: i64);
 
+#[cfg(all(windows, target_pointer_width="64"))]
+mulo!(__muloti4: i128, "unadjusted");
+#[cfg(not(all(windows, target_pointer_width="64")))]
+mulo!(__muloti4: i128);
+
 #[cfg(test)]
 mod tests {
     use qc::{I32, I64, U64};
@@ -91,7 +102,7 @@ mod tests {
             let mut overflow = 2;
             let r = f(a, b, &mut overflow);
             if overflow != 0 && overflow != 1 {
-                return None
+                panic!("Invalid value {} for overflow", overflow);
             }
             Some((r, overflow))
         }
@@ -103,7 +114,34 @@ mod tests {
             let mut overflow = 2;
             let r = f(a, b, &mut overflow);
             if overflow != 0 && overflow != 1 {
-                return None
+                panic!("Invalid value {} for overflow", overflow);
+            }
+            Some((r, overflow))
+        }
+    }
+}
+
+#[cfg(test)]
+#[cfg(all(not(windows),
+          not(target_arch = "mips64"),
+          not(target_arch = "mips64el"),
+          target_pointer_width="64"))]
+mod tests_i128 {
+    use qc::I128;
+
+    check! {
+        fn __multi3(f: extern fn(i128, i128) -> i128, a: I128, b: I128)
+                    -> Option<i128> {
+            Some(f(a.0, b.0))
+        }
+        fn __muloti4(f: extern fn(i128, i128, &mut i32) -> i128,
+                     a: I128,
+                     b: I128) -> Option<(i128, i32)> {
+            let (a, b) = (a.0, b.0);
+            let mut overflow = 2;
+            let r = f(a, b, &mut overflow);
+            if overflow != 0 && overflow != 1 {
+                panic!("Invalid value {} for overflow", overflow);
             }
             Some((r, overflow))
         }
diff --git a/src/int/sdiv.rs b/src/int/sdiv.rs
index b541322..023fad4 100644
--- a/src/int/sdiv.rs
+++ b/src/int/sdiv.rs
@@ -2,9 +2,12 @@ use int::Int;
 
 macro_rules! div {
     ($intrinsic:ident: $ty:ty, $uty:ty) => {
+        div!($intrinsic: $ty, $uty, $ty, |i| {i});
+    };
+    ($intrinsic:ident: $ty:ty, $uty:ty, $tyret:ty, $conv:expr) => {
         /// Returns `a / b`
         #[cfg_attr(not(test), no_mangle)]
-        pub extern "C" fn $intrinsic(a: $ty, b: $ty) -> $ty {
+        pub extern "C" fn $intrinsic(a: $ty, b: $ty) -> $tyret {
             let s_a = a >> (<$ty>::bits() - 1);
             let s_b = b >> (<$ty>::bits() - 1);
             let a = (a ^ s_a) - s_a;
@@ -12,23 +15,26 @@ macro_rules! div {
             let s = s_a ^ s_b;
 
             let r = udiv!(a as $uty, b as $uty);
-            (r as $ty ^ s) - s
+            ($conv)((r as $ty ^ s) - s)
         }
     }
 }
 
 macro_rules! mod_ {
     ($intrinsic:ident: $ty:ty, $uty:ty) => {
+        mod_!($intrinsic: $ty, $uty, $ty, |i| {i});
+    };
+    ($intrinsic:ident: $ty:ty, $uty:ty, $tyret:ty, $conv:expr) => {
         /// Returns `a % b`
         #[cfg_attr(not(test), no_mangle)]
-        pub extern "C" fn $intrinsic(a: $ty, b: $ty) -> $ty {
+        pub extern "C" fn $intrinsic(a: $ty, b: $ty) -> $tyret {
             let s = b >> (<$ty>::bits() - 1);
             let b = (b ^ s) - s;
             let s = a >> (<$ty>::bits() - 1);
             let a = (a ^ s) - s;
 
             let r = urem!(a as $uty, b as $uty);
-            (r as $ty ^ s) - s
+            ($conv)((r as $ty ^ s) - s)
         }
     }
 }
@@ -61,12 +67,24 @@ div!(__divsi3: i32, u32);
 #[cfg(not(all(feature = "c", target_arch = "x86")))]
 div!(__divdi3: i64, u64);
 
+#[cfg(not(all(windows, target_pointer_width="64")))]
+div!(__divti3: i128, u128);
+
+#[cfg(all(windows, target_pointer_width="64"))]
+div!(__divti3: i128, u128, ::U64x2, ::sconv);
+
 #[cfg(not(all(feature = "c", target_arch = "arm", not(target_os = "ios"))))]
 mod_!(__modsi3: i32, u32);
 
 #[cfg(not(all(feature = "c", target_arch = "x86")))]
 mod_!(__moddi3: i64, u64);
 
+#[cfg(not(all(windows, target_pointer_width="64")))]
+mod_!(__modti3: i128, u128);
+
+#[cfg(all(windows, target_pointer_width="64"))]
+mod_!(__modti3: i128, u128, ::U64x2, ::sconv);
+
 #[cfg(not(all(feature = "c", target_arch = "arm", not(target_os = "ios"))))]
 divmod!(__divmodsi4, __divsi3: i32);
 
@@ -144,3 +162,32 @@ mod tests {
         }
     }
 }
+
+#[cfg(test)]
+#[cfg(all(not(windows),
+          not(target_arch = "mips64"),
+          not(target_arch = "mips64el"),
+          target_pointer_width="64"))]
+mod tests_i128 {
+    use qc::U128;
+    check! {
+
+        fn __divti3(f: extern fn(i128, i128) -> i128, n: U128, d: U128) -> Option<i128> {
+            let (n, d) = (n.0 as i128, d.0 as i128);
+            if d == 0 {
+                None
+            } else {
+                Some(f(n, d))
+            }
+        }
+
+        fn __modti3(f: extern fn(i128, i128) -> i128, n: U128, d: U128) -> Option<i128> {
+            let (n, d) = (n.0 as i128, d.0 as i128);
+            if d == 0 {
+                None
+            } else {
+                Some(f(n, d))
+            }
+        }
+    }
+}
diff --git a/src/int/shift.rs b/src/int/shift.rs
index 9338f07..e5dc38f 100644
--- a/src/int/shift.rs
+++ b/src/int/shift.rs
@@ -1,4 +1,3 @@
-#[cfg(not(all(feature = "c", target_arch = "x86")))]
 use int::{Int, LargeInt};
 
 macro_rules! ashl {
@@ -58,12 +57,18 @@ macro_rules! lshr {
 #[cfg(not(all(feature = "c", target_arch = "x86")))]
 ashl!(__ashldi3: u64);
 
+ashl!(__ashlti3: u128);
+
 #[cfg(not(all(feature = "c", target_arch = "x86")))]
 ashr!(__ashrdi3: i64);
 
+ashr!(__ashrti3: i128);
+
 #[cfg(not(all(feature = "c", target_arch = "x86")))]
 lshr!(__lshrdi3: u64);
 
+lshr!(__lshrti3: u128);
+
 #[cfg(test)]
 mod tests {
     use qc::{I64, U64};
@@ -98,3 +103,42 @@ mod tests {
         }
     }
 }
+
+#[cfg(test)]
+#[cfg(all(not(windows),
+          not(target_arch = "mips64"),
+          not(target_arch = "mips64el"),
+          target_pointer_width="64"))]
+mod tests_i128 {
+    use qc::{I128, U128};
+
+    // NOTE We purposefully stick to `u32` for `b` here because we want "small" values (b < 64)
+    check! {
+        fn __ashlti3(f: extern fn(u128, u32) -> u128, a: U128, b: u32) -> Option<u128> {
+            let a = a.0;
+            if b >= 64 {
+                None
+            } else {
+                Some(f(a, b))
+            }
+        }
+
+        fn __ashrti3(f: extern fn(i128, u32) -> i128, a: I128, b: u32) -> Option<i128> {
+            let a = a.0;
+            if b >= 64 {
+                None
+            } else {
+                Some(f(a, b))
+            }
+        }
+
+        fn __lshrti3(f: extern fn(u128, u32) -> u128, a: U128, b: u32) -> Option<u128> {
+            let a = a.0;
+            if b >= 128 {
+                None
+            } else {
+                Some(f(a, b))
+            }
+        }
+    }
+}
diff --git a/src/int/udiv.rs b/src/int/udiv.rs
index 3c5629e..57d5fe5 100644
--- a/src/int/udiv.rs
+++ b/src/int/udiv.rs
@@ -96,172 +96,222 @@ pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 {
     q
 }
 
-/// Returns `n / d`
-#[cfg_attr(not(test), no_mangle)]
-#[cfg(not(all(feature = "c", target_arch = "x86")))]
-pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 {
-    __udivmoddi4(n, d, None)
+macro_rules! div_mod_intrinsics {
+    ($udiv_intr:ident, $umod_intr:ident : $ty:ty) => {
+        div_mod_intrinsics!($udiv_intr, $umod_intr : $ty,
+                            __udivmoddi4);
+    };
+    ($udiv_intr:ident, $umod_intr:ident : $ty:ty, $divmod_intr:expr) => {
+        div_mod_intrinsics!($udiv_intr, $umod_intr : $ty,
+                            $divmod_intr, $ty, |i|{ i });
+    };
+    ($udiv_intr:ident, $umod_intr:ident : $ty:ty, $divmod_intr:expr,
+     $tyret:ty, $conv:expr) => {
+        /// Returns `n / d`
+        #[cfg_attr(not(test), no_mangle)]
+        pub extern "C" fn $udiv_intr(n: $ty, d: $ty) -> $tyret {
+            let r = $divmod_intr(n, d, None);
+            ($conv)(r)
+        }
+
+        /// Returns `n % d`
+        #[cfg_attr(not(test), no_mangle)]
+        pub extern "C" fn $umod_intr(a: $ty, b: $ty) -> $tyret {
+            use core::mem;
+
+            let mut rem = unsafe { mem::uninitialized() };
+            $divmod_intr(a, b, Some(&mut rem));
+            ($conv)(rem)
+        }
+    }
 }
 
-/// Returns `n % d`
 #[cfg(not(all(feature = "c", target_arch = "x86")))]
-#[cfg_attr(not(test), no_mangle)]
-pub extern "C" fn __umoddi3(a: u64, b: u64) -> u64 {
-    use core::mem;
+div_mod_intrinsics!(__udivdi3, __umoddi3: u64);
 
-    let mut rem = unsafe { mem::uninitialized() };
-    __udivmoddi4(a, b, Some(&mut rem));
-    rem
+#[cfg(not(all(windows, target_pointer_width="64")))]
+div_mod_intrinsics!(__udivti3, __umodti3: u128, u128_div_mod);
+
+#[cfg(all(windows, target_pointer_width="64"))]
+div_mod_intrinsics!(__udivti3, __umodti3: u128, u128_div_mod, ::U64x2, ::conv);
+
+macro_rules! udivmod_inner {
+    ($n:expr, $d:expr, $rem:expr, $ty:ty) => {{
+        let (n, d, rem) = ($n, $d, $rem);
+        // NOTE X is unknown, K != 0
+        if n.high() == 0 {
+            if d.high() == 0 {
+                // 0 X
+                // ---
+                // 0 X
+
+                if let Some(rem) = rem {
+                    *rem = <$ty>::from(urem!(n.low(), d.low()));
+                }
+                return <$ty>::from(udiv!(n.low(), d.low()));
+            } else {
+                // 0 X
+                // ---
+                // K X
+                if let Some(rem) = rem {
+                    *rem = n;
+                }
+                return 0;
+            };
+        }
+
+        let mut sr;
+        let mut q;
+        let mut r;
+
+        if d.low() == 0 {
+            if d.high() == 0 {
+                // K X
+                // ---
+                // 0 0
+                // NOTE This should be unreachable in safe Rust because the program will panic before
+                // this intrinsic is called
+                unsafe {
+                    intrinsics::abort()
+                }
+            }
+
+            if n.low() == 0 {
+                // K 0
+                // ---
+                // K 0
+                if let Some(rem) = rem {
+                    *rem = <$ty>::from_parts(0, urem!(n.high(), d.high()));
+                }
+                return <$ty>::from(udiv!(n.high(), d.high()));
+            }
+
+            // K K
+            // ---
+            // K 0
+
+            if d.high().is_power_of_two() {
+                if let Some(rem) = rem {
+                    *rem = <$ty>::from_parts(n.low(), n.high() & (d.high() - 1));
+                }
+                return <$ty>::from(n.high() >> d.high().trailing_zeros());
+            }
+
+            sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros());
+
+            // D > N
+            if sr > <hty!($ty)>::bits() - 2 {
+                if let Some(rem) = rem {
+                    *rem = n;
+                }
+                return 0;
+            }
+
+            sr += 1;
+
+            // 1 <= sr <= <hty!($ty)>::bits() - 1
+            q = n << (<$ty>::bits() - sr);
+            r = n >> sr;
+        } else if d.high() == 0 {
+            // K X
+            // ---
+            // 0 K
+            if d.low().is_power_of_two() {
+                if let Some(rem) = rem {
+                    *rem = <$ty>::from(n.low() & (d.low() - 1));
+                }
+
+                if d.low() == 1 {
+                    return n;
+                } else {
+                    let sr = d.low().trailing_zeros();
+                    return n >> sr;
+                };
+            }
+
+            sr = 1 + <hty!($ty)>::bits() + d.low().leading_zeros() - n.high().leading_zeros();
+
+            // 2 <= sr <= u64::bits() - 1
+            q = n << (<$ty>::bits() - sr);
+            r = n >> sr;
+        } else {
+            // K X
+            // ---
+            // K K
+            sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros());
+
+            // D > N
+            if sr > <hty!($ty)>::bits() - 1 {
+                if let Some(rem) = rem {
+                    *rem = n;
+                }
+                return 0;
+            }
+
+            sr += 1;
+
+            // 1 <= sr <= <hty!($ty)>::bits()
+            q = n << (<$ty>::bits() - sr);
+            r = n >> sr;
+        }
+
+        // Not a special case
+        // q and r are initialized with
+        // q = n << (u64::bits() - sr)
+        // r = n >> sr
+        // 1 <= sr <= u64::bits() - 1
+        let mut carry = 0;
+
+        for _ in 0..sr {
+            // r:q = ((r:q) << 1) | carry
+            r = (r << 1) | (q >> (<$ty>::bits() - 1));
+            q = (q << 1) | carry as $ty;
+
+            // carry = 0
+            // if r >= d {
+            //     r -= d;
+            //     carry = 1;
+            // }
+            let s = (d.wrapping_sub(r).wrapping_sub(1)) as os_ty!($ty) >> (<$ty>::bits() - 1);
+            carry = (s & 1) as hty!($ty);
+            r -= d & s as $ty;
+        }
+
+        if let Some(rem) = rem {
+            *rem = r;
+        }
+        (q << 1) | carry as $ty
+    }}
 }
 
 /// Returns `n / d` and sets `*rem = n % d`
 #[cfg_attr(not(test), no_mangle)]
 pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 {
-    // NOTE X is unknown, K != 0
-    if n.high() == 0 {
-        if d.high() == 0 {
-            // 0 X
-            // ---
-            // 0 X
-
-            if let Some(rem) = rem {
-                *rem = u64::from(urem!(n.low(), d.low()));
-            }
-            return u64::from(udiv!(n.low(), d.low()));
-        } else {
-            // 0 X
-            // ---
-            // K X
-            if let Some(rem) = rem {
-                *rem = n;
-            }
-            return 0;
-        };
-    }
-
-    let mut sr;
-    let mut q;
-    let mut r;
-
-    if d.low() == 0 {
-        if d.high() == 0 {
-            // K X
-            // ---
-            // 0 0
-            // NOTE This should be unreachable in safe Rust because the program will panic before
-            // this intrinsic is called
-            unsafe {
-                intrinsics::abort()
-            }
-        }
-
-        if n.low() == 0 {
-            // K 0
-            // ---
-            // K 0
-            if let Some(rem) = rem {
-                *rem = u64::from_parts(0, urem!(n.high(), d.high()));
-            }
-            return u64::from(udiv!(n.high(), d.high()));
-        }
-
-        // K K
-        // ---
-        // K 0
-
-        if d.high().is_power_of_two() {
-            if let Some(rem) = rem {
-                *rem = u64::from_parts(n.low(), n.high() & (d.high() - 1));
-            }
-            return u64::from(n.high() >> d.high().trailing_zeros());
-        }
-
-        sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros());
-
-        // D > N
-        if sr > u32::bits() - 2 {
-            if let Some(rem) = rem {
-                *rem = n;
-            }
-            return 0;
-        }
-
-        sr += 1;
-
-        // 1 <= sr <= u32::bits() - 1
-        q = n << (u64::bits() - sr);
-        r = n >> sr;
-    } else if d.high() == 0 {
-        // K X
-        // ---
-        // 0 K
-        if d.low().is_power_of_two() {
-            if let Some(rem) = rem {
-                *rem = u64::from(n.low() & (d.low() - 1));
-            }
-
-            if d.low() == 1 {
-                return n;
-            } else {
-                let sr = d.low().trailing_zeros();
-                return n >> sr;
-            };
-        }
-
-        sr = 1 + u32::bits() + d.low().leading_zeros() - n.high().leading_zeros();
-
-        // 2 <= sr <= u64::bits() - 1
-        q = n << (u64::bits() - sr);
-        r = n >> sr;
-    } else {
-        // K X
-        // ---
-        // K K
-        sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros());
-
-        // D > N
-        if sr > u32::bits() - 1 {
-            if let Some(rem) = rem {
-                *rem = n;
-            }
-            return 0;
-        }
-
-        sr += 1;
-
-        // 1 <= sr <= u32::bits()
-        q = n << (u64::bits() - sr);
-        r = n >> sr;
-    }
-
-    // Not a special case
-    // q and r are initialized with
-    // q = n << (u64::bits() - sr)
-    // r = n >> sr
-    // 1 <= sr <= u64::bits() - 1
-    let mut carry = 0;
-
-    for _ in 0..sr {
-        // r:q = ((r:q) << 1) | carry
-        r = (r << 1) | (q >> (u64::bits() - 1));
-        q = (q << 1) | carry as u64;
-
-        // carry = 0
-        // if r >= d {
-        //     r -= d;
-        //     carry = 1;
-        // }
-        let s = (d.wrapping_sub(r).wrapping_sub(1)) as i64 >> (u64::bits() - 1);
-        carry = (s & 1) as u32;
-        r -= d & s as u64;
-    }
-
-    if let Some(rem) = rem {
-        *rem = r;
-    }
-    (q << 1) | carry as u64
+    udivmod_inner!(n, d, rem, u64)
 }
 
+macro_rules! udivmodti4 {
+    ($tyret:ty, $conv:expr) => {
+        /// Returns `n / d` and sets `*rem = n % d`
+        #[cfg_attr(not(test), no_mangle)]
+        pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> $tyret {
+            let r = u128_div_mod(n, d, rem);
+            ($conv)(r)
+        }
+    }
+}
+
+/// Returns `n / d` and sets `*rem = n % d`
+fn u128_div_mod(n: u128, d: u128, rem: Option<&mut u128>) -> u128 {
+    udivmod_inner!(n, d, rem, u128)
+}
+
+#[cfg(all(windows, target_pointer_width="64"))]
+udivmodti4!(::U64x2, ::conv);
+
+#[cfg(not(all(windows, target_pointer_width="64")))]
+udivmodti4!(u128, |i|{ i });
+
 #[cfg(test)]
 mod tests {
     use qc::{U32, U64};
@@ -330,3 +380,51 @@ mod tests {
         }
     }
 }
+
+#[cfg(test)]
+#[cfg(all(not(windows),
+          not(target_arch = "mips64"),
+          not(target_arch = "mips64el"),
+          target_pointer_width="64"))]
+mod tests_i128 {
+    use qc::U128;
+
+    check! {
+        fn __udivti3(f: extern fn(u128, u128) -> u128,
+                     n: U128,
+                     d: U128) -> Option<u128> {
+            let (n, d) = (n.0, d.0);
+            if d == 0 {
+                None
+            } else {
+                Some(f(n, d))
+            }
+        }
+
+        fn __umodti3(f: extern fn(u128, u128) -> u128,
+                     n: U128,
+                     d: U128) -> Option<u128> {
+            let (n, d) = (n.0, d.0);
+            if d == 0 {
+                None
+            } else {
+                Some(f(n, d))
+            }
+        }
+
+        fn __udivmodti4(f: extern fn(u128, u128, Option<&mut u128>) -> u128,
+                        n: U128,
+                        d: U128) -> Option<u128> {
+            let (n, d) = (n.0, d.0);
+            if d == 0 {
+                None
+            } else {
+                // FIXME fix the segfault when the remainder is requested
+                /*let mut r = 0;
+                let q = f(n, d, Some(&mut r));
+                Some((q, r))*/
+                Some(f(n, d, None))
+            }
+        }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 04bf2d0..0a80ad2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,6 +13,10 @@
 #![feature(core_intrinsics)]
 #![feature(naked_functions)]
 #![feature(staged_api)]
+#![feature(i128_type)]
+#![feature(repr_simd)]
+#![feature(abi_unadjusted)]
+#![allow(unused_features)]
 #![no_builtins]
 #![unstable(feature = "compiler_builtins_lib",
             reason = "Compiler builtins. Will never become stable.",
@@ -85,6 +89,24 @@ macro_rules! srem {
     }
 }
 
+// Hack for LLVM expectations for ABI on windows
+#[cfg(all(windows, target_pointer_width="64"))]
+#[repr(simd)]
+pub struct U64x2(u64, u64);
+
+#[cfg(all(windows, target_pointer_width="64"))]
+fn conv(i: u128) -> U64x2 {
+    use int::LargeInt;
+    U64x2(i.low(), i.high())
+}
+
+#[cfg(all(windows, target_pointer_width="64"))]
+fn sconv(i: i128) -> U64x2 {
+    use int::LargeInt;
+    let j = i as u128;
+    U64x2(j.low(), j.high())
+}
+
 #[cfg(test)]
 #[cfg_attr(target_arch = "arm", macro_use)]
 extern crate quickcheck;
diff --git a/src/qc.rs b/src/qc.rs
index 5dbc56f..ea0faac 100644
--- a/src/qc.rs
+++ b/src/qc.rs
@@ -144,6 +144,8 @@ macro_rules! arbitrary_large {
 
 arbitrary_large!(I64: i64);
 arbitrary_large!(U64: u64);
+arbitrary_large!(I128: i128);
+arbitrary_large!(U128: u128);
 
 macro_rules! arbitrary_float {
     ($TY:ident : $ty:ident) => {