Auto merge of #115 - Amanieu:linux-arm-atomic, r=alexcrichton

Add atomic support for pre-ARMv6 on Linux This uses the [kernel user helpers](https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt) which are available starting from kernel 2.6.15. Since Rust currently requires 2.6.18 at a minimum, this should be fine in practice. I did not include support for 64-bit atomics since that functionality is only available in kernel 3.1. This PR allows Rust to work on older ARM versions such as ARMv4 and ARMv5 with the full libstd.
2017-10-06 23:08:42 +00:00 · 2017-10-06 23:08:42 +00:00 · 5b96befbc5
commit 5b96befbc5
parent fe8d893a6b 73e38dcf2f
3 changed files with 178 additions and 0 deletions
--- a/build.rs
+++ b/build.rs
@ -40,6 +40,11 @@ fn main() {
    if llvm_target[0] == "thumbv6m" {
        println!("cargo:rustc-cfg=thumbv6m")
    }
+
+    // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures.
+    if llvm_target[0] == "armv5te" {
+        println!("cargo:rustc-cfg=armv5te")
+    }
 }

 #[cfg(feature = "gen-tests")]
--- a/src/arm_linux.rs
+++ b/src/arm_linux.rs
@ -0,0 +1,170 @@
+use core::intrinsics;
+use core::mem;
+
+// Kernel-provided user-mode helper functions:
+// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
+unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool {
+    let out: u32;
+    // FIXME: we can't use BLX on ARMv4
+    asm!("blx ${0}"
+         : "={r0}" (out)
+         : "r" (0xffff0fc0u32)
+           "{r0}" (oldval),
+           "{r1}" (newval),
+           "{r2}" (ptr)
+         : "r3", "r12", "lr", "cc", "memory");
+    out == 0
+}
+unsafe fn __kuser_memory_barrier() {
+    // FIXME: we can't use BLX on ARMv4
+    asm!("blx ${0}"
+         :
+         : "r" (0xffff0fa0u32)
+         : "lr", "memory");
+}
+
+// Word-align a pointer
+fn align_ptr<T>(ptr: *mut T) -> *mut u32 {
+    // This gives us a mask of 0 when T == u32 since the pointer is already
+    // supposed to be aligned, which avoids any masking in that case.
+    let ptr_mask = 3 & (4 - mem::size_of::<T>());
+    (ptr as usize & !ptr_mask) as *mut u32
+}
+
+// Calculate the shift and mask of a value inside an aligned word
+fn get_shift_mask<T>(ptr: *mut T) -> (u32, u32) {
+    // Mask to get the low byte/halfword/word
+    let mask = match mem::size_of::<T>() {
+        1 => 0xff,
+        2 => 0xffff,
+        4 => 0xffffffff,
+        _ => unreachable!(),
+    };
+
+    // If we are on big-endian then we need to adjust the shift accordingly
+    let endian_adjust = if cfg!(target_endian = "little") {
+        0
+    } else {
+        4 - mem::size_of::<T>() as u32
+    };
+
+    // Shift to get the desired element in the word
+    let ptr_mask = 3 & (4 - mem::size_of::<T>());
+    let shift = ((ptr as usize & ptr_mask) as u32 ^ endian_adjust) * 8;
+
+    (shift, mask)
+}
+
+// Extract a value from an aligned word
+fn extract_aligned(aligned: u32, shift: u32, mask: u32) -> u32 {
+    (aligned >> shift) & mask
+}
+
+// Insert a value into an aligned word
+fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 {
+    (aligned & !(mask << shift)) | ((val & mask) << shift)
+}
+
+// Generic atomic read-modify-write operation
+unsafe fn atomic_rmw<T, F: Fn(u32) -> u32>(ptr: *mut T, f: F) -> u32 {
+    let aligned_ptr = align_ptr(ptr);
+    let (shift, mask) = get_shift_mask(ptr);
+
+    loop {
+        let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr);
+        let curval = extract_aligned(curval_aligned, shift, mask);
+        let newval = f(curval);
+        let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
+        if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
+            return curval;
+        }
+    }
+}
+
+// Generic atomic compare-exchange operation
+unsafe fn atomic_cmpxchg<T>(oldval: u32, newval: u32, ptr: *mut T) -> u32 {
+    let aligned_ptr = align_ptr(ptr);
+    let (shift, mask) = get_shift_mask(ptr);
+
+    loop {
+        let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr);
+        let curval = extract_aligned(curval_aligned, shift, mask);
+        if curval != oldval {
+            return curval;
+        }
+        let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
+        if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) {
+            return oldval;
+        }
+    }
+}
+
+macro_rules! atomic_rmw {
+    ($name:ident, $ty:ty, $op:expr) => {
+        #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+        pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty {
+            atomic_rmw(ptr, |x| $op(x as $ty, val) as u32) as $ty
+        }
+    }
+}
+macro_rules! atomic_cmpxchg {
+    ($name:ident, $ty:ty) => {
+        #[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+        pub unsafe extern "C" fn $name(oldval: $ty, newval: $ty, ptr: *mut $ty) -> $ty {
+            atomic_cmpxchg(oldval as u32, newval as u32, ptr) as $ty
+        }
+    }
+}
+
+atomic_rmw!(__sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b));
+atomic_rmw!(__sync_fetch_and_add_2, u16, |a: u16, b: u16| a.wrapping_add(b));
+atomic_rmw!(__sync_fetch_and_add_4, u32, |a: u32, b: u32| a.wrapping_add(b));
+
+atomic_rmw!(__sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
+atomic_rmw!(__sync_fetch_and_sub_2, u16, |a: u16, b: u16| a.wrapping_sub(b));
+atomic_rmw!(__sync_fetch_and_sub_4, u32, |a: u32, b: u32| a.wrapping_sub(b));
+
+atomic_rmw!(__sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b);
+atomic_rmw!(__sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b);
+atomic_rmw!(__sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b);
+
+atomic_rmw!(__sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b);
+atomic_rmw!(__sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b);
+atomic_rmw!(__sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b);
+
+atomic_rmw!(__sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b);
+atomic_rmw!(__sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b);
+atomic_rmw!(__sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b);
+
+atomic_rmw!(__sync_fetch_and_nand_1, u8, |a: u8, b: u8| !a & b);
+atomic_rmw!(__sync_fetch_and_nand_2, u16, |a: u16, b: u16| !a & b);
+atomic_rmw!(__sync_fetch_and_nand_4, u32, |a: u32, b: u32| !a & b);
+
+atomic_rmw!(__sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b { a } else { b });
+atomic_rmw!(__sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b { a } else { b });
+atomic_rmw!(__sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b { a } else { b });
+
+atomic_rmw!(__sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b { a } else { b });
+atomic_rmw!(__sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b { a } else { b });
+atomic_rmw!(__sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b { a } else { b });
+
+atomic_rmw!(__sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b { a } else { b });
+atomic_rmw!(__sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b { a } else { b });
+atomic_rmw!(__sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b { a } else { b });
+
+atomic_rmw!(__sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b { a } else { b });
+atomic_rmw!(__sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b { a } else { b });
+atomic_rmw!(__sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b { a } else { b });
+
+atomic_rmw!(__sync_lock_test_and_set_1, u8, |_: u8, b: u8| b);
+atomic_rmw!(__sync_lock_test_and_set_2, u16, |_: u16, b: u16| b);
+atomic_rmw!(__sync_lock_test_and_set_4, u32, |_: u32, b: u32| b);
+
+atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8);
+atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16);
+atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32);
+
+#[cfg_attr(not(feature = "mangled-names"), no_mangle)]
+pub unsafe extern "C" fn __sync_synchronize() {
+    __kuser_memory_barrier();
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -51,6 +51,9 @@ pub mod mem;
 #[cfg(target_arch = "arm")]
 pub mod arm;

+#[cfg(all(armv5te, target_os = "linux", target_arch = "arm"))]
+pub mod arm_linux;
+
 #[cfg(target_arch = "x86")]
 pub mod x86;