[core] Add type_aligned_alloca

2024-11-22 15:59:27 +08:00 · 2024-11-22 15:59:27 +08:00 · 4d9ed9376b
parent a481add9af
commit 4d9ed9376b
6 changed files with 139 additions and 30 deletions
--- a/nac3artiq/src/codegen.rs
+++ b/nac3artiq/src/codegen.rs
@ -18,6 +18,7 @@ use nac3core::{
        irrt::ndarray::call_ndarray_calc_size,
        llvm_intrinsics::{call_int_smax, call_memcpy_generic, call_stackrestore, call_stacksave},
        stmt::{gen_block, gen_for_callback_incrementing, gen_if_callback, gen_with},
+        type_aligned_alloca,
        types::{NDArrayType, ProxyType},
        values::{
            ArrayLikeIndexer, ArrayLikeValue, ArraySliceValue, ListValue, NDArrayValue, ProxyValue,
@ -642,27 +643,12 @@ fn format_rpc_ret<'ctx>(
            // (4 + 4 * ndims) bytes with 8-byte alignment
            let sizeof_dims =
                ctx.builder.build_int_mul(ndarray.load_ndims(ctx), llvm_usize_sizeof, "").unwrap();
-            let unaligned_buffer_size =
+            let buffer_size =
                ctx.builder.build_int_add(sizeof_dims, llvm_pdata_sizeof, "").unwrap();
-            let buffer_size = round_up(ctx, unaligned_buffer_size, llvm_usize.const_int(8, false));

            let stackptr = call_stacksave(ctx, None);
-            // Just to be absolutely sure, alloca in [i8 x 8] slices to force 8-byte alignment
-            let buffer = ctx
-                .builder
-                .build_array_alloca(
-                    llvm_i8_8,
-                    ctx.builder
-                        .build_int_unsigned_div(buffer_size, llvm_usize.const_int(8, false), "")
-                        .unwrap(),
-                    "rpc.buffer",
-                )
-                .unwrap();
-            let buffer = ctx
-                .builder
-                .build_bit_cast(buffer, llvm_pi8, "")
-                .map(BasicValueEnum::into_pointer_value)
-                .unwrap();
+            let buffer =
+                type_aligned_alloca(generator, ctx, llvm_i8_8, buffer_size, Some("rpc.buffer"));
            let buffer = ArraySliceValue::from_ptr_val(buffer, buffer_size, None);

            // The first call to `rpc_recv` reads the top-level ndarray object: [pdata, shape]
@ -735,7 +721,7 @@ fn format_rpc_ret<'ctx>(
                );
            }

-            ndarray.create_data(ctx, llvm_elem_ty, num_elements);
+            ndarray.create_data(generator, ctx, llvm_elem_ty, num_elements);

            let ndarray_data = ndarray.data().base_ptr(ctx, generator);
            let ndarray_data_i8 =
--- a/nac3core/src/codegen/expr.rs
+++ b/nac3core/src/codegen/expr.rs
@ -2852,7 +2852,7 @@ fn gen_ndarray_subscript_expr<'ctx, G: CodeGenerator>(
                    .builder
                    .build_int_z_extend_or_bit_cast(ndarray_num_elems, sizeof_elem.get_type(), "")
                    .unwrap();
-                ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
+                ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems);

                let v_data_src_ptr = v.data().ptr_offset(ctx, generator, &index_addr, None);
                call_memcpy_generic(
--- a/nac3core/src/codegen/llvm_intrinsics.rs
+++ b/nac3core/src/codegen/llvm_intrinsics.rs
@ -343,3 +343,25 @@ pub fn call_float_powi<'ctx>(
        .map(Either::unwrap_left)
        .unwrap()
 }
+
+/// Invokes the [`llvm.ctpop`](https://llvm.org/docs/LangRef.html#llvm-ctpop-intrinsic) intrinsic.
+pub fn call_int_ctpop<'ctx>(
+    ctx: &CodeGenContext<'ctx, '_>,
+    src: IntValue<'ctx>,
+    name: Option<&str>,
+) -> IntValue<'ctx> {
+    const FN_NAME: &str = "llvm.ctpop";
+
+    let llvm_src_t = src.get_type();
+
+    let intrinsic_fn = Intrinsic::find(FN_NAME)
+        .and_then(|intrinsic| intrinsic.get_declaration(&ctx.module, &[llvm_src_t.into()]))
+        .unwrap();
+
+    ctx.builder
+        .build_call(intrinsic_fn, &[src.into()], name.unwrap_or_default())
+        .map(CallSiteValue::try_as_basic_value)
+        .map(|v| v.map_left(BasicValueEnum::into_int_value))
+        .map(Either::unwrap_left)
+        .unwrap()
+}
--- a/nac3core/src/codegen/mod.rs
+++ b/nac3core/src/codegen/mod.rs
@ -1119,3 +1119,106 @@ fn gen_in_range_check<'ctx>(
 fn get_va_count_arg_name(arg_name: StrRef) -> StrRef {
    format!("__{}_va_count", &arg_name).into()
 }
+
+/// Returns the alignment of the type.
+///
+/// This is necessary as `get_alignment` is not implemented as part of [`BasicType`].
+pub fn get_type_alignment<'ctx>(ty: impl Into<BasicTypeEnum<'ctx>>) -> IntValue<'ctx> {
+    match ty.into() {
+        BasicTypeEnum::ArrayType(ty) => ty.get_alignment(),
+        BasicTypeEnum::FloatType(ty) => ty.get_alignment(),
+        BasicTypeEnum::IntType(ty) => ty.get_alignment(),
+        BasicTypeEnum::PointerType(ty) => ty.get_alignment(),
+        BasicTypeEnum::StructType(ty) => ty.get_alignment(),
+        BasicTypeEnum::VectorType(ty) => ty.get_alignment(),
+    }
+}
+
+/// Inserts an `alloca` instruction with allocation `size` given in bytes and the alignment of the
+/// given type.
+///
+/// The returned [`PointerValue`] will have a type of `i8*`, a size of at least `size`, and will be
+/// aligned with the alignment of `align_ty`.
+pub fn type_aligned_alloca<'ctx, G: CodeGenerator + ?Sized>(
+    generator: &mut G,
+    ctx: &mut CodeGenContext<'ctx, '_>,
+    align_ty: impl Into<BasicTypeEnum<'ctx>>,
+    size: IntValue<'ctx>,
+    name: Option<&str>,
+) -> PointerValue<'ctx> {
+    /// Round `val` up to its modulo `power_of_two`.
+    fn round_up<'ctx>(
+        ctx: &CodeGenContext<'ctx, '_>,
+        val: IntValue<'ctx>,
+        power_of_two: IntValue<'ctx>,
+    ) -> IntValue<'ctx> {
+        debug_assert_eq!(
+            val.get_type().get_bit_width(),
+            power_of_two.get_type().get_bit_width(),
+            "`val` ({}) and `power_of_two` ({}) must be the same type",
+            val.get_type(),
+            power_of_two.get_type(),
+        );
+
+        let llvm_val_t = val.get_type();
+
+        let max_rem =
+            ctx.builder.build_int_sub(power_of_two, llvm_val_t.const_int(1, false), "").unwrap();
+        ctx.builder
+            .build_and(
+                ctx.builder.build_int_add(val, max_rem, "").unwrap(),
+                ctx.builder.build_not(max_rem, "").unwrap(),
+                "",
+            )
+            .unwrap()
+    }
+
+    let llvm_i8 = ctx.ctx.i8_type();
+    let llvm_pi8 = llvm_i8.ptr_type(AddressSpace::default());
+    let llvm_usize = generator.get_size_type(ctx.ctx);
+    let align_ty = align_ty.into();
+
+    let size = ctx.builder.build_int_cast(size, llvm_usize, "").unwrap();
+
+    debug_assert_eq!(
+        size.get_type().get_bit_width(),
+        llvm_usize.get_bit_width(),
+        "Expected size_t ({}) for parameter `size` of `aligned_alloca`, got {}",
+        llvm_usize,
+        size.get_type(),
+    );
+
+    let alignment = get_type_alignment(align_ty);
+    let alignment = ctx.builder.build_int_cast(alignment, llvm_usize, "").unwrap();
+
+    if ctx.registry.llvm_options.opt_level == OptimizationLevel::None {
+        let alignment_bitcount = llvm_intrinsics::call_int_ctpop(ctx, alignment, None);
+
+        ctx.make_assert(
+            generator,
+            ctx.builder
+                .build_int_compare(
+                    IntPredicate::EQ,
+                    alignment_bitcount,
+                    alignment_bitcount.get_type().const_int(1, false),
+                    "",
+                )
+                .unwrap(),
+            "0:AssertionError",
+            "Expected power-of-two alignment for aligned_alloca, got {0}",
+            [Some(alignment), None, None],
+            ctx.current_loc,
+        );
+    }
+
+    let buffer_size = round_up(ctx, size, alignment);
+    let aligned_slices = ctx.builder.build_int_unsigned_div(buffer_size, alignment, "").unwrap();
+
+    // Just to be absolutely sure, alloca in [i8 x alignment] slices
+    let buffer = ctx.builder.build_array_alloca(align_ty, aligned_slices, "").unwrap();
+
+    ctx.builder
+        .build_bit_cast(buffer, llvm_pi8, name.unwrap_or_default())
+        .map(BasicValueEnum::into_pointer_value)
+        .unwrap()
+}
--- a/nac3core/src/codegen/numpy.rs
+++ b/nac3core/src/codegen/numpy.rs
@ -235,7 +235,7 @@ fn ndarray_init_data<'ctx, G: CodeGenerator + ?Sized>(
        &ndarray.shape().as_slice_value(ctx, generator),
        (None, None),
    );
-    ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
+    ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems);

    ndarray
 }
--- a/nac3core/src/codegen/values/ndarray.rs
+++ b/nac3core/src/codegen/values/ndarray.rs
@ -9,8 +9,8 @@ use super::{
    UntypedArrayLikeAccessor, UntypedArrayLikeMutator,
 };
 use crate::codegen::{
-    irrt, llvm_intrinsics::call_int_umin, stmt::gen_for_callback_incrementing, types::NDArrayType,
-    CodeGenContext, CodeGenerator,
+    irrt, llvm_intrinsics::call_int_umin, stmt::gen_for_callback_incrementing, type_aligned_alloca,
+    types::NDArrayType, CodeGenContext, CodeGenerator,
 };

 /// Proxy type for accessing an `NDArray` value in LLVM.
@ -122,9 +122,10 @@ impl<'ctx> NDArrayValue<'ctx> {

    /// Convenience method for creating a new array storing data elements with the given element
    /// type `elem_ty` and `size`.
-    pub fn create_data(
+    pub fn create_data<G: CodeGenerator + ?Sized>(
        &self,
-        ctx: &CodeGenContext<'ctx, '_>,
+        generator: &mut G,
+        ctx: &mut CodeGenContext<'ctx, '_>,
        elem_ty: BasicTypeEnum<'ctx>,
        size: IntValue<'ctx>,
    ) {
@ -132,11 +133,8 @@ impl<'ctx> NDArrayValue<'ctx> {
            ctx.builder.build_int_cast(elem_ty.size_of().unwrap(), size.get_type(), "").unwrap();
        let nbytes = ctx.builder.build_int_mul(size, itemsize, "").unwrap();

-        // TODO: What about alignment?
-        self.store_data(
-            ctx,
-            ctx.builder.build_array_alloca(ctx.ctx.i8_type(), nbytes, "").unwrap(),
-        );
+        let data = type_aligned_alloca(generator, ctx, elem_ty, nbytes, None);
+        self.store_data(ctx, data);
    }

    /// Returns a proxy object to the field storing the data of this `NDArray`.