[core] codegen: Add type_aligned_alloca

2024-11-22 15:59:27 +08:00 · 2024-11-22 15:59:27 +08:00 · aa293b6bea
commit aa293b6bea
parent eb4b881690
6 changed files with 138 additions and 28 deletions
--- a/nac3artiq/src/codegen.rs
+++ b/nac3artiq/src/codegen.rs
@ -18,6 +18,7 @@ use nac3core::{
        irrt::ndarray::call_ndarray_calc_size,
        llvm_intrinsics::{call_int_smax, call_memcpy_generic, call_stackrestore, call_stacksave},
        stmt::{gen_block, gen_for_callback_incrementing, gen_if_callback, gen_with},
        type_aligned_alloca,
        types::ndarray::NDArrayType,
        values::{
            ndarray::NDArrayValue, ArrayLikeIndexer, ArrayLikeValue, ArraySliceValue, ListValue,
@ -650,27 +651,12 @@ fn format_rpc_ret<'ctx>(
            // (4 + 4 * ndims) bytes with 8-byte alignment
            let sizeof_dims =
                ctx.builder.build_int_mul(ndarray.load_ndims(ctx), llvm_usize_sizeof, "").unwrap();
-            let unaligned_buffer_size =
+            let buffer_size =
                ctx.builder.build_int_add(sizeof_dims, llvm_pdata_sizeof, "").unwrap();
            let buffer_size = round_up(ctx, unaligned_buffer_size, llvm_usize.const_int(8, false));
            let stackptr = call_stacksave(ctx, None);
-            // Just to be absolutely sure, alloca in [i8 x 8] slices to force 8-byte alignment
+            let buffer =
-            let buffer = ctx
+                type_aligned_alloca(generator, ctx, llvm_i8_8, buffer_size, Some("rpc.buffer"));
                .builder
                .build_array_alloca(
                    llvm_i8_8,
                    ctx.builder
                        .build_int_unsigned_div(buffer_size, llvm_usize.const_int(8, false), "")
                        .unwrap(),
                    "rpc.buffer",
                )
                .unwrap();
            let buffer = ctx
                .builder
                .build_bit_cast(buffer, llvm_pi8, "")
                .map(BasicValueEnum::into_pointer_value)
                .unwrap();
            let buffer = ArraySliceValue::from_ptr_val(buffer, buffer_size, None);
            // The first call to `rpc_recv` reads the top-level ndarray object: [pdata, shape]
@ -743,7 +729,7 @@ fn format_rpc_ret<'ctx>(
                );
            }
-            ndarray.create_data(ctx, llvm_elem_ty, num_elements);
+            ndarray.create_data(generator, ctx, llvm_elem_ty, num_elements);
            let ndarray_data = ndarray.data().base_ptr(ctx, generator);
            let ndarray_data_i8 =
--- a/nac3core/src/codegen/expr.rs
+++ b/nac3core/src/codegen/expr.rs
@ -2852,7 +2852,7 @@ fn gen_ndarray_subscript_expr<'ctx, G: CodeGenerator>(
                    .builder
                    .build_int_z_extend_or_bit_cast(ndarray_num_elems, sizeof_elem.get_type(), "")
                    .unwrap();
-                ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
+                ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems);
                let v_data_src_ptr = v.data().ptr_offset(ctx, generator, &index_addr, None);
                call_memcpy_generic(
--- a/nac3core/src/codegen/llvm_intrinsics.rs
+++ b/nac3core/src/codegen/llvm_intrinsics.rs
@ -343,3 +343,25 @@ pub fn call_float_powi<'ctx>(
        .map(Either::unwrap_left)
        .unwrap()
 }
 /// Invokes the [`llvm.ctpop`](https://llvm.org/docs/LangRef.html#llvm-ctpop-intrinsic) intrinsic.
 pub fn call_int_ctpop<'ctx>(
    ctx: &CodeGenContext<'ctx, '_>,
    src: IntValue<'ctx>,
    name: Option<&str>,
 ) -> IntValue<'ctx> {
    const FN_NAME: &str = "llvm.ctpop";
    let llvm_src_t = src.get_type();
    let intrinsic_fn = Intrinsic::find(FN_NAME)
        .and_then(|intrinsic| intrinsic.get_declaration(&ctx.module, &[llvm_src_t.into()]))
        .unwrap();
    ctx.builder
        .build_call(intrinsic_fn, &[src.into()], name.unwrap_or_default())
        .map(CallSiteValue::try_as_basic_value)
        .map(|v| v.map_left(BasicValueEnum::into_int_value))
        .map(Either::unwrap_left)
        .unwrap()
 }
--- a/nac3core/src/codegen/mod.rs
+++ b/nac3core/src/codegen/mod.rs
@ -1119,3 +1119,106 @@ fn gen_in_range_check<'ctx>(
 fn get_va_count_arg_name(arg_name: StrRef) -> StrRef {
    format!("__{}_va_count", &arg_name).into()
 }
 /// Returns the alignment of the type.
 ///
 /// This is necessary as `get_alignment` is not implemented as part of [`BasicType`].
 pub fn get_type_alignment<'ctx>(ty: impl Into<BasicTypeEnum<'ctx>>) -> IntValue<'ctx> {
    match ty.into() {
        BasicTypeEnum::ArrayType(ty) => ty.get_alignment(),
        BasicTypeEnum::FloatType(ty) => ty.get_alignment(),
        BasicTypeEnum::IntType(ty) => ty.get_alignment(),
        BasicTypeEnum::PointerType(ty) => ty.get_alignment(),
        BasicTypeEnum::StructType(ty) => ty.get_alignment(),
        BasicTypeEnum::VectorType(ty) => ty.get_alignment(),
    }
 }
 /// Inserts an `alloca` instruction with allocation `size` given in bytes and the alignment of the
 /// given type.
 ///
 /// The returned [`PointerValue`] will have a type of `i8*`, a size of at least `size`, and will be
 /// aligned with the alignment of `align_ty`.
 pub fn type_aligned_alloca<'ctx, G: CodeGenerator + ?Sized>(
    generator: &mut G,
    ctx: &mut CodeGenContext<'ctx, '_>,
    align_ty: impl Into<BasicTypeEnum<'ctx>>,
    size: IntValue<'ctx>,
    name: Option<&str>,
 ) -> PointerValue<'ctx> {
    /// Round `val` up to its modulo `power_of_two`.
    fn round_up<'ctx>(
        ctx: &CodeGenContext<'ctx, '_>,
        val: IntValue<'ctx>,
        power_of_two: IntValue<'ctx>,
    ) -> IntValue<'ctx> {
        debug_assert_eq!(
            val.get_type().get_bit_width(),
            power_of_two.get_type().get_bit_width(),
            "`val` ({}) and `power_of_two` ({}) must be the same type",
            val.get_type(),
            power_of_two.get_type(),
        );
        let llvm_val_t = val.get_type();
        let max_rem =
            ctx.builder.build_int_sub(power_of_two, llvm_val_t.const_int(1, false), "").unwrap();
        ctx.builder
            .build_and(
                ctx.builder.build_int_add(val, max_rem, "").unwrap(),
                ctx.builder.build_not(max_rem, "").unwrap(),
                "",
            )
            .unwrap()
    }
    let llvm_i8 = ctx.ctx.i8_type();
    let llvm_pi8 = llvm_i8.ptr_type(AddressSpace::default());
    let llvm_usize = generator.get_size_type(ctx.ctx);
    let align_ty = align_ty.into();
    let size = ctx.builder.build_int_truncate_or_bit_cast(size, llvm_usize, "").unwrap();
    debug_assert_eq!(
        size.get_type().get_bit_width(),
        llvm_usize.get_bit_width(),
        "Expected size_t ({}) for parameter `size` of `aligned_alloca`, got {}",
        llvm_usize,
        size.get_type(),
    );
    let alignment = get_type_alignment(align_ty);
    let alignment = ctx.builder.build_int_truncate_or_bit_cast(alignment, llvm_usize, "").unwrap();
    if ctx.registry.llvm_options.opt_level == OptimizationLevel::None {
        let alignment_bitcount = llvm_intrinsics::call_int_ctpop(ctx, alignment, None);
        ctx.make_assert(
            generator,
            ctx.builder
                .build_int_compare(
                    IntPredicate::EQ,
                    alignment_bitcount,
                    alignment_bitcount.get_type().const_int(1, false),
                    "",
                )
                .unwrap(),
            "0:AssertionError",
            "Expected power-of-two alignment for aligned_alloca, got {0}",
            [Some(alignment), None, None],
            ctx.current_loc,
        );
    }
    let buffer_size = round_up(ctx, size, alignment);
    let aligned_slices = ctx.builder.build_int_unsigned_div(buffer_size, alignment, "").unwrap();
    // Just to be absolutely sure, alloca in [i8 x alignment] slices
    let buffer = ctx.builder.build_array_alloca(align_ty, aligned_slices, "").unwrap();
    ctx.builder
        .build_bit_cast(buffer, llvm_pi8, name.unwrap_or_default())
        .map(BasicValueEnum::into_pointer_value)
        .unwrap()
 }
--- a/nac3core/src/codegen/numpy.rs
+++ b/nac3core/src/codegen/numpy.rs
@ -235,7 +235,7 @@ fn ndarray_init_data<'ctx, G: CodeGenerator + ?Sized>(
        &ndarray.shape().as_slice_value(ctx, generator),
        (None, None),
    );
-    ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
+    ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems);
    ndarray
 }
--- a/nac3core/src/codegen/values/ndarray/mod.rs
+++ b/nac3core/src/codegen/values/ndarray/mod.rs
@ -12,6 +12,7 @@ use crate::codegen::{
    irrt,
    llvm_intrinsics::call_int_umin,
    stmt::gen_for_callback_incrementing,
    type_aligned_alloca,
    types::{ndarray::NDArrayType, structure::StructField},
    CodeGenContext, CodeGenerator,
 };
@ -128,9 +129,10 @@ impl<'ctx> NDArrayValue<'ctx> {
    /// Convenience method for creating a new array storing data elements with the given element
    /// type `elem_ty` and `size`.
-    pub fn create_data(
+    pub fn create_data<G: CodeGenerator + ?Sized>(
        &self,
-        ctx: &CodeGenContext<'ctx, '_>,
+        generator: &mut G,
        ctx: &mut CodeGenContext<'ctx, '_>,
        elem_ty: BasicTypeEnum<'ctx>,
        size: IntValue<'ctx>,
    ) {
@ -140,11 +142,8 @@ impl<'ctx> NDArrayValue<'ctx> {
            .unwrap();
        let nbytes = ctx.builder.build_int_mul(size, itemsize, "").unwrap();
-        // TODO: What about alignment?
+        let data = type_aligned_alloca(generator, ctx, elem_ty, nbytes, None);
-        self.store_data(
+        self.store_data(ctx, data);
            ctx,
            ctx.builder.build_array_alloca(ctx.ctx.i8_type(), nbytes, "").unwrap(),
        );
    }
    /// Returns a proxy object to the field storing the data of this `NDArray`.