diff --git a/nac3artiq/src/codegen.rs b/nac3artiq/src/codegen.rs index 6340eb90..5273863e 100644 --- a/nac3artiq/src/codegen.rs +++ b/nac3artiq/src/codegen.rs @@ -18,6 +18,7 @@ use nac3core::{ irrt::ndarray::call_ndarray_calc_size, llvm_intrinsics::{call_int_smax, call_memcpy_generic, call_stackrestore, call_stacksave}, stmt::{gen_block, gen_for_callback_incrementing, gen_if_callback, gen_with}, + type_aligned_alloca, types::NDArrayType, values::{ ArrayLikeIndexer, ArrayLikeValue, ArraySliceValue, ListValue, NDArrayValue, ProxyValue, @@ -650,27 +651,12 @@ fn format_rpc_ret<'ctx>( // (4 + 4 * ndims) bytes with 8-byte alignment let sizeof_dims = ctx.builder.build_int_mul(ndarray.load_ndims(ctx), llvm_usize_sizeof, "").unwrap(); - let unaligned_buffer_size = + let buffer_size = ctx.builder.build_int_add(sizeof_dims, llvm_pdata_sizeof, "").unwrap(); - let buffer_size = round_up(ctx, unaligned_buffer_size, llvm_usize.const_int(8, false)); let stackptr = call_stacksave(ctx, None); - // Just to be absolutely sure, alloca in [i8 x 8] slices to force 8-byte alignment - let buffer = ctx - .builder - .build_array_alloca( - llvm_i8_8, - ctx.builder - .build_int_unsigned_div(buffer_size, llvm_usize.const_int(8, false), "") - .unwrap(), - "rpc.buffer", - ) - .unwrap(); - let buffer = ctx - .builder - .build_bit_cast(buffer, llvm_pi8, "") - .map(BasicValueEnum::into_pointer_value) - .unwrap(); + let buffer = + type_aligned_alloca(generator, ctx, llvm_i8_8, buffer_size, Some("rpc.buffer")); let buffer = ArraySliceValue::from_ptr_val(buffer, buffer_size, None); // The first call to `rpc_recv` reads the top-level ndarray object: [pdata, shape] @@ -743,7 +729,7 @@ fn format_rpc_ret<'ctx>( ); } - ndarray.create_data(ctx, llvm_elem_ty, num_elements); + ndarray.create_data(generator, ctx, llvm_elem_ty, num_elements); let ndarray_data = ndarray.data().base_ptr(ctx, generator); let ndarray_data_i8 = diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 7e69e4d8..fbaf6634 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -2852,7 +2852,7 @@ fn gen_ndarray_subscript_expr<'ctx, G: CodeGenerator>( .builder .build_int_z_extend_or_bit_cast(ndarray_num_elems, sizeof_elem.get_type(), "") .unwrap(); - ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems); + ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems); let v_data_src_ptr = v.data().ptr_offset(ctx, generator, &index_addr, None); call_memcpy_generic( diff --git a/nac3core/src/codegen/llvm_intrinsics.rs b/nac3core/src/codegen/llvm_intrinsics.rs index 4b62a5c2..9c11f88d 100644 --- a/nac3core/src/codegen/llvm_intrinsics.rs +++ b/nac3core/src/codegen/llvm_intrinsics.rs @@ -343,3 +343,25 @@ pub fn call_float_powi<'ctx>( .map(Either::unwrap_left) .unwrap() } + +/// Invokes the [`llvm.ctpop`](https://llvm.org/docs/LangRef.html#llvm-ctpop-intrinsic) intrinsic. +pub fn call_int_ctpop<'ctx>( + ctx: &CodeGenContext<'ctx, '_>, + src: IntValue<'ctx>, + name: Option<&str>, +) -> IntValue<'ctx> { + const FN_NAME: &str = "llvm.ctpop"; + + let llvm_src_t = src.get_type(); + + let intrinsic_fn = Intrinsic::find(FN_NAME) + .and_then(|intrinsic| intrinsic.get_declaration(&ctx.module, &[llvm_src_t.into()])) + .unwrap(); + + ctx.builder + .build_call(intrinsic_fn, &[src.into()], name.unwrap_or_default()) + .map(CallSiteValue::try_as_basic_value) + .map(|v| v.map_left(BasicValueEnum::into_int_value)) + .map(Either::unwrap_left) + .unwrap() +} diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index b2bb5ad5..bc1ce0b5 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -1119,3 +1119,106 @@ fn gen_in_range_check<'ctx>( fn get_va_count_arg_name(arg_name: StrRef) -> StrRef { format!("__{}_va_count", &arg_name).into() } + +/// Returns the alignment of the type. +/// +/// This is necessary as `get_alignment` is not implemented as part of [`BasicType`]. +pub fn get_type_alignment<'ctx>(ty: impl Into>) -> IntValue<'ctx> { + match ty.into() { + BasicTypeEnum::ArrayType(ty) => ty.get_alignment(), + BasicTypeEnum::FloatType(ty) => ty.get_alignment(), + BasicTypeEnum::IntType(ty) => ty.get_alignment(), + BasicTypeEnum::PointerType(ty) => ty.get_alignment(), + BasicTypeEnum::StructType(ty) => ty.get_alignment(), + BasicTypeEnum::VectorType(ty) => ty.get_alignment(), + } +} + +/// Inserts an `alloca` instruction with allocation `size` given in bytes and the alignment of the +/// given type. +/// +/// The returned [`PointerValue`] will have a type of `i8*`, a size of at least `size`, and will be +/// aligned with the alignment of `align_ty`. +pub fn type_aligned_alloca<'ctx, G: CodeGenerator + ?Sized>( + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, '_>, + align_ty: impl Into>, + size: IntValue<'ctx>, + name: Option<&str>, +) -> PointerValue<'ctx> { + /// Round `val` up to its modulo `power_of_two`. + fn round_up<'ctx>( + ctx: &CodeGenContext<'ctx, '_>, + val: IntValue<'ctx>, + power_of_two: IntValue<'ctx>, + ) -> IntValue<'ctx> { + debug_assert_eq!( + val.get_type().get_bit_width(), + power_of_two.get_type().get_bit_width(), + "`val` ({}) and `power_of_two` ({}) must be the same type", + val.get_type(), + power_of_two.get_type(), + ); + + let llvm_val_t = val.get_type(); + + let max_rem = + ctx.builder.build_int_sub(power_of_two, llvm_val_t.const_int(1, false), "").unwrap(); + ctx.builder + .build_and( + ctx.builder.build_int_add(val, max_rem, "").unwrap(), + ctx.builder.build_not(max_rem, "").unwrap(), + "", + ) + .unwrap() + } + + let llvm_i8 = ctx.ctx.i8_type(); + let llvm_pi8 = llvm_i8.ptr_type(AddressSpace::default()); + let llvm_usize = generator.get_size_type(ctx.ctx); + let align_ty = align_ty.into(); + + let size = ctx.builder.build_int_z_extend_or_bit_cast(size, llvm_usize, "").unwrap(); + + debug_assert_eq!( + size.get_type().get_bit_width(), + llvm_usize.get_bit_width(), + "Expected size_t ({}) for parameter `size` of `aligned_alloca`, got {}", + llvm_usize, + size.get_type(), + ); + + let alignment = get_type_alignment(align_ty); + let alignment = ctx.builder.build_int_z_extend_or_bit_cast(alignment, llvm_usize, "").unwrap(); + + if ctx.registry.llvm_options.opt_level == OptimizationLevel::None { + let alignment_bitcount = llvm_intrinsics::call_int_ctpop(ctx, alignment, None); + + ctx.make_assert( + generator, + ctx.builder + .build_int_compare( + IntPredicate::EQ, + alignment_bitcount, + alignment_bitcount.get_type().const_int(1, false), + "", + ) + .unwrap(), + "0:AssertionError", + "Expected power-of-two alignment for aligned_alloca, got {0}", + [Some(alignment), None, None], + ctx.current_loc, + ); + } + + let buffer_size = round_up(ctx, size, alignment); + let aligned_slices = ctx.builder.build_int_unsigned_div(buffer_size, alignment, "").unwrap(); + + // Just to be absolutely sure, alloca in [i8 x alignment] slices + let buffer = ctx.builder.build_array_alloca(align_ty, aligned_slices, "").unwrap(); + + ctx.builder + .build_bit_cast(buffer, llvm_pi8, name.unwrap_or_default()) + .map(BasicValueEnum::into_pointer_value) + .unwrap() +} diff --git a/nac3core/src/codegen/numpy.rs b/nac3core/src/codegen/numpy.rs index ddaa5154..ff2e084a 100644 --- a/nac3core/src/codegen/numpy.rs +++ b/nac3core/src/codegen/numpy.rs @@ -235,7 +235,7 @@ fn ndarray_init_data<'ctx, G: CodeGenerator + ?Sized>( &ndarray.shape().as_slice_value(ctx, generator), (None, None), ); - ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems); + ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems); ndarray } diff --git a/nac3core/src/codegen/values/ndarray/mod.rs b/nac3core/src/codegen/values/ndarray/mod.rs index 2583d45f..fddaff1c 100644 --- a/nac3core/src/codegen/values/ndarray/mod.rs +++ b/nac3core/src/codegen/values/ndarray/mod.rs @@ -12,6 +12,7 @@ use crate::codegen::{ irrt, llvm_intrinsics::call_int_umin, stmt::gen_for_callback_incrementing, + type_aligned_alloca, types::{structure::StructField, NDArrayType}, CodeGenContext, CodeGenerator, }; @@ -128,9 +129,10 @@ impl<'ctx> NDArrayValue<'ctx> { /// Convenience method for creating a new array storing data elements with the given element /// type `elem_ty` and `size`. - pub fn create_data( + pub fn create_data( &self, - ctx: &CodeGenContext<'ctx, '_>, + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, '_>, elem_ty: BasicTypeEnum<'ctx>, size: IntValue<'ctx>, ) { @@ -140,11 +142,8 @@ impl<'ctx> NDArrayValue<'ctx> { .unwrap(); let nbytes = ctx.builder.build_int_mul(size, itemsize, "").unwrap(); - // TODO: What about alignment? - self.store_data( - ctx, - ctx.builder.build_array_alloca(ctx.ctx.i8_type(), nbytes, "").unwrap(), - ); + let data = type_aligned_alloca(generator, ctx, elem_ty, nbytes, None); + self.store_data(ctx, data); } /// Returns a proxy object to the field storing the data of this `NDArray`.