[core] Add type_aligned_alloca
This commit is contained in:
parent
a481add9af
commit
4d9ed9376b
|
@ -18,6 +18,7 @@ use nac3core::{
|
|||
irrt::ndarray::call_ndarray_calc_size,
|
||||
llvm_intrinsics::{call_int_smax, call_memcpy_generic, call_stackrestore, call_stacksave},
|
||||
stmt::{gen_block, gen_for_callback_incrementing, gen_if_callback, gen_with},
|
||||
type_aligned_alloca,
|
||||
types::{NDArrayType, ProxyType},
|
||||
values::{
|
||||
ArrayLikeIndexer, ArrayLikeValue, ArraySliceValue, ListValue, NDArrayValue, ProxyValue,
|
||||
|
@ -642,27 +643,12 @@ fn format_rpc_ret<'ctx>(
|
|||
// (4 + 4 * ndims) bytes with 8-byte alignment
|
||||
let sizeof_dims =
|
||||
ctx.builder.build_int_mul(ndarray.load_ndims(ctx), llvm_usize_sizeof, "").unwrap();
|
||||
let unaligned_buffer_size =
|
||||
let buffer_size =
|
||||
ctx.builder.build_int_add(sizeof_dims, llvm_pdata_sizeof, "").unwrap();
|
||||
let buffer_size = round_up(ctx, unaligned_buffer_size, llvm_usize.const_int(8, false));
|
||||
|
||||
let stackptr = call_stacksave(ctx, None);
|
||||
// Just to be absolutely sure, alloca in [i8 x 8] slices to force 8-byte alignment
|
||||
let buffer = ctx
|
||||
.builder
|
||||
.build_array_alloca(
|
||||
llvm_i8_8,
|
||||
ctx.builder
|
||||
.build_int_unsigned_div(buffer_size, llvm_usize.const_int(8, false), "")
|
||||
.unwrap(),
|
||||
"rpc.buffer",
|
||||
)
|
||||
.unwrap();
|
||||
let buffer = ctx
|
||||
.builder
|
||||
.build_bit_cast(buffer, llvm_pi8, "")
|
||||
.map(BasicValueEnum::into_pointer_value)
|
||||
.unwrap();
|
||||
let buffer =
|
||||
type_aligned_alloca(generator, ctx, llvm_i8_8, buffer_size, Some("rpc.buffer"));
|
||||
let buffer = ArraySliceValue::from_ptr_val(buffer, buffer_size, None);
|
||||
|
||||
// The first call to `rpc_recv` reads the top-level ndarray object: [pdata, shape]
|
||||
|
@ -735,7 +721,7 @@ fn format_rpc_ret<'ctx>(
|
|||
);
|
||||
}
|
||||
|
||||
ndarray.create_data(ctx, llvm_elem_ty, num_elements);
|
||||
ndarray.create_data(generator, ctx, llvm_elem_ty, num_elements);
|
||||
|
||||
let ndarray_data = ndarray.data().base_ptr(ctx, generator);
|
||||
let ndarray_data_i8 =
|
||||
|
|
|
@ -2852,7 +2852,7 @@ fn gen_ndarray_subscript_expr<'ctx, G: CodeGenerator>(
|
|||
.builder
|
||||
.build_int_z_extend_or_bit_cast(ndarray_num_elems, sizeof_elem.get_type(), "")
|
||||
.unwrap();
|
||||
ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
|
||||
ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems);
|
||||
|
||||
let v_data_src_ptr = v.data().ptr_offset(ctx, generator, &index_addr, None);
|
||||
call_memcpy_generic(
|
||||
|
|
|
@ -343,3 +343,25 @@ pub fn call_float_powi<'ctx>(
|
|||
.map(Either::unwrap_left)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Invokes the [`llvm.ctpop`](https://llvm.org/docs/LangRef.html#llvm-ctpop-intrinsic) intrinsic.
|
||||
pub fn call_int_ctpop<'ctx>(
|
||||
ctx: &CodeGenContext<'ctx, '_>,
|
||||
src: IntValue<'ctx>,
|
||||
name: Option<&str>,
|
||||
) -> IntValue<'ctx> {
|
||||
const FN_NAME: &str = "llvm.ctpop";
|
||||
|
||||
let llvm_src_t = src.get_type();
|
||||
|
||||
let intrinsic_fn = Intrinsic::find(FN_NAME)
|
||||
.and_then(|intrinsic| intrinsic.get_declaration(&ctx.module, &[llvm_src_t.into()]))
|
||||
.unwrap();
|
||||
|
||||
ctx.builder
|
||||
.build_call(intrinsic_fn, &[src.into()], name.unwrap_or_default())
|
||||
.map(CallSiteValue::try_as_basic_value)
|
||||
.map(|v| v.map_left(BasicValueEnum::into_int_value))
|
||||
.map(Either::unwrap_left)
|
||||
.unwrap()
|
||||
}
|
||||
|
|
|
@ -1119,3 +1119,106 @@ fn gen_in_range_check<'ctx>(
|
|||
fn get_va_count_arg_name(arg_name: StrRef) -> StrRef {
|
||||
format!("__{}_va_count", &arg_name).into()
|
||||
}
|
||||
|
||||
/// Returns the alignment of the type.
|
||||
///
|
||||
/// This is necessary as `get_alignment` is not implemented as part of [`BasicType`].
|
||||
pub fn get_type_alignment<'ctx>(ty: impl Into<BasicTypeEnum<'ctx>>) -> IntValue<'ctx> {
|
||||
match ty.into() {
|
||||
BasicTypeEnum::ArrayType(ty) => ty.get_alignment(),
|
||||
BasicTypeEnum::FloatType(ty) => ty.get_alignment(),
|
||||
BasicTypeEnum::IntType(ty) => ty.get_alignment(),
|
||||
BasicTypeEnum::PointerType(ty) => ty.get_alignment(),
|
||||
BasicTypeEnum::StructType(ty) => ty.get_alignment(),
|
||||
BasicTypeEnum::VectorType(ty) => ty.get_alignment(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts an `alloca` instruction with allocation `size` given in bytes and the alignment of the
|
||||
/// given type.
|
||||
///
|
||||
/// The returned [`PointerValue`] will have a type of `i8*`, a size of at least `size`, and will be
|
||||
/// aligned with the alignment of `align_ty`.
|
||||
pub fn type_aligned_alloca<'ctx, G: CodeGenerator + ?Sized>(
|
||||
generator: &mut G,
|
||||
ctx: &mut CodeGenContext<'ctx, '_>,
|
||||
align_ty: impl Into<BasicTypeEnum<'ctx>>,
|
||||
size: IntValue<'ctx>,
|
||||
name: Option<&str>,
|
||||
) -> PointerValue<'ctx> {
|
||||
/// Round `val` up to its modulo `power_of_two`.
|
||||
fn round_up<'ctx>(
|
||||
ctx: &CodeGenContext<'ctx, '_>,
|
||||
val: IntValue<'ctx>,
|
||||
power_of_two: IntValue<'ctx>,
|
||||
) -> IntValue<'ctx> {
|
||||
debug_assert_eq!(
|
||||
val.get_type().get_bit_width(),
|
||||
power_of_two.get_type().get_bit_width(),
|
||||
"`val` ({}) and `power_of_two` ({}) must be the same type",
|
||||
val.get_type(),
|
||||
power_of_two.get_type(),
|
||||
);
|
||||
|
||||
let llvm_val_t = val.get_type();
|
||||
|
||||
let max_rem =
|
||||
ctx.builder.build_int_sub(power_of_two, llvm_val_t.const_int(1, false), "").unwrap();
|
||||
ctx.builder
|
||||
.build_and(
|
||||
ctx.builder.build_int_add(val, max_rem, "").unwrap(),
|
||||
ctx.builder.build_not(max_rem, "").unwrap(),
|
||||
"",
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
let llvm_i8 = ctx.ctx.i8_type();
|
||||
let llvm_pi8 = llvm_i8.ptr_type(AddressSpace::default());
|
||||
let llvm_usize = generator.get_size_type(ctx.ctx);
|
||||
let align_ty = align_ty.into();
|
||||
|
||||
let size = ctx.builder.build_int_cast(size, llvm_usize, "").unwrap();
|
||||
|
||||
debug_assert_eq!(
|
||||
size.get_type().get_bit_width(),
|
||||
llvm_usize.get_bit_width(),
|
||||
"Expected size_t ({}) for parameter `size` of `aligned_alloca`, got {}",
|
||||
llvm_usize,
|
||||
size.get_type(),
|
||||
);
|
||||
|
||||
let alignment = get_type_alignment(align_ty);
|
||||
let alignment = ctx.builder.build_int_cast(alignment, llvm_usize, "").unwrap();
|
||||
|
||||
if ctx.registry.llvm_options.opt_level == OptimizationLevel::None {
|
||||
let alignment_bitcount = llvm_intrinsics::call_int_ctpop(ctx, alignment, None);
|
||||
|
||||
ctx.make_assert(
|
||||
generator,
|
||||
ctx.builder
|
||||
.build_int_compare(
|
||||
IntPredicate::EQ,
|
||||
alignment_bitcount,
|
||||
alignment_bitcount.get_type().const_int(1, false),
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
"0:AssertionError",
|
||||
"Expected power-of-two alignment for aligned_alloca, got {0}",
|
||||
[Some(alignment), None, None],
|
||||
ctx.current_loc,
|
||||
);
|
||||
}
|
||||
|
||||
let buffer_size = round_up(ctx, size, alignment);
|
||||
let aligned_slices = ctx.builder.build_int_unsigned_div(buffer_size, alignment, "").unwrap();
|
||||
|
||||
// Just to be absolutely sure, alloca in [i8 x alignment] slices
|
||||
let buffer = ctx.builder.build_array_alloca(align_ty, aligned_slices, "").unwrap();
|
||||
|
||||
ctx.builder
|
||||
.build_bit_cast(buffer, llvm_pi8, name.unwrap_or_default())
|
||||
.map(BasicValueEnum::into_pointer_value)
|
||||
.unwrap()
|
||||
}
|
||||
|
|
|
@ -235,7 +235,7 @@ fn ndarray_init_data<'ctx, G: CodeGenerator + ?Sized>(
|
|||
&ndarray.shape().as_slice_value(ctx, generator),
|
||||
(None, None),
|
||||
);
|
||||
ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
|
||||
ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems);
|
||||
|
||||
ndarray
|
||||
}
|
||||
|
|
|
@ -9,8 +9,8 @@ use super::{
|
|||
UntypedArrayLikeAccessor, UntypedArrayLikeMutator,
|
||||
};
|
||||
use crate::codegen::{
|
||||
irrt, llvm_intrinsics::call_int_umin, stmt::gen_for_callback_incrementing, types::NDArrayType,
|
||||
CodeGenContext, CodeGenerator,
|
||||
irrt, llvm_intrinsics::call_int_umin, stmt::gen_for_callback_incrementing, type_aligned_alloca,
|
||||
types::NDArrayType, CodeGenContext, CodeGenerator,
|
||||
};
|
||||
|
||||
/// Proxy type for accessing an `NDArray` value in LLVM.
|
||||
|
@ -122,9 +122,10 @@ impl<'ctx> NDArrayValue<'ctx> {
|
|||
|
||||
/// Convenience method for creating a new array storing data elements with the given element
|
||||
/// type `elem_ty` and `size`.
|
||||
pub fn create_data(
|
||||
pub fn create_data<G: CodeGenerator + ?Sized>(
|
||||
&self,
|
||||
ctx: &CodeGenContext<'ctx, '_>,
|
||||
generator: &mut G,
|
||||
ctx: &mut CodeGenContext<'ctx, '_>,
|
||||
elem_ty: BasicTypeEnum<'ctx>,
|
||||
size: IntValue<'ctx>,
|
||||
) {
|
||||
|
@ -132,11 +133,8 @@ impl<'ctx> NDArrayValue<'ctx> {
|
|||
ctx.builder.build_int_cast(elem_ty.size_of().unwrap(), size.get_type(), "").unwrap();
|
||||
let nbytes = ctx.builder.build_int_mul(size, itemsize, "").unwrap();
|
||||
|
||||
// TODO: What about alignment?
|
||||
self.store_data(
|
||||
ctx,
|
||||
ctx.builder.build_array_alloca(ctx.ctx.i8_type(), nbytes, "").unwrap(),
|
||||
);
|
||||
let data = type_aligned_alloca(generator, ctx, elem_ty, nbytes, None);
|
||||
self.store_data(ctx, data);
|
||||
}
|
||||
|
||||
/// Returns a proxy object to the field storing the data of this `NDArray`.
|
||||
|
|
Loading…
Reference in New Issue