[core] Add type_aligned_alloca
This commit is contained in:
parent
a481add9af
commit
4d9ed9376b
|
@ -18,6 +18,7 @@ use nac3core::{
|
||||||
irrt::ndarray::call_ndarray_calc_size,
|
irrt::ndarray::call_ndarray_calc_size,
|
||||||
llvm_intrinsics::{call_int_smax, call_memcpy_generic, call_stackrestore, call_stacksave},
|
llvm_intrinsics::{call_int_smax, call_memcpy_generic, call_stackrestore, call_stacksave},
|
||||||
stmt::{gen_block, gen_for_callback_incrementing, gen_if_callback, gen_with},
|
stmt::{gen_block, gen_for_callback_incrementing, gen_if_callback, gen_with},
|
||||||
|
type_aligned_alloca,
|
||||||
types::{NDArrayType, ProxyType},
|
types::{NDArrayType, ProxyType},
|
||||||
values::{
|
values::{
|
||||||
ArrayLikeIndexer, ArrayLikeValue, ArraySliceValue, ListValue, NDArrayValue, ProxyValue,
|
ArrayLikeIndexer, ArrayLikeValue, ArraySliceValue, ListValue, NDArrayValue, ProxyValue,
|
||||||
|
@ -642,27 +643,12 @@ fn format_rpc_ret<'ctx>(
|
||||||
// (4 + 4 * ndims) bytes with 8-byte alignment
|
// (4 + 4 * ndims) bytes with 8-byte alignment
|
||||||
let sizeof_dims =
|
let sizeof_dims =
|
||||||
ctx.builder.build_int_mul(ndarray.load_ndims(ctx), llvm_usize_sizeof, "").unwrap();
|
ctx.builder.build_int_mul(ndarray.load_ndims(ctx), llvm_usize_sizeof, "").unwrap();
|
||||||
let unaligned_buffer_size =
|
let buffer_size =
|
||||||
ctx.builder.build_int_add(sizeof_dims, llvm_pdata_sizeof, "").unwrap();
|
ctx.builder.build_int_add(sizeof_dims, llvm_pdata_sizeof, "").unwrap();
|
||||||
let buffer_size = round_up(ctx, unaligned_buffer_size, llvm_usize.const_int(8, false));
|
|
||||||
|
|
||||||
let stackptr = call_stacksave(ctx, None);
|
let stackptr = call_stacksave(ctx, None);
|
||||||
// Just to be absolutely sure, alloca in [i8 x 8] slices to force 8-byte alignment
|
let buffer =
|
||||||
let buffer = ctx
|
type_aligned_alloca(generator, ctx, llvm_i8_8, buffer_size, Some("rpc.buffer"));
|
||||||
.builder
|
|
||||||
.build_array_alloca(
|
|
||||||
llvm_i8_8,
|
|
||||||
ctx.builder
|
|
||||||
.build_int_unsigned_div(buffer_size, llvm_usize.const_int(8, false), "")
|
|
||||||
.unwrap(),
|
|
||||||
"rpc.buffer",
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
let buffer = ctx
|
|
||||||
.builder
|
|
||||||
.build_bit_cast(buffer, llvm_pi8, "")
|
|
||||||
.map(BasicValueEnum::into_pointer_value)
|
|
||||||
.unwrap();
|
|
||||||
let buffer = ArraySliceValue::from_ptr_val(buffer, buffer_size, None);
|
let buffer = ArraySliceValue::from_ptr_val(buffer, buffer_size, None);
|
||||||
|
|
||||||
// The first call to `rpc_recv` reads the top-level ndarray object: [pdata, shape]
|
// The first call to `rpc_recv` reads the top-level ndarray object: [pdata, shape]
|
||||||
|
@ -735,7 +721,7 @@ fn format_rpc_ret<'ctx>(
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
ndarray.create_data(ctx, llvm_elem_ty, num_elements);
|
ndarray.create_data(generator, ctx, llvm_elem_ty, num_elements);
|
||||||
|
|
||||||
let ndarray_data = ndarray.data().base_ptr(ctx, generator);
|
let ndarray_data = ndarray.data().base_ptr(ctx, generator);
|
||||||
let ndarray_data_i8 =
|
let ndarray_data_i8 =
|
||||||
|
|
|
@ -2852,7 +2852,7 @@ fn gen_ndarray_subscript_expr<'ctx, G: CodeGenerator>(
|
||||||
.builder
|
.builder
|
||||||
.build_int_z_extend_or_bit_cast(ndarray_num_elems, sizeof_elem.get_type(), "")
|
.build_int_z_extend_or_bit_cast(ndarray_num_elems, sizeof_elem.get_type(), "")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
|
ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems);
|
||||||
|
|
||||||
let v_data_src_ptr = v.data().ptr_offset(ctx, generator, &index_addr, None);
|
let v_data_src_ptr = v.data().ptr_offset(ctx, generator, &index_addr, None);
|
||||||
call_memcpy_generic(
|
call_memcpy_generic(
|
||||||
|
|
|
@ -343,3 +343,25 @@ pub fn call_float_powi<'ctx>(
|
||||||
.map(Either::unwrap_left)
|
.map(Either::unwrap_left)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Invokes the [`llvm.ctpop`](https://llvm.org/docs/LangRef.html#llvm-ctpop-intrinsic) intrinsic.
|
||||||
|
pub fn call_int_ctpop<'ctx>(
|
||||||
|
ctx: &CodeGenContext<'ctx, '_>,
|
||||||
|
src: IntValue<'ctx>,
|
||||||
|
name: Option<&str>,
|
||||||
|
) -> IntValue<'ctx> {
|
||||||
|
const FN_NAME: &str = "llvm.ctpop";
|
||||||
|
|
||||||
|
let llvm_src_t = src.get_type();
|
||||||
|
|
||||||
|
let intrinsic_fn = Intrinsic::find(FN_NAME)
|
||||||
|
.and_then(|intrinsic| intrinsic.get_declaration(&ctx.module, &[llvm_src_t.into()]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
ctx.builder
|
||||||
|
.build_call(intrinsic_fn, &[src.into()], name.unwrap_or_default())
|
||||||
|
.map(CallSiteValue::try_as_basic_value)
|
||||||
|
.map(|v| v.map_left(BasicValueEnum::into_int_value))
|
||||||
|
.map(Either::unwrap_left)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
|
@ -1119,3 +1119,106 @@ fn gen_in_range_check<'ctx>(
|
||||||
fn get_va_count_arg_name(arg_name: StrRef) -> StrRef {
|
fn get_va_count_arg_name(arg_name: StrRef) -> StrRef {
|
||||||
format!("__{}_va_count", &arg_name).into()
|
format!("__{}_va_count", &arg_name).into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the alignment of the type.
|
||||||
|
///
|
||||||
|
/// This is necessary as `get_alignment` is not implemented as part of [`BasicType`].
|
||||||
|
pub fn get_type_alignment<'ctx>(ty: impl Into<BasicTypeEnum<'ctx>>) -> IntValue<'ctx> {
|
||||||
|
match ty.into() {
|
||||||
|
BasicTypeEnum::ArrayType(ty) => ty.get_alignment(),
|
||||||
|
BasicTypeEnum::FloatType(ty) => ty.get_alignment(),
|
||||||
|
BasicTypeEnum::IntType(ty) => ty.get_alignment(),
|
||||||
|
BasicTypeEnum::PointerType(ty) => ty.get_alignment(),
|
||||||
|
BasicTypeEnum::StructType(ty) => ty.get_alignment(),
|
||||||
|
BasicTypeEnum::VectorType(ty) => ty.get_alignment(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inserts an `alloca` instruction with allocation `size` given in bytes and the alignment of the
|
||||||
|
/// given type.
|
||||||
|
///
|
||||||
|
/// The returned [`PointerValue`] will have a type of `i8*`, a size of at least `size`, and will be
|
||||||
|
/// aligned with the alignment of `align_ty`.
|
||||||
|
pub fn type_aligned_alloca<'ctx, G: CodeGenerator + ?Sized>(
|
||||||
|
generator: &mut G,
|
||||||
|
ctx: &mut CodeGenContext<'ctx, '_>,
|
||||||
|
align_ty: impl Into<BasicTypeEnum<'ctx>>,
|
||||||
|
size: IntValue<'ctx>,
|
||||||
|
name: Option<&str>,
|
||||||
|
) -> PointerValue<'ctx> {
|
||||||
|
/// Round `val` up to its modulo `power_of_two`.
|
||||||
|
fn round_up<'ctx>(
|
||||||
|
ctx: &CodeGenContext<'ctx, '_>,
|
||||||
|
val: IntValue<'ctx>,
|
||||||
|
power_of_two: IntValue<'ctx>,
|
||||||
|
) -> IntValue<'ctx> {
|
||||||
|
debug_assert_eq!(
|
||||||
|
val.get_type().get_bit_width(),
|
||||||
|
power_of_two.get_type().get_bit_width(),
|
||||||
|
"`val` ({}) and `power_of_two` ({}) must be the same type",
|
||||||
|
val.get_type(),
|
||||||
|
power_of_two.get_type(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let llvm_val_t = val.get_type();
|
||||||
|
|
||||||
|
let max_rem =
|
||||||
|
ctx.builder.build_int_sub(power_of_two, llvm_val_t.const_int(1, false), "").unwrap();
|
||||||
|
ctx.builder
|
||||||
|
.build_and(
|
||||||
|
ctx.builder.build_int_add(val, max_rem, "").unwrap(),
|
||||||
|
ctx.builder.build_not(max_rem, "").unwrap(),
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
let llvm_i8 = ctx.ctx.i8_type();
|
||||||
|
let llvm_pi8 = llvm_i8.ptr_type(AddressSpace::default());
|
||||||
|
let llvm_usize = generator.get_size_type(ctx.ctx);
|
||||||
|
let align_ty = align_ty.into();
|
||||||
|
|
||||||
|
let size = ctx.builder.build_int_cast(size, llvm_usize, "").unwrap();
|
||||||
|
|
||||||
|
debug_assert_eq!(
|
||||||
|
size.get_type().get_bit_width(),
|
||||||
|
llvm_usize.get_bit_width(),
|
||||||
|
"Expected size_t ({}) for parameter `size` of `aligned_alloca`, got {}",
|
||||||
|
llvm_usize,
|
||||||
|
size.get_type(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let alignment = get_type_alignment(align_ty);
|
||||||
|
let alignment = ctx.builder.build_int_cast(alignment, llvm_usize, "").unwrap();
|
||||||
|
|
||||||
|
if ctx.registry.llvm_options.opt_level == OptimizationLevel::None {
|
||||||
|
let alignment_bitcount = llvm_intrinsics::call_int_ctpop(ctx, alignment, None);
|
||||||
|
|
||||||
|
ctx.make_assert(
|
||||||
|
generator,
|
||||||
|
ctx.builder
|
||||||
|
.build_int_compare(
|
||||||
|
IntPredicate::EQ,
|
||||||
|
alignment_bitcount,
|
||||||
|
alignment_bitcount.get_type().const_int(1, false),
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
"0:AssertionError",
|
||||||
|
"Expected power-of-two alignment for aligned_alloca, got {0}",
|
||||||
|
[Some(alignment), None, None],
|
||||||
|
ctx.current_loc,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let buffer_size = round_up(ctx, size, alignment);
|
||||||
|
let aligned_slices = ctx.builder.build_int_unsigned_div(buffer_size, alignment, "").unwrap();
|
||||||
|
|
||||||
|
// Just to be absolutely sure, alloca in [i8 x alignment] slices
|
||||||
|
let buffer = ctx.builder.build_array_alloca(align_ty, aligned_slices, "").unwrap();
|
||||||
|
|
||||||
|
ctx.builder
|
||||||
|
.build_bit_cast(buffer, llvm_pi8, name.unwrap_or_default())
|
||||||
|
.map(BasicValueEnum::into_pointer_value)
|
||||||
|
.unwrap()
|
||||||
|
}
|
||||||
|
|
|
@ -235,7 +235,7 @@ fn ndarray_init_data<'ctx, G: CodeGenerator + ?Sized>(
|
||||||
&ndarray.shape().as_slice_value(ctx, generator),
|
&ndarray.shape().as_slice_value(ctx, generator),
|
||||||
(None, None),
|
(None, None),
|
||||||
);
|
);
|
||||||
ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
|
ndarray.create_data(generator, ctx, llvm_ndarray_data_t, ndarray_num_elems);
|
||||||
|
|
||||||
ndarray
|
ndarray
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,8 +9,8 @@ use super::{
|
||||||
UntypedArrayLikeAccessor, UntypedArrayLikeMutator,
|
UntypedArrayLikeAccessor, UntypedArrayLikeMutator,
|
||||||
};
|
};
|
||||||
use crate::codegen::{
|
use crate::codegen::{
|
||||||
irrt, llvm_intrinsics::call_int_umin, stmt::gen_for_callback_incrementing, types::NDArrayType,
|
irrt, llvm_intrinsics::call_int_umin, stmt::gen_for_callback_incrementing, type_aligned_alloca,
|
||||||
CodeGenContext, CodeGenerator,
|
types::NDArrayType, CodeGenContext, CodeGenerator,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Proxy type for accessing an `NDArray` value in LLVM.
|
/// Proxy type for accessing an `NDArray` value in LLVM.
|
||||||
|
@ -122,9 +122,10 @@ impl<'ctx> NDArrayValue<'ctx> {
|
||||||
|
|
||||||
/// Convenience method for creating a new array storing data elements with the given element
|
/// Convenience method for creating a new array storing data elements with the given element
|
||||||
/// type `elem_ty` and `size`.
|
/// type `elem_ty` and `size`.
|
||||||
pub fn create_data(
|
pub fn create_data<G: CodeGenerator + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
ctx: &CodeGenContext<'ctx, '_>,
|
generator: &mut G,
|
||||||
|
ctx: &mut CodeGenContext<'ctx, '_>,
|
||||||
elem_ty: BasicTypeEnum<'ctx>,
|
elem_ty: BasicTypeEnum<'ctx>,
|
||||||
size: IntValue<'ctx>,
|
size: IntValue<'ctx>,
|
||||||
) {
|
) {
|
||||||
|
@ -132,11 +133,8 @@ impl<'ctx> NDArrayValue<'ctx> {
|
||||||
ctx.builder.build_int_cast(elem_ty.size_of().unwrap(), size.get_type(), "").unwrap();
|
ctx.builder.build_int_cast(elem_ty.size_of().unwrap(), size.get_type(), "").unwrap();
|
||||||
let nbytes = ctx.builder.build_int_mul(size, itemsize, "").unwrap();
|
let nbytes = ctx.builder.build_int_mul(size, itemsize, "").unwrap();
|
||||||
|
|
||||||
// TODO: What about alignment?
|
let data = type_aligned_alloca(generator, ctx, elem_ty, nbytes, None);
|
||||||
self.store_data(
|
self.store_data(ctx, data);
|
||||||
ctx,
|
|
||||||
ctx.builder.build_array_alloca(ctx.ctx.i8_type(), nbytes, "").unwrap(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a proxy object to the field storing the data of this `NDArray`.
|
/// Returns a proxy object to the field storing the data of this `NDArray`.
|
||||||
|
|
Loading…
Reference in New Issue