From f6a554d3c95e9aec16a0a71d53f3b5322f2a4c42 Mon Sep 17 00:00:00 2001 From: lyken Date: Wed, 17 Jul 2024 12:16:41 +0800 Subject: [PATCH] core/model: introduce `Model<'ctx>` abstraction --- nac3core/src/codegen/mod.rs | 1 + nac3core/src/codegen/model/core.rs | 173 +++++++++++ nac3core/src/codegen/model/fixed_int.rs | 156 ++++++++++ nac3core/src/codegen/model/int.rs | 83 +++++ nac3core/src/codegen/model/int_util.rs | 39 +++ nac3core/src/codegen/model/mod.rs | 16 + nac3core/src/codegen/model/opaque.rs | 57 ++++ nac3core/src/codegen/model/pointer.rs | 94 ++++++ nac3core/src/codegen/model/slice.rs | 87 ++++++ nac3core/src/codegen/model/structure.rs | 396 ++++++++++++++++++++++++ 10 files changed, 1102 insertions(+) create mode 100644 nac3core/src/codegen/model/core.rs create mode 100644 nac3core/src/codegen/model/fixed_int.rs create mode 100644 nac3core/src/codegen/model/int.rs create mode 100644 nac3core/src/codegen/model/int_util.rs create mode 100644 nac3core/src/codegen/model/mod.rs create mode 100644 nac3core/src/codegen/model/opaque.rs create mode 100644 nac3core/src/codegen/model/pointer.rs create mode 100644 nac3core/src/codegen/model/slice.rs create mode 100644 nac3core/src/codegen/model/structure.rs diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 17952369..85b963bb 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -41,6 +41,7 @@ pub mod extern_fns; mod generator; pub mod irrt; pub mod llvm_intrinsics; +pub mod model; pub mod numpy; pub mod stmt; diff --git a/nac3core/src/codegen/model/core.rs b/nac3core/src/codegen/model/core.rs new file mode 100644 index 00000000..0c1096e0 --- /dev/null +++ b/nac3core/src/codegen/model/core.rs @@ -0,0 +1,173 @@ +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum}, + values::{BasicValue, BasicValueEnum, PointerValue}, +}; + +use crate::codegen::{CodeGenContext, CodeGenerator}; + +use super::{slice::ArraySlice, Int, Pointer}; + +/* +Explanation on the abstraction: + + In LLVM, there are TYPES and VALUES. + + Inkwell gives us TYPES [`BasicTypeEnum<'ctx>`] and VALUES [`BasicValueEnum<'ctx>`], + but by themselves, they lack a lot of Rust compile-time known info. + + e.g., You did `let ptr = builder.build_alloca(my_llvm_ndarray_struct_ty)`, + but `ptr` is just a `PointerValue<'ctx>`, almost everything about the + underlying `my_llvm_ndarray_struct_ty` is gone. + + The `Model` abstraction is a wrapper around inkwell TYPES and VALUES but with + a richer interface. + + `Model<'ctx>` is a wrapper around for an inkwell TYPE: + - `NIntModel` is a i8. + - `NIntModel` is a i32. + - `NIntModel` is a i64. + - `IntModel` is a carrier for an inkwell `IntType<'ctx>`, + used when the type is dynamic/cannot be specified in Rust compile-time. + - `PointerModel<'ctx, E>` is a wrapper for `PointerType<'ctx>`, + where `E` is another `Model<'ctx>` that describes the element type of the pointer. + - `StructModel<'ctx, NDArray>` is a wrapper for `StructType<'ctx>`, + with additional information encoded within `NDArray`. (See `IsStruct<'ctx>`) + + `Model<'ctx>::Value`/`ModelValue<'ctx>` is a wrapper around for an inkwell VALUE: + - `NInt<'ctx, T>` is a value of `NIntModel<'ctx, T>`, + where `T` could be `Byte`, `Int32`, or `Int64`. + - `Pointer<'ctx, E>` is a value of `PointerModel<'ctx, E>`. + + Other interesting utilities: + - Given a `Model<'ctx>`, say, `let ndarray_model = StructModel<'ctx, NDArray>`, + you are do `ndarray_model.alloca(ctx, "my_ndarray")` to get a `Pointer<'ctx, Struct<'ctx, NDArray>>`, + notice that all LLVM type information are preserved. + - For a `let my_ndarray = Pointer<'ctx, StructModel>`, you can access a field by doing + `my_ndarray.gep(ctx, |f| f.itemsize).load() // or .store()`, and you can chain them + together for nested structures. + + A brief summary on the `Model<'ctx>` and `ModelValue<'ctx>` traits: + - Model<'ctx> + // The associated ModelValue of this Model + - type Value: ModelValue<'ctx> + + // Get the LLVM type of this Model + - fn get_llvm_type(&self) + + // Check if the input type is equal to the LLVM type of this Model + // NOTE: this function is provideed through `CanCheckLLVMType<'ctx>` + - fn check_llvm_type(&self, ty) -> Result<(), String> + + // Check if the input value's type is equal to the LLVM type of this Model. + // + // If so, wrap it with `Self::Value`. + - fn review_value>(&self, val: V) -> Result + + - ModelValue<'ctx> + // get the LLVM value of this ModelValue + - fn get_llvm_value(&self) -> BasicValueEnum<'ctx> +*/ + +/// A value that belongs to/produced by a [`Model<'ctx>`] +pub trait ModelValue<'ctx>: Clone + Copy { + /// Get the LLVM value of this [`ModelValue<'ctx>`] + fn get_llvm_value(&self) -> BasicValueEnum<'ctx>; +} + +// NOTE: Should have been within [`Model<'ctx>`], +// but rust object safety requirements made it necessary to +// split the trait. +pub trait CanCheckLLVMType<'ctx> { + /// See [`Model::check_llvm_type`] + fn check_llvm_type_impl( + &self, + ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), String>; +} + +pub trait Model<'ctx>: Clone + Copy + CanCheckLLVMType<'ctx> + Sized { + /// The associated [`ModelValue<'ctx>`] of this Model. + type Value: ModelValue<'ctx>; + + /// Get the LLVM type of this [`Model<'ctx>`] + fn get_llvm_type(&self, ctx: &'ctx Context) -> BasicTypeEnum<'ctx>; + + /// Check if the input type is equal to the LLVM type of this Model. + /// + /// If it doesn't match, an [`Err`] with a human-readable message is + /// thrown explaining *how* it was different. Meant for debugging. + fn check_llvm_type>(&self, ctx: &'ctx Context, ty: T) -> Result<(), String> { + self.check_llvm_type_impl(ctx, ty.as_basic_type_enum()) + } + + /// Check if the input value's type is equal to the LLVM type of this Model + /// (using [`Model::check_llvm_type`]). + /// + /// If so, wrap it with [`Model::Value`]. + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result; + + /// Check if [`Self::Value`] has the correct type described by this [`Model<'ctx>`] + fn check_value(&self, ctx: &'ctx Context, value: Self::Value) -> Result<(), String> { + self.review_value(ctx, value.get_llvm_value())?; + Ok(()) + } + + /// Build an instruction to allocate a value with the LLVM type of this [`Model<'ctx>`]. + fn alloca(&self, ctx: &CodeGenContext<'ctx, '_>, name: &str) -> Pointer<'ctx, Self> { + Pointer { + element: *self, + value: ctx.builder.build_alloca(self.get_llvm_type(ctx.ctx), name).unwrap(), + } + } + + /// Build an instruction to allocate an array of the LLVM type of this [`Model<'ctx>`]. + fn array_alloca( + &self, + ctx: &CodeGenContext<'ctx, '_>, + count: Int<'ctx>, + name: &str, + ) -> ArraySlice<'ctx, Self> { + ArraySlice { + num_elements: count, + pointer: Pointer { + element: *self, + value: ctx + .builder + .build_array_alloca(self.get_llvm_type(ctx.ctx), count.0, name) + .unwrap(), + }, + } + } + + /// Do [`CodeGenerator::gen_var_alloc`] with the LLVM type of this [`Model<'ctx>`]. + fn var_alloc( + &self, + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, '_>, + name: Option<&str>, + ) -> Result, String> { + let value = generator.gen_var_alloc(ctx, self.get_llvm_type(ctx.ctx), name)?; + Ok(Pointer { element: *self, value }) + } + + /// Do [`CodeGenerator::gen_array_var_alloc`] with the LLVM type of this [`Model<'ctx>`]. + fn array_var_alloc( + &self, + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, '_>, + size: Int<'ctx>, + name: Option<&'ctx str>, + ) -> Result, String> { + let slice = + generator.gen_array_var_alloc(ctx, self.get_llvm_type(ctx.ctx), size.0, name)?; + let ptr = PointerValue::from(slice); // TODO: Remove ArraySliceValue + + Ok(Pointer { element: *self, value: ptr }) + } +} diff --git a/nac3core/src/codegen/model/fixed_int.rs b/nac3core/src/codegen/model/fixed_int.rs new file mode 100644 index 00000000..c60f8236 --- /dev/null +++ b/nac3core/src/codegen/model/fixed_int.rs @@ -0,0 +1,156 @@ +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum, IntType}, + values::{BasicValue, BasicValueEnum, IntValue}, +}; + +use super::{ + core::*, + int_util::{check_int_llvm_type, review_int_llvm_value}, + Int, IntModel, +}; + +/// A marker trait to mark a singleton struct that describes a particular fixed integer type. +/// See [`Bool`], [`Byte`], [`Int32`], etc. +/// +/// The [`Default`] trait is to enable auto-instantiations. +pub trait NIntKind: Clone + Copy + Default { + /// Get the [`IntType<'ctx>`] of this [`NIntKind`]. + fn get_int_type(ctx: &Context) -> IntType<'_>; + + /// Get the [`IntType<'ctx>`] of this [`NIntKind`]. + /// + /// Compared to using [`NIntKind::get_int_type`], this + /// function does not require [`Context`]. + fn get_bit_width() -> u32; +} + +// Some pre-defined fixed integers + +#[derive(Debug, Clone, Copy, Default)] +pub struct Bool; +pub type BoolModel = NIntModel; + +impl NIntKind for Bool { + fn get_int_type(ctx: &Context) -> IntType<'_> { + ctx.bool_type() + } + + fn get_bit_width() -> u32 { + 1 + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct Byte; +pub type ByteModel = NIntModel; + +impl NIntKind for Byte { + fn get_int_type(ctx: &Context) -> IntType<'_> { + ctx.i8_type() + } + + fn get_bit_width() -> u32 { + 8 + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct Int32; +pub type Int32Model = NIntModel; + +impl NIntKind for Int32 { + fn get_int_type(ctx: &Context) -> IntType<'_> { + ctx.i32_type() + } + + fn get_bit_width() -> u32 { + 32 + } +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct Int64; +pub type Int64Model = NIntModel; + +impl NIntKind for Int64 { + fn get_int_type(ctx: &Context) -> IntType<'_> { + ctx.i64_type() + } + + fn get_bit_width() -> u32 { + 64 + } +} + +/// A [`Model`] representing an [`IntType<'ctx>`] of a specified bit width. +/// +/// Also see [`IntModel`], which is less constrained than [`NIntModel`], +/// but enables one to handle dynamic [`IntType<'ctx>`] at runtime. +#[derive(Debug, Clone, Copy, Default)] +pub struct NIntModel(pub T); + +impl<'ctx, T: NIntKind> CanCheckLLVMType<'ctx> for NIntModel { + fn check_llvm_type_impl( + &self, + ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), String> { + check_int_llvm_type(ty, T::get_int_type(ctx)) + } +} + +impl<'ctx, T: NIntKind> Model<'ctx> for NIntModel { + type Value = NInt<'ctx, T>; + + fn get_llvm_type(&self, ctx: &'ctx Context) -> BasicTypeEnum<'ctx> { + T::get_int_type(ctx).as_basic_type_enum() + } + + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result { + let value = review_int_llvm_value(value.as_basic_value_enum(), T::get_int_type(ctx))?; + Ok(NInt { kind: self.0, value }) + } +} + +impl NIntModel { + /// "Demote" this [`NIntModel`] to an [`IntModel`]. + /// + /// Information about the [`NIntKind`] will be lost. + pub fn to_int_model(self, ctx: &Context) -> IntModel<'_> { + IntModel(T::get_int_type(ctx)) + } + + /// Create an unsigned constant of this [`NIntModel`]. + pub fn constant<'ctx>(&self, ctx: &'ctx Context, value: u64) -> NInt<'ctx, T> { + NInt { kind: self.0, value: T::get_int_type(ctx).const_int(value, false) } + } +} + +/// A value of [`NIntModel<'ctx>`] +#[derive(Debug, Clone, Copy)] +pub struct NInt<'ctx, T: NIntKind> { + /// The [`NIntKind`] marker of this [`NInt`] + pub kind: T, + /// The LLVM value of this [`NInt`]. + pub value: IntValue<'ctx>, +} + +impl<'ctx, T: NIntKind> ModelValue<'ctx> for NInt<'ctx, T> { + fn get_llvm_value(&self) -> BasicValueEnum<'ctx> { + self.value.as_basic_value_enum() + } +} + +impl<'ctx, T: NIntKind> NInt<'ctx, T> { + /// "Demote" this [`NInt`] to an [`Int`]. + /// + /// Information about the [`NIntKind`] will be lost. + pub fn to_int(self) -> Int<'ctx> { + Int(self.value) + } +} diff --git a/nac3core/src/codegen/model/int.rs b/nac3core/src/codegen/model/int.rs new file mode 100644 index 00000000..50aa30d2 --- /dev/null +++ b/nac3core/src/codegen/model/int.rs @@ -0,0 +1,83 @@ +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum, IntType}, + values::{BasicValue, BasicValueEnum, IntValue}, +}; + +use crate::codegen::{model::int_util::review_int_llvm_value, CodeGenContext}; + +use super::{core::*, int_util::check_int_llvm_type}; + +/// A model representing an [`IntType<'ctx>`]. +/// +/// Also see [`NIntModel`], which is more constrained than [`IntModel`] +/// but provides more type-safe mechanisms and even auto-derivation of [`BasicTypeEnum<'ctx>`] +/// for creating LLVM structures. +#[derive(Debug, Clone, Copy)] +pub struct IntModel<'ctx>(pub IntType<'ctx>); + +impl<'ctx> CanCheckLLVMType<'ctx> for IntModel<'ctx> { + fn check_llvm_type_impl( + &self, + _ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), String> { + check_int_llvm_type(ty, self.0) + } +} + +impl<'ctx> Model<'ctx> for IntModel<'ctx> { + type Value = Int<'ctx>; + + fn get_llvm_type(&self, _ctx: &'ctx Context) -> BasicTypeEnum<'ctx> { + self.0.as_basic_type_enum() + } + + fn review_value>( + &self, + _ctx: &'ctx Context, + value: V, + ) -> Result { + review_int_llvm_value(value.as_basic_value_enum(), self.0).map(Int) + } +} + +impl<'ctx> IntModel<'ctx> { + /// Create a constant value that inhabits this [`IntModel<'ctx>`]. + #[must_use] + pub fn constant(&self, value: u64) -> Int<'ctx> { + Int(self.0.const_int(value, false)) + } + + /// Check if `other` is fully compatible with this [`IntModel<'ctx>`]. + /// + /// This simply checks if the underlying [`IntType<'ctx>`] has + /// the same number of bits. + #[must_use] + pub fn same_as(&self, other: IntModel<'ctx>) -> bool { + // TODO: or `self.0 == other.0` would also work? + self.0.get_bit_width() == other.0.get_bit_width() + } +} + +/// An inhabitant of an [`IntModel<'ctx>`] +#[derive(Debug, Clone, Copy)] +pub struct Int<'ctx>(pub IntValue<'ctx>); + +impl<'ctx> ModelValue<'ctx> for Int<'ctx> { + fn get_llvm_value(&self) -> BasicValueEnum<'ctx> { + self.0.as_basic_value_enum() + } +} + +impl<'ctx> Int<'ctx> { + #[must_use] + pub fn signed_cast_to_int( + self, + ctx: &CodeGenContext<'ctx, '_>, + target_int: IntModel<'ctx>, + name: &str, + ) -> Int<'ctx> { + Int(ctx.builder.build_int_s_extend_or_bit_cast(self.0, target_int.0, name).unwrap()) + } +} diff --git a/nac3core/src/codegen/model/int_util.rs b/nac3core/src/codegen/model/int_util.rs new file mode 100644 index 00000000..aa9d89bb --- /dev/null +++ b/nac3core/src/codegen/model/int_util.rs @@ -0,0 +1,39 @@ +use inkwell::{ + types::{BasicType, BasicTypeEnum, IntType}, + values::{BasicValueEnum, IntValue}, +}; + +/// Helper function to check if `scrutinee` is the same as `expected_int_type` +pub fn check_int_llvm_type<'ctx>( + ty: BasicTypeEnum<'ctx>, + expected_int_type: IntType<'ctx>, +) -> Result<(), String> { + // Check if llvm_type is int type + let BasicTypeEnum::IntType(ty) = ty else { + return Err(format!("Expecting an int type but got {ty:?}")); + }; + + // Check bit width + if ty.get_bit_width() != expected_int_type.get_bit_width() { + return Err(format!( + "Expecting an int type of {}-bit(s) but got int type {}-bit(s)", + expected_int_type.get_bit_width(), + ty.get_bit_width() + )); + } + + Ok(()) +} + +/// Helper function to cast `scrutinee` is into an [`IntValue<'ctx>`]. +/// The LLVM type of `scrutinee` will be checked with [`check_int_llvm_type`]. +pub fn review_int_llvm_value<'ctx>( + value: BasicValueEnum<'ctx>, + expected_int_type: IntType<'ctx>, +) -> Result, String> { + // Check if value is of int type, error if that is anything else + check_int_llvm_type(value.get_type().as_basic_type_enum(), expected_int_type)?; + + // Ok, it is must be an int + Ok(value.into_int_value()) +} diff --git a/nac3core/src/codegen/model/mod.rs b/nac3core/src/codegen/model/mod.rs new file mode 100644 index 00000000..2ebf02b8 --- /dev/null +++ b/nac3core/src/codegen/model/mod.rs @@ -0,0 +1,16 @@ +pub mod core; +pub mod fixed_int; +pub mod int; +mod int_util; +pub mod opaque; +pub mod pointer; +pub mod slice; +pub mod structure; + +pub use core::*; +pub use fixed_int::*; +pub use int::*; +pub use opaque::*; +pub use pointer::*; +pub use slice::*; +pub use structure::*; diff --git a/nac3core/src/codegen/model/opaque.rs b/nac3core/src/codegen/model/opaque.rs new file mode 100644 index 00000000..6402015a --- /dev/null +++ b/nac3core/src/codegen/model/opaque.rs @@ -0,0 +1,57 @@ +use inkwell::{ + context::Context, + types::BasicTypeEnum, + values::{BasicValue, BasicValueEnum}, +}; + +use super::*; + +/// A [`Model`] that holds an arbitrary [`BasicTypeEnum`]. +/// +/// Use this and [`Opaque`] when you are dealing with a [`BasicTypeEnum<'ctx>`] +/// at runtime and there is no way to abstract your implementation +/// with [`Model`]. +#[derive(Debug, Clone, Copy)] +pub struct OpaqueModel<'ctx>(pub BasicTypeEnum<'ctx>); + +impl<'ctx> CanCheckLLVMType<'ctx> for OpaqueModel<'ctx> { + fn check_llvm_type_impl( + &self, + _ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), String> { + if ty == self.0 { + Ok(()) + } else { + Err(format!("Expecting {}, but got {}", self.0, ty)) + } + } +} + +impl<'ctx> Model<'ctx> for OpaqueModel<'ctx> { + type Value = Opaque<'ctx>; + + fn get_llvm_type(&self, _ctx: &'ctx Context) -> BasicTypeEnum<'ctx> { + self.0 + } + + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result { + let value = value.as_basic_value_enum(); + self.check_llvm_type(ctx, value.get_type())?; + Ok(Opaque(value)) + } +} + +/// A value of [`OpaqueModel`] +#[derive(Debug, Clone, Copy)] +pub struct Opaque<'ctx>(pub BasicValueEnum<'ctx>); + +impl<'ctx> ModelValue<'ctx> for Opaque<'ctx> { + fn get_llvm_value(&self) -> BasicValueEnum<'ctx> { + self.0 + } +} diff --git a/nac3core/src/codegen/model/pointer.rs b/nac3core/src/codegen/model/pointer.rs new file mode 100644 index 00000000..ef146d4c --- /dev/null +++ b/nac3core/src/codegen/model/pointer.rs @@ -0,0 +1,94 @@ +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum}, + values::{BasicValue, BasicValueEnum, PointerValue}, + AddressSpace, +}; + +use crate::codegen::CodeGenContext; + +use super::{core::*, OpaqueModel}; + +/// A [`Model<'ctx>`] representing an LLVM [`PointerType<'ctx>`] +/// with *full* information on the element u +/// +/// [`self.0`] contains [`Model<'ctx>`] that represents the +/// LLVM type of element of the [`PointerType<'ctx>`] is pointing at +/// (like `PointerType<'ctx>::get_element_type()`, but abstracted as a [`Model<'ctx>`]). +#[derive(Debug, Clone, Copy, Default)] +pub struct PointerModel(pub E); + +impl<'ctx, E: Model<'ctx>> CanCheckLLVMType<'ctx> for PointerModel { + fn check_llvm_type_impl( + &self, + ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), String> { + // Check if scrutinee is even a PointerValue + let BasicTypeEnum::PointerType(ty) = ty else { + return Err(format!("Expecting a pointer value, but got {ty:?}")); + }; + + // Check the type of what the pointer is pointing at + // TODO: This will be deprecated by inkwell > llvm14 because `get_element_type()` will be gone + let Ok(element_ty) = BasicTypeEnum::try_from(ty.get_element_type()) else { + return Err(format!( + "Expecting pointer to point to an inkwell BasicValue, but got {ty:?}" + )); + }; + + self.0.check_llvm_type(ctx, element_ty) // TODO: Include backtrace? + } +} + +impl<'ctx, E: Model<'ctx>> Model<'ctx> for PointerModel { + type Value = Pointer<'ctx, E>; + + fn get_llvm_type(&self, ctx: &'ctx Context) -> BasicTypeEnum<'ctx> { + self.0.get_llvm_type(ctx).ptr_type(AddressSpace::default()).as_basic_type_enum() + } + + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result { + let value = value.as_basic_value_enum(); + + self.check_llvm_type(ctx, value.get_type())?; + + // TODO: Check get_element_type(). For inkwell LLVM 14 at least... + Ok(Pointer { element: self.0, value: value.into_pointer_value() }) + } +} + +/// An inhabitant of [`PointerModel`] +#[derive(Debug, Clone, Copy)] +pub struct Pointer<'ctx, E: Model<'ctx>> { + pub element: E, + pub value: PointerValue<'ctx>, +} + +impl<'ctx, E: Model<'ctx>> ModelValue<'ctx> for Pointer<'ctx, E> { + fn get_llvm_value(&self) -> BasicValueEnum<'ctx> { + self.value.as_basic_value_enum() + } +} + +impl<'ctx, E: Model<'ctx>> Pointer<'ctx, E> { + /// Build an instruction to store a value into this pointer + pub fn store(&self, ctx: &CodeGenContext<'ctx, '_>, val: E::Value) { + ctx.builder.build_store(self.value, val.get_llvm_value()).unwrap(); + } + + /// Build an instruction to load a value from this pointer + pub fn load(&self, ctx: &CodeGenContext<'ctx, '_>, name: &str) -> E::Value { + let val = ctx.builder.build_load(self.value, name).unwrap(); + self.element.review_value(ctx.ctx, val).unwrap() // If unwrap() panics, there is a logic error in your code. + } + + /// "Demote" the [`Model`] of the thing this pointer is pointing at. + pub fn to_opaque(self, ctx: &'ctx Context) -> Pointer<'ctx, OpaqueModel<'ctx>> { + Pointer { element: OpaqueModel(self.element.get_llvm_type(ctx)), value: self.value } + } +} diff --git a/nac3core/src/codegen/model/slice.rs b/nac3core/src/codegen/model/slice.rs new file mode 100644 index 00000000..e6d28d95 --- /dev/null +++ b/nac3core/src/codegen/model/slice.rs @@ -0,0 +1,87 @@ +use crate::codegen::{CodeGenContext, CodeGenerator}; + +use super::{Int, Model, Pointer}; + +/// An LLVM "slice" - literally just a pointer and a length value. +/// The pointer points to a location with `num_elements` **contiguously** placed +/// values of [`E`][`Model`] in memory. +/// +/// NOTE: This is NOT a [`Model`]! This is simply a helper +/// structure to aggregate a length value and a pointer together. +pub struct ArraySlice<'ctx, E: Model<'ctx>> { + pub pointer: Pointer<'ctx, E>, + pub num_elements: Int<'ctx>, +} + +impl<'ctx, E: Model<'ctx>> ArraySlice<'ctx, E> { + /// Get the `idx`-nth element of this [`ArraySlice`], + /// but doesn't do an assertion to see if `idx` is + /// out of bounds or not. + /// + /// Also see [`ArraySlice::ix`]. + pub fn ix_unchecked( + &self, + ctx: &CodeGenContext<'ctx, '_>, + idx: Int<'ctx>, + name: &str, + ) -> Pointer<'ctx, E> { + let element_addr = + unsafe { ctx.builder.build_in_bounds_gep(self.pointer.value, &[idx.0], name).unwrap() }; + Pointer { value: element_addr, element: self.pointer.element } + } + + /// Call [`ArraySlice::ix_unchecked`], but + /// checks if `idx` is in bounds, otherwise + /// a runtime `IndexError` will be thrown. + pub fn ix( + &self, + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, '_>, + idx: Int<'ctx>, + name: &str, + ) -> Pointer<'ctx, E> { + let int_type = self.num_elements.0.get_type(); // NOTE: Weird get_type(), see comment under `trait Ixed` + + assert_eq!(int_type.get_bit_width(), idx.0.get_type().get_bit_width()); // Might as well check bit width to catch bugs + + // TODO: SGE or UGE? or make it defined by the implementee? + + // Check `0 <= index` + let lower_bounded = ctx + .builder + .build_int_compare( + inkwell::IntPredicate::SLE, + int_type.const_zero(), + idx.0, + "lower_bounded", + ) + .unwrap(); + + // Check `index < num_elements` + let upper_bounded = ctx + .builder + .build_int_compare( + inkwell::IntPredicate::SLT, + idx.0, + self.num_elements.0, + "upper_bounded", + ) + .unwrap(); + + // Compute `0 <= index && index < num_elements` + let bounded = ctx.builder.build_and(lower_bounded, upper_bounded, "bounded").unwrap(); + + // Assert `bounded` + ctx.make_assert( + generator, + bounded, + "0:IndexError", + "nac3core LLVM codegen attempting to access out of bounds array index {0}. Must satisfy 0 <= index < {2}", + [ Some(idx.0), Some(self.num_elements.0), None], + ctx.current_loc + ); + + // ...and finally do indexing + self.ix_unchecked(ctx, idx, name) + } +} diff --git a/nac3core/src/codegen/model/structure.rs b/nac3core/src/codegen/model/structure.rs new file mode 100644 index 00000000..f53753f4 --- /dev/null +++ b/nac3core/src/codegen/model/structure.rs @@ -0,0 +1,396 @@ +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum, StructType}, + values::{BasicValue, BasicValueEnum, StructValue}, +}; +use itertools::{izip, Itertools}; + +use crate::codegen::CodeGenContext; + +use super::{core::CanCheckLLVMType, Model, ModelValue, Pointer}; + +/// An LLVM struct's "field". +#[derive(Debug, Clone, Copy)] +pub struct Field { + /// The GEP index of the field. + pub gep_index: u64, + + /// The name of this field. Generally named + /// to how the field is named in ARTIQ or IRRT. + /// + /// NOTE: This is only used for debugging. + pub name: &'static str, + + /// The [`Model`] of the field. + pub element: E, +} + +// A helper struct for [`FieldBuilder`] +struct FieldLLVM<'ctx> { + gep_index: u64, + name: &'ctx str, + + // Only CanCheckLLVMType is needed, dont use `Model<'ctx>` + llvm_type_model: Box + 'ctx>, + llvm_type: BasicTypeEnum<'ctx>, +} + +/// A helper struct to create [`Field`]-s in [`StructKind::build_fields`]. +/// +/// See [`StructKind`] for more details and see how [`FieldBuilder`] is put +/// into action. +pub struct FieldBuilder<'ctx> { + /// The [`Context`] this [`FieldBuilder`] is under. + /// + /// Can be used in [`StructKind::build_fields`]. + /// See [`StructKind`] for more details and see how [`FieldBuilder`] is put + /// into action. + pub ctx: &'ctx Context, + + /// An incrementing counter for GEP indices when + /// doing [`FieldBuilder::add_field`] or [`FieldBuilder::add_field_auto`]. + gep_index_counter: u64, + + /// Name of the `struct` this [`FieldBuilder`] is currently + /// building. + /// + /// NOTE: This is only used for debugging. + struct_name: &'ctx str, + + /// The fields added so far. + fields: Vec>, +} + +impl<'ctx> FieldBuilder<'ctx> { + #[must_use] + pub fn new(ctx: &'ctx Context, struct_name: &'ctx str) -> Self { + FieldBuilder { ctx, gep_index_counter: 0, struct_name, fields: Vec::new() } + } + + fn next_gep_index(&mut self) -> u64 { + let index = self.gep_index_counter; + self.gep_index_counter += 1; + index + } + + /// Add a new field. + /// + /// - `name`: The name of the field. See [`Field::name`]. + /// - `element`: The [`Model`] of the type of the field. See [`Field::element`]. + pub fn add_field + 'ctx>(&mut self, name: &'static str, element: E) -> Field { + let gep_index = self.next_gep_index(); + + self.fields.push(FieldLLVM { + gep_index, + name, + llvm_type: element.get_llvm_type(self.ctx), + llvm_type_model: Box::new(element), + }); + + Field { gep_index, name, element } + } + + /// Like [`FieldBuilder::add_field`] but `element` can be **automatically derived** + /// if it has the `Default` instance. + /// + /// Certain [`Model`] has a [`Default`] trait - [`Model`]s that are just singletons, + /// By deriving the [`Default`] trait on those [`Model`]s, Rust could automatically + /// construct the [`Model`] with [`Default::default`]. + /// + /// This function is equivalent to + /// ```ignore + /// self.add_field(name, E::default()) + /// ``` + pub fn add_field_auto + Default + 'ctx>( + &mut self, + name: &'static str, + ) -> Field { + self.add_field(name, E::default()) + } +} + +/// A marker trait to mark singleton struct that +/// describes a particular LLVM structure. +/// +/// It is a powerful inkwell abstraction that can reduce +/// a lot of inkwell boilerplate when dealing with LLVM structs, +/// `getelementptr`, `load`-ing and `store`-ing fields. +/// +/// ### Usage +pub trait StructKind<'ctx>: Clone + Copy { + /// The type of the Rust `struct` that holds all the fields of this LLVM struct. + type Fields; + + // TODO: + /// The name of this [`StructKind`]. + /// + /// The name should be the name of in + /// IRRT's `struct` or ARTIQ's definition. + fn struct_name(&self) -> &'static str; + + /// Define the [`Field`]s of this [`StructKind`] + /// + /// + /// ### Syntax + /// + /// Suppose you want to define the following C++ `struct`s in `nac3core`: + /// ```cpp + /// template + /// struct Str { + /// uint8_t* content; // NOTE: could be `void *` + /// SizeT length; + /// } + /// + /// template + /// struct Exception { + /// uint32_t id; + /// Str message; + /// uint64_t param0; + /// uint64_t param1; + /// uint64_t param2; + /// } + /// ``` + /// + /// You write this in nac3core: + /// ```ignore + /// struct Str<'ctx> { + /// sizet: IntModel<'ctx>, + /// } + /// + /// struct StrFields<'ctx> { + /// content: Field>, // equivalent to `NIntModel`. + /// length: Field>, // `SizeT` is only known in runtime - `CodeGenerator::get_size_type()`. /// } + /// } + /// + /// impl StructKind<'ctx> for Str<'ctx> { + /// fn struct_name() { + /// "Str" + /// } + /// + /// fn build_fields(&self, builder: &mut FieldBuilder<'ctx>) -> Self::Fields { + /// // THE order of `builder.add_field*` is IMPORTANT!!! + /// // so the GEP indices would be correct. + /// StrFields { + /// content: builder.add_field_auto("content"), // `PointerModel` has `Default` trait. + /// length: builder.add_field("length", IntModel(self.sizet)), // `PointerModel` has `Default` trait. + /// } + /// } + /// } + /// + /// struct Exception<'ctx> { + /// sizet: IntModel<'ctx>, + /// } + /// + /// struct ExceptionFields<'ctx> { + /// id: Field>, + /// message: Field>, + /// param0: Field>, + /// param1: Field>, + /// param2: Field>, + /// } + /// + /// impl StructKind<'ctx> for Exception<'ctx> { + /// fn struct_name() { + /// "Exception" + /// } + /// + /// fn build_fields(&self, builder: &mut FieldBuilder<'ctx>) -> Self::Fields { + /// // THE order of `builder.add_field*` is IMPORTANT!!! + /// // so the GEP indices would be correct. + /// ExceptionFields { + /// id: builder.add_field_auto("content"), // `NIntModel` has `Default` trait. + /// message: builder.add_field("message", StructModel(Str { sizet: self.sizet })), + /// param0: builder.add_field_auto("param0"), // has `Default` trait + /// param1: builder.add_field_auto("param1"), // has `Default` trait + /// param2: builder.add_field_auto("param2"), // has `Default` trait + /// } + /// } + /// } + /// ``` + /// + /// Then to `alloca` an `Exception`, do this: + /// ```ignore + /// let generator: dyn CodeGenerator<'ctx>; + /// let ctx: &CodeGenContext<'ctx, '_>; + /// let sizet = generator.get_size_type(); + /// let exn_model = StructModel(Exception { sizet }); + /// let exn = exn_model.alloca(ctx, "my_exception"); // Every [`Model<'ctx>`] has an `.alloca()` function. + /// // exn: Pointer<'ctx, StructModel> + /// ``` + /// + /// NOTE: In fact, it is possible to define `Str` and `Exception` like this: + /// ```ignore + /// struct Str { + /// _phantom: PhantomData, + /// } + /// + /// struct Exception { + /// _phantom: PhantomData, + /// } + /// ``` + /// But issues arise by you don't know the nac3core + /// `CodeGenerator`'s `get_size_type()` before hand. + fn build_fields(&self, builder: &mut FieldBuilder<'ctx>) -> Self::Fields; +} + +/// A [`Model<'ctx>`] that represents an LLVM struct. +/// +/// `self.0` contains a [`IsStruct<'ctx>`] that gives the details of the LLVM struct. +#[derive(Debug, Clone, Copy, Default)] +pub struct StructModel(pub S); + +impl<'ctx, S: StructKind<'ctx>> CanCheckLLVMType<'ctx> for StructModel { + fn check_llvm_type_impl( + &self, + ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), String> { + // Check if scrutinee is even a struct type + let BasicTypeEnum::StructType(ty) = ty else { + return Err(format!("Expecting a struct type, but got {ty:?}")); + }; + + // Ok. now check the struct type thoroughly + self.check_struct_type(ctx, ty) + } +} + +impl<'ctx, S: StructKind<'ctx>> Model<'ctx> for StructModel { + type Value = Struct<'ctx, S>; + + fn get_llvm_type(&self, ctx: &'ctx Context) -> BasicTypeEnum<'ctx> { + self.get_struct_type(ctx).as_basic_type_enum() + } + + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result { + let value = value.as_basic_value_enum(); + + // Check that `value` is not some bogus values or an incorrect StructValue + self.check_llvm_type(ctx, value.get_type())?; + + Ok(Struct { kind: self.0, value: value.into_struct_value() }) + } +} + +impl<'ctx, S: StructKind<'ctx>> StructModel { + /// Get the [`S::Fields`] of this [`StructModel`]. + pub fn get_fields(&self, ctx: &'ctx Context) -> S::Fields { + let mut builder = FieldBuilder::new(ctx, self.0.struct_name()); + self.0.build_fields(&mut builder) + } + + /// Get the LLVM struct type this [`IsStruct<'ctx>`] is representing. + pub fn get_struct_type(&self, ctx: &'ctx Context) -> StructType<'ctx> { + let mut builder = FieldBuilder::new(ctx, self.0.struct_name()); + self.0.build_fields(&mut builder); // Self::Fields is discarded + + let field_types = builder.fields.iter().map(|f| f.llvm_type).collect_vec(); + ctx.struct_type(&field_types, false) + } + + /// Check if `scrutinee` matches the [`StructType<'ctx>`] this [`IsStruct<'ctx>`] is representing. + pub fn check_struct_type( + &self, + ctx: &'ctx Context, + scrutinee: StructType<'ctx>, + ) -> Result<(), String> { + // Details about scrutinee + let scrutinee_field_types = scrutinee.get_field_types(); + + // Details about the defined specifications of this struct + // We will access them through builder + let mut builder = FieldBuilder::new(ctx, self.0.struct_name()); + self.0.build_fields(&mut builder); + + // Check # of fields + if builder.fields.len() != scrutinee_field_types.len() { + return Err(format!( + "Expecting struct to have {} field(s), but scrutinee has {} field(s)", + builder.fields.len(), + scrutinee_field_types.len() + )); + } + + // Check the types of each field + // TODO: Traceback? + for (f, scrutinee_field_type) in izip!(builder.fields, scrutinee_field_types) { + f.llvm_type_model + .check_llvm_type_impl(ctx, scrutinee_field_type.as_basic_type_enum())?; + } + + Ok(()) + } +} + +/// A value of [`StructModel`] of a particular [`StructKind`]. +#[derive(Debug, Clone, Copy)] +pub struct Struct<'ctx, S> { + pub kind: S, + pub value: StructValue<'ctx>, +} + +impl<'ctx, S: StructKind<'ctx>> ModelValue<'ctx> for Struct<'ctx, S> { + fn get_llvm_value(&self) -> BasicValueEnum<'ctx> { + self.value.as_basic_value_enum() + } +} + +impl<'ctx, S: StructKind<'ctx>> Pointer<'ctx, StructModel> { + /// Build an instruction that does `getelementptr` on an LLVM structure referenced by this pointer. + /// + /// This provides a nice syntax to chain up `getelementptr` in an intuitive and type-safe way: + /// + /// ```ignore + /// let ctx: &CodeGenContext<'ctx, '_>; + /// let ndarray: Pointer<'ctx, StructModel>>; + /// ndarray.gep(ctx, |f| f.ndims).store(); + /// ``` + /// + /// You might even write chains `gep`, i.e., + /// ```ignore + /// let exn_ptr: Pointer<'ctx, StructModel>; + /// let value: Int<'ctx>; // Suppose it has the correct inkwell `IntType<'ctx>`. + /// + /// // To do `exn.message.length = value`: + /// let exn_message_ptr = exn_ptr.gep(ctx, |f| f.message); + /// let exn_message_length_ptr = exn_message_ptr.gep(ctx, |f| f.length); + /// exn_message_length_ptr.store(ctx, my_value); + /// + /// // or simply: + /// exn_ptr + /// .gep(ctx, |f| f.message) + /// .gep(ctx, |f| f.length) + /// .store(ctx, my_value) // Equivalent to `my_struct.thing1.value = my_value` + /// ``` + pub fn gep( + &self, + ctx: &CodeGenContext<'ctx, '_>, + get_field: GetFieldFn, + ) -> Pointer<'ctx, E> + where + E: Model<'ctx>, + GetFieldFn: FnOnce(S::Fields) -> Field, + { + let fields = self.element.get_fields(ctx.ctx); + let field = get_field(fields); + + // TODO: I think I'm not supposed to *just* use i32 for GEP like that + let llvm_i32 = ctx.ctx.i32_type(); + + let ptr = unsafe { + ctx.builder + .build_in_bounds_gep( + self.value, + &[llvm_i32.const_zero(), llvm_i32.const_int(field.gep_index, false)], + field.name, + ) + .unwrap() + }; + + Pointer { element: field.element, value: ptr } + } +}