diff --git a/nac3core/src/codegen/generator.rs b/nac3core/src/codegen/generator.rs index bb822f19..ef7e6949 100644 --- a/nac3core/src/codegen/generator.rs +++ b/nac3core/src/codegen/generator.rs @@ -11,12 +11,18 @@ use inkwell::{ }; use nac3parser::ast::{Expr, Stmt, StrRef}; +use super::model::SizeTModel; + pub trait CodeGenerator { /// Return the module name for the code generator. fn get_name(&self) -> &str; fn get_size_type<'ctx>(&self, ctx: &'ctx Context) -> IntType<'ctx>; + fn get_sizet<'ctx>(&self, ctx: &'ctx Context) -> SizeTModel<'ctx> { + SizeTModel(self.get_size_type(ctx)) + } + /// Generate function call and returns the function return value. /// - obj: Optional object for method call. /// - fun: Function signature and definition ID. diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 17952369..85b963bb 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -41,6 +41,7 @@ pub mod extern_fns; mod generator; pub mod irrt; pub mod llvm_intrinsics; +pub mod model; pub mod numpy; pub mod stmt; diff --git a/nac3core/src/codegen/model/core.rs b/nac3core/src/codegen/model/core.rs new file mode 100644 index 00000000..9c04b548 --- /dev/null +++ b/nac3core/src/codegen/model/core.rs @@ -0,0 +1,204 @@ +use core::fmt; +use std::marker::PhantomData; + +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum, IntType}, + values::{BasicValue, IntValue, PointerValue}, +}; + +use crate::codegen::{CodeGenContext, CodeGenerator}; + +use super::{ArraySlice, Pointer, PointerModel}; + +/* +TODO: UPDATE when the Model finally stablizes +Explanation on the abstraction: + + In LLVM, there are TYPES and VALUES. + + Inkwell gives us TYPES [`BasicTypeEnum<'ctx>`] and VALUES [`BasicValueEnum<'ctx>`], + but by themselves, they lack a lot of Rust compile-time known info. + + e.g., You did `let ptr = builder.build_alloca(my_llvm_ndarray_struct_ty)`, + but `ptr` is just a `PointerValue<'ctx>`, almost everything about the + underlying `my_llvm_ndarray_struct_ty` is gone. + + The `Model` abstraction is a wrapper around inkwell TYPES and VALUES but with + a richer interface. + + `Model<'ctx>` is a wrapper around for an inkwell TYPE: + - `NIntModel` is a i8. + - `NIntModel` is a i32. + - `NIntModel` is a i64. + - `IntModel` is a carrier for an inkwell `IntType<'ctx>`, + used when the type is dynamic/cannot be specified in Rust compile-time. + - `PointerModel<'ctx, E>` is a wrapper for `PointerType<'ctx>`, + where `E` is another `Model<'ctx>` that describes the element type of the pointer. + - `StructModel<'ctx, NDArray>` is a wrapper for `StructType<'ctx>`, + with additional information encoded within `NDArray`. (See `IsStruct<'ctx>`) + + `Model<'ctx>::Value`/`ModelValue<'ctx>` is a wrapper around for an inkwell VALUE: + - `NInt<'ctx, T>` is a value of `NIntModel<'ctx, T>`, + where `T` could be `Byte`, `Int32`, or `Int64`. + - `Pointer<'ctx, E>` is a value of `PointerModel<'ctx, E>`. + + Other interesting utilities: + - Given a `Model<'ctx>`, say, `let ndarray_model = StructModel<'ctx, NDArray>`, + you are do `ndarray_model.alloca(ctx, "my_ndarray")` to get a `Pointer<'ctx, Struct<'ctx, NDArray>>`, + notice that all LLVM type information are preserved. + - For a `let my_ndarray = Pointer<'ctx, StructModel>`, you can access a field by doing + `my_ndarray.gep(ctx, |f| f.itemsize).load() // or .store()`, and you can chain them + together for nested structures. + + A brief summary on the `Model<'ctx>` and `ModelValue<'ctx>` traits: + - Model<'ctx> + // The associated ModelValue of this Model + - type Value: ModelValue<'ctx> + + // Get the LLVM type of this Model + - fn get_llvm_type(&self) + + // Check if the input type is equal to the LLVM type of this Model + // NOTE: this function is provideed through `CanCheckLLVMType<'ctx>` + - fn check_llvm_type(&self, ty) -> Result<(), String> + + // Check if the input value's type is equal to the LLVM type of this Model. + // + // If so, wrap it with `Self::Value`. + - fn review_value>(&self, val: V) -> Result + + - ModelValue<'ctx> + // get the LLVM value of this ModelValue + - fn get_llvm_value(&self) -> BasicValueEnum<'ctx> +*/ + +#[derive(Debug, Clone)] +pub struct ModelError(pub String); + +// NOTE: Should have been within [`Model<'ctx>`], +// but rust object safety requirements made it necessary to +// split the trait. +pub trait CanCheckLLVMType<'ctx> { + /// See [`Model::check_llvm_type`] + fn check_llvm_type_impl( + &self, + ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), ModelError>; +} + +pub trait Model<'ctx>: fmt::Debug + Clone + Copy + CanCheckLLVMType<'ctx> + Sized + Eq { + /// The corresponding LLVM [`BasicValue<'ctx>`] of this Model. + type Value: BasicValue<'ctx>; + /// The corresponding LLVM [`BasicType<'ctx>`] of this Model. + type Type: BasicType<'ctx>; + + /// Get the LLVM type of this [`Model<'ctx>`] + fn get_type(&self, ctx: &'ctx Context) -> Self::Type; + + /// Check if the input type is equal to the LLVM type of this Model. + /// + /// If it doesn't match, an [`Err`] with a human-readable message is + /// thrown explaining *how* it was different. Meant for debugging. + fn check_type>(&self, ctx: &'ctx Context, ty: T) -> Result<(), ModelError> { + self.check_llvm_type_impl(ctx, ty.as_basic_type_enum()) + } + + /// Check if an LLVM value's type is equal to the LLVM type of this [`Model`]. + /// If so, wrap it with [`Instance`]. + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result, ModelError>; + + /// Directly create an [`Instance`] of this [`Model`]. + /// + /// It is assumed that the LLVM type of `value` has been checked. + /// + /// It is recommended that you use [`Model::review_value`] instead in order to + /// catch bugs. + fn believe_value(&self, value: Self::Value) -> Instance<'ctx, Self> { + Instance { model: *self, value, _phantom: PhantomData } + } + + /// Build an instruction to allocate a value with the LLVM type of this [`Model<'ctx>`]. + fn alloca(&self, ctx: &CodeGenContext<'ctx, '_>, name: &str) -> Pointer<'ctx, Self> { + let ptr_model = PointerModel(*self); + let ptr = ctx.builder.build_alloca(self.get_type(ctx.ctx), name).unwrap(); + ptr_model.believe_value(ptr) + } + + /// Build an instruction to allocate an array of the LLVM type of this [`Model<'ctx>`]. + fn array_alloca( + &self, + ctx: &CodeGenContext<'ctx, '_>, + num_elements: Instance<'ctx, N>, + name: &str, + ) -> ArraySlice<'ctx, N, Self> + where + N: Model<'ctx, Value = IntValue<'ctx>, Type = IntType<'ctx>>, + { + let ptr_model = PointerModel(*self); + let ptr = ctx + .builder + .build_array_alloca( + self.get_type(ctx.ctx).as_basic_type_enum(), + num_elements.value, + name, + ) + .unwrap(); + let pointer = ptr_model.believe_value(ptr); + + ArraySlice { pointer, num_elements } + } + + /// Do [`CodeGenerator::gen_var_alloc`] with the LLVM type of this [`Model<'ctx>`]. + fn var_alloc( + &self, + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, '_>, + name: Option<&str>, + ) -> Result, String> { + let ptr_model = PointerModel(*self); + let ptr = + generator.gen_var_alloc(ctx, self.get_type(ctx.ctx).as_basic_type_enum(), name)?; + Ok(ptr_model.believe_value(ptr)) + } + + /// Do [`CodeGenerator::gen_array_var_alloc`] with the LLVM type of this [`Model<'ctx>`]. + fn array_var_alloc( + &self, + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, '_>, + num_elements: Instance<'ctx, N>, + name: Option<&'ctx str>, + ) -> Result, String> + where + N: Model<'ctx, Value = IntValue<'ctx>, Type = IntType<'ctx>>, + { + let ptr_model = PointerModel(*self); + + // TODO: Remove ProxyType ArraySlice + let ptr = ptr_model.believe_value(PointerValue::from(generator.gen_array_var_alloc( + ctx, + self.get_type(ctx.ctx).as_basic_type_enum(), + num_elements.value, + name, + )?)); + + Ok(ArraySlice { num_elements, pointer: ptr }) + } +} + +/// An LLVM value of a type of a [`Model<'ctx>`]. +/// +/// It is guaranteed that [`Instance::value`]'s LLVM type +/// has been *checked* to match [`Instance::model`]. +#[derive(Debug, Clone, Copy)] +pub struct Instance<'ctx, M: Model<'ctx>> { + pub model: M, + pub value: M::Value, + _phantom: PhantomData<&'ctx ()>, +} diff --git a/nac3core/src/codegen/model/fixed_int.rs b/nac3core/src/codegen/model/fixed_int.rs new file mode 100644 index 00000000..e9135c6d --- /dev/null +++ b/nac3core/src/codegen/model/fixed_int.rs @@ -0,0 +1,161 @@ +use core::fmt; + +use inkwell::{ + context::Context, + types::{BasicTypeEnum, IntType}, + values::{BasicValue, IntValue}, +}; + +use super::{ + core::*, + int_util::{check_int_llvm_type, int_constant, review_int_llvm_value}, + Int, IntModel, +}; + +/// A marker trait to mark a singleton struct that describes a particular fixed integer type. +/// See [`Bool`], [`Byte`], [`Int32`], etc. +/// +/// The [`Default`] trait is to enable auto-instantiations. +pub trait NIntKind: fmt::Debug + Clone + Copy + Default + PartialEq + Eq { + /// Get the [`IntType<'ctx>`] of this [`NIntKind`]. + fn get_int_type(ctx: &Context) -> IntType<'_>; + + /// Get the [`IntType<'ctx>`] of this [`NIntKind`]. + /// + /// Compared to using [`NIntKind::get_int_type`], this + /// function does not require [`Context`]. + fn get_bit_width() -> u32; +} + +/// A [`Model`] representing an [`IntType<'ctx>`] of a specified bit width. +/// +/// Also see [`IntModel`], which is less constrained than [`NIntModel`], +/// but enables one to handle dynamic [`IntType<'ctx>`] at runtime. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct NIntModel(pub T); +pub type NInt<'ctx, T> = Instance<'ctx, NIntModel>; + +impl<'ctx, T: NIntKind> CanCheckLLVMType<'ctx> for NIntModel { + fn check_llvm_type_impl( + &self, + ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), ModelError> { + check_int_llvm_type(ty, T::get_int_type(ctx)) + } +} + +impl<'ctx, T: NIntKind> Model<'ctx> for NIntModel { + type Type = IntType<'ctx>; + type Value = IntValue<'ctx>; + + fn get_type(&self, ctx: &'ctx Context) -> Self::Type { + T::get_int_type(ctx) + } + + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result, ModelError> { + let value = review_int_llvm_value(value.as_basic_value_enum(), T::get_int_type(ctx))?; + Ok(self.believe_value(value)) + } +} + +impl NIntModel { + /// "Demote" this [`NIntModel`] to an [`IntModel`]. + /// + /// Information about the [`NIntKind`] will be lost. + pub fn to_int_model(self, ctx: &Context) -> IntModel<'_> { + IntModel(T::get_int_type(ctx)) + } + + /// Create an unsigned constant of this [`NIntModel`]. + pub fn constant<'ctx>(&self, ctx: &'ctx Context, value: u64) -> NInt<'ctx, T> { + int_constant(ctx, *self, value) + } +} + +impl<'ctx, T: NIntKind> NInt<'ctx, T> { + /// "Demote" this [`NInt`] to an [`Int`]. + /// + /// Information about the [`NIntKind`] will be lost. + pub fn to_int(self, ctx: &'ctx Context) -> Int<'ctx> { + let int_model = self.model.to_int_model(ctx); + int_model.believe_value(self.value) + } +} + +// Some pre-defined fixed integer types + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct Bool; +pub type BoolModel = NIntModel; + +impl NIntKind for Bool { + fn get_int_type(ctx: &Context) -> IntType<'_> { + ctx.bool_type() + } + + fn get_bit_width() -> u32 { + 1 + } +} + +// Extra utilities for [`Bool`] +impl NIntModel { + /// Create a constant `false` + #[must_use] + pub fn const_false<'ctx>(&self, ctx: &'ctx Context) -> NInt<'ctx, Bool> { + self.constant(ctx, 0) + } + + /// Create a constant `true` + #[must_use] + pub fn const_true<'ctx>(&self, ctx: &'ctx Context) -> NInt<'ctx, Bool> { + self.constant(ctx, 1) + } +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct Byte; +pub type ByteModel = NIntModel; + +impl NIntKind for Byte { + fn get_int_type(ctx: &Context) -> IntType<'_> { + ctx.i8_type() + } + + fn get_bit_width() -> u32 { + 8 + } +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct Int32; +pub type Int32Model = NIntModel; + +impl NIntKind for Int32 { + fn get_int_type(ctx: &Context) -> IntType<'_> { + ctx.i32_type() + } + + fn get_bit_width() -> u32 { + 32 + } +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct Int64; +pub type Int64Model = NIntModel; + +impl NIntKind for Int64 { + fn get_int_type(ctx: &Context) -> IntType<'_> { + ctx.i64_type() + } + + fn get_bit_width() -> u32 { + 64 + } +} diff --git a/nac3core/src/codegen/model/function_builder.rs b/nac3core/src/codegen/model/function_builder.rs new file mode 100644 index 00000000..6f9aafb9 --- /dev/null +++ b/nac3core/src/codegen/model/function_builder.rs @@ -0,0 +1,65 @@ +use inkwell::{ + types::{BasicMetadataTypeEnum, BasicType}, + values::{AnyValue, BasicMetadataValueEnum, BasicValue, BasicValueEnum}, +}; + +use crate::codegen::{model::*, CodeGenContext}; + +// TODO: Variadic argument? +pub struct FunctionBuilder<'ctx, 'a> { + ctx: &'a CodeGenContext<'ctx, 'a>, + fn_name: &'a str, + arguments: Vec<(BasicMetadataTypeEnum<'ctx>, BasicMetadataValueEnum<'ctx>)>, +} + +impl<'ctx, 'a> FunctionBuilder<'ctx, 'a> { + pub fn begin(ctx: &'a CodeGenContext<'ctx, 'a>, fn_name: &'a str) -> Self { + FunctionBuilder { ctx, fn_name, arguments: Vec::new() } + } + + // NOTE: `_name` is for self-documentation + #[must_use] + pub fn arg>(mut self, _name: &'static str, arg: Instance<'ctx, M>) -> Self { + self.arguments.push(( + arg.model.get_type(self.ctx.ctx).as_basic_type_enum().into(), + arg.value.as_basic_value_enum().into(), + )); + self + } + + pub fn returning>( + self, + name: &'static str, + return_model: M, + ) -> Instance<'ctx, M> { + let (param_tys, param_vals): (Vec<_>, Vec<_>) = self.arguments.into_iter().unzip(); + + // Get the LLVM function, create (by declaring) the function if it doesn't exist in `ctx.module`. + let function = self.ctx.module.get_function(self.fn_name).unwrap_or_else(|| { + let fn_type = return_model.get_type(self.ctx.ctx).fn_type(¶m_tys, false); + self.ctx.module.add_function(self.fn_name, fn_type, None) + }); + + // Build call + let ret = self.ctx.builder.build_call(function, ¶m_vals, name).unwrap(); + + // Check the return value/type + let Ok(ret) = BasicValueEnum::try_from(ret.as_any_value_enum()) else { + panic!("Return type is not a BasicValue"); + }; + return_model.review_value(self.ctx.ctx, ret).unwrap() + } + + // TODO: Code duplication, but otherwise returning> cannot resolve S if return_optic = None + pub fn returning_void(self) { + let (param_tys, param_vals): (Vec<_>, Vec<_>) = self.arguments.into_iter().unzip(); + + let function = self.ctx.module.get_function(self.fn_name).unwrap_or_else(|| { + let return_type = self.ctx.ctx.void_type(); + let fn_type = return_type.fn_type(¶m_tys, false); + self.ctx.module.add_function(self.fn_name, fn_type, None) + }); + + self.ctx.builder.build_call(function, ¶m_vals, "").unwrap(); + } +} diff --git a/nac3core/src/codegen/model/int.rs b/nac3core/src/codegen/model/int.rs new file mode 100644 index 00000000..6c816aa5 --- /dev/null +++ b/nac3core/src/codegen/model/int.rs @@ -0,0 +1,92 @@ +use inkwell::{ + context::Context, + types::{BasicTypeEnum, IntType}, + values::{BasicValue, IntValue}, +}; + +use super::{ + core::*, + int_util::{check_int_llvm_type, int_constant, review_int_llvm_value}, +}; + +/// A model representing an [`IntType<'ctx>`]. +/// +/// Also see [`NIntModel`][`super::NIntModel`], which is more constrained than [`IntModel`] +/// but provides more type-safe mechanisms and even auto-derivation of [`BasicTypeEnum<'ctx>`] +/// for creating LLVM structures. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct IntModel<'ctx>(pub IntType<'ctx>); + +pub type Int<'ctx> = Instance<'ctx, IntModel<'ctx>>; + +impl<'ctx> CanCheckLLVMType<'ctx> for IntModel<'ctx> { + fn check_llvm_type_impl( + &self, + _ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), ModelError> { + check_int_llvm_type(ty, self.0) + } +} + +impl<'ctx> Model<'ctx> for IntModel<'ctx> { + type Value = IntValue<'ctx>; + type Type = IntType<'ctx>; + + fn get_type(&self, _ctx: &'ctx Context) -> Self::Type { + self.0 + } + + fn review_value>( + &self, + _ctx: &'ctx Context, + value: V, + ) -> Result, ModelError> { + let value = review_int_llvm_value(value.as_basic_value_enum(), self.0)?; + Ok(self.believe_value(value)) + } +} + +impl<'ctx> IntModel<'ctx> { + /// Create a constant value that inhabits this [`IntModel<'ctx>`]. + #[must_use] + pub fn constant(&self, ctx: &'ctx Context, value: u64) -> Int<'ctx> { + int_constant(ctx, *self, value) + } +} + +/// A model representing an [`IntType<'ctx>`] that happens to be defined as `size_t`. +/// +/// This is specifically created to guide developers to write `size_t`-dependent code. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct SizeTModel<'ctx>(pub IntType<'ctx>); + +pub type SizeT<'ctx> = Instance<'ctx, SizeTModel<'ctx>>; + +impl<'ctx> CanCheckLLVMType<'ctx> for SizeTModel<'ctx> { + fn check_llvm_type_impl( + &self, + _ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), ModelError> { + check_int_llvm_type(ty, self.0) + } +} + +impl<'ctx> Model<'ctx> for SizeTModel<'ctx> { + type Value = IntValue<'ctx>; + type Type = IntType<'ctx>; + + fn get_type(&self, _ctx: &'ctx Context) -> Self::Type { + self.0 + } + + fn review_value>( + &self, + _ctx: &'ctx Context, + value: V, + ) -> Result, ModelError> { + let value = review_int_llvm_value(value.as_basic_value_enum(), self.0)?; + Ok(self.believe_value(value)) + } +} diff --git a/nac3core/src/codegen/model/int_util.rs b/nac3core/src/codegen/model/int_util.rs new file mode 100644 index 00000000..850676f0 --- /dev/null +++ b/nac3core/src/codegen/model/int_util.rs @@ -0,0 +1,87 @@ +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum, IntType}, + values::{BasicValueEnum, IntValue}, +}; + +use crate::codegen::CodeGenContext; + +use super::{Instance, Model, ModelError}; + +/// Helper function to check if `scrutinee` is the same as `expected_int_type` +pub fn check_int_llvm_type<'ctx>( + ty: BasicTypeEnum<'ctx>, + expected_int_type: IntType<'ctx>, +) -> Result<(), ModelError> { + // Check if llvm_type is int type + let BasicTypeEnum::IntType(ty) = ty else { + return Err(ModelError(format!("Expecting an int type but got {ty:?}"))); + }; + + // Check bit width + if ty.get_bit_width() != expected_int_type.get_bit_width() { + return Err(ModelError(format!( + "Expecting an int type of {}-bit(s) but got int type {}-bit(s)", + expected_int_type.get_bit_width(), + ty.get_bit_width() + ))); + } + + Ok(()) +} + +/// Helper function to cast `scrutinee` is into an [`IntValue<'ctx>`]. +/// The LLVM type of `scrutinee` will be checked with [`check_int_llvm_type`]. +pub fn review_int_llvm_value<'ctx>( + value: BasicValueEnum<'ctx>, + expected_int_type: IntType<'ctx>, +) -> Result, ModelError> { + // Check if value is of int type, error if that is anything else + check_int_llvm_type(value.get_type().as_basic_type_enum(), expected_int_type)?; + + // Ok, it is must be an int + Ok(value.into_int_value()) +} + +pub fn int_constant<'ctx, M>(ctx: &'ctx Context, model: M, value: u64) -> Instance<'ctx, M> +where + M: Model<'ctx, Value = IntValue<'ctx>, Type = IntType<'ctx>>, +{ + let value = model.get_type(ctx).const_int(value, false); + model.believe_value(value) +} + +impl<'ctx, M> Instance<'ctx, M> +where + M: Model<'ctx, Value = IntValue<'ctx>, Type = IntType<'ctx>>, +{ + pub fn s_extend_or_bit_cast( + &self, + ctx: &CodeGenContext<'ctx, '_>, + to_model: N, + name: &str, + ) -> Instance<'ctx, N> + where + N: Model<'ctx, Value = IntValue<'ctx>, Type = IntType<'ctx>>, + { + let value = ctx + .builder + .build_int_s_extend_or_bit_cast(self.value, to_model.get_type(ctx.ctx), name) + .unwrap(); + to_model.believe_value(value) + } + + pub fn truncate( + &self, + ctx: &CodeGenContext<'ctx, '_>, + to_model: N, + name: &str, + ) -> Instance<'ctx, N> + where + N: Model<'ctx, Value = IntValue<'ctx>, Type = IntType<'ctx>>, + { + let value = + ctx.builder.build_int_truncate(self.value, to_model.get_type(ctx.ctx), name).unwrap(); + to_model.believe_value(value) + } +} diff --git a/nac3core/src/codegen/model/mod.rs b/nac3core/src/codegen/model/mod.rs new file mode 100644 index 00000000..877a439e --- /dev/null +++ b/nac3core/src/codegen/model/mod.rs @@ -0,0 +1,18 @@ +pub mod core; +pub mod fixed_int; +pub mod function_builder; +pub mod int; +mod int_util; +pub mod opaque; +pub mod pointer; +pub mod slice; +pub mod structure; + +pub use core::*; +pub use fixed_int::*; +pub use function_builder::*; +pub use int::*; +pub use opaque::*; +pub use pointer::*; +pub use slice::*; +pub use structure::*; diff --git a/nac3core/src/codegen/model/opaque.rs b/nac3core/src/codegen/model/opaque.rs new file mode 100644 index 00000000..0627eb7e --- /dev/null +++ b/nac3core/src/codegen/model/opaque.rs @@ -0,0 +1,50 @@ +use inkwell::{ + context::Context, + types::BasicTypeEnum, + values::{BasicValue, BasicValueEnum}, +}; + +use super::*; + +/// A [`Model`] that holds an arbitrary [`BasicTypeEnum`]. +/// +/// Use this and [`Opaque`] when you are dealing with a [`BasicTypeEnum<'ctx>`] +/// at runtime and there is no way to abstract your implementation +/// with [`Model`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct OpaqueModel<'ctx>(pub BasicTypeEnum<'ctx>); + +impl<'ctx> CanCheckLLVMType<'ctx> for OpaqueModel<'ctx> { + fn check_llvm_type_impl( + &self, + _ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), ModelError> { + if ty == self.0 { + Ok(()) + } else { + Err(ModelError(format!("Expecting {}, but got {}", self.0, ty))) + } + } +} + +impl<'ctx> Model<'ctx> for OpaqueModel<'ctx> { + type Value = BasicValueEnum<'ctx>; + type Type = BasicTypeEnum<'ctx>; + + fn get_type(&self, _ctx: &'ctx Context) -> BasicTypeEnum<'ctx> { + self.0 + } + + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result, ModelError> { + let value = value.as_basic_value_enum(); + self.check_type(ctx, value.get_type())?; + Ok(self.believe_value(value)) + } +} + +pub type Opaque<'ctx> = Instance<'ctx, OpaqueModel<'ctx>>; diff --git a/nac3core/src/codegen/model/pointer.rs b/nac3core/src/codegen/model/pointer.rs new file mode 100644 index 00000000..22f3c765 --- /dev/null +++ b/nac3core/src/codegen/model/pointer.rs @@ -0,0 +1,114 @@ +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum, PointerType}, + values::{BasicValue, PointerValue}, + AddressSpace, +}; + +use crate::codegen::CodeGenContext; + +use super::{core::*, OpaqueModel}; + +/// A [`Model<'ctx>`] representing an LLVM [`PointerType<'ctx>`] +/// with *full* information on the element u +/// +/// [`self.0`] contains [`Model<'ctx>`] that represents the +/// LLVM type of element of the [`PointerType<'ctx>`] is pointing at +/// (like `PointerType<'ctx>::get_element_type()`, but abstracted as a [`Model<'ctx>`]). +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct PointerModel(pub E); + +pub type Pointer<'ctx, E> = Instance<'ctx, PointerModel>; + +impl<'ctx, E: Model<'ctx>> CanCheckLLVMType<'ctx> for PointerModel { + fn check_llvm_type_impl( + &self, + ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), ModelError> { + // Check if scrutinee is even a PointerValue + let BasicTypeEnum::PointerType(ty) = ty else { + return Err(ModelError(format!("Expecting a pointer value, but got {ty:?}"))); + }; + + // Check the type of what the pointer is pointing at + // TODO: This will be deprecated by inkwell > llvm14 because `get_element_type()` will be gone + let Ok(element_ty) = BasicTypeEnum::try_from(ty.get_element_type()) else { + return Err(ModelError(format!( + "Expecting pointer to point to an inkwell BasicValue, but got {ty:?}" + ))); + }; + + self.0.check_type(ctx, element_ty) // TODO: Include backtrace? + } +} + +impl<'ctx, E: Model<'ctx>> Model<'ctx> for PointerModel { + type Value = PointerValue<'ctx>; + type Type = PointerType<'ctx>; + + fn get_type(&self, ctx: &'ctx Context) -> Self::Type { + self.0.get_type(ctx).ptr_type(AddressSpace::default()) + } + + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result, ModelError> { + let value = value.as_basic_value_enum(); + self.check_type(ctx, value.get_type())?; + Ok(self.believe_value(value.into_pointer_value())) + } +} + +impl<'ctx, E: Model<'ctx>> PointerModel { + /// Create a null [`Pointer`] of this [`PointerModel`] + pub fn nullptr(&self, ctx: &'ctx Context) -> Pointer<'ctx, E> { + let nullptr = self.get_type(ctx).const_null(); + self.believe_value(nullptr) + } +} + +impl<'ctx, E: Model<'ctx>> Pointer<'ctx, E> { + /// Build an instruction to store a value into this pointer + pub fn store(&self, ctx: &CodeGenContext<'ctx, '_>, instance: Instance<'ctx, E>) { + assert_eq!( + self.model.0, instance.model, + "Attempting to store an Instance of a different type" + ); + ctx.builder.build_store(self.value, instance.value).unwrap(); + } + + /// Build an instruction to load a value from this pointer + pub fn load(&self, ctx: &CodeGenContext<'ctx, '_>, name: &str) -> Instance<'ctx, E> { + let value = ctx.builder.build_load(self.value, name).unwrap(); + self.model.0.review_value(ctx.ctx, value).unwrap() // If unwrap() panics, there is a logic error in your code. + } + + /// "Demote" the [`Model`] of the thing this pointer is pointing to. + pub fn cast_to_opaque(self, ctx: &'ctx Context) -> Pointer<'ctx, OpaqueModel<'ctx>> { + let ptr_model = PointerModel(OpaqueModel(self.model.get_type(ctx).as_basic_type_enum())); + ptr_model.believe_value(self.value) + } + + /// Cast the [`Model`] of the thing this pointer is pointing to + /// and uses inkwell's [`Builder::build_pointer_cast`] to cast the LLVM pointer type. + pub fn cast_to>( + self, + ctx: &CodeGenContext<'ctx, '_>, + element: K, + name: &str, + ) -> Pointer<'ctx, K> { + let casted_ptr_model = PointerModel(element); + let casted_ptr = ctx + .builder + .build_pointer_cast( + self.value, + element.get_type(ctx.ctx).ptr_type(AddressSpace::default()), + name, + ) + .unwrap(); + casted_ptr_model.believe_value(casted_ptr) + } +} diff --git a/nac3core/src/codegen/model/slice.rs b/nac3core/src/codegen/model/slice.rs new file mode 100644 index 00000000..16471682 --- /dev/null +++ b/nac3core/src/codegen/model/slice.rs @@ -0,0 +1,94 @@ +use inkwell::{types::IntType, values::IntValue}; + +use crate::codegen::{CodeGenContext, CodeGenerator}; + +use super::{int_util::int_constant, Instance, Model, Pointer}; + +/// An LLVM "slice" - literally just a pointer and a length value. +/// The pointer points to a location with `num_elements` **contiguously** placed +/// values of [`E`][`Model`] in memory. +/// +/// NOTE: This is NOT a [`Model`]! This is simply a helper +/// structure to aggregate a length value and a pointer together. +pub struct ArraySlice<'ctx, N, E> +where + N: Model<'ctx, Value = IntValue<'ctx>, Type = IntType<'ctx>>, + E: Model<'ctx>, +{ + pub pointer: Pointer<'ctx, E>, + pub num_elements: Instance<'ctx, N>, +} + +impl<'ctx, N, E> ArraySlice<'ctx, N, E> +where + N: Model<'ctx, Value = IntValue<'ctx>, Type = IntType<'ctx>>, + E: Model<'ctx>, +{ + /// Get the [Model][`super::Model`] of the element type of this [`ArraySlice`] + pub fn get_element_model(&self) -> E { + self.pointer.model.0 + } + + /// Get the `idx`-nth element of this [`ArraySlice`], + /// but doesn't do an assertion to see if `idx` is + /// out of bounds or not. + /// + /// Also see [`ArraySlice::ix`]. + pub fn ix_unchecked( + &self, + ctx: &CodeGenContext<'ctx, '_>, + idx: Instance<'ctx, N>, + name: &str, + ) -> Pointer<'ctx, E> { + assert_eq!(idx.model, self.num_elements.model); + let element_ptr = unsafe { + ctx.builder.build_in_bounds_gep(self.pointer.value, &[idx.value], name).unwrap() + }; + self.pointer.model.review_value(ctx.ctx, element_ptr).unwrap() + } + + /// Call [`ArraySlice::ix_unchecked`], but + /// checks if `idx` is in bounds, otherwise + /// a runtime `IndexError` will be thrown. + pub fn ix( + &self, + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, '_>, + idx: Instance<'ctx, N>, + name: &str, + ) -> Pointer<'ctx, E> { + assert_eq!(idx.model, self.num_elements.model); + let int_type = self.num_elements.model; + + // Assert `0 <= idx < length` and throw an Exception if `idx` is out of bounds + let lower_bounded = ctx + .builder + .build_int_compare( + inkwell::IntPredicate::SLE, + int_constant(ctx.ctx, int_type, 0).value, + idx.value, + "lower_bounded", + ) + .unwrap(); + let upper_bounded = ctx + .builder + .build_int_compare( + inkwell::IntPredicate::SLT, + idx.value, + self.num_elements.value, + "upper_bounded", + ) + .unwrap(); + let bounded = ctx.builder.build_and(lower_bounded, upper_bounded, "bounded").unwrap(); + ctx.make_assert( + generator, + bounded, + "0:IndexError", + "nac3core LLVM codegen attempting to access out of bounds array index {0}. Must satisfy 0 <= index < {2}", + [ Some(idx.value), Some(self.num_elements.value), None], + ctx.current_loc + ); + + self.ix_unchecked(ctx, idx, name) + } +} diff --git a/nac3core/src/codegen/model/structure.rs b/nac3core/src/codegen/model/structure.rs new file mode 100644 index 00000000..76120405 --- /dev/null +++ b/nac3core/src/codegen/model/structure.rs @@ -0,0 +1,384 @@ +use core::fmt; + +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum, StructType}, + values::{BasicValue, StructValue}, +}; +use itertools::{izip, Itertools}; + +use crate::codegen::CodeGenContext; + +use super::{core::CanCheckLLVMType, Instance, Model, ModelError, Pointer, PointerModel}; + +/// An LLVM struct's "field". +#[derive(Debug, Clone, Copy)] +pub struct Field { + /// The GEP index of this field. + pub gep_index: u64, + + /// The name of this field. Generally named + /// to how the field is named in ARTIQ or IRRT. + /// + /// NOTE: This is only used for debugging. + pub name: &'static str, + + /// The [`Model`] of this field. + pub model: E, +} + +// A helper struct for [`FieldBuilder`] +struct FieldLLVM<'ctx> { + gep_index: u64, + name: &'ctx str, + + // Only CanCheckLLVMType is needed, dont use `Model<'ctx>` + llvm_type_model: Box + 'ctx>, + llvm_type: BasicTypeEnum<'ctx>, +} + +/// A helper struct to create [`Field`]-s in [`StructKind::build_fields`]. +/// +/// See [`StructKind`] for more details and see how [`FieldBuilder`] is put +/// into action. +pub struct FieldBuilder<'ctx> { + /// The [`Context`] this [`FieldBuilder`] is under. + /// + /// Can be used in [`StructKind::build_fields`]. + /// See [`StructKind`] for more details and see how [`FieldBuilder`] is put + /// into action. + pub ctx: &'ctx Context, + + /// An incrementing counter for GEP indices when + /// doing [`FieldBuilder::add_field`] or [`FieldBuilder::add_field_auto`]. + gep_index_counter: u64, + + /// Name of the `struct` this [`FieldBuilder`] is currently + /// building. + /// + /// NOTE: This is only used for debugging. + struct_name: &'ctx str, + + /// The fields added so far. + fields: Vec>, +} + +impl<'ctx> FieldBuilder<'ctx> { + #[must_use] + pub fn new(ctx: &'ctx Context, struct_name: &'ctx str) -> Self { + FieldBuilder { ctx, gep_index_counter: 0, struct_name, fields: Vec::new() } + } + + fn next_gep_index(&mut self) -> u64 { + let index = self.gep_index_counter; + self.gep_index_counter += 1; + index + } + + /// Add a new field. + /// + /// - `name`: The name of the field. See [`Field::name`]. + /// - `element`: The [`Model`] of the type of the field. See [`Field::element`]. + pub fn add_field + 'ctx>(&mut self, name: &'static str, element: E) -> Field { + let gep_index = self.next_gep_index(); + + self.fields.push(FieldLLVM { + gep_index, + name, + llvm_type: element.get_type(self.ctx).as_basic_type_enum(), + llvm_type_model: Box::new(element), + }); + + Field { gep_index, name, model: element } + } + + /// Like [`FieldBuilder::add_field`] but `element` can be **automatically derived** + /// if it has the `Default` instance. + /// + /// Certain [`Model`] has a [`Default`] trait - [`Model`]s that are just singletons, + /// By deriving the [`Default`] trait on those [`Model`]s, Rust could automatically + /// construct the [`Model`] with [`Default::default`]. + /// + /// This function is equivalent to + /// ```ignore + /// self.add_field(name, E::default()) + /// ``` + pub fn add_field_auto + Default + 'ctx>( + &mut self, + name: &'static str, + ) -> Field { + self.add_field(name, E::default()) + } +} + +/// A marker trait to mark singleton struct that +/// describes a particular LLVM structure. +/// +/// It is a powerful inkwell abstraction that can reduce +/// a lot of inkwell boilerplate when dealing with LLVM structs, +/// `getelementptr`, `load`-ing and `store`-ing fields. +/// +/// ### Usage +pub trait StructKind<'ctx>: fmt::Debug + Clone + Copy + PartialEq + Eq { + /// The type of the Rust `struct` that holds all the fields of this LLVM struct. + type Fields; + + // TODO: + /// The name of this [`StructKind`]. + /// + /// The name should be the name of in + /// IRRT's `struct` or ARTIQ's definition. + fn struct_name(&self) -> &'static str; + + /// Define the [`Field`]s of this [`StructKind`] + /// + /// + /// ### Syntax + /// + /// Suppose you want to define the following C++ `struct`s in `nac3core`: + /// ```cpp + /// template + /// struct Str { + /// uint8_t* content; // NOTE: could be `void *` + /// SizeT length; + /// } + /// + /// template + /// struct Exception { + /// uint32_t id; + /// Str message; + /// uint64_t param0; + /// uint64_t param1; + /// uint64_t param2; + /// } + /// ``` + /// + /// You write this in nac3core: + /// ```ignore + /// struct Str<'ctx> { + /// sizet: IntModel<'ctx>, + /// } + /// + /// struct StrFields<'ctx> { + /// content: Field>, // equivalent to `NIntModel`. + /// length: Field>, // `SizeT` is only known in runtime - `CodeGenerator::get_size_type()`. /// } + /// } + /// + /// impl StructKind<'ctx> for Str<'ctx> { + /// fn struct_name() { + /// "Str" + /// } + /// + /// fn build_fields(&self, builder: &mut FieldBuilder<'ctx>) -> Self::Fields { + /// // THE order of `builder.add_field*` is IMPORTANT!!! + /// // so the GEP indices would be correct. + /// StrFields { + /// content: builder.add_field_auto("content"), // `PointerModel` has `Default` trait. + /// length: builder.add_field("length", IntModel(self.sizet)), // `PointerModel` has `Default` trait. + /// } + /// } + /// } + /// + /// struct Exception<'ctx> { + /// sizet: IntModel<'ctx>, + /// } + /// + /// struct ExceptionFields<'ctx> { + /// id: Field>, + /// message: Field>, + /// param0: Field>, + /// param1: Field>, + /// param2: Field>, + /// } + /// + /// impl StructKind<'ctx> for Exception<'ctx> { + /// fn struct_name() { + /// "Exception" + /// } + /// + /// fn build_fields(&self, builder: &mut FieldBuilder<'ctx>) -> Self::Fields { + /// // THE order of `builder.add_field*` is IMPORTANT!!! + /// // so the GEP indices would be correct. + /// ExceptionFields { + /// id: builder.add_field_auto("content"), // `NIntModel` has `Default` trait. + /// message: builder.add_field("message", StructModel(Str { sizet: self.sizet })), + /// param0: builder.add_field_auto("param0"), // has `Default` trait + /// param1: builder.add_field_auto("param1"), // has `Default` trait + /// param2: builder.add_field_auto("param2"), // has `Default` trait + /// } + /// } + /// } + /// ``` + /// + /// Then to `alloca` an `Exception`, do this: + /// ```ignore + /// let generator: dyn CodeGenerator<'ctx>; + /// let ctx: &CodeGenContext<'ctx, '_>; + /// let sizet = generator.get_size_type(); + /// let exn_model = StructModel(Exception { sizet }); + /// let exn = exn_model.alloca(ctx, "my_exception"); // Every [`Model<'ctx>`] has an `.alloca()` function. + /// // exn: Pointer<'ctx, StructModel> + /// ``` + /// + /// NOTE: In fact, it is possible to define `Str` and `Exception` like this: + /// ```ignore + /// struct Str { + /// _phantom: PhantomData, + /// } + /// + /// struct Exception { + /// _phantom: PhantomData, + /// } + /// ``` + /// But issues arise by you don't know the nac3core + /// `CodeGenerator`'s `get_size_type()` before hand. + fn build_fields(&self, builder: &mut FieldBuilder<'ctx>) -> Self::Fields; +} + +/// A [`Model<'ctx>`] that represents an LLVM struct. +/// +/// `self.0` contains a [`StructKind<'ctx>`] that gives the details of the LLVM struct. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct StructModel(pub S); +pub type Struct<'ctx, S> = Instance<'ctx, StructModel>; + +impl<'ctx, S: StructKind<'ctx>> CanCheckLLVMType<'ctx> for StructModel { + fn check_llvm_type_impl( + &self, + ctx: &'ctx Context, + ty: BasicTypeEnum<'ctx>, + ) -> Result<(), ModelError> { + // Check if scrutinee is even a struct type + let BasicTypeEnum::StructType(ty) = ty else { + return Err(ModelError(format!("Expecting a struct type, but got {ty:?}"))); + }; + + // Ok. now check the struct type thoroughly + self.check_struct_type(ctx, ty) + } +} + +impl<'ctx, S: StructKind<'ctx>> Model<'ctx> for StructModel { + type Value = StructValue<'ctx>; + type Type = StructType<'ctx>; + + fn get_type(&self, ctx: &'ctx Context) -> Self::Type { + self.get_struct_type(ctx) + } + + fn review_value>( + &self, + ctx: &'ctx Context, + value: V, + ) -> Result, ModelError> { + let value = value.as_basic_value_enum(); + self.check_type(ctx, value.get_type())?; + Ok(self.believe_value(value.into_struct_value())) + } +} + +impl<'ctx, S: StructKind<'ctx>> StructModel { + /// Get the [`S::Fields`] of this [`StructModel`]. + pub fn get_fields(&self, ctx: &'ctx Context) -> S::Fields { + let mut builder = FieldBuilder::new(ctx, self.0.struct_name()); + self.0.build_fields(&mut builder) + } + + /// Get the LLVM struct type this [`IsStruct<'ctx>`] is representing. + pub fn get_struct_type(&self, ctx: &'ctx Context) -> StructType<'ctx> { + let mut builder = FieldBuilder::new(ctx, self.0.struct_name()); + self.0.build_fields(&mut builder); // Self::Fields is discarded + + let field_types = builder.fields.iter().map(|f| f.llvm_type).collect_vec(); + ctx.struct_type(&field_types, false) + } + + /// Check if `scrutinee` matches the [`StructType<'ctx>`] this [`IsStruct<'ctx>`] is representing. + pub fn check_struct_type( + &self, + ctx: &'ctx Context, + scrutinee: StructType<'ctx>, + ) -> Result<(), ModelError> { + // Details about scrutinee + let scrutinee_field_types = scrutinee.get_field_types(); + + // Details about the defined specifications of this struct + // We will access them through builder + let mut builder = FieldBuilder::new(ctx, self.0.struct_name()); + self.0.build_fields(&mut builder); + + // Check # of fields + if builder.fields.len() != scrutinee_field_types.len() { + return Err(ModelError(format!( + "Expecting struct to have {} field(s), but scrutinee has {} field(s)", + builder.fields.len(), + scrutinee_field_types.len() + ))); + } + + // Check the types of each field + // TODO: Traceback? + for (f, scrutinee_field_type) in izip!(builder.fields, scrutinee_field_types) { + f.llvm_type_model + .check_llvm_type_impl(ctx, scrutinee_field_type.as_basic_type_enum())?; + } + + Ok(()) + } +} + +impl<'ctx, S: StructKind<'ctx>> Pointer<'ctx, StructModel> { + /// Build an instruction that does `getelementptr` on an LLVM structure referenced by this pointer. + /// + /// This provides a nice syntax to chain up `getelementptr` in an intuitive and type-safe way: + /// + /// ```ignore + /// let ctx: &CodeGenContext<'ctx, '_>; + /// let ndarray: Pointer<'ctx, StructModel>>; + /// ndarray.gep(ctx, |f| f.ndims).store(); + /// ``` + /// + /// You might even write chains `gep`, i.e., + /// ```ignore + /// let exn_ptr: Pointer<'ctx, StructModel>; + /// let value: Int<'ctx>; // Suppose it has the correct inkwell `IntType<'ctx>`. + /// + /// // To do `exn.message.length = value`: + /// let exn_message_ptr = exn_ptr.gep(ctx, |f| f.message); + /// let exn_message_length_ptr = exn_message_ptr.gep(ctx, |f| f.length); + /// exn_message_length_ptr.store(ctx, my_value); + /// + /// // or simply: + /// exn_ptr + /// .gep(ctx, |f| f.message) + /// .gep(ctx, |f| f.length) + /// .store(ctx, my_value) // Equivalent to `my_struct.thing1.value = my_value` + /// ``` + pub fn gep( + &self, + ctx: &CodeGenContext<'ctx, '_>, + get_field: GetFieldFn, + ) -> Pointer<'ctx, E> + where + E: Model<'ctx>, + GetFieldFn: FnOnce(S::Fields) -> Field, + { + let fields = self.model.0.get_fields(ctx.ctx); + let field = get_field(fields); + + // TODO: I think I'm not supposed to *just* use i32 for GEP like that + let llvm_i32 = ctx.ctx.i32_type(); + + let ptr_model = PointerModel(field.model); + let ptr = unsafe { + ctx.builder + .build_in_bounds_gep( + self.value, + &[llvm_i32.const_zero(), llvm_i32.const_int(field.gep_index, false)], + field.name, + ) + .unwrap() + }; + ptr_model.believe_value(ptr) + } +}