From b825ab03cf8aeca62df30a55ad0a15079b641ee6 Mon Sep 17 00:00:00 2001 From: lyken Date: Thu, 18 Jul 2024 16:06:19 +0800 Subject: [PATCH] WIP --- nac3core/src/codegen/expr.rs | 135 ++++++++++++---------- nac3core/src/codegen/irrt/mod.rs | 1 + nac3core/src/codegen/irrt/util.rs | 79 +++++++++++++ nac3core/src/codegen/mod.rs | 7 +- nac3core/src/codegen/model/structure.rs | 1 - nac3core/src/codegen/stmt.rs | 88 ++++++++------ nac3core/src/codegen/structs/exception.rs | 66 +++++++++++ nac3core/src/codegen/structs/mod.rs | 2 + nac3core/src/codegen/structs/str.rs | 55 +++++++++ 9 files changed, 335 insertions(+), 99 deletions(-) create mode 100644 nac3core/src/codegen/irrt/util.rs create mode 100644 nac3core/src/codegen/structs/exception.rs create mode 100644 nac3core/src/codegen/structs/mod.rs create mode 100644 nac3core/src/codegen/structs/str.rs diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 38ac9a63..9297a6c3 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -34,7 +34,7 @@ use crate::{ use inkwell::{ attributes::{Attribute, AttributeLoc}, types::{AnyType, BasicType, BasicTypeEnum}, - values::{BasicValueEnum, CallSiteValue, FunctionValue, IntValue, PointerValue}, + values::{AnyValue, BasicValueEnum, CallSiteValue, FunctionValue, IntValue, PointerValue}, AddressSpace, IntPredicate, OptimizationLevel, }; use itertools::{chain, izip, Either, Itertools}; @@ -43,6 +43,14 @@ use nac3parser::ast::{ Unaryop, }; +use super::{ + model::*, + structs::{ + exception::{Exception, ExceptionId}, + str::Str, + }, +}; + pub fn get_subst_key( unifier: &mut Unifier, obj: Option, @@ -281,24 +289,7 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> { None } } - Constant::Str(v) => { - assert!(self.unifier.unioned(ty, self.primitives.str)); - if let Some(v) = self.const_strings.get(v) { - Some(*v) - } else { - let str_ptr = self - .builder - .build_global_string_ptr(v, "const") - .map(|v| v.as_pointer_value().into()) - .unwrap(); - let size = generator.get_size_type(self.ctx).const_int(v.len() as u64, false); - let ty = self.get_llvm_type(generator, self.primitives.str); - let val = - ty.into_struct_type().const_named_struct(&[str_ptr, size.into()]).into(); - self.const_strings.insert(v.to_string(), val); - Some(val) - } - } + Constant::Str(s) => Some(self.gen_string(generator, s)), Constant::Ellipsis => { let msg = self.gen_string(generator, "NotImplementedError"); @@ -560,58 +551,86 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> { } /// Helper function for generating a LLVM variable storing a [String]. - pub fn gen_string(&mut self, generator: &mut G, s: S) -> BasicValueEnum<'ctx> + pub fn gen_string(&mut self, generator: &mut G, string: &str) -> Struct<'ctx, Str> where G: CodeGenerator + ?Sized, - S: Into, { - self.gen_const(generator, &Constant::Str(s.into()), self.primitives.str).unwrap() + // Define all used models + let sizet = IntModel(generator.get_size_type(&self.ctx)); + let str_content_model = PointerModel(FixedIntModel(Byte)); + + self.const_strings.get(string).copied().or_else(|| { + let global_str_ptr = self.builder.build_global_string_ptr(string, "const").unwrap(); + let global_str_len = sizet.constant(string.len() as u64); + let ok = Str { sizet }.constant( + self, + str_content_model + .review(&self.ctx, global_str_ptr.as_pointer_value().as_any_value_enum()), + global_str_len, + ); + + todo!(); + }); + + todo!() + // self.gen_const(generator, &Constant::Str(s.into()), self.primitives.str).unwrap() + + // assert!(self.unifier.unioned(ty, self.primitives.str)); + // if let Some(v) = self.const_strings.get(v) { + // Some(*v) + // } else { + // let str_ptr = self + // .builder + // .build_global_string_ptr(v, "const") + // .map(|v| v.as_pointer_value().into()) + // .unwrap(); + // let size = generator.get_size_type(self.ctx).const_int(v.len() as u64, false); + // let ty = self.get_llvm_type(generator, self.primitives.str); + // let val = + // ty.into_struct_type().const_named_struct(&[str_ptr, size.into()]).into(); + // self.const_strings.insert(v.to_string(), val); + // Some(val) + // } } pub fn raise_exn( &mut self, generator: &mut G, name: &str, - msg: BasicValueEnum<'ctx>, - params: [Option>; 3], + msg: Struct<'ctx, Str<'ctx>>, + params: [Option>; 3], loc: Location, ) { - let zelf = if let Some(exception_val) = self.exception_val { - exception_val - } else { - let ty = self.get_llvm_type(generator, self.primitives.exception).into_pointer_type(); - let zelf_ty: BasicTypeEnum = ty.get_element_type().into_struct_type().into(); - let zelf = generator.gen_var_alloc(self, zelf_ty, Some("exn")).unwrap(); - *self.exception_val.insert(zelf) - }; - let int32 = self.ctx.i32_type(); - let zero = int32.const_zero(); - unsafe { - let id_ptr = self.builder.build_in_bounds_gep(zelf, &[zero, zero], "exn.id").unwrap(); - let id = self.resolver.get_string_id(name); - self.builder.build_store(id_ptr, int32.const_int(id as u64, false)).unwrap(); - let ptr = self - .builder - .build_in_bounds_gep(zelf, &[zero, int32.const_int(5, false)], "exn.msg") - .unwrap(); - self.builder.build_store(ptr, msg).unwrap(); - let i64_zero = self.ctx.i64_type().const_zero(); - for (i, attr_ind) in [6, 7, 8].iter().enumerate() { - let ptr = self - .builder - .build_in_bounds_gep( - zelf, - &[zero, int32.const_int(*attr_ind, false)], - "exn.param", - ) - .unwrap(); - let val = params[i].map_or(i64_zero, |v| { - self.builder.build_int_s_extend(v, self.ctx.i64_type(), "sext").unwrap() - }); - self.builder.build_store(ptr, val).unwrap(); + // Define some used [`Model<'ctx>`]s + let sizet = IntModel(generator.get_size_type(self.ctx)); + let exception_id_model = FixedIntModel::::default(); + let exception_model = StructModel(Exception { sizet }); + + // let zelf = if let Some(exception_val) = self.exception_val { + // exception_val + // } else { + // let zelf = generator.gen_var_alloc(self, exception_model.get_llvm_type(self.ctx), Some("exn")); + // *self.exception_val.insert(zelf) + // }; + + let exn = self.exception_val.unwrap_or_else(|| { + let exn = exception_model.var_alloc(generator, self, Some("exn")).unwrap(); + *self.exception_val.insert(exn) + }); + + // Now load everything into `exn` + exn.gep(self, |f| f.exception_id).store( + self, + exception_id_model.constant(self.ctx, self.resolver.get_string_id(name) as u64), + ); + exn.gep(self, |f| f.message).store(self, msg); + for (param_i, param) in params.iter().enumerate() { + if let Some(param) = param { + exn.gep(self, |f| f.params[param_i]).store(self, *param); } } - gen_raise(generator, self, Some(&zelf.into()), loc); + + gen_raise(generator, self, Some(exn), loc); } pub fn make_assert( diff --git a/nac3core/src/codegen/irrt/mod.rs b/nac3core/src/codegen/irrt/mod.rs index dfb91611..cd3ed433 100644 --- a/nac3core/src/codegen/irrt/mod.rs +++ b/nac3core/src/codegen/irrt/mod.rs @@ -1,6 +1,7 @@ use crate::typecheck::typedef::Type; mod test; +pub mod util; use super::{ classes::{ diff --git a/nac3core/src/codegen/irrt/util.rs b/nac3core/src/codegen/irrt/util.rs new file mode 100644 index 00000000..9b0e3cf0 --- /dev/null +++ b/nac3core/src/codegen/irrt/util.rs @@ -0,0 +1,79 @@ +use inkwell::{ + types::{BasicMetadataTypeEnum, BasicType, IntType}, + values::{AnyValue, BasicMetadataValueEnum}, +}; + +use crate::{ + codegen::{model::*, CodeGenContext}, + util::SizeVariant, +}; + +fn get_size_variant(ty: IntType) -> SizeVariant { + match ty.get_bit_width() { + 32 => SizeVariant::Bits32, + 64 => SizeVariant::Bits64, + _ => unreachable!("Unsupported int type bit width {}", ty.get_bit_width()), + } +} + +#[must_use] +pub fn get_sized_dependent_function_name(ty: IntModel, fn_name: &str) -> String { + let mut fn_name = fn_name.to_owned(); + match get_size_variant(ty.0) { + SizeVariant::Bits32 => { + // Do nothing, `fn_name` already has the correct name + } + SizeVariant::Bits64 => { + // Append "64", this is the naming convention + fn_name.push_str("64"); + } + } + fn_name +} + +// TODO: Variadic argument? +pub struct FunctionBuilder<'ctx, 'a> { + ctx: &'a CodeGenContext<'ctx, 'a>, + fn_name: &'a str, + arguments: Vec<(BasicMetadataTypeEnum<'ctx>, BasicMetadataValueEnum<'ctx>)>, +} + +impl<'ctx, 'a> FunctionBuilder<'ctx, 'a> { + pub fn begin(ctx: &'a CodeGenContext<'ctx, 'a>, fn_name: &'a str) -> Self { + FunctionBuilder { ctx, fn_name, arguments: Vec::new() } + } + + // The name is for self-documentation + #[must_use] + pub fn arg>(mut self, _name: &'static str, model: M, value: M::Value) -> Self { + self.arguments + .push((model.get_llvm_type(self.ctx.ctx).into(), value.get_llvm_value().into())); + self + } + + pub fn returning>(self, name: &'static str, return_model: M) -> M::Value { + let (param_tys, param_vals): (Vec<_>, Vec<_>) = self.arguments.into_iter().unzip(); + + let function = self.ctx.module.get_function(self.fn_name).unwrap_or_else(|| { + let return_type = return_model.get_llvm_type(self.ctx.ctx); + let fn_type = return_type.fn_type(¶m_tys, false); + self.ctx.module.add_function(self.fn_name, fn_type, None) + }); + + let ret = self.ctx.builder.build_call(function, ¶m_vals, name).unwrap(); + return_model.review(self.ctx.ctx, ret.as_any_value_enum()) + } + + // TODO: Code duplication, but otherwise returning> cannot resolve S if return_optic = None + pub fn returning_void(self) { + let (param_tys, param_vals): (Vec<_>, Vec<_>) = self.arguments.into_iter().unzip(); + + let function = self.ctx.module.get_function(self.fn_name).unwrap_or_else(|| { + let return_type = self.ctx.ctx.void_type(); + let fn_type = return_type.fn_type(¶m_tys, false); + self.ctx.module.add_function(self.fn_name, fn_type, None) + }); + + self.ctx.builder.build_call(function, ¶m_vals, "").unwrap(); + } +} diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 85b963bb..2044a4fa 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -24,6 +24,7 @@ use inkwell::{ AddressSpace, IntPredicate, OptimizationLevel, }; use itertools::Itertools; +use model::{Pointer, Struct, StructModel}; use nac3parser::ast::{Location, Stmt, StrRef}; use parking_lot::{Condvar, Mutex}; use std::collections::{HashMap, HashSet}; @@ -32,6 +33,7 @@ use std::sync::{ Arc, }; use std::thread; +use structs::{exception::Exception, str::Str}; pub mod builtin_fns; pub mod classes; @@ -44,6 +46,7 @@ pub mod llvm_intrinsics; pub mod model; pub mod numpy; pub mod stmt; +pub mod structs; #[cfg(test)] mod test; @@ -159,11 +162,11 @@ pub struct CodeGenContext<'ctx, 'a> { pub registry: &'a WorkerRegistry, /// Cache for constant strings. - pub const_strings: HashMap>, + pub const_strings: HashMap>>, /// [`BasicBlock`] containing all `alloca` statements for the current function. pub init_bb: BasicBlock<'ctx>, - pub exception_val: Option>, + pub exception_val: Option>>>, /// The header and exit basic blocks of a loop in this context. See /// for explanation of these terminology. diff --git a/nac3core/src/codegen/model/structure.rs b/nac3core/src/codegen/model/structure.rs index f53753f4..c03292b1 100644 --- a/nac3core/src/codegen/model/structure.rs +++ b/nac3core/src/codegen/model/structure.rs @@ -25,7 +25,6 @@ pub struct Field { pub element: E, } -// A helper struct for [`FieldBuilder`] struct FieldLLVM<'ctx> { gep_index: u64, name: &'ctx str, diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs index cb013d85..2faa5052 100644 --- a/nac3core/src/codegen/stmt.rs +++ b/nac3core/src/codegen/stmt.rs @@ -2,6 +2,8 @@ use super::{ super::symbol_resolver::ValueEnum, expr::destructure_range, irrt::{handle_slice_indices, list_slice_assignment}, + model::*, + structs::{exception::Exception, str::Str}, CodeGenContext, CodeGenerator, }; use crate::{ @@ -20,7 +22,7 @@ use inkwell::{ attributes::{Attribute, AttributeLoc}, basic_block::BasicBlock, types::{BasicType, BasicTypeEnum}, - values::{BasicValue, BasicValueEnum, FunctionValue, IntValue, PointerValue}, + values::{AnyValue, BasicValue, BasicValueEnum, FunctionValue, IntValue, PointerValue}, IntPredicate, }; use nac3parser::ast::{ @@ -1113,47 +1115,57 @@ pub fn exn_constructor<'ctx>( pub fn gen_raise<'ctx, G: CodeGenerator + ?Sized>( generator: &mut G, ctx: &mut CodeGenContext<'ctx, '_>, - exception: Option<&BasicValueEnum<'ctx>>, + exception: Option>>>, loc: Location, ) { - if let Some(exception) = exception { - unsafe { - let int32 = ctx.ctx.i32_type(); - let zero = int32.const_zero(); - let exception = exception.into_pointer_value(); - let file_ptr = ctx - .builder - .build_in_bounds_gep(exception, &[zero, int32.const_int(1, false)], "file_ptr") - .unwrap(); - let filename = ctx.gen_string(generator, loc.file.0); - ctx.builder.build_store(file_ptr, filename).unwrap(); - let row_ptr = ctx - .builder - .build_in_bounds_gep(exception, &[zero, int32.const_int(2, false)], "row_ptr") - .unwrap(); - ctx.builder.build_store(row_ptr, int32.const_int(loc.row as u64, false)).unwrap(); - let col_ptr = ctx - .builder - .build_in_bounds_gep(exception, &[zero, int32.const_int(3, false)], "col_ptr") - .unwrap(); - ctx.builder.build_store(col_ptr, int32.const_int(loc.column as u64, false)).unwrap(); + match exception { + Some(exception) => { + // Define all used models + let sizet = IntModel(generator.get_size_type(ctx.ctx)); // Should be the same as `exception`'s `sizet` + let str_model = StructModel(Str { sizet }); - let current_fun = ctx.builder.get_insert_block().unwrap().get_parent().unwrap(); - let fun_name = ctx.gen_string(generator, current_fun.get_name().to_str().unwrap()); - let name_ptr = ctx - .builder - .build_in_bounds_gep(exception, &[zero, int32.const_int(4, false)], "name_ptr") - .unwrap(); - ctx.builder.build_store(name_ptr, fun_name).unwrap(); + let filename = ctx.gen_string(generator, loc.file.0).as_any_value_enum(); + let filename = str_model.review(ctx.ctx, filename); + exception.gep(ctx, |f| f.file_name).load(ctx, filename); + + // generator.gen_strin + + // let filename = ); + + // let int32 = ctx.ctx.i32_type(); + // let zero = int32.const_zero(); + + // let exception = exception.into_pointer_value(); + // let file_ptr = ctx + // .builder + // .build_in_bounds_gep(exception, &[zero, int32.const_int(1, false)], "file_ptr") + // .unwrap(); + // let filename = ctx.gen_string(generator, loc.file.0); + // ctx.builder.build_store(file_ptr, filename).unwrap(); + // let row_ptr = ctx + // .builder + // .build_in_bounds_gep(exception, &[zero, int32.const_int(2, false)], "row_ptr") + // .unwrap(); + // ctx.builder.build_store(row_ptr, int32.const_int(loc.row as u64, false)).unwrap(); + // let col_ptr = ctx + // .builder + // .build_in_bounds_gep(exception, &[zero, int32.const_int(3, false)], "col_ptr") + // .unwrap(); + // ctx.builder.build_store(col_ptr, int32.const_int(loc.column as u64, false)).unwrap(); + + // let current_fun = ctx.builder.get_insert_block().unwrap().get_parent().unwrap(); + // let fun_name = ctx.gen_string(generator, current_fun.get_name().to_str().unwrap()); + // let name_ptr = ctx + // .builder + // .build_in_bounds_gep(exception, &[zero, int32.const_int(4, false)], "name_ptr") + // .unwrap(); + // ctx.builder.build_store(name_ptr, fun_name).unwrap(); } - - let raise = get_builtins(generator, ctx, "__nac3_raise"); - let exception = *exception; - ctx.build_call_or_invoke(raise, &[exception], "raise"); - } else { - let resume = get_builtins(generator, ctx, "__nac3_resume"); - ctx.build_call_or_invoke(resume, &[], "resume"); - } + None => { + let resume = get_builtins(generator, ctx, "__nac3_resume"); + ctx.build_call_or_invoke(resume, &[], "resume"); + } + }; ctx.builder.build_unreachable().unwrap(); } diff --git a/nac3core/src/codegen/structs/exception.rs b/nac3core/src/codegen/structs/exception.rs new file mode 100644 index 00000000..176e6152 --- /dev/null +++ b/nac3core/src/codegen/structs/exception.rs @@ -0,0 +1,66 @@ +use crate::codegen::model::*; + +use super::str::Str; + +/// The LLVM int type of an Exception ID. +pub type ExceptionId = Int32; +pub struct ExceptionFields<'ctx> { + /// nac3core's ID of the exception + pub exception_id: Field>, + /// The name of the file this `Exception` was raised in. + pub file_name: Field>>, + /// The line number in the file this `Exception` was raised in. + pub line: Field>, + /// The column number in the file this `Exception` was raised in. + pub column: Field>, + /// The name of the Python function this `Exception` was raised in. + pub function_name: Field>>, + /// The message of this Exception. + /// + /// The message can optionally contain integer parameters `{0}`, `{1}`, and `{2}` in its string, + /// where they will be substituted by `params[0]`, `params[1]`, and `params[2]` respectively (as `int64_t`-s). + /// Here is an example: + /// + /// ``` + /// "Index {0} is out of bounds! List only has {1} element(s)." + /// ``` + /// + /// In this case, `params[0]` and `params[1]` must be specified, and `params[2]` is ***unused***. + pub message: Field>>, + pub params: [Field>; 3], +} + +/// nac3core & ARTIQ's `Exception` definition. +/// +/// Also see the definition of `pub struct Exception<'a>` in . +#[derive(Debug, Clone, Copy)] +pub struct Exception<'ctx> { + /// The `SizeT` type of this string. + pub sizet: IntModel<'ctx>, +} + +impl<'ctx> IsStruct<'ctx> for Exception<'ctx> { + type Fields = ExceptionFields<'ctx>; + + fn struct_name(&self) -> &'static str { + "Exception" + } + + fn build_fields(&self, builder: &mut FieldBuilder<'ctx>) -> Self::Fields { + let str = StructModel(Str { sizet: self.sizet }); + + let exception_id = builder.add_field_auto("exception_id"); + let file_name = builder.add_field("file_name", str); + let line = builder.add_field_auto("line"); + let column = builder.add_field_auto("column"); + let function_name = builder.add_field("function_name", str); + let message = builder.add_field("message", str); + let params = [ + builder.add_field_auto("param0"), + builder.add_field_auto("param1"), + builder.add_field_auto("param2"), + ]; + + Self::Fields { exception_id, file_name, line, column, function_name, message, params } + } +} diff --git a/nac3core/src/codegen/structs/mod.rs b/nac3core/src/codegen/structs/mod.rs new file mode 100644 index 00000000..fea1839a --- /dev/null +++ b/nac3core/src/codegen/structs/mod.rs @@ -0,0 +1,2 @@ +pub mod exception; +pub mod str; diff --git a/nac3core/src/codegen/structs/str.rs b/nac3core/src/codegen/structs/str.rs new file mode 100644 index 00000000..2639195a --- /dev/null +++ b/nac3core/src/codegen/structs/str.rs @@ -0,0 +1,55 @@ +use std::marker::PhantomData; + +use inkwell::{types::BasicType, values::BasicValue}; + +use crate::codegen::{model::*, CodeGenContext}; + +pub struct StrFields<'ctx> { + /// Pointer to the string. Does not have to be null-terminated. + pub content: Field>, + /// Number of bytes this string occupies in space. + /// + /// The [`IntModel`] matches [`Str::sizet`]. + pub length: Field>, +} + +/// nac3core's LLVM representation of a string in memory +#[derive(Debug, Clone, Copy)] +pub struct Str<'ctx> { + /// The `SizeT` type of this string. + pub sizet: IntModel<'ctx>, +} + +impl<'ctx> IsStruct<'ctx> for Str<'ctx> { + type Fields = StrFields<'ctx>; + + fn struct_name(&self) -> &'static str { + "Str" + } + + fn build_fields(&self, builder: &mut FieldBuilder<'ctx>) -> Self::Fields { + Self::Fields { + content: builder.add_field_auto("content"), + length: builder.add_field("length", self.sizet), + } + } +} + +impl<'ctx> Str<'ctx> { + pub fn constant( + &self, + ctx: &CodeGenContext<'ctx, '_>, + content: Pointer<'ctx, ByteModel>, + length: Int<'ctx>, + ) -> Struct<'ctx, Self> { + // NOTE: Unfortunately Rust's type system is not powerful enough to generalize this. + // But this code duplication is acceptable + + self.sizet.check(ctx.ctx, length); // Check length's IntType + + let llvm_ty = self.get_struct_type(ctx.ctx); + let llvm_val = + llvm_ty.const_named_struct(&[content.get_llvm_value(), length.get_llvm_value()]); + Struct { structure: *self, value: llvm_val } + } +}