From 7ea5a5f84debb294127b79fa720fb94be401602d Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 12 Feb 2022 21:13:16 +0800 Subject: [PATCH] nac3core: codegen refactoring - No longer check if the statement will return. Instead, we check if the current basic block is terminated, which is simpler and handles exception/break/continue better. - Use invoke statement when unwind is needed. - Moved codegen for a block of statements into a separate function. --- nac3core/src/codegen/expr.rs | 32 +++++-- nac3core/src/codegen/generator.rs | 38 ++------- nac3core/src/codegen/mod.rs | 67 +++++++++++---- nac3core/src/codegen/stmt.rs | 135 +++++++++++++++++------------- 4 files changed, 159 insertions(+), 113 deletions(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index b1368ba2..0d704403 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -12,12 +12,12 @@ use crate::{ typecheck::typedef::{FunSignature, FuncArg, Type, TypeEnum, Unifier}, }; use inkwell::{ - types::{BasicType, BasicTypeEnum}, - values::{BasicValueEnum, IntValue, PointerValue}, AddressSpace, + types::{BasicType, BasicTypeEnum}, + values::{BasicValueEnum, FunctionValue, IntValue, PointerValue} }; use itertools::{chain, izip, zip, Itertools}; -use nac3parser::ast::{self, Boolop, Comprehension, Constant, Expr, ExprKind, Operator, StrRef}; +use nac3parser::ast::{self, Boolop, Comprehension, Constant, Expr, ExprKind, Location, Operator, StrRef}; use super::CodeGenerator; @@ -256,6 +256,25 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> { _ => unimplemented!(), } } + + pub fn build_call_or_invoke( + &self, + fun: FunctionValue<'ctx>, + params: &[BasicValueEnum<'ctx>], + call_name: &str + ) -> Option> { + if let Some(target) = self.unwind_target { + let current = self.builder.get_insert_block().unwrap().get_parent().unwrap(); + let then_block = self.ctx.append_basic_block(current, &format!("after.{}", call_name)); + let result = self.builder.build_invoke(fun, params, then_block, target, call_name).try_as_basic_value().left(); + self.builder.position_at_end(then_block); + result + } else { + let param: Vec<_> = params.iter().map(|v| (*v).into()).collect(); + self.builder.build_call(fun, ¶m, call_name).try_as_basic_value().left() + } + } + pub fn gen_string>( &mut self, generator: &mut G, @@ -404,7 +423,7 @@ pub fn gen_call<'ctx, 'a, G: CodeGenerator>( } // default value handling for k in keys.into_iter() { - mapping.insert(k.name, ctx.gen_symbol_val(&k.default_value.unwrap()).into()); + mapping.insert(k.name, ctx.gen_symbol_val(generator, &k.default_value.unwrap()).into()); } // reorder the parameters let mut real_params = @@ -474,7 +493,8 @@ pub fn gen_call<'ctx, 'a, G: CodeGenerator>( }; ctx.module.add_function(&symbol, fun_ty, None) }); - ctx.builder.build_call(fun_val, ¶m_vals, "call").try_as_basic_value().left() + + ctx.build_call_or_invoke(fun_val, ¶m_vals, "call") } pub fn destructure_range<'ctx, 'a>( @@ -715,7 +735,7 @@ pub fn gen_expr<'ctx, 'a, G: CodeGenerator>( Some(match &expr.node { ExprKind::Constant { value, .. } => { let ty = expr.custom.unwrap(); - ctx.gen_const(value, ty).into() + ctx.gen_const(generator, value, ty).into() } ExprKind::Name { id, .. } => match ctx.var_assignment.get(id) { Some((ptr, None, _)) => ctx.builder.build_load(*ptr, "load").into(), diff --git a/nac3core/src/codegen/generator.rs b/nac3core/src/codegen/generator.rs index 6d897b97..0c9858fa 100644 --- a/nac3core/src/codegen/generator.rs +++ b/nac3core/src/codegen/generator.rs @@ -117,67 +117,45 @@ pub trait CodeGenerator { /// Generate code for a while expression. /// Return true if the while loop must early return - fn gen_while<'ctx, 'a>( - &mut self, - ctx: &mut CodeGenContext<'ctx, 'a>, - stmt: &Stmt>, - ) -> bool + fn gen_while<'ctx, 'a>(&mut self, ctx: &mut CodeGenContext<'ctx, 'a>, stmt: &Stmt>) where Self: Sized, { gen_while(self, ctx, stmt); - false } /// Generate code for a while expression. /// Return true if the while loop must early return - fn gen_for<'ctx, 'a>( - &mut self, - ctx: &mut CodeGenContext<'ctx, 'a>, - stmt: &Stmt>, - ) -> bool + fn gen_for<'ctx, 'a>(&mut self, ctx: &mut CodeGenContext<'ctx, 'a>, stmt: &Stmt>) where Self: Sized, { gen_for(self, ctx, stmt); - false } /// Generate code for an if expression. /// Return true if the statement must early return - fn gen_if<'ctx, 'a>( - &mut self, - ctx: &mut CodeGenContext<'ctx, 'a>, - stmt: &Stmt>, - ) -> bool + fn gen_if<'ctx, 'a>(&mut self, ctx: &mut CodeGenContext<'ctx, 'a>, stmt: &Stmt>) where Self: Sized, { - gen_if(self, ctx, stmt) + gen_if(self, ctx, stmt); } - fn gen_with<'ctx, 'a>( - &mut self, - ctx: &mut CodeGenContext<'ctx, 'a>, - stmt: &Stmt>, - ) -> bool + fn gen_with<'ctx, 'a>(&mut self, ctx: &mut CodeGenContext<'ctx, 'a>, stmt: &Stmt>) where Self: Sized, { - gen_with(self, ctx, stmt) + gen_with(self, ctx, stmt); } /// Generate code for a statement /// Return true if the statement must early return - fn gen_stmt<'ctx, 'a>( - &mut self, - ctx: &mut CodeGenContext<'ctx, 'a>, - stmt: &Stmt>, - ) -> bool + fn gen_stmt<'ctx, 'a>(&mut self, ctx: &mut CodeGenContext<'ctx, 'a>, stmt: &Stmt>) where Self: Sized, { - gen_stmt(self, ctx, stmt) + gen_stmt(self, ctx, stmt); } } diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 1601a3e7..3ca03ef9 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -14,7 +14,7 @@ use inkwell::{ module::Module, passes::{PassManager, PassManagerBuilder}, types::{BasicType, BasicTypeEnum}, - values::{FunctionValue, PointerValue}, + values::{BasicValueEnum, FunctionValue, PhiValue, PointerValue}, AddressSpace, OptimizationLevel, }; use itertools::Itertools; @@ -60,11 +60,28 @@ pub struct CodeGenContext<'ctx, 'a> { pub primitives: PrimitiveStore, pub calls: Arc>, pub registry: &'a WorkerRegistry, + // const string cache + pub const_strings: HashMap>, // stores the alloca for variables pub init_bb: BasicBlock<'ctx>, - // where continue and break should go to respectively // the first one is the test_bb, and the second one is bb after the loop - pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, + pub loop_target: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, + // unwind target bb + pub unwind_target: Option>, + // return target bb, just emit ret if no such target + pub return_target: Option>, + pub return_buffer: Option>, + // outer finally block function + pub outer_final: Option<(PointerValue<'ctx>, Vec>, Vec>)>, + // outer catch clauses + pub outer_catch_clauses: + Option<(Vec>>, BasicBlock<'ctx>, PhiValue<'ctx>)>, +} + +impl<'ctx, 'a> CodeGenContext<'ctx, 'a> { + pub fn is_terminated(&self) -> bool { + self.builder.get_insert_block().unwrap().get_terminator().is_some() + } } type Fp = Box; @@ -182,7 +199,7 @@ impl WorkerRegistry { fn worker_thread(&self, generator: &mut G, f: Arc) { let context = Context::create(); let mut builder = context.create_builder(); - let mut module = context.create_module(generator.get_name()); + let module = context.create_module(generator.get_name()); let pass_builder = PassManagerBuilder::create(); pass_builder.set_optimization_level(OptimizationLevel::Default); @@ -190,10 +207,12 @@ impl WorkerRegistry { pass_builder.populate_function_pass_manager(&passes); while let Some(task) = self.receiver.recv().unwrap() { - let result = gen_func(&context, generator, self, builder, module, task); + let tmp_module = context.create_module("tmp"); + let result = gen_func(&context, generator, self, builder, tmp_module, task); builder = result.0; - module = result.1; passes.run_on(&result.2); + module.link_in_module(result.1).unwrap(); + // module = result.1; *self.task_count.lock() -= 1; self.wait_condvar.notify_all(); } @@ -235,20 +254,26 @@ fn get_llvm_type<'ctx>( // we assume the type cache should already contain primitive types, // and they should be passed by value instead of passing as pointer. type_cache.get(&unifier.get_representative(ty)).cloned().unwrap_or_else(|| { - let ty = unifier.get_ty(ty); - match &*ty { + let ty_enum = unifier.get_ty(ty); + let result = match &*ty_enum { TObj { obj_id, fields, .. } => { + // check to avoid treating primitives as classes + if obj_id.0 <= 7 { + unreachable!(); + } // a struct with fields in the order of declaration let top_level_defs = top_level.definitions.read(); let definition = top_level_defs.get(obj_id.0).unwrap(); - let ty = if let TopLevelDef::Class { fields: fields_list, .. } = &*definition.read() + let ty = if let TopLevelDef::Class { name, fields: fields_list, .. } = &*definition.read() { + let struct_type = ctx.opaque_struct_type(&name.to_string()); let fields = fields.borrow(); let fields = fields_list .iter() .map(|f| get_llvm_type(ctx, generator, unifier, top_level, type_cache, fields[&f.0].0)) .collect_vec(); - ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + struct_type.set_body(&fields, false); + struct_type.ptr_type(AddressSpace::Generic).into() } else { unreachable!() }; @@ -270,8 +295,10 @@ fn get_llvm_type<'ctx>( ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() } TVirtual { .. } => unimplemented!(), - _ => unreachable!("{}", ty.get_type_name()), - } + _ => unreachable!("{}", ty_enum.get_type_name()), + }; + type_cache.insert(unifier.get_representative(ty), result); + result }) } @@ -415,6 +442,7 @@ pub fn gen_func<'ctx, G: CodeGenerator>( builder.build_store(alloca, param); var_assignment.insert(arg.name, (alloca, None, 0)); } + let return_buffer = fn_type.get_return_type().map(|v| builder.build_alloca(v, "$ret")); let static_values = { let store = registry.static_value_store.lock(); store.store[task.id].clone() @@ -432,7 +460,13 @@ pub fn gen_func<'ctx, G: CodeGenerator>( resolver: task.resolver, top_level: top_level_ctx.as_ref(), calls: task.calls, - loop_bb: None, + loop_target: None, + return_target: None, + return_buffer, + unwind_target: None, + outer_final: None, + outer_catch_clauses: None, + const_strings: Default::default(), registry, var_assignment, type_cache, @@ -444,15 +478,14 @@ pub fn gen_func<'ctx, G: CodeGenerator>( static_value_store, }; - let mut returned = false; for stmt in task.body.iter() { - returned = generator.gen_stmt(&mut code_gen_context, stmt); - if returned { + generator.gen_stmt(&mut code_gen_context, stmt); + if code_gen_context.is_terminated() { break; } } // after static analysis, only void functions can have no return at the end. - if !returned { + if !code_gen_context.is_terminated() { code_gen_context.builder.build_return(None); } diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs index f8f42d43..5f7c6d12 100644 --- a/nac3core/src/codegen/stmt.rs +++ b/nac3core/src/codegen/stmt.rs @@ -6,13 +6,17 @@ use super::{ }; use crate::{ codegen::expr::gen_binop_expr, - typecheck::typedef::{Type, TypeEnum}, + toplevel::{DefinitionId, TopLevelDef}, + typecheck::typedef::{Type, TypeEnum, FunSignature} }; use inkwell::{ + attributes::{Attribute, AttributeLoc}, + basic_block::BasicBlock, types::BasicTypeEnum, - values::{BasicValue, BasicValueEnum, PointerValue}, + values::{BasicValue, BasicValueEnum, FunctionValue, PointerValue}, + IntPredicate::EQ, }; -use nac3parser::ast::{Expr, ExprKind, Stmt, StmtKind}; +use nac3parser::ast::{ExcepthandlerKind, Expr, ExprKind, Location, Stmt, StmtKind, StrRef, Constant}; use std::convert::TryFrom; pub fn gen_var<'ctx, 'a>( @@ -173,7 +177,7 @@ pub fn gen_for<'ctx, 'a, G: CodeGenerator>( let orelse_bb = if orelse.is_empty() { cont_bb } else { ctx.ctx.append_basic_block(current, "orelse") }; // store loop bb information and restore it later - let loop_bb = ctx.loop_bb.replace((test_bb, cont_bb)); + let loop_bb = ctx.loop_target.replace((test_bb, cont_bb)); let iter_val = generator.gen_expr(ctx, iter).unwrap().to_basic_value_enum(ctx, generator); if ctx.unifier.unioned(iter.custom.unwrap(), ctx.primitives.range) { @@ -234,22 +238,22 @@ pub fn gen_for<'ctx, 'a, G: CodeGenerator>( generator.gen_assign(ctx, target, val.into()); } - for stmt in body.iter() { - generator.gen_stmt(ctx, stmt); - } + gen_block(generator, ctx, body.iter()); for (k, (_, _, counter)) in var_assignment.iter() { let (_, static_val, counter2) = ctx.var_assignment.get_mut(k).unwrap(); if counter != counter2 { *static_val = None; } } - ctx.builder.build_unconditional_branch(test_bb); + if !ctx.is_terminated() { + ctx.builder.build_unconditional_branch(test_bb); + } if !orelse.is_empty() { ctx.builder.position_at_end(orelse_bb); - for stmt in orelse.iter() { - generator.gen_stmt(ctx, stmt); + gen_block(generator, ctx, orelse.iter()); + if !ctx.is_terminated() { + ctx.builder.build_unconditional_branch(cont_bb); } - ctx.builder.build_unconditional_branch(cont_bb); } for (k, (_, _, counter)) in var_assignment.iter() { let (_, static_val, counter2) = ctx.var_assignment.get_mut(k).unwrap(); @@ -258,7 +262,7 @@ pub fn gen_for<'ctx, 'a, G: CodeGenerator>( } } ctx.builder.position_at_end(cont_bb); - ctx.loop_bb = loop_bb; + ctx.loop_target = loop_bb; } else { unreachable!() } @@ -282,7 +286,7 @@ pub fn gen_while<'ctx, 'a, G: CodeGenerator>( let orelse_bb = if orelse.is_empty() { cont_bb } else { ctx.ctx.append_basic_block(current, "orelse") }; // store loop bb information and restore it later - let loop_bb = ctx.loop_bb.replace((test_bb, cont_bb)); + let loop_bb = ctx.loop_target.replace((test_bb, cont_bb)); ctx.builder.build_unconditional_branch(test_bb); ctx.builder.position_at_end(test_bb); let test = generator.gen_expr(ctx, test).unwrap().to_basic_value_enum(ctx, generator); @@ -292,22 +296,22 @@ pub fn gen_while<'ctx, 'a, G: CodeGenerator>( unreachable!() }; ctx.builder.position_at_end(body_bb); - for stmt in body.iter() { - generator.gen_stmt(ctx, stmt); - } + gen_block(generator, ctx, body.iter()); for (k, (_, _, counter)) in var_assignment.iter() { let (_, static_val, counter2) = ctx.var_assignment.get_mut(k).unwrap(); if counter != counter2 { *static_val = None; } } - ctx.builder.build_unconditional_branch(test_bb); + if !ctx.is_terminated() { + ctx.builder.build_unconditional_branch(test_bb); + } if !orelse.is_empty() { ctx.builder.position_at_end(orelse_bb); - for stmt in orelse.iter() { - generator.gen_stmt(ctx, stmt); + gen_block(generator, ctx, orelse.iter()); + if !ctx.is_terminated() { + ctx.builder.build_unconditional_branch(cont_bb); } - ctx.builder.build_unconditional_branch(cont_bb); } for (k, (_, _, counter)) in var_assignment.iter() { let (_, static_val, counter2) = ctx.var_assignment.get_mut(k).unwrap(); @@ -316,7 +320,7 @@ pub fn gen_while<'ctx, 'a, G: CodeGenerator>( } } ctx.builder.position_at_end(cont_bb); - ctx.loop_bb = loop_bb; + ctx.loop_target = loop_bb; } else { unreachable!() } @@ -326,7 +330,7 @@ pub fn gen_if<'ctx, 'a, G: CodeGenerator>( generator: &mut G, ctx: &mut CodeGenContext<'ctx, 'a>, stmt: &Stmt>, -) -> bool { +) { if let StmtKind::If { test, body, orelse, .. } = &stmt.node { // var_assignment static values may be changed in another branch // if so, remove the static value as it may not be correct in this branch @@ -352,13 +356,7 @@ pub fn gen_if<'ctx, 'a, G: CodeGenerator>( unreachable!() }; ctx.builder.position_at_end(body_bb); - let mut exited = false; - for stmt in body.iter() { - exited = generator.gen_stmt(ctx, stmt); - if exited { - break; - } - } + gen_block(generator, ctx, body.iter()); for (k, (_, _, counter)) in var_assignment.iter() { let (_, static_val, counter2) = ctx.var_assignment.get_mut(k).unwrap(); if counter != counter2 { @@ -366,32 +364,22 @@ pub fn gen_if<'ctx, 'a, G: CodeGenerator>( } } - if !exited { + if !ctx.is_terminated() { if cont_bb.is_none() { cont_bb = Some(ctx.ctx.append_basic_block(current, "cont")); } ctx.builder.build_unconditional_branch(cont_bb.unwrap()); } - let then_exited = exited; - let else_exited = if !orelse.is_empty() { - exited = false; + if !orelse.is_empty() { ctx.builder.position_at_end(orelse_bb); - for stmt in orelse.iter() { - exited = generator.gen_stmt(ctx, stmt); - if exited { - break; - } - } - if !exited { + gen_block(generator, ctx, orelse.iter()); + if !ctx.is_terminated() { if cont_bb.is_none() { cont_bb = Some(ctx.ctx.append_basic_block(current, "cont")); } ctx.builder.build_unconditional_branch(cont_bb.unwrap()); } - exited - } else { - false - }; + } if let Some(cont_bb) = cont_bb { ctx.builder.position_at_end(cont_bb); } @@ -401,7 +389,11 @@ pub fn gen_if<'ctx, 'a, G: CodeGenerator>( *static_val = None; } } - then_exited && else_exited + } else { + unreachable!() + } +} + pub fn exn_constructor<'ctx, 'a>( ctx: &mut CodeGenContext<'ctx, 'a>, obj: Option<(Type, ValueEnum<'ctx>)>, @@ -481,23 +473,37 @@ pub fn gen_with<'ctx, 'a, G: CodeGenerator>( unimplemented!() } +pub fn gen_return<'ctx, 'a, G: CodeGenerator>( + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, 'a>, + value: &Option>>>, +) { + let value = value + .as_ref() + .map(|v| generator.gen_expr(ctx, v).unwrap().to_basic_value_enum(ctx, generator)); + if let Some(return_target) = ctx.return_target { + if let Some(value) = value { + ctx.builder.build_store(ctx.return_buffer.unwrap(), value); + } + ctx.builder.build_unconditional_branch(return_target); + } else { + let value = value.as_ref().map(|v| v as &dyn BasicValue); + ctx.builder.build_return(value); + } +} + pub fn gen_stmt<'ctx, 'a, G: CodeGenerator>( generator: &mut G, ctx: &mut CodeGenContext<'ctx, 'a>, stmt: &Stmt>, -) -> bool { +) { match &stmt.node { StmtKind::Pass { .. } => {} StmtKind::Expr { value, .. } => { generator.gen_expr(ctx, value); } StmtKind::Return { value, .. } => { - let value = value - .as_ref() - .map(|v| generator.gen_expr(ctx, v).unwrap().to_basic_value_enum(ctx, generator)); - let value = value.as_ref().map(|v| v as &dyn BasicValue); - ctx.builder.build_return(value); - return true; + gen_return(generator, ctx, value); } StmtKind::AnnAssign { target, value, .. } => { if let Some(value) = value { @@ -512,17 +518,15 @@ pub fn gen_stmt<'ctx, 'a, G: CodeGenerator>( } } StmtKind::Continue { .. } => { - ctx.builder.build_unconditional_branch(ctx.loop_bb.unwrap().0); - return true; + ctx.builder.build_unconditional_branch(ctx.loop_target.unwrap().0); } StmtKind::Break { .. } => { - ctx.builder.build_unconditional_branch(ctx.loop_bb.unwrap().1); - return true; + ctx.builder.build_unconditional_branch(ctx.loop_target.unwrap().1); } - StmtKind::If { .. } => return generator.gen_if(ctx, stmt), - StmtKind::While { .. } => return generator.gen_while(ctx, stmt), - StmtKind::For { .. } => return generator.gen_for(ctx, stmt), - StmtKind::With { .. } => return generator.gen_with(ctx, stmt), + StmtKind::If { .. } => generator.gen_if(ctx, stmt), + StmtKind::While { .. } => generator.gen_while(ctx, stmt), + StmtKind::For { .. } => generator.gen_for(ctx, stmt), + StmtKind::With { .. } => generator.gen_with(ctx, stmt), StmtKind::AugAssign { target, op, value, .. } => { let value = gen_binop_expr(generator, ctx, target, op, value); generator.gen_assign(ctx, target, value); @@ -530,4 +534,15 @@ pub fn gen_stmt<'ctx, 'a, G: CodeGenerator>( _ => unimplemented!(), }; false +pub fn gen_block<'ctx, 'a, 'b, G: CodeGenerator, I: Iterator>>>( + generator: &mut G, + ctx: &mut CodeGenContext<'ctx, 'a>, + stmts: I, +) { + for stmt in stmts { + generator.gen_stmt(ctx, stmt); + if ctx.is_terminated() { + break; + } + } }