From e112354d25d3c16d3c18297f3143bde3b1508c74 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 11 Aug 2021 14:37:26 +0800 Subject: [PATCH] codegen refactored --- nac3core/src/codegen/expr.rs | 46 ++------- nac3core/src/codegen/mod.rs | 195 +++++++++++++++++++++++++++++++++++ nac3core/src/codegen/stmt.rs | 13 ++- nac3core/src/top_level.rs | 34 +----- 4 files changed, 215 insertions(+), 73 deletions(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 6f03f0d28..2f4c74db5 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -1,8 +1,9 @@ use std::{collections::HashMap, convert::TryInto, iter::once}; +use super::{get_llvm_type, CodeGenContext}; use crate::{ symbol_resolver::SymbolValue, - top_level::{CodeGenContext, DefinitionId, TopLevelDef}, + top_level::{DefinitionId, TopLevelDef}, typecheck::typedef::{FunSignature, Type, TypeEnum}, }; use inkwell::{ @@ -48,45 +49,6 @@ impl<'ctx> CodeGenContext<'ctx> { index } - pub fn get_llvm_type(&mut self, ty: Type) -> BasicTypeEnum<'ctx> { - use TypeEnum::*; - // we assume the type cache should already contain primitive types, - // and they should be passed by value instead of passing as pointer. - self.type_cache.get(&ty).cloned().unwrap_or_else(|| match &*self.unifier.get_ty(ty) { - TObj { obj_id, fields, .. } => { - // a struct with fields in the order of declaration - let defs = self.top_level.definitions.read(); - let definition = defs.get(obj_id.0).unwrap(); - let ty = if let TopLevelDef::Class { fields: fields_list, .. } = &*definition.read() - { - let fields = fields.borrow(); - let fields = - fields_list.iter().map(|f| self.get_llvm_type(fields[&f.0])).collect_vec(); - self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } else { - unreachable!() - }; - ty - } - TTuple { ty } => { - // a struct with fields in the order present in the tuple - let fields = ty.iter().map(|ty| self.get_llvm_type(*ty)).collect_vec(); - self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } - TList { ty } => { - // a struct with an integer and a pointer to an array - let element_type = self.get_llvm_type(*ty); - let fields = [ - self.ctx.i32_type().into(), - element_type.ptr_type(AddressSpace::Generic).into(), - ]; - self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } - TVirtual { .. } => unimplemented!(), - _ => unreachable!(), - }) - } - fn gen_symbol_val(&mut self, val: &SymbolValue) -> BasicValueEnum<'ctx> { match val { SymbolValue::I32(v) => self.ctx.i32_type().const_int(*v as u64, true).into(), @@ -113,6 +75,10 @@ impl<'ctx> CodeGenContext<'ctx> { } } + pub fn get_llvm_type(&mut self, ty: Type) -> BasicTypeEnum<'ctx> { + get_llvm_type(self.ctx, &mut self.unifier, self.top_level, &mut self.type_cache, ty) + } + fn gen_call( &mut self, obj: Option<(Type, BasicValueEnum<'ctx>)>, diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 95ee4bbf9..c3fba2dbc 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -1,2 +1,197 @@ +use crate::{ + symbol_resolver::SymbolResolver, + top_level::{TopLevelContext, TopLevelDef}, + typecheck::{ + type_inferencer::PrimitiveStore, + typedef::{FunSignature, Type, TypeEnum, Unifier}, + }, +}; +use inkwell::{ + basic_block::BasicBlock, + builder::Builder, + context::Context, + module::Module, + types::{BasicType, BasicTypeEnum}, + values::PointerValue, + AddressSpace, +}; +use itertools::Itertools; +use rayon::current_thread_index; +use rustpython_parser::ast::{Stmt, StmtKind}; +use std::collections::HashMap; +use std::sync::Arc; + mod expr; mod stmt; + +pub struct CodeGenContext<'ctx> { + pub ctx: &'ctx Context, + pub builder: Builder<'ctx>, + pub module: Module<'ctx>, + pub top_level: &'ctx TopLevelContext, + pub unifier: Unifier, + pub resolver: Box, + pub var_assignment: HashMap>, + pub type_cache: HashMap>, + pub primitives: PrimitiveStore, + // stores the alloca for variables + pub init_bb: BasicBlock<'ctx>, + // where continue and break should go to respectively + // the first one is the test_bb, and the second one is bb after the loop + pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, +} + +pub struct CodeGenTask { + pub subst: Vec<(Type, Type)>, + pub symbol_name: String, + pub signature: FunSignature, + pub body: Stmt>, + pub unifier_index: usize, + pub resolver: Box, +} + +fn get_llvm_type<'ctx>( + ctx: &'ctx Context, + unifier: &mut Unifier, + top_level: &TopLevelContext, + type_cache: &mut HashMap>, + ty: Type, +) -> BasicTypeEnum<'ctx> { + use TypeEnum::*; + // we assume the type cache should already contain primitive types, + // and they should be passed by value instead of passing as pointer. + type_cache.get(&ty).cloned().unwrap_or_else(|| match &*unifier.get_ty(ty) { + TObj { obj_id, fields, .. } => { + // a struct with fields in the order of declaration + let defs = top_level.definitions.read(); + let definition = defs.get(obj_id.0).unwrap(); + let ty = if let TopLevelDef::Class { fields: fields_list, .. } = &*definition.read() { + let fields = fields.borrow(); + let fields = fields_list + .iter() + .map(|f| get_llvm_type(ctx, unifier, top_level, type_cache, fields[&f.0])) + .collect_vec(); + ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } else { + unreachable!() + }; + ty + } + TTuple { ty } => { + // a struct with fields in the order present in the tuple + let fields = ty + .iter() + .map(|ty| get_llvm_type(ctx, unifier, top_level, type_cache, *ty)) + .collect_vec(); + ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } + TList { ty } => { + // a struct with an integer and a pointer to an array + let element_type = get_llvm_type(ctx, unifier, top_level, type_cache, *ty); + let fields = + [ctx.i32_type().into(), element_type.ptr_type(AddressSpace::Generic).into()]; + ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } + TVirtual { .. } => unimplemented!(), + _ => unreachable!(), + }) +} + +pub fn gen_func(task: CodeGenTask, top_level_ctx: Arc) { + // unwrap_or(0) is for unit tests without using rayon + let thread_id = current_thread_index().unwrap_or(0); + let (mut unifier, primitives) = { + let unifiers = top_level_ctx.unifiers.read(); + let (unifier, primitives) = &unifiers[task.unifier_index]; + (Unifier::from_shared_unifier(unifier), *primitives) + }; + let contexts = top_level_ctx.conetexts.read(); + let context = contexts[thread_id].lock(); + + for (a, b) in task.subst.iter() { + // this should be unification between variables and concrete types + // and should not cause any problem... + unifier.unify(*a, *b).unwrap(); + } + + // rebuild primitive store with unique representatives + let primitives = PrimitiveStore { + int32: unifier.get_representative(primitives.int32), + int64: unifier.get_representative(primitives.int64), + float: unifier.get_representative(primitives.float), + bool: unifier.get_representative(primitives.bool), + none: unifier.get_representative(primitives.none), + }; + + let mut type_cache: HashMap<_, _> = [ + (primitives.int32, context.i32_type().into()), + (primitives.int64, context.i64_type().into()), + (primitives.float, context.f64_type().into()), + (primitives.bool, context.bool_type().into()), + ] + .iter() + .cloned() + .collect(); + + let params = task + .signature + .args + .iter() + .map(|arg| { + get_llvm_type(&context, &mut unifier, top_level_ctx.as_ref(), &mut type_cache, arg.ty) + }) + .collect_vec(); + + let fn_type = if unifier.unioned(task.signature.ret, primitives.none) { + context.void_type().fn_type(¶ms, false) + } else { + get_llvm_type( + &context, + &mut unifier, + top_level_ctx.as_ref(), + &mut type_cache, + task.signature.ret, + ) + .fn_type(¶ms, false) + }; + + let builder = context.create_builder(); + let module = context.create_module(&task.symbol_name); + let fn_val = module.add_function(&task.symbol_name, fn_type, None); + let init_bb = context.append_basic_block(fn_val, "init"); + builder.position_at_end(init_bb); + let body_bb = context.append_basic_block(fn_val, "body"); + + let mut var_assignment = HashMap::new(); + for (n, arg) in task.signature.args.iter().enumerate() { + let param = fn_val.get_nth_param(n as u32).unwrap(); + let alloca = builder.build_alloca( + get_llvm_type(&context, &mut unifier, top_level_ctx.as_ref(), &mut type_cache, arg.ty), + &arg.name, + ); + builder.build_store(alloca, param); + var_assignment.insert(arg.name.clone(), alloca); + } + builder.build_unconditional_branch(body_bb); + builder.position_at_end(body_bb); + + let mut code_gen_context = CodeGenContext { + ctx: &context, + resolver: task.resolver, + top_level: top_level_ctx.as_ref(), + loop_bb: None, + var_assignment, + type_cache, + primitives, + init_bb, + builder, + module, + unifier, + }; + + if let StmtKind::FunctionDef { body, .. } = &task.body.node { + for stmt in body.iter() { + code_gen_context.gen_stmt(stmt); + } + } +} diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs index f2d7dd332..fa9727f80 100644 --- a/nac3core/src/codegen/stmt.rs +++ b/nac3core/src/codegen/stmt.rs @@ -1,12 +1,14 @@ -use crate::{top_level::CodeGenContext, typecheck::typedef::Type}; -use inkwell::values::{BasicValueEnum, PointerValue}; +use super::CodeGenContext; +use crate::typecheck::typedef::Type; +use inkwell::values::{BasicValue, BasicValueEnum, PointerValue}; use rustpython_parser::ast::{Expr, ExprKind, Stmt, StmtKind}; impl<'ctx> CodeGenContext<'ctx> { fn gen_var(&mut self, ty: Type) -> PointerValue<'ctx> { // put the alloca in init block let current = self.builder.get_insert_block().unwrap(); - self.builder.position_at_end(self.init_bb); + // position before the last branching instruction... + self.builder.position_before(&self.init_bb.get_last_instruction().unwrap()); let ty = self.get_llvm_type(ty); let ptr = self.builder.build_alloca(ty, "tmp"); self.builder.position_at_end(current); @@ -71,6 +73,11 @@ impl<'ctx> CodeGenContext<'ctx> { StmtKind::Expr { value } => { self.gen_expr(&value); } + StmtKind::Return { value } => { + let value = value.as_ref().map(|v| self.gen_expr(&v)); + let value = value.as_ref().map(|v| v as &dyn BasicValue); + self.builder.build_return(value); + } StmtKind::AnnAssign { target, value, .. } => { if let Some(value) = value { let value = self.gen_expr(&value); diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index a51239860..f1ace2d90 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -3,11 +3,8 @@ use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; use crate::symbol_resolver::SymbolResolver; -use inkwell::{ - basic_block::BasicBlock, builder::Builder, context::Context, module::Module, - types::BasicTypeEnum, values::PointerValue, -}; -use parking_lot::RwLock; +use inkwell::context::Context; +use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] @@ -49,33 +46,10 @@ pub enum TopLevelDef { }, } -pub struct CodeGenTask { - pub subst: HashMap, - pub symbol_name: String, - pub body: Stmt>, - pub unifier: SharedUnifier, -} - pub struct TopLevelContext { pub definitions: Arc>>>, - pub unifiers: Arc>>, -} - -pub struct CodeGenContext<'ctx> { - pub ctx: &'ctx Context, - pub builder: Builder<'ctx>, - pub module: Module<'ctx>, - pub top_level: &'ctx TopLevelContext, - pub unifier: Unifier, - pub resolver: Box, - pub var_assignment: HashMap>, - pub type_cache: HashMap>, - pub primitives: PrimitiveStore, - // stores the alloca for variables - pub init_bb: BasicBlock<'ctx>, - // where continue and break should go to respectively - // the first one is the test_bb, and the second one is bb after the loop - pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, + pub unifiers: Arc>>, + pub conetexts: Arc>>>, } pub struct TopLevelDefInfo<'a> {