2021-08-03 14:11:41 +08:00
use std ::{ collections ::HashMap , sync ::Arc } ;
2021-08-03 13:38:27 +08:00
2021-08-05 14:55:23 +08:00
use super ::typecheck ::type_inferencer ::PrimitiveStore ;
2021-08-09 01:43:41 +08:00
use super ::typecheck ::typedef ::{ SharedUnifier , Type , Unifier , TypeEnum } ;
2021-08-07 10:28:41 +08:00
use crate ::symbol_resolver ::SymbolResolver ;
2021-08-09 16:10:17 +08:00
use inkwell ::{
basic_block ::BasicBlock , builder ::Builder , context ::Context , module ::Module ,
types ::BasicTypeEnum , values ::PointerValue ,
} ;
2021-08-03 14:11:41 +08:00
use parking_lot ::RwLock ;
2021-08-03 13:38:27 +08:00
use rustpython_parser ::ast ::Stmt ;
2021-08-06 10:30:57 +08:00
#[ derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy) ]
pub struct DefinitionId ( pub usize ) ;
2021-08-03 13:38:27 +08:00
pub enum TopLevelDef {
Class {
// object ID used for TypeEnum
2021-08-06 10:30:57 +08:00
object_id : DefinitionId ,
2021-08-03 13:38:27 +08:00
// type variables bounded to the class.
type_vars : Vec < Type > ,
2021-08-07 15:06:39 +08:00
// class fields
2021-08-03 13:38:27 +08:00
fields : Vec < ( String , Type ) > ,
// class methods, pointing to the corresponding function definition.
2021-08-07 15:06:39 +08:00
methods : Vec < ( String , Type , DefinitionId ) > ,
2021-08-03 13:38:27 +08:00
// ancestor classes, including itself.
ancestors : Vec < DefinitionId > ,
} ,
Function {
2021-08-07 15:06:39 +08:00
// prefix for symbol, should be unique globally, and not ending with numbers
name : String ,
// function signature.
2021-08-03 13:38:27 +08:00
signature : Type ,
/// Function instance to symbol mapping
/// Key: string representation of type variable values, sorted by variable ID in ascending
/// order, including type variables associated with the class.
/// Value: function symbol name.
instance_to_symbol : HashMap < String , String > ,
/// Function instances to annotated AST mapping
/// Key: string representation of type variable values, sorted by variable ID in ascending
/// order, including type variables associated with the class. Excluding rigid type
/// variables.
/// Value: AST annotated with types together with a unification table index. Could contain
/// rigid type variables that would be substituted when the function is instantiated.
2021-08-05 14:55:23 +08:00
instance_to_stmt : HashMap < String , ( Stmt < Option < Type > > , usize ) > ,
2021-08-03 13:38:27 +08:00
} ,
2021-08-10 10:33:18 +08:00
Initializer {
class_id : Option < DefinitionId > ,
}
2021-08-03 13:38:27 +08:00
}
pub struct CodeGenTask {
pub subst : HashMap < usize , Type > ,
pub symbol_name : String ,
2021-08-05 14:55:23 +08:00
pub body : Stmt < Option < Type > > ,
2021-08-03 13:38:27 +08:00
pub unifier : SharedUnifier ,
}
pub struct TopLevelContext {
2021-08-05 14:55:23 +08:00
pub definitions : Arc < RwLock < Vec < RwLock < TopLevelDef > > > > ,
pub unifiers : Arc < RwLock < Vec < SharedUnifier > > > ,
2021-08-03 13:38:27 +08:00
}
2021-08-03 14:11:41 +08:00
2021-08-05 14:55:23 +08:00
pub struct CodeGenContext < ' ctx > {
pub ctx : & ' ctx Context ,
pub builder : Builder < ' ctx > ,
pub module : Module < ' ctx > ,
pub top_level : & ' ctx TopLevelContext ,
2021-08-03 14:11:41 +08:00
pub unifier : Unifier ,
2021-08-05 14:55:23 +08:00
pub resolver : Box < dyn SymbolResolver > ,
pub var_assignment : HashMap < String , PointerValue < ' ctx > > ,
2021-08-07 15:06:39 +08:00
pub type_cache : HashMap < Type , BasicTypeEnum < ' ctx > > ,
pub primitives : PrimitiveStore ,
2021-08-09 16:37:28 +08:00
// stores the alloca for variables
pub init_bb : BasicBlock < ' ctx > ,
2021-08-09 16:10:17 +08:00
// where continue and break should go to respectively
// the first one is the test_bb, and the second one is bb after the loop
pub loop_bb : Option < ( BasicBlock < ' ctx > , BasicBlock < ' ctx > ) > ,
2021-08-03 14:11:41 +08:00
}
2021-08-09 01:43:41 +08:00
use rustpython_parser ::ast ;
2021-08-10 10:33:18 +08:00
pub struct TopLevelDefInfo < ' a > { // like adding some info on top of the TopLevelDef for later parsing the class bases, method, and function sigatures
def : TopLevelDef , // the definition entry
ty : Type , // the entry in the top_level unifier
ast : Option < ast ::Stmt < ( ) > > , // the ast submitted by applications
resolver : Option < & ' a dyn SymbolResolver > // the resolver
}
pub struct TopLevelComposer < ' a > {
pub definition_list : Vec < TopLevelDefInfo < ' a > > ,
pub primitives : PrimitiveStore ,
pub unifier : Unifier ,
}
impl < ' a > TopLevelComposer < ' a > {
2021-08-09 01:43:41 +08:00
pub fn make_primitives ( ) -> ( PrimitiveStore , Unifier ) {
let mut unifier = Unifier ::new ( ) ;
let int32 = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 10:33:18 +08:00
obj_id : DefinitionId ( 0 ) , // 0 should be fine
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let int64 = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 10:33:18 +08:00
obj_id : DefinitionId ( 1 ) , // 0 should be fine
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let float = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 10:33:18 +08:00
obj_id : DefinitionId ( 2 ) , // 0 should be fine
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let bool = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 10:33:18 +08:00
obj_id : DefinitionId ( 3 ) , // 0 should be fine
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let none = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 10:33:18 +08:00
obj_id : DefinitionId ( 4 ) , // 0 should be fine
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let primitives = PrimitiveStore { int32 , int64 , float , bool , none } ;
crate ::typecheck ::magic_methods ::set_primitives_magic_methods ( & primitives , & mut unifier ) ;
( primitives , unifier )
}
2021-08-10 10:33:18 +08:00
pub fn new ( ) -> Self {
let primitives = Self ::make_primitives ( ) ;
let definition_list : Vec < TopLevelDefInfo < ' a > > = vec! [
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 0 ) ,
ast : None ,
resolver : None ,
ty : primitives . 0. int32 // just arbitary picked one...
} ,
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 1 ) ,
ast : None ,
resolver : None ,
ty : primitives . 0. int64 // just arbitary picked one...
} ,
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 2 ) ,
ast : None ,
resolver : None ,
ty : primitives . 0. float // just arbitary picked one...
} ,
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 3 ) ,
ast : None ,
resolver : None ,
ty : primitives . 0. bool // just arbitary picked one...
} ,
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 4 ) ,
ast : None ,
resolver : None ,
ty : primitives . 0. none // just arbitary picked one...
2021-08-09 01:43:41 +08:00
} ,
2021-08-10 10:33:18 +08:00
] ; // the entries for primitive types
TopLevelComposer {
definition_list ,
primitives : primitives . 0 ,
unifier : primitives . 1
2021-08-09 01:43:41 +08:00
}
}
2021-08-10 10:33:18 +08:00
pub fn make_top_level_class_def ( index : usize ) -> TopLevelDef {
TopLevelDef ::Class {
object_id : DefinitionId ( index ) ,
type_vars : Default ::default ( ) ,
fields : Default ::default ( ) ,
methods : Default ::default ( ) ,
ancestors : Default ::default ( ) ,
}
}
pub fn make_top_level_function_def ( name : String , ty : Type ) -> TopLevelDef {
TopLevelDef ::Function {
name ,
signature : ty ,
instance_to_symbol : Default ::default ( ) ,
instance_to_stmt : Default ::default ( )
}
}
// like to make and return a "primitive" symbol resolver? so that the symbol resolver can later figure out primitive type definitions when passed a primitive type name
pub fn get_primitives_definition ( & self ) -> Vec < ( String , DefinitionId , Type ) > {
vec! [
( " int32 " . into ( ) , DefinitionId ( 0 ) , self . primitives . int32 ) ,
( " int64 " . into ( ) , DefinitionId ( 0 ) , self . primitives . int32 ) ,
( " float " . into ( ) , DefinitionId ( 0 ) , self . primitives . int32 ) ,
( " bool " . into ( ) , DefinitionId ( 0 ) , self . primitives . int32 ) ,
( " none " . into ( ) , DefinitionId ( 0 ) , self . primitives . int32 ) ,
]
}
pub fn register_top_level ( & mut self , ast : ast ::Stmt < ( ) > , resolver : & ' a dyn SymbolResolver ) -> Result < Vec < ( String , DefinitionId , Type ) > , String > {
2021-08-09 01:43:41 +08:00
match & ast . node {
2021-08-10 10:33:18 +08:00
ast ::StmtKind ::ClassDef { name , body , .. } = > {
let class_name = name . to_string ( ) ;
let def_id = self . definition_list . len ( ) ;
// add the class to the unifier
let ty = self . unifier . add_ty ( TypeEnum ::TObj {
obj_id : DefinitionId ( def_id ) ,
fields : Default ::default ( ) ,
params : Default ::default ( )
} ) ;
// add to the definition list
self . definition_list . push (
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( def_id ) ,
resolver : Some ( resolver ) ,
ast : Some ( ast ) ,
ty ,
}
) ;
// TODO: parse class def body and register class methods into the def list?
// FIXME: module's symbol resolver would not know the name of the class methods, thus cannot return their definition_id? so we have to manage it ourselves?
// or do we return the class method list of (method_name, def_id, type) to application to be used to build symbol resolver? <- current implementation
Ok ( vec! [ ( class_name , DefinitionId ( def_id ) , ty ) ] ) // FIXME: need to add class method def
} ,
ast ::StmtKind ::FunctionDef { name , .. } = > {
let fun_name = name . to_string ( ) ;
let def_id = self . definition_list . len ( ) ;
// add to the unifier
let ty = self . unifier . add_ty ( TypeEnum ::TFunc ( crate ::typecheck ::typedef ::FunSignature {
args : Default ::default ( ) ,
ret : self . primitives . none , // NOTE: this needs to be changed later
vars : Default ::default ( )
} ) ) ;
// add to the definition list
self . definition_list . push (
TopLevelDefInfo {
def : Self ::make_top_level_function_def (
name . into ( ) ,
self . primitives . none // NOTE: this needs to be changed later
) ,
resolver : Some ( resolver ) ,
ast : Some ( ast ) ,
ty ,
}
) ;
Ok ( vec! [ ( fun_name , DefinitionId ( def_id ) , ty ) ] )
} ,
_ = > Err ( " only registrations of top level classes/functions are supprted " . into ( ) )
}
}
/// this should be called after all top level classes are registered, and will actually fill in those fields of the previous dummy one
pub fn analyze_top_level ( & mut self ) -> Result < ( ) , String > {
for mut d in & mut self . definition_list {
if let ( Some ( ast ) , Some ( resolver ) ) = ( & d . ast , d . resolver ) {
match & ast . node {
ast ::StmtKind ::ClassDef {
name ,
bases ,
body ,
..
} = > {
// ancestors and typevars associate with the class are analyzed by looking into the `bases` ast node
for b in bases {
match & b . node {
ast ::ExprKind ::Name { id , .. } = > { // base class, name directly available inside the module, can use this module's symbol resolver
let def_id = resolver . get_identifier_def ( id ) ;
unimplemented! ( )
} ,
ast ::ExprKind ::Attribute { value , attr , .. } = > { // things can be like `class A(BaseModule.Base)`, here we have to get the symbol resolver of the module `BaseModule`?
unimplemented! ( ) // need to change symbol resolver in order to get the symbol resolver of the imported module
} ,
ast ::ExprKind ::Subscript { value , slice , .. } = > { // typevars bounded to the class, things like `class A(Generic[T, V])`
if let ast ::ExprKind ::Name { id , .. } = & value . node {
if id = = " Generic " {
// TODO: get typevars
unimplemented! ( )
} else {
return Err ( " unknown type var " . into ( ) )
}
2021-08-09 01:43:41 +08:00
}
} ,
2021-08-10 10:33:18 +08:00
_ = > return Err ( " not supported " . into ( ) )
2021-08-09 01:43:41 +08:00
}
}
2021-08-10 10:33:18 +08:00
// class method and field are analyzed by looking into the class body ast node
for stmt in body {
2021-08-09 01:43:41 +08:00
unimplemented! ( )
2021-08-10 10:33:18 +08:00
}
} ,
2021-08-09 01:43:41 +08:00
2021-08-10 10:33:18 +08:00
ast ::StmtKind ::FunctionDef {
name ,
args ,
body ,
returns ,
..
} = > {
unimplemented! ( )
2021-08-09 01:43:41 +08:00
}
2021-08-10 10:33:18 +08:00
_ = > return Err ( " only expect function and class definitions to be submitted here to be analyzed " . into ( ) )
}
2021-08-09 01:43:41 +08:00
}
2021-08-10 10:33:18 +08:00
} ;
Ok ( ( ) )
2021-08-09 01:43:41 +08:00
}
}