2021-08-10 23:49:58 +08:00
use std ::borrow ::Borrow ;
2021-08-03 14:11:41 +08:00
use std ::{ collections ::HashMap , sync ::Arc } ;
2021-08-03 13:38:27 +08:00
2021-08-05 14:55:23 +08:00
use super ::typecheck ::type_inferencer ::PrimitiveStore ;
2021-08-10 21:57:31 +08:00
use super ::typecheck ::typedef ::{ SharedUnifier , Type , TypeEnum , Unifier } ;
2021-08-07 10:28:41 +08:00
use crate ::symbol_resolver ::SymbolResolver ;
2021-08-09 16:10:17 +08:00
use inkwell ::{
basic_block ::BasicBlock , builder ::Builder , context ::Context , module ::Module ,
types ::BasicTypeEnum , values ::PointerValue ,
} ;
2021-08-03 14:11:41 +08:00
use parking_lot ::RwLock ;
2021-08-10 21:57:31 +08:00
use rustpython_parser ::ast ::{ self , Stmt } ;
2021-08-03 13:38:27 +08:00
2021-08-06 10:30:57 +08:00
#[ derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy) ]
pub struct DefinitionId ( pub usize ) ;
2021-08-03 13:38:27 +08:00
pub enum TopLevelDef {
Class {
// object ID used for TypeEnum
2021-08-06 10:30:57 +08:00
object_id : DefinitionId ,
2021-08-03 13:38:27 +08:00
// type variables bounded to the class.
type_vars : Vec < Type > ,
2021-08-07 15:06:39 +08:00
// class fields
2021-08-03 13:38:27 +08:00
fields : Vec < ( String , Type ) > ,
// class methods, pointing to the corresponding function definition.
2021-08-07 15:06:39 +08:00
methods : Vec < ( String , Type , DefinitionId ) > ,
2021-08-03 13:38:27 +08:00
// ancestor classes, including itself.
ancestors : Vec < DefinitionId > ,
} ,
Function {
2021-08-07 15:06:39 +08:00
// prefix for symbol, should be unique globally, and not ending with numbers
name : String ,
// function signature.
2021-08-03 13:38:27 +08:00
signature : Type ,
/// Function instance to symbol mapping
/// Key: string representation of type variable values, sorted by variable ID in ascending
/// order, including type variables associated with the class.
/// Value: function symbol name.
instance_to_symbol : HashMap < String , String > ,
/// Function instances to annotated AST mapping
/// Key: string representation of type variable values, sorted by variable ID in ascending
/// order, including type variables associated with the class. Excluding rigid type
/// variables.
/// Value: AST annotated with types together with a unification table index. Could contain
/// rigid type variables that would be substituted when the function is instantiated.
2021-08-05 14:55:23 +08:00
instance_to_stmt : HashMap < String , ( Stmt < Option < Type > > , usize ) > ,
2021-08-03 13:38:27 +08:00
} ,
2021-08-10 10:33:18 +08:00
Initializer {
2021-08-10 21:57:31 +08:00
class_id : DefinitionId ,
} ,
2021-08-03 13:38:27 +08:00
}
pub struct CodeGenTask {
pub subst : HashMap < usize , Type > ,
pub symbol_name : String ,
2021-08-05 14:55:23 +08:00
pub body : Stmt < Option < Type > > ,
2021-08-03 13:38:27 +08:00
pub unifier : SharedUnifier ,
}
pub struct TopLevelContext {
2021-08-05 14:55:23 +08:00
pub definitions : Arc < RwLock < Vec < RwLock < TopLevelDef > > > > ,
pub unifiers : Arc < RwLock < Vec < SharedUnifier > > > ,
2021-08-03 13:38:27 +08:00
}
2021-08-03 14:11:41 +08:00
2021-08-05 14:55:23 +08:00
pub struct CodeGenContext < ' ctx > {
pub ctx : & ' ctx Context ,
pub builder : Builder < ' ctx > ,
pub module : Module < ' ctx > ,
pub top_level : & ' ctx TopLevelContext ,
2021-08-03 14:11:41 +08:00
pub unifier : Unifier ,
2021-08-05 14:55:23 +08:00
pub resolver : Box < dyn SymbolResolver > ,
pub var_assignment : HashMap < String , PointerValue < ' ctx > > ,
2021-08-07 15:06:39 +08:00
pub type_cache : HashMap < Type , BasicTypeEnum < ' ctx > > ,
pub primitives : PrimitiveStore ,
2021-08-09 16:37:28 +08:00
// stores the alloca for variables
pub init_bb : BasicBlock < ' ctx > ,
2021-08-09 16:10:17 +08:00
// where continue and break should go to respectively
// the first one is the test_bb, and the second one is bb after the loop
pub loop_bb : Option < ( BasicBlock < ' ctx > , BasicBlock < ' ctx > ) > ,
2021-08-03 14:11:41 +08:00
}
2021-08-09 01:43:41 +08:00
2021-08-10 23:49:58 +08:00
pub fn name_mangling ( mut class_name : String , method_name : & str ) -> String { // need to further extend to more name mangling like instantiations of typevar
class_name . push_str ( method_name ) ;
class_name
}
pub struct TopLevelDefInfo < ' a > {
2021-08-10 21:57:31 +08:00
// like adding some info on top of the TopLevelDef for later parsing the class bases, method,
// and function sigatures
2021-08-10 23:49:58 +08:00
def : TopLevelDef , // the definition entry
ty : Type , // the entry in the top_level unifier
ast : Option < ast ::Stmt < ( ) > > , // the ast submitted by applications, primitives and class methods will have None value here
resolver : Option < & ' a dyn SymbolResolver > // the resolver
2021-08-10 10:33:18 +08:00
}
2021-08-10 21:57:31 +08:00
2021-08-10 10:33:18 +08:00
pub struct TopLevelComposer < ' a > {
pub definition_list : Vec < TopLevelDefInfo < ' a > > ,
pub primitives : PrimitiveStore ,
pub unifier : Unifier ,
}
impl < ' a > TopLevelComposer < ' a > {
2021-08-09 01:43:41 +08:00
pub fn make_primitives ( ) -> ( PrimitiveStore , Unifier ) {
let mut unifier = Unifier ::new ( ) ;
let int32 = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 21:57:31 +08:00
obj_id : DefinitionId ( 0 ) ,
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let int64 = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 21:57:31 +08:00
obj_id : DefinitionId ( 1 ) ,
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let float = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 21:57:31 +08:00
obj_id : DefinitionId ( 2 ) ,
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let bool = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 21:57:31 +08:00
obj_id : DefinitionId ( 3 ) ,
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let none = unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 21:57:31 +08:00
obj_id : DefinitionId ( 4 ) ,
2021-08-09 01:43:41 +08:00
fields : HashMap ::new ( ) . into ( ) ,
params : HashMap ::new ( ) ,
} ) ;
let primitives = PrimitiveStore { int32 , int64 , float , bool , none } ;
crate ::typecheck ::magic_methods ::set_primitives_magic_methods ( & primitives , & mut unifier ) ;
( primitives , unifier )
}
2021-08-10 21:57:31 +08:00
2021-08-10 10:33:18 +08:00
pub fn new ( ) -> Self {
let primitives = Self ::make_primitives ( ) ;
let definition_list : Vec < TopLevelDefInfo < ' a > > = vec! [
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 0 ) ,
ast : None ,
resolver : None ,
2021-08-10 21:57:31 +08:00
ty : primitives . 0. int32 ,
2021-08-10 10:33:18 +08:00
} ,
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 1 ) ,
ast : None ,
resolver : None ,
2021-08-10 21:57:31 +08:00
ty : primitives . 0. int64 ,
2021-08-10 10:33:18 +08:00
} ,
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 2 ) ,
ast : None ,
resolver : None ,
2021-08-10 21:57:31 +08:00
ty : primitives . 0. float ,
2021-08-10 10:33:18 +08:00
} ,
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 3 ) ,
ast : None ,
resolver : None ,
2021-08-10 21:57:31 +08:00
ty : primitives . 0. bool ,
2021-08-10 10:33:18 +08:00
} ,
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( 4 ) ,
ast : None ,
resolver : None ,
2021-08-10 21:57:31 +08:00
ty : primitives . 0. none ,
2021-08-09 01:43:41 +08:00
} ,
2021-08-10 10:33:18 +08:00
] ; // the entries for primitive types
2021-08-10 21:57:31 +08:00
TopLevelComposer { definition_list , primitives : primitives . 0 , unifier : primitives . 1 }
2021-08-09 01:43:41 +08:00
}
2021-08-10 23:49:58 +08:00
/// already include the definition_id of itself inside the ancestors vector
2021-08-10 10:33:18 +08:00
pub fn make_top_level_class_def ( index : usize ) -> TopLevelDef {
TopLevelDef ::Class {
object_id : DefinitionId ( index ) ,
type_vars : Default ::default ( ) ,
fields : Default ::default ( ) ,
methods : Default ::default ( ) ,
2021-08-10 23:49:58 +08:00
ancestors : vec ! [ DefinitionId ( index ) ] ,
2021-08-10 10:33:18 +08:00
}
}
pub fn make_top_level_function_def ( name : String , ty : Type ) -> TopLevelDef {
TopLevelDef ::Function {
name ,
signature : ty ,
instance_to_symbol : Default ::default ( ) ,
2021-08-10 21:57:31 +08:00
instance_to_stmt : Default ::default ( ) ,
2021-08-10 10:33:18 +08:00
}
}
2021-08-10 21:57:31 +08:00
// like to make and return a "primitive" symbol resolver? so that the symbol resolver can later
// figure out primitive type definitions when passed a primitive type name
2021-08-10 10:33:18 +08:00
pub fn get_primitives_definition ( & self ) -> Vec < ( String , DefinitionId , Type ) > {
vec! [
( " int32 " . into ( ) , DefinitionId ( 0 ) , self . primitives . int32 ) ,
2021-08-10 23:49:58 +08:00
( " int64 " . into ( ) , DefinitionId ( 1 ) , self . primitives . int64 ) ,
( " float " . into ( ) , DefinitionId ( 2 ) , self . primitives . float ) ,
( " bool " . into ( ) , DefinitionId ( 3 ) , self . primitives . bool ) ,
( " none " . into ( ) , DefinitionId ( 4 ) , self . primitives . none ) ,
2021-08-10 10:33:18 +08:00
]
}
2021-08-10 21:57:31 +08:00
pub fn register_top_level (
& mut self ,
ast : ast ::Stmt < ( ) > ,
resolver : & ' a dyn SymbolResolver ,
) -> Result < Vec < ( String , DefinitionId , Type ) > , String > {
2021-08-09 01:43:41 +08:00
match & ast . node {
2021-08-10 21:57:31 +08:00
ast ::StmtKind ::ClassDef { name , body , .. } = > {
2021-08-10 10:33:18 +08:00
let class_name = name . to_string ( ) ;
2021-08-10 23:49:58 +08:00
let class_def_id = self . definition_list . len ( ) ;
2021-08-10 10:33:18 +08:00
// add the class to the unifier
let ty = self . unifier . add_ty ( TypeEnum ::TObj {
2021-08-10 23:49:58 +08:00
obj_id : DefinitionId ( class_def_id ) ,
2021-08-10 10:33:18 +08:00
fields : Default ::default ( ) ,
2021-08-10 21:57:31 +08:00
params : Default ::default ( ) ,
2021-08-10 10:33:18 +08:00
} ) ;
2021-08-10 23:49:58 +08:00
let mut ret_vector : Vec < ( String , DefinitionId , Type ) > = vec! [ ( class_name . clone ( ) , DefinitionId ( class_def_id ) , ty ) ] ;
// parse class def body and register class methods into the def list
// NOTE: module's symbol resolver would not know the name of the class methods, thus cannot return their definition_id? so we have to manage it ourselves?
// or do we return the class method list of (method_name, def_id, type) to application to be used to build symbol resolver? <- current implementation
for b in body {
if let ast ::StmtKind ::FunctionDef { name , .. } = & b . node {
let fun_name = name_mangling ( class_name . clone ( ) , name ) ;
let def_id = self . definition_list . len ( ) ;
// add to unifier
let ty = self . unifier . add_ty ( TypeEnum ::TFunc ( crate ::typecheck ::typedef ::FunSignature {
args : Default ::default ( ) ,
ret : self . primitives . none ,
vars : Default ::default ( )
} ) ) ;
// add to the definition list
self . definition_list . push (
TopLevelDefInfo {
def : Self ::make_top_level_function_def ( fun_name . clone ( ) , ty ) ,
resolver : Some ( resolver ) ,
ty ,
ast : None // since it is inside the class def body statments
}
) ;
ret_vector . push ( ( fun_name , DefinitionId ( def_id ) , ty ) ) ;
if name = = " __init__ " { // if it is the contructor, special handling is needed. In the above handling, we still add __init__ function to the class method
self . definition_list . push (
TopLevelDefInfo {
def : TopLevelDef ::Initializer {
class_id : DefinitionId ( class_def_id ) // FIXME: None if have no parameter, Some if same as __init__?
} ,
ty : self . primitives . none , // arbitary picked one
ast : None , // it is inside the class def body statments
resolver : Some ( resolver )
}
)
// FIXME: should we return this to the symbol resolver?
}
} else { } // else do nothing
}
// add to the definition list
self . definition_list . push (
TopLevelDefInfo {
def : Self ::make_top_level_class_def ( class_def_id ) ,
resolver : Some ( resolver ) ,
ast : Some ( ast ) ,
ty ,
}
) ;
Ok ( ret_vector )
} ,
2021-08-10 10:33:18 +08:00
2021-08-10 21:57:31 +08:00
ast ::StmtKind ::FunctionDef { name , .. } = > {
2021-08-10 10:33:18 +08:00
let fun_name = name . to_string ( ) ;
let def_id = self . definition_list . len ( ) ;
// add to the unifier
2021-08-10 21:57:31 +08:00
let ty =
self . unifier . add_ty ( TypeEnum ::TFunc ( crate ::typecheck ::typedef ::FunSignature {
args : Default ::default ( ) ,
2021-08-10 23:49:58 +08:00
ret : self . primitives . none ,
vars : Default ::default ( )
} ) ) ;
2021-08-10 10:33:18 +08:00
// add to the definition list
2021-08-10 21:57:31 +08:00
self . definition_list . push ( TopLevelDefInfo {
2021-08-10 23:49:58 +08:00
def : Self ::make_top_level_function_def (
name . into ( ) ,
self . primitives . none
) ,
resolver : Some ( resolver ) ,
ast : Some ( ast ) ,
ty ,
2021-08-10 21:57:31 +08:00
} ) ;
2021-08-10 10:33:18 +08:00
Ok ( vec! [ ( fun_name , DefinitionId ( def_id ) , ty ) ] )
2021-08-10 21:57:31 +08:00
}
2021-08-10 10:33:18 +08:00
2021-08-10 21:57:31 +08:00
_ = > Err ( " only registrations of top level classes/functions are supprted " . into ( ) ) ,
2021-08-10 10:33:18 +08:00
}
}
/// this should be called after all top level classes are registered, and will actually fill in those fields of the previous dummy one
pub fn analyze_top_level ( & mut self ) -> Result < ( ) , String > {
for mut d in & mut self . definition_list {
if let ( Some ( ast ) , Some ( resolver ) ) = ( & d . ast , d . resolver ) {
match & ast . node {
ast ::StmtKind ::ClassDef {
bases ,
body ,
..
} = > {
2021-08-10 23:49:58 +08:00
// get the mutable reference of the entry in the definition list, get the `TopLevelDef`
let ( _ ,
ancestors ,
fields ,
methods ,
type_vars
) = if let TopLevelDef ::Class {
object_id ,
ancestors ,
fields ,
methods ,
type_vars
} = & mut d . def {
( object_id , ancestors , fields , methods , type_vars )
} else { unreachable! ( ) } ;
// try to get mutable reference of the entry in the unification table, get the `TypeEnum`
let ( params ,
fields
) = if let TypeEnum ::TObj {
params , // FIXME: this params is immutable, even if this is mutable, what should the key be, get the original typevar's var_id?
fields ,
..
} = self . unifier . get_ty ( d . ty ) . borrow ( ) {
( params , fields )
} else { unreachable! ( ) } ;
2021-08-10 21:57:31 +08:00
// ancestors and typevars associate with the class are analyzed by looking
// into the `bases` ast node
2021-08-10 10:33:18 +08:00
for b in bases {
match & b . node {
2021-08-10 23:49:58 +08:00
// typevars bounded to the class, things like `class A(Generic[T, V, ImportedModule.T])`
// should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params
ast ::ExprKind ::Subscript { value , slice , .. } if {
if let ast ::ExprKind ::Name { id , .. } = & value . node {
id = = " Generic "
} else { false }
} = > {
match & slice . node {
// `class Foo(Generic[T, V, P, ImportedModule.T]):`
ast ::ExprKind ::Tuple { elts , .. } = > {
for e in elts {
// TODO: I'd better parse the node to get the Type of the type vars(can have things like: A.B.C.typevar?)
match & e . node {
ast ::ExprKind ::Name { id , .. } = > {
// the def_list
type_vars . push ( resolver . get_symbol_type ( id ) . ok_or_else ( | | " unknown type variable " . to_string ( ) ) ? ) ;
// the TypeEnum of the class
// FIXME: the `params` destructed above is not mutable, even if this is mutable, what should the key be?
unimplemented! ( )
} ,
_ = > unimplemented! ( )
}
}
} ,
// `class Foo(Generic[T]):`
ast ::ExprKind ::Name { id , .. } = > {
// the def_list
type_vars . push ( resolver . get_symbol_type ( id ) . ok_or_else ( | | " unknown type variable " . to_string ( ) ) ? ) ;
// the TypeEnum of the class
// FIXME: the `params` destructed above is not mutable, even if this is mutable, what should the key be?
unimplemented! ( )
} ,
// `class Foo(Generic[ImportedModule.T])`
ast ::ExprKind ::Attribute { value , attr , .. } = > {
// TODO:
unimplemented! ( )
} ,
_ = > return Err ( " not supported " . into ( ) ) // NOTE: it is really all the supported cases?
} ;
} ,
// base class, name directly available inside the
// module, can use this module's symbol resolver
2021-08-10 21:57:31 +08:00
ast ::ExprKind ::Name { id , .. } = > {
2021-08-10 10:33:18 +08:00
let def_id = resolver . get_identifier_def ( id ) ;
2021-08-10 23:49:58 +08:00
// the definition list
ancestors . push ( def_id ) ;
2021-08-10 10:33:18 +08:00
} ,
2021-08-10 23:49:58 +08:00
// base class, things can be like `class A(BaseModule.Base)`, here we have to get the
// symbol resolver of the module `BaseModule`?
2021-08-10 21:57:31 +08:00
ast ::ExprKind ::Attribute { value , attr , .. } = > {
2021-08-10 23:49:58 +08:00
if let ast ::ExprKind ::Name { id , .. } = & value . node {
if let Some ( base_module_resolver ) = resolver . get_module_resolver ( id ) {
let def_id = base_module_resolver . get_identifier_def ( attr ) ;
// the definition list
ancestors . push ( def_id ) ;
} else { return Err ( " unkown imported module " . into ( ) ) }
} else { return Err ( " unkown imported module " . into ( ) ) }
2021-08-10 10:33:18 +08:00
} ,
2021-08-10 23:49:58 +08:00
// `class Foo(ImportedModule.A[int, bool])`, A is a class with associated type variables
2021-08-10 21:57:31 +08:00
ast ::ExprKind ::Subscript { value , slice , .. } = > {
2021-08-10 23:49:58 +08:00
unimplemented! ( )
2021-08-09 01:43:41 +08:00
} ,
2021-08-10 10:33:18 +08:00
_ = > return Err ( " not supported " . into ( ) )
2021-08-09 01:43:41 +08:00
}
}
2021-08-10 23:49:58 +08:00
// ----------- class method and field are analyzed by looking into the class body ast node -----------
2021-08-10 10:33:18 +08:00
for stmt in body {
2021-08-10 23:49:58 +08:00
if let ast ::StmtKind ::FunctionDef {
name ,
args ,
body ,
returns ,
..
} = & stmt . node {
} else { }
// do nothing. we do not care about things like this?
// class A:
// a = 3
// b = [2, 3]
2021-08-10 10:33:18 +08:00
}
} ,
2021-08-09 01:43:41 +08:00
2021-08-10 23:49:58 +08:00
// top level function definition
2021-08-10 10:33:18 +08:00
ast ::StmtKind ::FunctionDef {
name ,
args ,
body ,
returns ,
..
} = > {
unimplemented! ( )
2021-08-09 01:43:41 +08:00
}
2021-08-10 10:33:18 +08:00
_ = > return Err ( " only expect function and class definitions to be submitted here to be analyzed " . into ( ) )
}
2021-08-09 01:43:41 +08:00
}
2021-08-10 21:57:31 +08:00
}
2021-08-10 10:33:18 +08:00
Ok ( ( ) )
2021-08-09 01:43:41 +08:00
}
2021-08-10 21:57:31 +08:00
}
2021-08-10 23:49:58 +08:00
pub fn parse_type_var < T > ( input : & ast ::Expr < T > , resolver : & dyn SymbolResolver ) -> Result < Type , String > {
match & input . node {
ast ::ExprKind ::Name { id , .. } = > {
resolver . get_symbol_type ( id ) . ok_or_else ( | | " unknown type variable identifer " . to_string ( ) )
} ,
ast ::ExprKind ::Attribute { value , attr , .. } = > {
if let ast ::ExprKind ::Name { id , .. } = & value . node {
let next_resolver = resolver . get_module_resolver ( id ) . ok_or_else ( | | " unknown imported module " . to_string ( ) ) ? ;
next_resolver . get_symbol_type ( attr ) . ok_or_else ( | | " unknown type variable identifer " . to_string ( ) )
} else {
unimplemented! ( )
// recursively resolve attr thing, FIXME: new problem: how do we handle this?
// # A.py
// class A:
// T = TypeVar('T', int, bool)
// pass
// # B.py
// import A
// class B(Generic[A.A.T]):
// pass
}
} ,
_ = > Err ( " not supported " . into ( ) )
}
}