From ba5bb78f11d761b525e542e8635b69c5d07710dc Mon Sep 17 00:00:00 2001 From: ychenfo Date: Fri, 13 Aug 2021 02:38:29 +0800 Subject: [PATCH] top level parse class base/generic --- nac3core/src/top_level.rs | 585 +++++++++++++++++++++++--------------- 1 file changed, 361 insertions(+), 224 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 6be9e69c..74f531d8 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,9 +1,11 @@ -use std::borrow::Borrow; +use std::borrow::{Borrow, BorrowMut}; +use std::collections::HashSet; use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; use crate::symbol_resolver::SymbolResolver; +use crate::typecheck::typedef::{FunSignature, FuncArg}; use inkwell::context::Context; use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; @@ -57,30 +59,31 @@ pub struct TopLevelContext { pub conetexts: Arc>>>, } -// like adding some info on top of the TopLevelDef for -// later parsing the class bases, method, and function sigatures -pub struct TopLevelDefInfo { - // the definition entry - def: TopLevelDef, - // the entry in the top_level unifier - ty: Type, - // the ast submitted by applications, primitives and - // class methods will have None value here - ast: Option>, -} - pub struct TopLevelComposer { - // list of top level definitions and their info - pub definition_list: RwLock>, + // list of top level definitions, same as top level context + pub definition_list: Arc>>>, + // list of top level Type, the index is same as the field `definition_list` + pub ty_list: RwLock>, + // list of top level ast, the index is same as the field `definition_list` and `ty_list` + pub ast_list: RwLock>>>, + // start as a primitive unifier, will add more top_level defs inside + pub unifier: RwLock, // primitive store pub primitives: PrimitiveStore, - // start as a primitive unifier, will add more top_level defs inside - pub unifier: Unifier, // mangled class method name to def_id - pub class_method_to_def_id: HashMap, + pub class_method_to_def_id: RwLock>, } impl TopLevelComposer { + pub fn to_top_level_context(&self) -> TopLevelContext { + TopLevelContext { + definitions: self.definition_list.clone(), + // FIXME: all the big unifier or? + unifiers: Default::default(), + conetexts: Default::default(), + } + } + fn name_mangling(mut class_name: String, method_name: &str) -> String { class_name.push_str(method_name); class_name @@ -118,51 +121,47 @@ impl TopLevelComposer { (primitives, unifier) } - /// return a composer and things to make a "primitive" symbol resolver, so that the symbol + /// return a composer and things to make a "primitive" symbol resolver, so that the symbol /// resolver can later figure out primitive type definitions when passed a primitive type name pub fn new() -> (Vec<(String, DefinitionId, Type)>, Self) { let primitives = Self::make_primitives(); - // the def list including the entries of primitive info - let definition_list: Vec = vec![ - TopLevelDefInfo { - def: Self::make_top_level_class_def(0, None), - ast: None, - ty: primitives.0.int32, - }, - TopLevelDefInfo { - def: Self::make_top_level_class_def(1, None), - ast: None, - ty: primitives.0.int64, - }, - TopLevelDefInfo { - def: Self::make_top_level_class_def(2, None), - ast: None, - ty: primitives.0.float, - }, - TopLevelDefInfo { - def: Self::make_top_level_class_def(3, None), - ast: None, - ty: primitives.0.bool, - }, - TopLevelDefInfo { - def: Self::make_top_level_class_def(4, None), - ast: None, - ty: primitives.0.none, - }, + + let top_level_def_list = vec![ + RwLock::new(Self::make_top_level_class_def(0, None)), + RwLock::new(Self::make_top_level_class_def(1, None)), + RwLock::new(Self::make_top_level_class_def(2, None)), + RwLock::new(Self::make_top_level_class_def(3, None)), + RwLock::new(Self::make_top_level_class_def(4, None)), ]; - let composer = TopLevelComposer { - definition_list: definition_list.into(), + + let ast_list: Vec>> = vec![None, None, None, None, None]; + + let ty_list: Vec = vec![ + primitives.0.int32, + primitives.0.int64, + primitives.0.float, + primitives.0.bool, + primitives.0.none, + ]; + + let composer = TopLevelComposer { + definition_list: RwLock::new(top_level_def_list).into(), + ty_list: RwLock::new(ty_list), + ast_list: RwLock::new(ast_list), primitives: primitives.0, - unifier: primitives.1, + unifier: primitives.1.into(), class_method_to_def_id: Default::default(), }; - (vec![ - ("int32".into(), DefinitionId(0), composer.primitives.int32), - ("int64".into(), DefinitionId(1), composer.primitives.int64), - ("float".into(), DefinitionId(2), composer.primitives.float), - ("bool".into(), DefinitionId(3), composer.primitives.bool), - ("none".into(), DefinitionId(4), composer.primitives.none), - ], composer) + ( + vec![ + ("int32".into(), DefinitionId(0), composer.primitives.int32), + ("int64".into(), DefinitionId(1), composer.primitives.int64), + ("float".into(), DefinitionId(2), composer.primitives.float), + ("bool".into(), DefinitionId(3), composer.primitives.bool), + ("none".into(), DefinitionId(4), composer.primitives.none), + ], + composer, + ) } /// already include the definition_id of itself inside the ancestors vector @@ -202,24 +201,31 @@ impl TopLevelComposer { match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); - let mut def_list = self.definition_list.write(); + + let (mut def_list, mut ty_list, mut ast_list) = + (self.definition_list.write(), self.ty_list.write(), self.ast_list.write()); + + // will be deleted after tested + assert_eq!(ty_list.len(), def_list.len()); + assert_eq!(def_list.len(), ast_list.len()); + let class_def_id = def_list.len(); // add the class to the unifier - let ty = self.unifier.add_ty(TypeEnum::TObj { + let ty = self.unifier.write().add_ty(TypeEnum::TObj { obj_id: DefinitionId(class_def_id), fields: Default::default(), params: Default::default(), }); - // add the class to the definition list - def_list.push(TopLevelDefInfo { - def: Self::make_top_level_class_def(class_def_id, resolver.clone()), - // NOTE: Temporarily none here since function body need to be read later - ast: None, - ty, - }); - + // add the class to the definition lists + def_list + .push(Self::make_top_level_class_def(class_def_id, resolver.clone()).into()); + ty_list.push(ty); + // since later when registering class method, ast will still be used, + // here push None temporarly, later will push the ast + ast_list.push(None); + // parse class def body and register class methods into the def list // module's symbol resolver would not know the name of the class methods, // thus cannot return their definition_id? so we have to manage it ourselves @@ -228,9 +234,9 @@ impl TopLevelComposer { if let ast::StmtKind::FunctionDef { name, .. } = &b.node { let fun_name = Self::name_mangling(class_name.clone(), name); let def_id = def_list.len(); - + // add to unifier - let ty = self.unifier.add_ty(TypeEnum::TFunc( + let ty = self.unifier.write().add_ty(TypeEnum::TFunc( crate::typecheck::typedef::FunSignature { args: Default::default(), ret: self.primitives.none, @@ -239,60 +245,66 @@ impl TopLevelComposer { )); // add to the definition list - def_list.push(TopLevelDefInfo { - def: Self::make_top_level_function_def(fun_name.clone(), ty, resolver.clone()), - ty, - // since it is inside the class def body statments, the ast is None - ast: None, - }); + def_list.push( + Self::make_top_level_function_def( + fun_name.clone(), + ty, + resolver.clone(), + ) + .into(), + ); + ty_list.push(ty); + // the ast of class method is in the class, push None in to the list here + ast_list.push(None); // class method, do not let the symbol manager manage it, use our own map - self.class_method_to_def_id.insert(fun_name, DefinitionId(def_id)); + self.class_method_to_def_id.write().insert(fun_name, DefinitionId(def_id)); // if it is the contructor, special handling is needed. In the above // handling, we still add __init__ function to the class method if name == "__init__" { - // FIXME: how can this later be fetched? - def_list.push(TopLevelDefInfo { - def: TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) }, - // arbitary picked one for the constructor - ty: self.primitives.none, - // it is inside the class def body statments, so None - ast: None, - }) + // NOTE: how can this later be fetched? + def_list.push( + TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } + .into(), + ); + // arbitarily push one to make sure the index is correct + ty_list.push(self.primitives.none); + ast_list.push(None); } } } - // move the ast to the entry of the class in the def_list - def_list.get_mut(class_def_id).unwrap().ast = Some(ast); - + // move the ast to the entry of the class in the ast_list + ast_list[class_def_id] = Some(ast); + // return Ok((class_name, DefinitionId(class_def_id), ty)) - }, + } ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); - - // add to the unifier - let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - })); - - // add to the definition list - let mut def_list = self.definition_list.write(); - def_list.push(TopLevelDefInfo { - def: Self::make_top_level_function_def( - name.into(), - self.primitives.none, - resolver, - ), - ast: Some(ast), - ty, - }); + // add to the unifier + let ty = self.unifier.write().add_ty(TypeEnum::TFunc( + crate::typecheck::typedef::FunSignature { + args: Default::default(), + ret: self.primitives.none, + vars: Default::default(), + }, + )); + + let (mut def_list, mut ty_list, mut ast_list) = + (self.definition_list.write(), self.ty_list.write(), self.ast_list.write()); + // add to the definition list + def_list.push( + Self::make_top_level_function_def(name.into(), self.primitives.none, resolver) + .into(), + ); + ty_list.push(ty); + ast_list.push(Some(ast)); + + // return Ok((fun_name, DefinitionId(def_list.len() - 1), ty)) } @@ -300,144 +312,269 @@ impl TopLevelComposer { } } - /// this should be called after all top level classes are registered, and will actually fill in those fields of the previous dummy one + /// this should be called after all top level classes are registered, and + /// will actually fill in those fields of the previous dummy one pub fn analyze_top_level(&mut self) -> Result<(), String> { - for d in self.definition_list.write().iter_mut() { - // only analyze those with ast, and class_method(ast in class def) - if let Some(ast) = &d.ast { - match &ast.node { - ast::StmtKind::ClassDef { - bases, - body, + let mut def_list = self.definition_list.write(); + let ty_list = self.ty_list.read(); + let ast_list = self.ast_list.read(); + let mut unifier = self.unifier.write(); + + for (def, ty, ast) in def_list + .iter_mut() + .zip(ty_list.iter()) + .zip(ast_list.iter()) + .map(|((x, y), z)| (x, y, z)) + .collect::, &Type, &Option>)>>() + { + // only analyze those entries with ast, and class_method(whose ast in class def) + match ast { + Some(ast::Located{node: ast::StmtKind::ClassDef { + bases, + body, + name: class_name, + .. + }, .. }) => { + // get the mutable reference of the entry in the + // definition list, get the `TopLevelDef` + let ( + def_ancestors, + def_fields, + def_methods, + def_type_vars, + resolver, + ) = if let TopLevelDef::Class { + object_id: _, + ancestors, + fields, + methods, + type_vars, + resolver: Some(resolver) + } = def.get_mut() { + (ancestors, fields, methods, type_vars, resolver.lock()) + } else { unreachable!() }; + + // try to get mutable reference of the entry in the + // unification table, get the `TypeEnum` + let type_enum = unifier.get_ty(*ty); + let ( + enum_params, + enum_fields + ) = if let TypeEnum::TObj { + params, + fields, .. - } => { - // get the mutable reference of the entry in the definition list, get the `TopLevelDef` - let ( - ancestors, - fields, - methods, - type_vars, - resolver, - ) = if let TopLevelDef::Class { - object_id: _, - ancestors, - fields, - methods, - type_vars, - resolver: Some(resolver) - } = &mut d.def { - (ancestors, fields, methods, type_vars, resolver.lock()) - } else { unreachable!() }; + } = type_enum.borrow() { + (params, fields) + } else { unreachable!() }; - // try to get mutable reference of the entry in the unification table, get the `TypeEnum` - let (params, - fields - ) = if let TypeEnum::TObj { - params, - fields, - .. - } = self.unifier.get_ty(d.ty).borrow() { - (params, fields) - } else { unreachable!() }; - - // ancestors and typevars associate with the class are analyzed by looking - // into the `bases` ast node - for b in bases { - match &b.node { - // typevars bounded to the class, only support things like `class A(Generic[T, V])`, - // things like `class A(Generic[T, V, ImportedModule.T])` is not supported - // i.e. only simple names are allowed in the subscript - // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params - ast::ExprKind::Subscript {value, slice, ..} if { - if let ast::ExprKind::Name {id, ..} = &value.node { - id == "Generic" + // ancestors and typevars associate with the class are analyzed by looking + // into the `bases` ast node + // `Generic` should only occur once, use this flag + let mut generic_occured = false; + // TODO: haven't check this yet + let mut occured_type_var: HashSet = Default::default(); + // TODO: haven't check this yet + let mut occured_base: HashSet = Default::default(); + for b in bases { + match &b.node { + // analyze typevars bounded to the class, + // only support things like `class A(Generic[T, V])`, + // things like `class A(Generic[T, V, ImportedModule.T])` is not supported + // i.e. only simple names are allowed in the subscript + // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params + ast::ExprKind::Subscript {value, slice, ..} if { + // can only be `Generic[...]` and this can only appear once + if let ast::ExprKind::Name { id, .. } = &value.node { + if id == "Generic" { + if !generic_occured { + generic_occured = true; + true + } else { + return Err("Only single Generic[...] or Protocol[...] can be in bases".into()) + } } else { false } - } => { - match &slice.node { - // `class Foo(Generic[T, V, P]):` - ast::ExprKind::Tuple {elts, ..} => { - for e in elts { - // let ty_def_id = resolver. - } - }, + } else { false } + } => { + match &slice.node { + // `class Foo(Generic[T, V, P]):` multiple element inside the subscript + ast::ExprKind::Tuple {elts, ..} => { + let tys = elts + .iter() + .map(|x| {resolver.parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + x)}) + .collect::, _>>()?; + + let ty_var_ids = tys + .iter() + .map(|t| unifier.get_ty(*t)) + .collect::>() + .iter() + .map(|x| { + let x = x.as_ref(); + if let TypeEnum::TVar {id, ..} = x { + Ok(*id) + } else { + Err("Expect type variabls here".to_string()) + } + }) + .collect::, _>>()?; - // `class Foo(Generic[T]):` - ast::ExprKind::Name {id, ..} => { - // the def_list - // type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); FIXME: - unimplemented!() - }, + // write to TypeEnum + for (id, ty) in ty_var_ids.iter().zip(tys.iter()) { + enum_params.borrow_mut().insert(*id, *ty); + } - _ => return Err("not supported, only simple names are allowed in the subscript".into()) + // write to TopLevelDef + for ty in tys{ + def_type_vars.push(ty) + } + }, + + // `class Foo(Generic[T]):`, only single element + _ => { + let ty = resolver.parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + &slice + )?; + + let ty_var_id = if let TypeEnum::TVar { id, .. } = unifier + .get_ty(ty) + .as_ref() { *id } else { + return Err("Expect type variabls here".to_string()) + }; + + // write to TypeEnum + enum_params.borrow_mut().insert(ty_var_id, ty); + + // write to TopLevelDef + def_type_vars.push(ty); + }, + }; + } + + // analyze base classes, which is possible in + // other cases, we parse for the base class + _ => { + let ty = resolver.parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + b + )?; + + let obj_def_id = if let TypeEnum::TObj { obj_id, .. } = unifier + .get_ty(ty) + .as_ref() { + *obj_id + } else { + return Err("Expect concrete classes/types here".into()) }; - }, - - /* // base class, name directly available inside the - // module, can use this module's symbol resolver - ast::ExprKind::Name {id, ..} => { - // let def_id = resolver.get_identifier_def(id); FIXME: - // the definition list - // ancestors.push(def_id); - }, - - // base class, things can be like `class A(BaseModule.Base)`, here we have to get the - // symbol resolver of the module `BaseModule`? - ast::ExprKind::Attribute {value, attr, ..} => { - if let ast::ExprKind::Name {id, ..} = &value.node { - // if let Some(base_module_resolver) = resolver.get_module_resolver(id) { - // let def_id = base_module_resolver.get_identifier_def(attr); - // // the definition list - // ancestors.push(def_id); - // } else { return Err("unkown imported module".into()) } FIXME: - } else { return Err("unkown imported module".into()) } - }, - - // `class Foo(ImportedModule.A[int, bool])`, A is a class with associated type variables - ast::ExprKind::Subscript {value, slice, ..} => { - unimplemented!() - }, */ - // base class is possible in other cases, we parse for thr base class - _ => return Err("not supported".into()) + // write to TopLevelDef + def_ancestors.push(obj_def_id); } } + } - // class method and field are analyzed by - // looking into the class body ast node - for stmt in body { - if let ast::StmtKind::FunctionDef { - name, - args, - body, - returns, - .. - } = &stmt.node { + // class method and field are analyzed by + // looking into the class body ast node + // NOTE: should consider parents' method and fields(check re-def and add), + // but we do it later we go over these again after we finish analyze the + // fields/methods as declared in the ast + // method with same name should not occur twice, so use this + let defined_method: HashSet = Default::default(); + for stmt in body { + if let ast::StmtKind::FunctionDef { + name, + args, + body, + returns, + .. + } = &stmt.node { + // build type enum, need FunSignature {args, vars, ret} + // args. Now only args with no default TODO: other kinds of args + let func_args = args.args + .iter() + .map(|x| -> Result { + Ok(FuncArg { + name: x.node.arg.clone(), + ty: resolver.parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + x + .node + .annotation + .as_ref() + .ok_or_else(|| "type annotations required for function parameters".to_string())? + )?, + default_value: None + }) + }) + .collect::, _>>()?; + // vars. find TypeVars used in the argument type annotation + let func_vars = func_args + .iter() + .filter_map(|FuncArg { ty, .. } | { + if let TypeEnum::TVar { id, .. } = unifier.get_ty(*ty).as_ref() { + Some((*id, *ty)) + } else { None } + }) + .collect::>(); + // return type + let func_ret = resolver + .parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + returns + .as_ref() + .ok_or_else(|| "return type annotations required here".to_string())? + .as_ref(), + )?; + // build the TypeEnum + let func_ty = TypeEnum::TFunc(FunSignature { + args: func_args, + vars: func_vars, + ret: func_ret + }); + // TODO: write to the TypeEnum and Def_list + - } else { } + + if name == "__init__" { + // special for constructor, need to look into the fields + // TODO: look into the function body and see + } + } else { // do nothing. we do not care about things like this? // class A: // a = 3 // b = [2, 3] - - } - }, - - // top level function definition - ast::StmtKind::FunctionDef { - name, - args, - body, - returns, - .. - } => { - unimplemented!() } + }, - node => { - return Err("only expect function and class definitions to be submitted here to be analyzed".into()) - } + // top level function definition + Some(ast::Located{node: ast::StmtKind::FunctionDef { + name, + args, + body, + returns, + .. + }, .. }) => { + // TODO: + unimplemented!() } + + // only expect class def and function def ast + _ => return Err("only expect function and class definitions to be submitted here to be analyzed".into()) } } Ok(())