From 82ce816177ef422a4658527e3ce82511aeb84a1a Mon Sep 17 00:00:00 2001 From: ychenfo Date: Tue, 10 Aug 2021 10:33:18 +0800 Subject: [PATCH] refactored top level parsing, need review --- nac3core/src/symbol_resolver.rs | 11 +- nac3core/src/top_level.rs | 272 ++++++++++++------ .../src/typecheck/type_inferencer/test.rs | 14 +- 3 files changed, 204 insertions(+), 93 deletions(-) diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index 86a43ad76..7d34423f3 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -15,10 +15,11 @@ pub enum SymbolValue { } pub trait SymbolResolver { - fn get_symbol_type(&mut self, str: &str) -> Option; - fn parse_type_name(&mut self, expr: &Expr<()>) -> Option; - fn get_function_def(&mut self, str: &str) -> DefinitionId; - fn get_symbol_value(&mut self, str: &str) -> Option; - fn get_symbol_location(&mut self, str: &str) -> Option; + fn get_symbol_type(&self, str: &str) -> Option; + fn parse_type_name(&self, expr: &Expr<()>) -> Option; + fn get_identifier_def(&self, str: &str) -> DefinitionId; + fn get_symbol_value(&self, str: &str) -> Option; + fn get_symbol_location(&self, str: &str) -> Option; + fn get_module_resolver(&self, module_name: &str) -> Option<&dyn SymbolResolver>; // NOTE: for getting imported modules' symbol resolver? // handle function call etc. } diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 29238ee62..8eafc61a6 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,4 +1,3 @@ -use std::default; use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; @@ -45,6 +44,9 @@ pub enum TopLevelDef { /// rigid type variables that would be substituted when the function is instantiated. instance_to_stmt: HashMap>, usize)>, }, + Initializer { + class_id: Option, + } } pub struct CodeGenTask { @@ -76,40 +78,46 @@ pub struct CodeGenContext<'ctx> { pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, } -pub struct TopLevelManager<'a> { - pub def_index: usize, - pub ctx: TopLevelContext, - pub resolver: &'a mut Box, - pub primitives: (PrimitiveStore, Unifier) -} use rustpython_parser::ast; -impl<'a> TopLevelManager<'a> { +pub struct TopLevelDefInfo<'a> { // like adding some info on top of the TopLevelDef for later parsing the class bases, method, and function sigatures + def: TopLevelDef, // the definition entry + ty: Type, // the entry in the top_level unifier + ast: Option>, // the ast submitted by applications + resolver: Option<&'a dyn SymbolResolver> // the resolver +} +pub struct TopLevelComposer<'a> { + pub definition_list: Vec>, + pub primitives: PrimitiveStore, + pub unifier: Unifier, +} + +impl<'a> TopLevelComposer<'a> { pub fn make_primitives() -> (PrimitiveStore, Unifier) { let mut unifier = Unifier::new(); let int32 = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(0), // NOTE: what should it be? + obj_id: DefinitionId(0), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); let int64 = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(1), // NOTE: what should it be? + obj_id: DefinitionId(1), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); let float = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(2), // NOTE: what should it be? + obj_id: DefinitionId(2), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); let bool = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(3), // NOTE: what should it be? + obj_id: DefinitionId(3), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); let none = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(4), // NOTE: what should it be? + obj_id: DefinitionId(4), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); @@ -117,90 +125,188 @@ impl<'a> TopLevelManager<'a> { crate::typecheck::magic_methods::set_primitives_magic_methods(&primitives, &mut unifier); (primitives, unifier) } - - pub fn new(resolver: &'a mut Box) -> Self { - TopLevelManager { - def_index: 1, - ctx: TopLevelContext { - definitions: Default::default(), - unifiers: Default::default() + pub fn new() -> Self { + let primitives = Self::make_primitives(); + let definition_list: Vec> = vec![ + TopLevelDefInfo { + def: Self::make_top_level_class_def(0), + ast: None, + resolver: None, + ty: primitives.0.int32 // just arbitary picked one... }, - resolver, - primitives: Self::make_primitives() + TopLevelDefInfo { + def: Self::make_top_level_class_def(1), + ast: None, + resolver: None, + ty: primitives.0.int64 // just arbitary picked one... + }, + TopLevelDefInfo { + def: Self::make_top_level_class_def(2), + ast: None, + resolver: None, + ty: primitives.0.float // just arbitary picked one... + }, + TopLevelDefInfo { + def: Self::make_top_level_class_def(3), + ast: None, + resolver: None, + ty: primitives.0.bool // just arbitary picked one... + }, + TopLevelDefInfo { + def: Self::make_top_level_class_def(4), + ast: None, + resolver: None, + ty: primitives.0.none // just arbitary picked one... + }, + ]; // the entries for primitive types + TopLevelComposer { + definition_list, + primitives: primitives.0, + unifier: primitives.1 } } - pub fn register_top_level(&mut self, ast: &ast::Stmt<()>) -> Result{ + pub fn make_top_level_class_def(index: usize) -> TopLevelDef { + TopLevelDef::Class { + object_id: DefinitionId(index), + type_vars: Default::default(), + fields: Default::default(), + methods: Default::default(), + ancestors: Default::default(), + } + } + pub fn make_top_level_function_def(name: String, ty: Type) -> TopLevelDef { + TopLevelDef::Function { + name, + signature: ty, + instance_to_symbol: Default::default(), + instance_to_stmt: Default::default() + } + } + + // like to make and return a "primitive" symbol resolver? so that the symbol resolver can later figure out primitive type definitions when passed a primitive type name + pub fn get_primitives_definition(&self) -> Vec<(String, DefinitionId, Type)> { + vec![ + ("int32".into(), DefinitionId(0), self.primitives.int32), + ("int64".into(), DefinitionId(0), self.primitives.int32), + ("float".into(), DefinitionId(0), self.primitives.int32), + ("bool".into(), DefinitionId(0), self.primitives.int32), + ("none".into(), DefinitionId(0), self.primitives.int32), + ] + } + + pub fn register_top_level(&mut self, ast: ast::Stmt<()>, resolver: &'a dyn SymbolResolver) -> Result, String> { match &ast.node { - ast::StmtKind::ClassDef { - name, - bases, - keywords, - body, - decorator_list - } => { - // ancestors and type_vars are found using the `bases` field - let mut class_ancestors: Vec = Default::default(); - let mut class_type_vars: Vec = Default::default(); - for base in bases { - match &base.node { - ast::ExprKind::Subscript {value, slice, ..} => { - match &value.node { - ast::ExprKind::Name {id, ..} if id == "Generic" => { - match &slice.node { - ast::ExprKind::Tuple {elts, ..} => { - for e in elts { - class_type_vars.push( - self.resolver. - parse_type_name(e) - .ok_or_else(|| "unkown base class type".to_string())? - ); // FIXME: is it correct to use this? - } - }, - _ => class_type_vars.push( - self.resolver - .parse_type_name(slice) - .ok_or_else(|| "unkown base class type".to_string())? - ) // FIXME: is it correct to use this? + ast::StmtKind::ClassDef {name, body, ..} => { + let class_name = name.to_string(); + let def_id = self.definition_list.len(); + // add the class to the unifier + let ty = self.unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(def_id), + fields: Default::default(), + params: Default::default() + }); + // add to the definition list + self.definition_list.push( + TopLevelDefInfo { + def: Self::make_top_level_class_def(def_id), + resolver: Some(resolver), + ast: Some(ast), + ty, + } + ); + + // TODO: parse class def body and register class methods into the def list? + // FIXME: module's symbol resolver would not know the name of the class methods, thus cannot return their definition_id? so we have to manage it ourselves? + // or do we return the class method list of (method_name, def_id, type) to application to be used to build symbol resolver? <- current implementation + + Ok(vec![(class_name, DefinitionId(def_id), ty)]) // FIXME: need to add class method def + }, + + ast::StmtKind::FunctionDef {name, ..} => { + let fun_name = name.to_string(); + let def_id = self.definition_list.len(); + // add to the unifier + let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { + args: Default::default(), + ret: self.primitives.none, // NOTE: this needs to be changed later + vars: Default::default() + })); + // add to the definition list + self.definition_list.push( + TopLevelDefInfo { + def: Self::make_top_level_function_def( + name.into(), + self.primitives.none // NOTE: this needs to be changed later + ), + resolver: Some(resolver), + ast: Some(ast), + ty, + } + ); + + Ok(vec![(fun_name, DefinitionId(def_id), ty)]) + }, + + _ => Err("only registrations of top level classes/functions are supprted".into()) + } + } + + /// this should be called after all top level classes are registered, and will actually fill in those fields of the previous dummy one + pub fn analyze_top_level(&mut self) -> Result<(), String> { + for mut d in &mut self.definition_list { + if let (Some(ast), Some(resolver)) = (&d.ast, d.resolver) { + match &ast.node { + ast::StmtKind::ClassDef { + name, + bases, + body, + .. + } => { + // ancestors and typevars associate with the class are analyzed by looking into the `bases` ast node + for b in bases { + match &b.node { + ast::ExprKind::Name {id, ..} => { // base class, name directly available inside the module, can use this module's symbol resolver + let def_id = resolver.get_identifier_def(id); + unimplemented!() + }, + ast::ExprKind::Attribute {value, attr, ..} => { // things can be like `class A(BaseModule.Base)`, here we have to get the symbol resolver of the module `BaseModule`? + unimplemented!() // need to change symbol resolver in order to get the symbol resolver of the imported module + }, + ast::ExprKind::Subscript {value, slice, ..} => { // typevars bounded to the class, things like `class A(Generic[T, V])` + if let ast::ExprKind::Name {id, ..} = &value.node { + if id == "Generic" { + // TODO: get typevars + unimplemented!() + } else { + return Err("unknown type var".into()) + } } }, - _ => return Err("only subscription on keyword Generic is allowed".into()) + _ => return Err("not supported".into()) } - }, - - ast::ExprKind::Name {id, ..} => { - class_ancestors.push(self.resolver.get_function_def(id)) // FIXME: is it correct to use this? } - _ => return Err("unsupported expression in the bases list".into()) - } - } - - // fields and methods are determined using the `body` field - let class_fields: Vec<(String, Type)> = Default::default(); - let class_methods: Vec<(String, Type, DefinitionId)> = Default::default(); - for stmt in body { - match &stmt.node { - ast::StmtKind::FunctionDef {name, .. } if name != "__init__" => { - let result = self.register_top_level(stmt)?; + // class method and field are analyzed by looking into the class body ast node + for stmt in body { unimplemented!() - }, + } + }, - _ => unimplemented!() + ast::StmtKind::FunctionDef { + name, + args, + body, + returns, + .. + } => { + unimplemented!() } + + _ => return Err("only expect function and class definitions to be submitted here to be analyzed".into()) } - - let defs = self.ctx.definitions.write(); - let index = defs.len(); - - unimplemented!() - }, - - ast::StmtKind::FunctionDef {name, ..} => { - unimplemented!() } - - _ => Err("only expect function definition and class definition".into()) - } + }; + Ok(()) } } \ No newline at end of file diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 76b7515ca..f4d2cd8d1 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -15,11 +15,11 @@ struct Resolver { } impl SymbolResolver for Resolver { - fn get_symbol_type(&mut self, str: &str) -> Option { + fn get_symbol_type(&self, str: &str) -> Option { self.identifier_mapping.get(str).cloned() } - fn parse_type_name(&mut self, ty: &ast::Expr<()>) -> Option { + fn parse_type_name(&self, ty: &ast::Expr<()>) -> Option { if let ExprKind::Name { id, .. } = &ty.node { self.class_names.get(id).cloned() } else { @@ -27,15 +27,19 @@ impl SymbolResolver for Resolver { } } - fn get_symbol_value(&mut self, _: &str) -> Option { + fn get_symbol_value(&self, _: &str) -> Option { unimplemented!() } - fn get_symbol_location(&mut self, _: &str) -> Option { + fn get_symbol_location(&self, _: &str) -> Option { unimplemented!() } - fn get_function_def(&mut self, _: &str) -> DefinitionId { + fn get_identifier_def(&self, _: &str) -> DefinitionId { + unimplemented!() + } + + fn get_module_resolver(&self, _: &str) -> Option<&dyn SymbolResolver> { unimplemented!() } }