From d8c3c063ecb5cfc17200bdf7c9782f178bce793e Mon Sep 17 00:00:00 2001 From: ychenfo Date: Mon, 16 Aug 2021 09:46:55 +0800 Subject: [PATCH] split top level handling in several functions --- nac3core/src/top_level.rs | 574 +++++++++++++++++--------------------- 1 file changed, 251 insertions(+), 323 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 5143f997f..ed1b24f48 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -60,8 +60,6 @@ pub struct TopLevelContext { pub struct TopLevelComposer { // list of top level definitions, same as top level context pub definition_list: Arc>>>, - // list of top level Type, the index is same as the field `definition_list` - pub ty_list: RwLock>, // list of top level ast, the index is same as the field `definition_list` and `ty_list` pub ast_list: RwLock>>>, // start as a primitive unifier, will add more top_level defs inside @@ -70,6 +68,8 @@ pub struct TopLevelComposer { pub primitives: PrimitiveStore, // mangled class method name to def_id pub class_method_to_def_id: RwLock>, + // record the def id of the classes whoses fields and methods are to be analyzed + pub to_be_analyzed_class: RwLock>, } impl TopLevelComposer { @@ -133,21 +133,13 @@ impl TopLevelComposer { let ast_list: Vec>> = vec![None, None, None, None, None]; - let ty_list: Vec = vec![ - primitives.0.int32, - primitives.0.int64, - primitives.0.float, - primitives.0.bool, - primitives.0.none, - ]; - let composer = TopLevelComposer { definition_list: RwLock::new(top_level_def_list).into(), - ty_list: RwLock::new(ty_list), ast_list: RwLock::new(ast_list), primitives: primitives.0, unifier: primitives.1.into(), class_method_to_def_id: Default::default(), + to_be_analyzed_class: Default::default(), }; ( vec![ @@ -190,17 +182,20 @@ impl TopLevelComposer { } } + /// step 0, register, just remeber the names of top level classes/function pub fn register_top_level( &mut self, ast: ast::Stmt<()>, resolver: Option>>, - ) -> Result<(String, DefinitionId, Type), String> { - // get write access to the lists - let (mut def_list, mut ty_list, mut ast_list) = - (self.definition_list.write(), self.ty_list.write(), self.ast_list.write()); - - // will be deleted after tested - assert_eq!(ty_list.len(), def_list.len()); + ) -> Result<(String, DefinitionId), String> { + let ( + mut def_list, + mut ast_list + ) = ( + self.definition_list.write(), + self.ast_list.write() + ); + assert_eq!(def_list.len(), ast_list.len()); match &ast.node { @@ -208,25 +203,17 @@ impl TopLevelComposer { let class_name = name.to_string(); let class_def_id = def_list.len(); - // add the class to the unifier - let ty = self.unifier.write().add_ty(TypeEnum::TObj { - obj_id: DefinitionId(class_def_id), - fields: Default::default(), - params: Default::default(), - }); - // add the class to the definition lists def_list .push(Self::make_top_level_class_def(class_def_id, resolver.clone()).into()); - ty_list.push(ty); // since later when registering class method, ast will still be used, - // here push None temporarly, later will push the ast + // here push None temporarly, later will move the ast inside ast_list.push(None); // parse class def body and register class methods into the def list. // module's symbol resolver would not know the name of the class methods, // thus cannot return their definition_id? so we have to manage it ourselves - // by using the field `class_method_to_def_id` + // by using `class_method_to_def_id` for b in body { if let ast::StmtKind::FunctionDef { name, .. } = &b.node { let fun_name = Self::name_mangling(class_name.clone(), name); @@ -248,356 +235,297 @@ impl TopLevelComposer { ) .into(), ); - ty_list.push(ty); // the ast of class method is in the class, push None in to the list here ast_list.push(None); // class method, do not let the symbol manager manage it, use our own map self.class_method_to_def_id.write().insert(fun_name, DefinitionId(def_id)); - - // if it is the contructor, special handling is needed. In the above - // handling, we still add __init__ function to the class method - if name == "__init__" { - // NOTE: how can this later be fetched? - def_list.push( - TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } - .into(), - ); - // arbitarily push one to make sure the index is correct - ty_list.push(self.primitives.none); - ast_list.push(None); - } } } // move the ast to the entry of the class in the ast_list ast_list[class_def_id] = Some(ast); - // return - Ok((class_name, DefinitionId(class_def_id), ty)) + // put the constructor into the def_list + def_list.push( + TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } + .into(), + ); + ast_list.push(None); + + // class, put its def_id into the to be analyzed set + let mut to_be_analyzed = self.to_be_analyzed_class.write(); + to_be_analyzed.push(DefinitionId(class_def_id)); + + + Ok((class_name, DefinitionId(class_def_id))) } ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); - // add to the unifier - let ty = self.unifier.write().add_ty(TypeEnum::TFunc(FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - })); - // add to the definition list def_list.push( Self::make_top_level_function_def(name.into(), self.primitives.none, resolver) .into(), ); - ty_list.push(ty); ast_list.push(Some(ast)); // return - Ok((fun_name, DefinitionId(def_list.len() - 1), ty)) + Ok((fun_name, DefinitionId(def_list.len() - 1))) } _ => Err("only registrations of top level classes/functions are supprted".into()), } } - pub fn analyze_top_level_class_type_var(&mut self) -> Result<(), String> { + /// step 1, analyze the type vars associated with top level class + fn analyze_top_level_class_type_var(&mut self) -> Result<(), String> { let mut def_list = self.definition_list.write(); - let ty_list = self.ty_list.read(); let ast_list = self.ast_list.read(); let mut unifier = self.unifier.write(); - for (def, ty, ast) in def_list + for (class_def, class_ast) in def_list .iter_mut() - .zip(ty_list.iter()) .zip(ast_list.iter()) - .map(|((x, y), z)| (x, y, z)) - .collect::, &Type, &Option>)>>() - { - unimplemented!() - } - unimplemented!() - } - - /// this should be called after all top level classes are registered, and - /// will actually fill in those fields of the previous dummy one - pub fn analyze_top_level(&mut self) -> Result<(), String> { - let mut def_list = self.definition_list.write(); - let ty_list = self.ty_list.read(); - let ast_list = self.ast_list.read(); - let mut unifier = self.unifier.write(); - - for (def, ty, ast) in def_list - .iter_mut() - .zip(ty_list.iter()) - .zip(ast_list.iter()) - .map(|((x, y), z)| (x, y, z)) - .collect::, &Type, &Option>)>>() - { - // only analyze those entries with ast, and class_method(whose ast in class def) - match ast { - Some(ast::Located{node: ast::StmtKind::ClassDef { - bases, - body, - name: class_name, + .collect::, &Option>)>>() { + // only deal with class def here + let ( + class_bases, + class_def_type_vars, + class_resolver + ) = { + if let TopLevelDef::Class { + type_vars, + resolver, .. - }, .. }) => { - // get the mutable reference of the entry in the - // definition list, get the `TopLevelDef` - let ( - def_ancestors, - def_fields, - def_methods, - def_type_vars, - resolver, - ) = if let TopLevelDef::Class { - object_id: _, - ancestors, - fields, - methods, - type_vars, - resolver: Some(resolver) - } = def.get_mut() { - (ancestors, fields, methods, type_vars, resolver.lock()) - } else { unreachable!() }; - - // try to get mutable reference of the entry in the - // unification table, get the `TypeEnum` - let type_enum = unifier.get_ty(*ty); - let ( - enum_params, - enum_fields - ) = if let TypeEnum::TObj { - params, - fields, + } = class_def.get_mut() { + if let Some(ast::Located {node: ast::StmtKind::ClassDef { + bases, .. - } = type_enum.borrow() { - (params, fields) - } else { unreachable!() }; - - // ancestors and typevars associate with the class are analyzed by looking - // into the `bases` ast node - // `Generic` should only occur once, use this flag - let mut generic_occured = false; - // TODO: haven't check this yet - let mut occured_type_var: HashSet = Default::default(); - // TODO: haven't check this yet - let mut occured_base: HashSet = Default::default(); - for b in bases { - match &b.node { - // analyze typevars bounded to the class, - // only support things like `class A(Generic[T, V])`, - // things like `class A(Generic[T, V, ImportedModule.T])` is not supported - // i.e. only simple names are allowed in the subscript - // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params - ast::ExprKind::Subscript {value, slice, ..} if { - // can only be `Generic[...]` and this can only appear once - if let ast::ExprKind::Name { id, .. } = &value.node { - if id == "Generic" { - if !generic_occured { - generic_occured = true; - true - } else { - return Err("Only single Generic[...] or Protocol[...] can be in bases".into()) - } - } else { false } - } else { false } - } => { - match &slice.node { - // `class Foo(Generic[T, V, P]):` multiple element inside the subscript - ast::ExprKind::Tuple {elts, ..} => { - let tys = elts - .iter() - // here parse_type_annotation should be fine, - // since we only expect type vars, which is not relevant - // to the top-level parsing - .map(|x| resolver.parse_type_annotation( - &self.to_top_level_context(), - unifier.borrow_mut(), - &self.primitives, - x)) - .collect::, _>>()?; - - let ty_var_ids = tys - .iter() - .map(|t| { - let tmp = unifier.get_ty(*t); - // make sure it is type var - if let TypeEnum::TVar {id, ..} = tmp.as_ref() { - Ok(*id) - } else { - Err("Expect type variabls here".to_string()) - } - }) - .collect::, _>>()?; - - // write to TypeEnum - for (id, ty) in ty_var_ids.iter().zip(tys.iter()) { - enum_params.borrow_mut().insert(*id, *ty); - } - - // write to TopLevelDef - for ty in tys{ - def_type_vars.push(ty) - } - }, - - // `class Foo(Generic[T]):`, only single element - _ => { - let ty = resolver.parse_type_annotation( - &self.to_top_level_context(), - unifier.borrow_mut(), - &self.primitives, - &slice - )?; - - let ty_var_id = if let TypeEnum::TVar { id, .. } = unifier - .get_ty(ty) - .as_ref() { *id } else { - return Err("Expect type variabls here".to_string()) - }; - - // write to TypeEnum - enum_params.borrow_mut().insert(ty_var_id, ty); - - // write to TopLevelDef - def_type_vars.push(ty); - }, - }; - } - - // analyze base classes, which is possible in - // other cases, we parse for the base class - // FIXME: calling parse_type_annotation here might cause some problem - // when the base class is parametrized `BaseClass[int, bool]`, since the - // analysis of type var of some class is not done yet. - // we can first only look at the name, and later check the - // parameter when others are done - // Or - // first get all the class' type var analyzed, and then - // analyze the base class - _ => { - let ty = resolver.parse_type_annotation( - &self.to_top_level_context(), - unifier.borrow_mut(), - &self.primitives, - b - )?; - - let obj_def_id = if let TypeEnum::TObj { obj_id, .. } = unifier - .get_ty(ty) - .as_ref() { - *obj_id - } else { - return Err("Expect concrete classes/types here".into()) - }; - - // write to TopLevelDef - def_ancestors.push(obj_def_id); - } - } - } - - // class method and field are analyzed by - // looking into the class body ast node - // NOTE: should consider parents' method and fields(check re-def and add), - // but we do it later we go over these again after we finish analyze the - // fields/methods as declared in the ast - // method with same name should not occur twice, so use this - let defined_method: HashSet = Default::default(); - for stmt in body { - if let ast::StmtKind::FunctionDef { - name: func_name, - args, - body, - returns, - .. - } = &stmt.node { - // build type enum, need FunSignature {args, vars, ret} - // args. Now only args with no default TODO: other kinds of args - let func_args = args.args + }, .. }) = class_ast { + (bases, type_vars, resolver) + } else { unreachable!("must be both class") } + } else { continue } + }; + + let mut generic_occured = false; + for b in class_bases { + match &b.node { + // analyze typevars bounded to the class, + // only support things like `class A(Generic[T, V])`, + // things like `class A(Generic[T, V, ImportedModule.T])` is not supported + // i.e. only simple names are allowed in the subscript + // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params + ast::ExprKind::Subscript {value, slice, ..} if { + // can only be `Generic[...]` and this can only appear once + if let ast::ExprKind::Name { id, .. } = &value.node { + if id == "Generic" { + if !generic_occured { + generic_occured = true; + true + } else { + return Err("Only single Generic[...] can be in bases".into()) + } + } else { false } + } else { false } + } => { + // if `class A(Generic[T, V, G])` + if let ast::ExprKind::Tuple { elts, .. } = &slice.node { + // parse the type vars + let type_vars = elts .iter() - .map(|x| -> Result { - Ok(FuncArg { - name: x.node.arg.clone(), - ty: resolver.parse_type_annotation( + .map(|e| + class_resolver + .as_ref() + .unwrap() + .lock() + .parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - x - .node - .annotation - .as_ref() - .ok_or_else(|| "type annotations required for function parameters".to_string())? - )?, - default_value: None - }) - }) - .collect::, _>>()?; - // vars. find TypeVars used in the argument type annotation - let func_vars = func_args + e) + ) + .collect::, _>>()?; + + // check if all are unique type vars + let mut occured_type_var_id: HashSet = HashSet::new(); + let all_unique_type_var = type_vars .iter() - .filter_map(|FuncArg { ty, .. } | { - if let TypeEnum::TVar { id, .. } = unifier.get_ty(*ty).as_ref() { - Some((*id, *ty)) - } else { None } - }) - .collect::>(); - // return type - let func_ret = resolver - .parse_type_annotation( + .all(|x| { + let ty = unifier.get_ty(*x); + if let TypeEnum::TVar {id, ..} = ty.as_ref() { + occured_type_var_id.insert(*id) + } else { false } + }); + + if !all_unique_type_var { return Err("expect unique type variables".into()) } + + // add to TopLevelDef + class_def_type_vars.extend(type_vars); + + // `class A(Generic[T])` + } else { + let ty = + class_resolver + .as_ref() + .unwrap() + .lock() + .parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - returns - .as_ref() - .ok_or_else(|| "return type annotations required here".to_string())? - .as_ref(), + &slice )?; - // build the TypeEnum - let func_type_sig = FunSignature { - args: func_args, - vars: func_vars, - ret: func_ret - }; - - // write to the TypeEnum and Def_list (by replacing the ty with the new Type created above) - let func_name_mangled = Self::name_mangling(class_name.clone(), func_name); - let def_id = self.class_method_to_def_id.read()[&func_name_mangled]; - unimplemented!(); - - - if func_name == "__init__" { - // special for constructor, need to look into the fields - // TODO: look into the function body and see - } - } else { - // do nothing. we do not care about things like this? - // class A: - // a = 3 - // b = [2, 3] + // check if it is type var + let is_type_var = matches!( + unifier.get_ty(ty).as_ref(), + &TypeEnum::TVar { .. } + ); + if !is_type_var { return Err("expect type variable here".into()) } + + // add to TopLevelDef + class_def_type_vars.push(ty); } } - }, + + // if others, do nothing in this function + _ => continue + } + } + + }; + Ok(()) + } - // top level function definition - Some(ast::Located{node: ast::StmtKind::FunctionDef { - name, - args, - body, - returns, + /// step 2, base classes. Need to separate step1 and step2 for this reason: + /// `class B(Generic[T, V]); + /// class A(B[int, bool])` + /// if the type var associated with class `B` has not been handled properly, + /// the parse of type annotation of `B[int, bool]` will fail + fn analyze_top_level_class_bases(&mut self) -> Result<(), String> { + let mut def_list = self.definition_list.write(); + let ast_list = self.ast_list.read(); + let mut unifier = self.unifier.write(); + + for (class_def, class_ast) in def_list + .iter_mut() + .zip(ast_list.iter()) + .collect::, &Option>)>>() { + let ( + class_bases, + class_ancestors, + class_resolver + ) = { + if let TopLevelDef::Class { + ancestors, + resolver, .. - }, .. }) => { - // TODO: - unimplemented!() + } = class_def.get_mut() { + if let Some(ast::Located {node: ast::StmtKind::ClassDef { + bases, + .. + }, .. }) = class_ast { + (bases, ancestors, resolver) + } else { unreachable!("must be both class") } + } else { continue } + }; + for b in class_bases { + // type vars have already been handled, so skip on `Generic[...]` + if let ast::ExprKind::Subscript {value, ..} = &b.node { + if let ast::ExprKind::Name {id, ..} = &value.node { + if id == "Generic" { continue } + } + } + // get the def id of the base class + let base_ty = class_resolver.as_ref().unwrap().lock().parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + b + )?; + let base_id = + if let TypeEnum::TObj {obj_id, ..} = unifier.get_ty(base_ty).as_ref() { + *obj_id + } else { return Err("expect concrete class/type to be base class".into()) }; + + // write to the class ancestors + class_ancestors.push(base_id); + } + + }; + Ok(()) + } + + /// step 3, class_fields + fn analyze_top_level_class_fields_methods(&mut self) -> Result<(), String> { + let mut def_list = self.definition_list.write(); + let ast_list = self.ast_list.read(); + let mut unifier = self.unifier.write(); + let class_method_to_def_id = self.class_method_to_def_id.read(); + let mut to_be_analyzed_class = self.to_be_analyzed_class.write(); + + while !to_be_analyzed_class.is_empty() { + let ind = to_be_analyzed_class.remove(0).0; + + let (class_def, class_ast) = ( + &mut def_list[ind], &ast_list[ind] + ); + let ( + class_name, + class_fields, + class_methods, + class_resolver, + class_body + ) = { + if let TopLevelDef::Class { + resolver, + fields, + methods, + .. + } = class_def.get_mut() { + if let Some(ast::Located {node: ast::StmtKind::ClassDef { + name, + body, + .. + }, .. }) = class_ast { + (name, fields, methods, resolver, body) + } else { unreachable!("must be both class") } + } else { continue } + }; + for b in class_body { + if let ast::StmtKind::FunctionDef { + args: func_args, + body: func_body, + name: func_name, + returns: func_returns, + .. + } = &b.node { + // unwrap should not fail + let method_def_id = + class_method_to_def_id + .get(&Self::name_mangling( + class_name.into(), + func_name) + ).unwrap(); + + let a = &def_list[method_def_id.0]; + } else { + // what should we do with `class A: a = 3`? + continue } - // only expect class def and function def ast - _ => return Err("only expect function and class definitions to be submitted here to be analyzed".into()) } } Ok(()) + + } + + fn analyze_top_level_inheritance(&mut self) -> Result<(), String> { + unimplemented!() + } + + fn analyze_top_level_field_instantiation(&mut self) -> Result<(), String> { + unimplemented!() } }