From ff41cdb000301442974c1d3d97583a7fdc46523e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 5 Jan 2021 12:17:45 +0800 Subject: [PATCH] implemented statement check --- nac3core/src/context/inference_context.rs | 38 ++-- nac3core/src/expression_inference.rs | 2 +- nac3core/src/lib.rs | 2 +- nac3core/src/statement_check.rs | 241 ++++++++++++++++++++++ nac3core/src/statement_inference.rs | 95 --------- 5 files changed, 262 insertions(+), 116 deletions(-) create mode 100644 nac3core/src/statement_check.rs delete mode 100644 nac3core/src/statement_inference.rs diff --git a/nac3core/src/context/inference_context.rs b/nac3core/src/context/inference_context.rs index d056e3d4e8..bcea7e8b9c 100644 --- a/nac3core/src/context/inference_context.rs +++ b/nac3core/src/context/inference_context.rs @@ -22,14 +22,14 @@ pub struct InferenceContext<'a> { primitives: Vec, /// list of variable instances variables: Vec, - /// identifier to (type, readable) mapping. - /// an identifier might be defined earlier but has no value (for some code path), thus not - /// readable. - sym_table: HashMap<&'a str, (Type, bool)>, + /// identifier to type mapping. + sym_table: HashMap<&'a str, Type>, /// resolution function reference, that may resolve unbounded identifiers to some type resolution_fn: Box Result>, /// stack stack: ContextStack<'a>, + /// return type + result: Option, } // non-trivial implementations here @@ -56,6 +56,7 @@ impl<'a> InferenceContext<'a> { var_defs: Vec::new(), sym_def: Vec::new(), }, + result: None, } } @@ -63,7 +64,7 @@ impl<'a> InferenceContext<'a> { /// variable assignment would be limited within the scope (not readable outside), and type /// variable type guard would be limited within the scope. /// returns the list of variables assigned within the scope, and the result of the function - pub fn with_scope(&mut self, f: F) -> (Vec<&'a str>, R) + pub fn with_scope(&mut self, f: F) -> (Vec<(&'a str, Type)>, R) where F: FnOnce(&mut Self) -> R, { @@ -84,8 +85,8 @@ impl<'a> InferenceContext<'a> { let (_, level) = self.stack.sym_def.last().unwrap(); if *level > self.stack.level { let (name, _) = self.stack.sym_def.pop().unwrap(); - self.sym_table.remove(name).unwrap(); - poped_names.push(name); + let ty = self.sym_table.remove(name).unwrap(); + poped_names.push((name, ty)); } else { break; } @@ -96,19 +97,15 @@ impl<'a> InferenceContext<'a> { /// assign a type to an identifier. /// may return error if the identifier was defined but with different type pub fn assign(&mut self, name: &'a str, ty: Type) -> Result { - if let Some((t, x)) = self.sym_table.get_mut(name) { + if let Some(t) = self.sym_table.get_mut(name) { if t == &ty { - if !*x { - self.stack.sym_def.push((name, self.stack.level)); - } - *x = true; Ok(ty) } else { Err("different types".into()) } } else { self.stack.sym_def.push((name, self.stack.level)); - self.sym_table.insert(name, (ty.clone(), true)); + self.sym_table.insert(name, ty.clone()); Ok(ty) } } @@ -122,12 +119,8 @@ impl<'a> InferenceContext<'a> { /// may return error if the identifier is not defined, and cannot be resolved with the /// resolution function. pub fn resolve(&mut self, name: &str) -> Result { - if let Some((t, x)) = self.sym_table.get(name) { - if *x { - Ok(t.clone()) - } else { - Err("may not have value".into()) - } + if let Some(t) = self.sym_table.get(name) { + Ok(t.clone()) } else { self.resolution_fn.as_mut()(name) } @@ -139,6 +132,10 @@ impl<'a> InferenceContext<'a> { std::mem::swap(self.top_level.var_defs.get_mut(id.0).unwrap(), &mut def); self.stack.var_defs.push((id.0, def, self.stack.level)); } + + pub fn set_result(&mut self, result: Option) { + self.result = result; + } } // trivial getters: @@ -168,6 +165,9 @@ impl<'a> InferenceContext<'a> { pub fn get_type(&self, name: &str) -> Option { self.top_level.get_type(name) } + pub fn get_result(&self) -> Option { + self.result.clone() + } } impl TypeEnum { diff --git a/nac3core/src/expression_inference.rs b/nac3core/src/expression_inference.rs index 34acf4ddc7..2405982c09 100644 --- a/nac3core/src/expression_inference.rs +++ b/nac3core/src/expression_inference.rs @@ -303,7 +303,7 @@ fn infer_if_expr<'b: 'a, 'a>( } } -fn infer_simple_binding<'a: 'b, 'b>( +pub fn infer_simple_binding<'a: 'b, 'b>( ctx: &mut InferenceContext<'b>, name: &'a Expression, ty: Type, diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index dd3553e577..6e031979eb 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -7,7 +7,7 @@ extern crate rustpython_parser; pub mod expression_inference; pub mod inference_core; -pub mod statement_inference; +pub mod statement_check; mod magic_methods; pub mod primitives; pub mod typedef; diff --git a/nac3core/src/statement_check.rs b/nac3core/src/statement_check.rs new file mode 100644 index 0000000000..f480adba3e --- /dev/null +++ b/nac3core/src/statement_check.rs @@ -0,0 +1,241 @@ +use crate::context::InferenceContext; +use crate::expression_inference::{infer_expr, infer_simple_binding}; +use crate::inference_core::resolve_call; +use crate::magic_methods::binop_assign_name; +use crate::primitives::*; +use crate::typedef::{Type, TypeEnum::*}; +use rustpython_parser::ast::*; + +pub fn check_stmts<'b: 'a, 'a>( + ctx: &mut InferenceContext<'a>, + stmts: &'b [Statement], +) -> Result { + for stmt in stmts.iter() { + match &stmt.node { + StatementType::Assign { targets, value } => { + check_assign(ctx, targets.as_slice(), &value)?; + } + StatementType::AugAssign { target, op, value } => { + check_aug_assign(ctx, &target, op, &value)?; + } + StatementType::If { test, body, orelse } => { + check_if(ctx, test, body.as_slice(), orelse)?; + } + StatementType::While { test, body, orelse } => { + check_while_stmt(ctx, test, body.as_slice(), orelse)?; + } + StatementType::For { + is_async, + target, + iter, + body, + orelse, + } => { + if *is_async { + return Err("async for is not supported".to_string()); + } + check_for_stmt(ctx, target, iter, body.as_slice(), orelse)?; + } + StatementType::Return { value } => { + let result = ctx.get_result(); + let t = if let Some(value) = value { + infer_expr(ctx, value)? + } else { + None + }; + return if t == result { + Ok(true) + } else { + Err("return type mismatch".to_string()) + }; + } + StatementType::Continue | StatementType::Break => { + continue; + } + _ => return Err("not supported".to_string()), + } + } + Ok(false) +} + +fn get_target_type<'b: 'a, 'a>( + ctx: &mut InferenceContext<'a>, + target: &'b Expression, +) -> Result { + match &target.node { + ExpressionType::Subscript { a, b } => { + let int32 = ctx.get_primitive(INT32_TYPE); + if infer_expr(ctx, &a)? == Some(int32) { + let b = get_target_type(ctx, &b)?; + if let ParametricType(LIST_TYPE, t) = b.as_ref() { + Ok(t[0].clone()) + } else { + Err("subscript is only supported for list".to_string()) + } + } else { + Err("subscript must be int32".to_string()) + } + } + ExpressionType::Attribute { value, name } => { + let t = get_target_type(ctx, &value)?; + let base = t.get_base(ctx).ok_or_else(|| "no attributes".to_string())?; + Ok(base + .fields + .get(name.as_str()) + .ok_or_else(|| "no such attribute")? + .clone()) + } + ExpressionType::Identifier { name } => Ok(ctx.resolve(name.as_str())?), + _ => Err("not supported".to_string()), + } +} + +fn check_stmt_binding<'b: 'a, 'a>( + ctx: &mut InferenceContext<'a>, + target: &'b Expression, + ty: Type, +) -> Result<(), String> { + match &target.node { + ExpressionType::Identifier { name } => { + if name.as_str() == "_" { + Ok(()) + } else { + match ctx.resolve(name.as_str()) { + Ok(t) if t == ty => Ok(()), + Err(_) => { + ctx.assign(name.as_str(), ty).unwrap(); + Ok(()) + } + _ => Err("conflicting type".into()), + } + } + } + ExpressionType::Tuple { elements } => { + if let ParametricType(TUPLE_TYPE, ls) = ty.as_ref() { + if ls.len() != elements.len() { + return Err("incorrect pattern length".into()); + } + for (x, y) in elements.iter().zip(ls.iter()) { + check_stmt_binding(ctx, x, y.clone())?; + } + Ok(()) + } else { + Err("pattern matching supports tuple only".into()) + } + } + _ => { + let t = get_target_type(ctx, target)?; + if ty == t { + Ok(()) + } else { + Err("type mismatch".into()) + } + } + } +} + +fn check_assign<'b: 'a, 'a>( + ctx: &mut InferenceContext<'a>, + targets: &'b [Expression], + value: &'b Expression, +) -> Result<(), String> { + let ty = infer_expr(ctx, value)?.ok_or_else(|| "no value".to_string())?; + for t in targets.iter() { + check_stmt_binding(ctx, t, ty.clone())?; + } + Ok(()) +} + +fn check_aug_assign<'b: 'a, 'a>( + ctx: &mut InferenceContext<'a>, + target: &'b Expression, + op: &'b Operator, + value: &'b Expression, +) -> Result<(), String> { + let left = infer_expr(ctx, target)?.ok_or_else(|| "no value".to_string())?; + let right = infer_expr(ctx, value)?.ok_or_else(|| "no value".to_string())?; + let fun = binop_assign_name(op); + resolve_call(ctx, Some(left), fun, &[right])?; + Ok(()) +} + +fn check_if<'b: 'a, 'a>( + ctx: &mut InferenceContext<'a>, + test: &'b Expression, + body: &'b [Statement], + orelse: &'b Option, +) -> Result { + let boolean = ctx.get_primitive(BOOL_TYPE); + let t = infer_expr(ctx, test)?; + if t == Some(boolean) { + let (names, result) = ctx.with_scope(|ctx| check_stmts(ctx, body)); + let returned = result?; + if let Some(orelse) = orelse { + let (names2, result) = ctx.with_scope(|ctx| check_stmts(ctx, orelse.as_slice())); + let returned = returned && result?; + for (name, ty) in names.iter() { + for (name2, ty2) in names2.iter() { + if *name == *name2 && ty == ty2 { + ctx.assign(name, ty.clone()).unwrap(); + } + } + } + Ok(returned) + } else { + Ok(false) + } + } else { + Err("condition should be bool".to_string()) + } +} + +fn check_while_stmt<'b: 'a, 'a>( + ctx: &mut InferenceContext<'a>, + test: &'b Expression, + body: &'b [Statement], + orelse: &'b Option, +) -> Result { + let boolean = ctx.get_primitive(BOOL_TYPE); + let t = infer_expr(ctx, test)?; + if t == Some(boolean) { + // to check what variables are defined, we would have to do a graph analysis... + // not implemented now + let (_, result) = ctx.with_scope(|ctx| check_stmts(ctx, body)); + result?; + if let Some(orelse) = orelse { + let (_, result) = ctx.with_scope(|ctx| check_stmts(ctx, orelse.as_slice())); + result?; + } + // to check whether the loop returned on every possible path, we need to analyse the graph, + // not implemented now + Ok(false) + } else { + Err("condition should be bool".to_string()) + } +} + +fn check_for_stmt<'b: 'a, 'a>( + ctx: &mut InferenceContext<'a>, + target: &'b Expression, + iter: &'b Expression, + body: &'b [Statement], + orelse: &'b Option, +) -> Result { + let ty = infer_expr(ctx, iter)?.ok_or_else(|| "no value".to_string())?; + if let ParametricType(LIST_TYPE, ls) = ty.as_ref() { + let (_, result) = ctx.with_scope(|ctx| { + infer_simple_binding(ctx, target, ls[0].clone())?; + check_stmts(ctx, body) + }); + result?; + if let Some(orelse) = orelse { + let (_, result) = ctx.with_scope(|ctx| check_stmts(ctx, orelse.as_slice())); + result?; + } + // to check whether the loop returned on every possible path, we need to analyse the graph, + // not implemented now + Ok(false) + } else { + Err("only list can be iterated over".to_string()) + } +} diff --git a/nac3core/src/statement_inference.rs b/nac3core/src/statement_inference.rs deleted file mode 100644 index 858381290d..0000000000 --- a/nac3core/src/statement_inference.rs +++ /dev/null @@ -1,95 +0,0 @@ -use crate::context::InferenceContext; -use crate::expression_inference::infer_expr; -use crate::inference_core::resolve_call; -use crate::magic_methods::*; -use crate::primitives::*; -use crate::typedef::{Type, TypeEnum::*}; -use rustpython_parser::ast::*; - -fn get_target_type<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - target: &'b Expression, -) -> Result { - match &target.node { - ExpressionType::Subscript { a, b } => { - let int32 = ctx.get_primitive(INT32_TYPE); - if infer_expr(ctx, &a)? == Some(int32) { - let b = get_target_type(ctx, &b)?; - if let ParametricType(LIST_TYPE, t) = b.as_ref() { - Ok(t[0].clone()) - } else { - Err("subscript is only supported for list".to_string()) - } - } else { - Err("subscript must be int32".to_string()) - } - } - ExpressionType::Attribute { value, name } => { - let t = get_target_type(ctx, &value)?; - let base = t.get_base(ctx).ok_or_else(|| "no attributes".to_string())?; - Ok(base - .fields - .get(name.as_str()) - .ok_or_else(|| "no such attribute")? - .clone()) - } - ExpressionType::Identifier { name } => Ok(ctx.resolve(name.as_str())?), - _ => Err("not supported".to_string()), - } -} - -fn check_stmt_binding<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - target: &'b Expression, - ty: Type, -) -> Result<(), String> { - match &target.node { - ExpressionType::Identifier { name } => { - if name.as_str() == "_" { - Ok(()) - } else { - match ctx.resolve(name.as_str()) { - Ok(t) if t == ty => Ok(()), - Err(_) => { - ctx.assign(name.as_str(), ty).unwrap(); - Ok(()) - } - _ => Err("conflicting type".into()), - } - } - } - ExpressionType::Tuple { elements } => { - if let ParametricType(TUPLE_TYPE, ls) = ty.as_ref() { - if ls.len() != elements.len() { - return Err("incorrect pattern length".into()); - } - for (x, y) in elements.iter().zip(ls.iter()) { - check_stmt_binding(ctx, x, y.clone())?; - } - Ok(()) - } else { - Err("pattern matching supports tuple only".into()) - } - } - _ => { - let t = get_target_type(ctx, target)?; - if ty == t { - Ok(()) - } else { - Err("type mismatch".into()) - } - } - } -} - -fn check_assign<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - targets: &'b [Expression], - value: &'b Expression, -) -> Result<(), String> { - let ty = infer_expr(ctx, value)?.ok_or_else(|| "no value".to_string())?; - for t in targets.iter() { - check_stmt_binding(ctx, t, ty.clone())?; - } - Ok(()) -}