From ed04cef431a2e705729a762fd58410b7f3070f13 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 28 Jun 2021 14:48:04 +0800 Subject: [PATCH] added symbol resolver... --- Cargo.lock | 47 +++--------- nac3core/Cargo.toml | 2 +- nac3core/src/lib.rs | 1 + ...top_level_context.rs => global_context.rs} | 35 ++------- .../typecheck/context/inference_context.rs | 71 ++++++++----------- nac3core/src/typecheck/context/mod.rs | 4 +- nac3core/src/typecheck/inference_core.rs | 4 +- nac3core/src/typecheck/location.rs | 31 ++++++++ nac3core/src/typecheck/mod.rs | 5 +- nac3core/src/typecheck/primitives.rs | 4 +- nac3core/src/typecheck/symbol_resolver.rs | 23 ++++++ 11 files changed, 108 insertions(+), 119 deletions(-) rename nac3core/src/typecheck/context/{top_level_context.rs => global_context.rs} (74%) create mode 100644 nac3core/src/typecheck/location.rs create mode 100644 nac3core/src/typecheck/symbol_resolver.rs diff --git a/Cargo.lock b/Cargo.lock index 93a535ea..a3b73348 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,16 +79,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - [[package]] name = "crunchy" version = "0.2.2" @@ -221,6 +211,15 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "indoc" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a75aeaaef0ce18b58056d306c27b07436fbb34b8816c53094b76dd81803136" +dependencies = [ + "unindent", +] + [[package]] name = "indoc-impl" version = "0.3.6" @@ -383,7 +382,7 @@ checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" name = "nac3core" version = "0.1.0" dependencies = [ - "codespan-reporting", + "indoc 1.0.3", "inkwell", "num-bigint 0.3.2", "num-traits", @@ -600,7 +599,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf6bbbe8f70d179260b3728e5d04eb012f4f0c7988e58c11433dd689cecaa72e" dependencies = [ "ctor", - "indoc", + "indoc 0.3.6", "inventory", "libc", "parking_lot", @@ -826,15 +825,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "termcolor" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" -dependencies = [ - "winapi-util", -] - [[package]] name = "tiny-keccak" version = "2.0.2" @@ -896,12 +886,6 @@ dependencies = [ "unic-common", ] -[[package]] -name = "unicode-width" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" - [[package]] name = "unicode-xid" version = "0.2.2" @@ -954,15 +938,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 04025e53..af84b255 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -9,5 +9,5 @@ num-bigint = "0.3" num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } -codespan-reporting = "0.11.1" +indoc = "1.0" diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index a87ce31c..ff64158d 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -4,6 +4,7 @@ extern crate num_bigint; extern crate inkwell; extern crate rustpython_parser; +extern crate indoc; mod typecheck; diff --git a/nac3core/src/typecheck/context/top_level_context.rs b/nac3core/src/typecheck/context/global_context.rs similarity index 74% rename from nac3core/src/typecheck/context/top_level_context.rs rename to nac3core/src/typecheck/context/global_context.rs index d7c4ca23..b322d7ea 100644 --- a/nac3core/src/typecheck/context/top_level_context.rs +++ b/nac3core/src/typecheck/context/global_context.rs @@ -5,7 +5,7 @@ use std::rc::Rc; /// Structure for storing top-level type definitions. /// Used for collecting type signature from source code. /// Can be converted to `InferenceContext` for type inference in functions. -pub struct TopLevelContext<'a> { +pub struct GlobalContext<'a> { /// List of primitive definitions. pub(super) primitive_defs: Vec>, /// List of class definitions. @@ -16,61 +16,39 @@ pub struct TopLevelContext<'a> { pub(super) var_defs: Vec>, /// Function name to signature mapping. pub(super) fn_table: HashMap<&'a str, FnDef>, - /// Type name to type mapping. - pub(super) sym_table: HashMap<&'a str, Type>, primitives: Vec, variables: Vec, } -impl<'a> TopLevelContext<'a> { - pub fn new(primitive_defs: Vec>) -> TopLevelContext { - let mut sym_table = HashMap::new(); +impl<'a> GlobalContext<'a> { + pub fn new(primitive_defs: Vec>) -> GlobalContext { let mut primitives = Vec::new(); for (i, t) in primitive_defs.iter().enumerate() { primitives.push(TypeEnum::PrimitiveType(PrimitiveId(i)).into()); - sym_table.insert(t.name, TypeEnum::PrimitiveType(PrimitiveId(i)).into()); } - TopLevelContext { + GlobalContext { primitive_defs, class_defs: Vec::new(), parametric_defs: Vec::new(), var_defs: Vec::new(), fn_table: HashMap::new(), - sym_table, primitives, variables: Vec::new(), } } pub fn add_class(&mut self, def: ClassDef<'a>) -> ClassId { - self.sym_table.insert( - def.base.name, - TypeEnum::ClassType(ClassId(self.class_defs.len())).into(), - ); self.class_defs.push(def); ClassId(self.class_defs.len() - 1) } pub fn add_parametric(&mut self, def: ParametricDef<'a>) -> ParamId { - let params = def - .params - .iter() - .map(|&v| Rc::new(TypeEnum::TypeVariable(v))) - .collect(); - self.sym_table.insert( - def.base.name, - TypeEnum::ParametricType(ParamId(self.parametric_defs.len()), params).into(), - ); self.parametric_defs.push(def); ParamId(self.parametric_defs.len() - 1) } pub fn add_variable(&mut self, def: VarDef<'a>) -> VariableId { - self.sym_table.insert( - def.name, - TypeEnum::TypeVariable(VariableId(self.var_defs.len())).into(), - ); self.add_variable_private(def) } @@ -128,9 +106,4 @@ impl<'a> TopLevelContext<'a> { pub fn get_variable(&self, id: VariableId) -> Type { self.variables.get(id.0).unwrap().clone() } - - pub fn get_type(&self, name: &str) -> Option { - // TODO: handle parametric types - self.sym_table.get(name).cloned() - } } diff --git a/nac3core/src/typecheck/context/inference_context.rs b/nac3core/src/typecheck/context/inference_context.rs index b4d2644f..ac643315 100644 --- a/nac3core/src/typecheck/context/inference_context.rs +++ b/nac3core/src/typecheck/context/inference_context.rs @@ -1,22 +1,22 @@ -use super::TopLevelContext; +use super::super::symbol_resolver::*; use super::super::typedef::*; +use super::GlobalContext; use std::boxed::Box; use std::collections::HashMap; struct ContextStack<'a> { /// stack level, starts from 0 level: u32, - /// stack of variable definitions containing (id, def, level) where `def` is the original - /// definition in `level-1`. - var_defs: Vec<(usize, VarDef<'a>, u32)>, /// stack of symbol definitions containing (name, level) where `level` is the smallest level /// where the name is assigned a value sym_def: Vec<(&'a str, u32)>, } pub struct InferenceContext<'a> { - /// top level context - top_level: TopLevelContext<'a>, + /// global context + global: GlobalContext<'a>, + /// per source symbol resolver + resolver: Box, /// list of primitive instances primitives: Vec, @@ -26,8 +26,6 @@ pub struct InferenceContext<'a> { /// an identifier might be defined earlier but has no value (for some code path), thus not /// readable. sym_table: HashMap<&'a str, (Type, bool)>, - /// resolution function reference, that may resolve unbounded identifiers to some type - resolution_fn: Box Result>, /// stack stack: ContextStack<'a>, } @@ -35,25 +33,21 @@ pub struct InferenceContext<'a> { // non-trivial implementations here impl<'a> InferenceContext<'a> { /// return a new `InferenceContext` from `TopLevelContext` and resolution function. - pub fn new( - top_level: TopLevelContext, - resolution_fn: Box Result>, - ) -> InferenceContext { - let primitives = (0..top_level.primitive_defs.len()) + pub fn new(global: GlobalContext, resolver: Box) -> InferenceContext { + let primitives = (0..global.primitive_defs.len()) .map(|v| TypeEnum::PrimitiveType(PrimitiveId(v)).into()) .collect(); - let variables = (0..top_level.var_defs.len()) + let variables = (0..global.var_defs.len()) .map(|v| TypeEnum::TypeVariable(VariableId(v)).into()) .collect(); InferenceContext { - top_level, + global, + resolver, primitives, variables, sym_table: HashMap::new(), - resolution_fn, stack: ContextStack { level: 0, - var_defs: Vec::new(), sym_def: Vec::new(), }, } @@ -61,7 +55,6 @@ impl<'a> InferenceContext<'a> { /// execute the function with new scope. /// variable assignment would be limited within the scope (not readable outside), and type - /// variable type guard would be limited within the scope. /// returns the list of variables assigned within the scope, and the result of the function pub fn with_scope(&mut self, f: F) -> (Vec<&'a str>, R) where @@ -70,15 +63,6 @@ impl<'a> InferenceContext<'a> { self.stack.level += 1; let result = f(self); self.stack.level -= 1; - while !self.stack.var_defs.is_empty() { - let (_, _, level) = self.stack.var_defs.last().unwrap(); - if *level > self.stack.level { - let (id, def, _) = self.stack.var_defs.pop().unwrap(); - self.top_level.var_defs[id] = def; - } else { - break; - } - } let mut poped_names = Vec::new(); while !self.stack.sym_def.is_empty() { let (_, level) = self.stack.sym_def.last().unwrap(); @@ -126,19 +110,16 @@ impl<'a> InferenceContext<'a> { if *x { Ok(t.clone()) } else { - Err("may not have value".into()) + Err("unbounded identifier".into()) } } else { - self.resolution_fn.as_mut()(name) + match self.resolver.get_symbol_type(name) { + Some(SymbolType::Identifier(t)) => Ok(t), + Some(SymbolType::TypeName(_)) => Err("is not a value".into()), + _ => Err("unbounded identifier".into()), + } } } - - /// restrict the bound of a type variable by replacing its definition. - /// used for implementing type guard - pub fn restrict(&mut self, id: VariableId, mut def: VarDef<'a>) { - std::mem::swap(self.top_level.var_defs.get_mut(id.0).unwrap(), &mut def); - self.stack.var_defs.push((id.0, def, self.stack.level)); - } } // trivial getters: @@ -151,22 +132,26 @@ impl<'a> InferenceContext<'a> { } pub fn get_fn_def(&self, name: &str) -> Option<&FnDef> { - self.top_level.fn_table.get(name) + self.global.fn_table.get(name) } pub fn get_primitive_def(&self, id: PrimitiveId) -> &TypeDef { - self.top_level.primitive_defs.get(id.0).unwrap() + self.global.primitive_defs.get(id.0).unwrap() } pub fn get_class_def(&self, id: ClassId) -> &ClassDef { - self.top_level.class_defs.get(id.0).unwrap() + self.global.class_defs.get(id.0).unwrap() } pub fn get_parametric_def(&self, id: ParamId) -> &ParametricDef { - self.top_level.parametric_defs.get(id.0).unwrap() + self.global.parametric_defs.get(id.0).unwrap() } pub fn get_variable_def(&self, id: VariableId) -> &VarDef { - self.top_level.var_defs.get(id.0).unwrap() + self.global.var_defs.get(id.0).unwrap() } - pub fn get_type(&self, name: &str) -> Option { - self.top_level.get_type(name) + pub fn get_type(&self, name: &str) -> Result { + match self.resolver.get_symbol_type(name) { + Some(SymbolType::TypeName(t)) => Ok(t), + Some(SymbolType::Identifier(_)) => Err("not a type".into()), + _ => Err("unbounded identifier".into()), + } } } diff --git a/nac3core/src/typecheck/context/mod.rs b/nac3core/src/typecheck/context/mod.rs index f59140d9..3a5d8d11 100644 --- a/nac3core/src/typecheck/context/mod.rs +++ b/nac3core/src/typecheck/context/mod.rs @@ -1,4 +1,4 @@ mod inference_context; -mod top_level_context; +mod global_context; pub use inference_context::InferenceContext; -pub use top_level_context::TopLevelContext; +pub use global_context::GlobalContext; diff --git a/nac3core/src/typecheck/inference_core.rs b/nac3core/src/typecheck/inference_core.rs index 5190a164..679c04c8 100644 --- a/nac3core/src/typecheck/inference_core.rs +++ b/nac3core/src/typecheck/inference_core.rs @@ -168,11 +168,11 @@ pub fn resolve_call( #[cfg(test)] mod tests { use super::*; - use super::super::context::TopLevelContext; + use super::super::context::GlobalContext; use super::super::primitives::*; use std::rc::Rc; - fn get_inference_context(ctx: TopLevelContext) -> InferenceContext { + fn get_inference_context(ctx: GlobalContext) -> InferenceContext { InferenceContext::new(ctx, Box::new(|_| Err("unbounded identifier".into()))) } diff --git a/nac3core/src/typecheck/location.rs b/nac3core/src/typecheck/location.rs new file mode 100644 index 00000000..0165ef0a --- /dev/null +++ b/nac3core/src/typecheck/location.rs @@ -0,0 +1,31 @@ +use rustpython_parser::ast; +use std::vec::Vec; + +#[derive(Clone, Copy, PartialEq)] +pub struct FileID(u32); + +#[derive(Clone, Copy, PartialEq)] +pub enum Location { + CodeRange(FileID, ast::Location), + Builtin +} + +pub struct FileRegistry { + files: Vec, +} + +impl FileRegistry { + pub fn new() -> FileRegistry { + FileRegistry { files: Vec::new() } + } + + pub fn add_file(&mut self, path: &str) -> FileID { + let index = self.files.len() as u32; + self.files.push(path.to_owned()); + FileID(index) + } + + pub fn query_file(&self, id: FileID) -> &str { + &self.files[id.0 as usize] + } +} diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 952907c2..a3be5925 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,6 +1,7 @@ +pub mod context; pub mod inference_core; +pub mod location; pub mod magic_methods; pub mod primitives; +pub mod symbol_resolver; pub mod typedef; -pub mod context; - diff --git a/nac3core/src/typecheck/primitives.rs b/nac3core/src/typecheck/primitives.rs index cd6a36b5..94e76ee7 100644 --- a/nac3core/src/typecheck/primitives.rs +++ b/nac3core/src/typecheck/primitives.rs @@ -78,7 +78,7 @@ fn impl_order(def: &mut TypeDef, ty: &Type) { def.methods.insert("__ge__", fun); } -pub fn basic_ctx() -> TopLevelContext<'static> { +pub fn basic_ctx() -> GlobalContext<'static> { let primitives = [ TypeDef { name: "bool", @@ -102,7 +102,7 @@ pub fn basic_ctx() -> TopLevelContext<'static> { }, ] .to_vec(); - let mut ctx = TopLevelContext::new(primitives); + let mut ctx = GlobalContext::new(primitives); let b = ctx.get_primitive(BOOL_TYPE); let b_def = ctx.get_primitive_def_mut(BOOL_TYPE); diff --git a/nac3core/src/typecheck/symbol_resolver.rs b/nac3core/src/typecheck/symbol_resolver.rs new file mode 100644 index 00000000..a6eff440 --- /dev/null +++ b/nac3core/src/typecheck/symbol_resolver.rs @@ -0,0 +1,23 @@ +use super::typedef::Type; +use super::location::Location; + +pub enum SymbolType { + TypeName(Type), + Identifier(Type), +} + +pub enum SymbolValue<'a> { + I32(i32), + I64(i64), + Double(f64), + Bool(bool), + Tuple(&'a [SymbolValue<'a>]), + Bytes(&'a [u8]), +} + +pub trait SymbolResolver { + fn get_symbol_type(&self, str: &str) -> Option; + fn get_symbol_value(&self, str: &str) -> Option; + fn get_symbol_location(&self, str: &str) -> Option; + // handle function call etc. +}