hm-inference #6

Merged
sb10q merged 136 commits from hm-inference into master 2021-08-19 11:46:50 +08:00
11 changed files with 108 additions and 119 deletions
Showing only changes of commit ed04cef431 - Show all commits

47
Cargo.lock generated
View File

@ -79,16 +79,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]] [[package]]
name = "crunchy" name = "crunchy"
version = "0.2.2" version = "0.2.2"
@ -221,6 +211,15 @@ dependencies = [
"proc-macro-hack", "proc-macro-hack",
] ]
[[package]]
name = "indoc"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5a75aeaaef0ce18b58056d306c27b07436fbb34b8816c53094b76dd81803136"
dependencies = [
"unindent",
]
[[package]] [[package]]
name = "indoc-impl" name = "indoc-impl"
version = "0.3.6" version = "0.3.6"
@ -383,7 +382,7 @@ checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
name = "nac3core" name = "nac3core"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"codespan-reporting", "indoc 1.0.3",
"inkwell", "inkwell",
"num-bigint 0.3.2", "num-bigint 0.3.2",
"num-traits", "num-traits",
@ -600,7 +599,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf6bbbe8f70d179260b3728e5d04eb012f4f0c7988e58c11433dd689cecaa72e" checksum = "bf6bbbe8f70d179260b3728e5d04eb012f4f0c7988e58c11433dd689cecaa72e"
dependencies = [ dependencies = [
"ctor", "ctor",
"indoc", "indoc 0.3.6",
"inventory", "inventory",
"libc", "libc",
"parking_lot", "parking_lot",
@ -826,15 +825,6 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "termcolor"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
dependencies = [
"winapi-util",
]
[[package]] [[package]]
name = "tiny-keccak" name = "tiny-keccak"
version = "2.0.2" version = "2.0.2"
@ -896,12 +886,6 @@ dependencies = [
"unic-common", "unic-common",
] ]
[[package]]
name = "unicode-width"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
[[package]] [[package]]
name = "unicode-xid" name = "unicode-xid"
version = "0.2.2" version = "0.2.2"
@ -954,15 +938,6 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "winapi-x86_64-pc-windows-gnu" name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0" version = "0.4.0"

View File

@ -9,5 +9,5 @@ num-bigint = "0.3"
num-traits = "0.2" num-traits = "0.2"
inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] }
rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" }
codespan-reporting = "0.11.1" indoc = "1.0"

View File

@ -4,6 +4,7 @@
extern crate num_bigint; extern crate num_bigint;
extern crate inkwell; extern crate inkwell;
extern crate rustpython_parser; extern crate rustpython_parser;
extern crate indoc;
mod typecheck; mod typecheck;

View File

@ -5,7 +5,7 @@ use std::rc::Rc;
/// Structure for storing top-level type definitions. /// Structure for storing top-level type definitions.
/// Used for collecting type signature from source code. /// Used for collecting type signature from source code.
/// Can be converted to `InferenceContext` for type inference in functions. /// Can be converted to `InferenceContext` for type inference in functions.
pub struct TopLevelContext<'a> { pub struct GlobalContext<'a> {
/// List of primitive definitions. /// List of primitive definitions.
pub(super) primitive_defs: Vec<TypeDef<'a>>, pub(super) primitive_defs: Vec<TypeDef<'a>>,
/// List of class definitions. /// List of class definitions.
@ -16,61 +16,39 @@ pub struct TopLevelContext<'a> {
pub(super) var_defs: Vec<VarDef<'a>>, pub(super) var_defs: Vec<VarDef<'a>>,
/// Function name to signature mapping. /// Function name to signature mapping.
pub(super) fn_table: HashMap<&'a str, FnDef>, pub(super) fn_table: HashMap<&'a str, FnDef>,
/// Type name to type mapping.
pub(super) sym_table: HashMap<&'a str, Type>,
primitives: Vec<Type>, primitives: Vec<Type>,
variables: Vec<Type>, variables: Vec<Type>,
} }
impl<'a> TopLevelContext<'a> { impl<'a> GlobalContext<'a> {
pub fn new(primitive_defs: Vec<TypeDef<'a>>) -> TopLevelContext { pub fn new(primitive_defs: Vec<TypeDef<'a>>) -> GlobalContext {
let mut sym_table = HashMap::new();
let mut primitives = Vec::new(); let mut primitives = Vec::new();
for (i, t) in primitive_defs.iter().enumerate() { for (i, t) in primitive_defs.iter().enumerate() {
primitives.push(TypeEnum::PrimitiveType(PrimitiveId(i)).into()); primitives.push(TypeEnum::PrimitiveType(PrimitiveId(i)).into());
sym_table.insert(t.name, TypeEnum::PrimitiveType(PrimitiveId(i)).into());
} }
TopLevelContext { GlobalContext {
primitive_defs, primitive_defs,
class_defs: Vec::new(), class_defs: Vec::new(),
parametric_defs: Vec::new(), parametric_defs: Vec::new(),
var_defs: Vec::new(), var_defs: Vec::new(),
fn_table: HashMap::new(), fn_table: HashMap::new(),
sym_table,
primitives, primitives,
variables: Vec::new(), variables: Vec::new(),
} }
} }
pub fn add_class(&mut self, def: ClassDef<'a>) -> ClassId { pub fn add_class(&mut self, def: ClassDef<'a>) -> ClassId {
self.sym_table.insert(
def.base.name,
TypeEnum::ClassType(ClassId(self.class_defs.len())).into(),
);
self.class_defs.push(def); self.class_defs.push(def);
ClassId(self.class_defs.len() - 1) ClassId(self.class_defs.len() - 1)
} }
pub fn add_parametric(&mut self, def: ParametricDef<'a>) -> ParamId { pub fn add_parametric(&mut self, def: ParametricDef<'a>) -> ParamId {
let params = def
.params
.iter()
.map(|&v| Rc::new(TypeEnum::TypeVariable(v)))
.collect();
self.sym_table.insert(
def.base.name,
TypeEnum::ParametricType(ParamId(self.parametric_defs.len()), params).into(),
);
self.parametric_defs.push(def); self.parametric_defs.push(def);
ParamId(self.parametric_defs.len() - 1) ParamId(self.parametric_defs.len() - 1)
} }
pub fn add_variable(&mut self, def: VarDef<'a>) -> VariableId { pub fn add_variable(&mut self, def: VarDef<'a>) -> VariableId {
self.sym_table.insert(
def.name,
TypeEnum::TypeVariable(VariableId(self.var_defs.len())).into(),
);
self.add_variable_private(def) self.add_variable_private(def)
} }
@ -128,9 +106,4 @@ impl<'a> TopLevelContext<'a> {
pub fn get_variable(&self, id: VariableId) -> Type { pub fn get_variable(&self, id: VariableId) -> Type {
self.variables.get(id.0).unwrap().clone() self.variables.get(id.0).unwrap().clone()
} }
pub fn get_type(&self, name: &str) -> Option<Type> {
// TODO: handle parametric types
self.sym_table.get(name).cloned()
}
} }

View File

@ -1,22 +1,22 @@
use super::TopLevelContext; use super::super::symbol_resolver::*;
use super::super::typedef::*; use super::super::typedef::*;
use super::GlobalContext;
use std::boxed::Box; use std::boxed::Box;
use std::collections::HashMap; use std::collections::HashMap;
struct ContextStack<'a> { struct ContextStack<'a> {
/// stack level, starts from 0 /// stack level, starts from 0
level: u32, level: u32,
/// stack of variable definitions containing (id, def, level) where `def` is the original
/// definition in `level-1`.
var_defs: Vec<(usize, VarDef<'a>, u32)>,
/// stack of symbol definitions containing (name, level) where `level` is the smallest level /// stack of symbol definitions containing (name, level) where `level` is the smallest level
/// where the name is assigned a value /// where the name is assigned a value
sym_def: Vec<(&'a str, u32)>, sym_def: Vec<(&'a str, u32)>,
} }
pub struct InferenceContext<'a> { pub struct InferenceContext<'a> {
/// top level context /// global context
top_level: TopLevelContext<'a>, global: GlobalContext<'a>,
/// per source symbol resolver
resolver: Box<dyn SymbolResolver>,
/// list of primitive instances /// list of primitive instances
primitives: Vec<Type>, primitives: Vec<Type>,
@ -26,8 +26,6 @@ pub struct InferenceContext<'a> {
/// an identifier might be defined earlier but has no value (for some code path), thus not /// an identifier might be defined earlier but has no value (for some code path), thus not
/// readable. /// readable.
sym_table: HashMap<&'a str, (Type, bool)>, sym_table: HashMap<&'a str, (Type, bool)>,
/// resolution function reference, that may resolve unbounded identifiers to some type
resolution_fn: Box<dyn FnMut(&str) -> Result<Type, String>>,
/// stack /// stack
stack: ContextStack<'a>, stack: ContextStack<'a>,
} }
@ -35,25 +33,21 @@ pub struct InferenceContext<'a> {
// non-trivial implementations here // non-trivial implementations here
impl<'a> InferenceContext<'a> { impl<'a> InferenceContext<'a> {
/// return a new `InferenceContext` from `TopLevelContext` and resolution function. /// return a new `InferenceContext` from `TopLevelContext` and resolution function.
pub fn new( pub fn new(global: GlobalContext, resolver: Box<dyn SymbolResolver>) -> InferenceContext {
top_level: TopLevelContext, let primitives = (0..global.primitive_defs.len())
resolution_fn: Box<dyn FnMut(&str) -> Result<Type, String>>,
) -> InferenceContext {
let primitives = (0..top_level.primitive_defs.len())
.map(|v| TypeEnum::PrimitiveType(PrimitiveId(v)).into()) .map(|v| TypeEnum::PrimitiveType(PrimitiveId(v)).into())
.collect(); .collect();
let variables = (0..top_level.var_defs.len()) let variables = (0..global.var_defs.len())
.map(|v| TypeEnum::TypeVariable(VariableId(v)).into()) .map(|v| TypeEnum::TypeVariable(VariableId(v)).into())
.collect(); .collect();
InferenceContext { InferenceContext {
top_level, global,
resolver,
primitives, primitives,
variables, variables,
sym_table: HashMap::new(), sym_table: HashMap::new(),
resolution_fn,
stack: ContextStack { stack: ContextStack {
level: 0, level: 0,
var_defs: Vec::new(),
sym_def: Vec::new(), sym_def: Vec::new(),
}, },
} }
@ -61,7 +55,6 @@ impl<'a> InferenceContext<'a> {
/// execute the function with new scope. /// execute the function with new scope.
/// variable assignment would be limited within the scope (not readable outside), and type /// variable assignment would be limited within the scope (not readable outside), and type
/// variable type guard would be limited within the scope.
/// returns the list of variables assigned within the scope, and the result of the function /// returns the list of variables assigned within the scope, and the result of the function
pub fn with_scope<F, R>(&mut self, f: F) -> (Vec<&'a str>, R) pub fn with_scope<F, R>(&mut self, f: F) -> (Vec<&'a str>, R)
where where
@ -70,15 +63,6 @@ impl<'a> InferenceContext<'a> {
self.stack.level += 1; self.stack.level += 1;
let result = f(self); let result = f(self);
self.stack.level -= 1; self.stack.level -= 1;
while !self.stack.var_defs.is_empty() {
let (_, _, level) = self.stack.var_defs.last().unwrap();
if *level > self.stack.level {
let (id, def, _) = self.stack.var_defs.pop().unwrap();
self.top_level.var_defs[id] = def;
} else {
break;
}
}
let mut poped_names = Vec::new(); let mut poped_names = Vec::new();
while !self.stack.sym_def.is_empty() { while !self.stack.sym_def.is_empty() {
let (_, level) = self.stack.sym_def.last().unwrap(); let (_, level) = self.stack.sym_def.last().unwrap();
@ -126,19 +110,16 @@ impl<'a> InferenceContext<'a> {
if *x { if *x {
Ok(t.clone()) Ok(t.clone())
} else { } else {
Err("may not have value".into()) Err("unbounded identifier".into())
} }
} else { } else {
self.resolution_fn.as_mut()(name) match self.resolver.get_symbol_type(name) {
Some(SymbolType::Identifier(t)) => Ok(t),
Some(SymbolType::TypeName(_)) => Err("is not a value".into()),
_ => Err("unbounded identifier".into()),
}
} }
} }
/// restrict the bound of a type variable by replacing its definition.
/// used for implementing type guard
pub fn restrict(&mut self, id: VariableId, mut def: VarDef<'a>) {
std::mem::swap(self.top_level.var_defs.get_mut(id.0).unwrap(), &mut def);
self.stack.var_defs.push((id.0, def, self.stack.level));
}
} }
// trivial getters: // trivial getters:
@ -151,22 +132,26 @@ impl<'a> InferenceContext<'a> {
} }
pub fn get_fn_def(&self, name: &str) -> Option<&FnDef> { pub fn get_fn_def(&self, name: &str) -> Option<&FnDef> {
self.top_level.fn_table.get(name) self.global.fn_table.get(name)
} }
pub fn get_primitive_def(&self, id: PrimitiveId) -> &TypeDef { pub fn get_primitive_def(&self, id: PrimitiveId) -> &TypeDef {
self.top_level.primitive_defs.get(id.0).unwrap() self.global.primitive_defs.get(id.0).unwrap()
} }
pub fn get_class_def(&self, id: ClassId) -> &ClassDef { pub fn get_class_def(&self, id: ClassId) -> &ClassDef {
self.top_level.class_defs.get(id.0).unwrap() self.global.class_defs.get(id.0).unwrap()
} }
pub fn get_parametric_def(&self, id: ParamId) -> &ParametricDef { pub fn get_parametric_def(&self, id: ParamId) -> &ParametricDef {
self.top_level.parametric_defs.get(id.0).unwrap() self.global.parametric_defs.get(id.0).unwrap()
} }
pub fn get_variable_def(&self, id: VariableId) -> &VarDef { pub fn get_variable_def(&self, id: VariableId) -> &VarDef {
self.top_level.var_defs.get(id.0).unwrap() self.global.var_defs.get(id.0).unwrap()
} }
pub fn get_type(&self, name: &str) -> Option<Type> { pub fn get_type(&self, name: &str) -> Result<Type, String> {
self.top_level.get_type(name) match self.resolver.get_symbol_type(name) {
Some(SymbolType::TypeName(t)) => Ok(t),
Some(SymbolType::Identifier(_)) => Err("not a type".into()),
_ => Err("unbounded identifier".into()),
}
} }
} }

View File

@ -1,4 +1,4 @@
mod inference_context; mod inference_context;
mod top_level_context; mod global_context;
pub use inference_context::InferenceContext; pub use inference_context::InferenceContext;
pub use top_level_context::TopLevelContext; pub use global_context::GlobalContext;

View File

@ -168,11 +168,11 @@ pub fn resolve_call(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use super::super::context::TopLevelContext; use super::super::context::GlobalContext;
use super::super::primitives::*; use super::super::primitives::*;
use std::rc::Rc; use std::rc::Rc;
fn get_inference_context(ctx: TopLevelContext) -> InferenceContext { fn get_inference_context(ctx: GlobalContext) -> InferenceContext {
InferenceContext::new(ctx, Box::new(|_| Err("unbounded identifier".into()))) InferenceContext::new(ctx, Box::new(|_| Err("unbounded identifier".into())))
} }

View File

@ -0,0 +1,31 @@
use rustpython_parser::ast;
use std::vec::Vec;
#[derive(Clone, Copy, PartialEq)]
pub struct FileID(u32);
#[derive(Clone, Copy, PartialEq)]
pub enum Location {
CodeRange(FileID, ast::Location),
Builtin
}
pub struct FileRegistry {
files: Vec<String>,
}
impl FileRegistry {
pub fn new() -> FileRegistry {
FileRegistry { files: Vec::new() }
}
pub fn add_file(&mut self, path: &str) -> FileID {
let index = self.files.len() as u32;
self.files.push(path.to_owned());
FileID(index)
}
pub fn query_file(&self, id: FileID) -> &str {
&self.files[id.0 as usize]
}
}

View File

@ -1,6 +1,7 @@
pub mod context;
pub mod inference_core; pub mod inference_core;
pub mod location;
pub mod magic_methods; pub mod magic_methods;
pub mod primitives; pub mod primitives;
pub mod symbol_resolver;
pub mod typedef; pub mod typedef;
pub mod context;

View File

@ -78,7 +78,7 @@ fn impl_order(def: &mut TypeDef, ty: &Type) {
def.methods.insert("__ge__", fun); def.methods.insert("__ge__", fun);
} }
pub fn basic_ctx() -> TopLevelContext<'static> { pub fn basic_ctx() -> GlobalContext<'static> {
let primitives = [ let primitives = [
TypeDef { TypeDef {
name: "bool", name: "bool",
@ -102,7 +102,7 @@ pub fn basic_ctx() -> TopLevelContext<'static> {
}, },
] ]
.to_vec(); .to_vec();
let mut ctx = TopLevelContext::new(primitives); let mut ctx = GlobalContext::new(primitives);
let b = ctx.get_primitive(BOOL_TYPE); let b = ctx.get_primitive(BOOL_TYPE);
let b_def = ctx.get_primitive_def_mut(BOOL_TYPE); let b_def = ctx.get_primitive_def_mut(BOOL_TYPE);

View File

@ -0,0 +1,23 @@
use super::typedef::Type;
use super::location::Location;
pub enum SymbolType {
TypeName(Type),
Identifier(Type),
}
pub enum SymbolValue<'a> {
I32(i32),
I64(i64),
Double(f64),
Bool(bool),
Tuple(&'a [SymbolValue<'a>]),
Bytes(&'a [u8]),
}
pub trait SymbolResolver {
fn get_symbol_type(&self, str: &str) -> Option<SymbolType>;
fn get_symbol_value(&self, str: &str) -> Option<SymbolValue>;
fn get_symbol_location(&self, str: &str) -> Option<Location>;
// handle function call etc.
}