diff --git a/Cargo.lock b/Cargo.lock index a3b73348..c09cc2c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "ahash" version = "0.7.4" @@ -73,6 +75,12 @@ version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787" +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.0" @@ -107,7 +115,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "dirs-sys-next", ] @@ -143,13 +151,22 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +[[package]] +name = "generational-arena" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d3b771574f62d0548cee0ad9057857e9fc25d7a3335f140c84f6acd0bf601" +dependencies = [ + "cfg-if 0.1.10", +] + [[package]] name = "getrandom" version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi 0.9.0+wasi-snapshot-preview1", ] @@ -160,7 +177,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi 0.10.2+wasi-snapshot-preview1", ] @@ -236,7 +253,7 @@ dependencies = [ [[package]] name = "inkwell" version = "0.1.0" -source = "git+https://github.com/TheDan64/inkwell#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" +source = "git+https://github.com/TheDan64/inkwell?branch=master#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" dependencies = [ "either", "inkwell_internals", @@ -250,7 +267,7 @@ dependencies = [ [[package]] name = "inkwell_internals" version = "0.3.0" -source = "git+https://github.com/TheDan64/inkwell#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" +source = "git+https://github.com/TheDan64/inkwell?branch=master#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" dependencies = [ "proc-macro2", "quote", @@ -263,7 +280,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -369,7 +386,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -382,6 +399,8 @@ checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" name = "nac3core" version = "0.1.0" dependencies = [ + "ena", + "generational-arena", "indoc 1.0.3", "inkwell", "num-bigint 0.3.2", @@ -478,7 +497,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "instant", "libc", "redox_syscall", @@ -729,7 +748,7 @@ checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" [[package]] name = "rustpython-ast" version = "0.1.0" -source = "git+https://github.com/RustPython/RustPython#bee5794b6e2b777ee343c7277954b73d06b5cb7d" +source = "git+https://github.com/RustPython/RustPython?branch=master#bee5794b6e2b777ee343c7277954b73d06b5cb7d" dependencies = [ "num-bigint 0.4.0", ] @@ -737,7 +756,7 @@ dependencies = [ [[package]] name = "rustpython-parser" version = "0.1.2" -source = "git+https://github.com/RustPython/RustPython#bee5794b6e2b777ee343c7277954b73d06b5cb7d" +source = "git+https://github.com/RustPython/RustPython?branch=master#bee5794b6e2b777ee343c7277954b73d06b5cb7d" dependencies = [ "ahash", "lalrpop", diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index af84b255..79ad0997 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -10,4 +10,6 @@ num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } indoc = "1.0" +generational-arena = "0.2" +ena = "0.14" diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index ff64158d..9fdca9d4 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -5,6 +5,8 @@ extern crate num_bigint; extern crate inkwell; extern crate rustpython_parser; extern crate indoc; +extern crate generational_arena; +extern crate ena; mod typecheck; diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 19afdd1c..118a79ab 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] -mod context; -pub mod location; -mod magic_methods; -mod primitives; -pub mod symbol_resolver; +// mod context; +// pub mod location; +// mod magic_methods; +// mod primitives; +// pub mod symbol_resolver; pub mod typedef; diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 7c447b61..a3f14830 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -1,64 +1,219 @@ +use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; +use generational_arena::{Arena, Index}; +use std::cell::RefCell; use std::collections::HashMap; -use std::collections::HashSet; -use std::rc::Rc; -#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] -pub struct VariableId(pub(crate) usize); +// Order: +// TVar +// |--> TSeq +// | |--> TTuple +// | `--> TList +// |--> TRecord +// | |--> TObj +// | `--> TVirtual +// `--> TCall +// `--> TFunc -#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] -pub struct TypeId(pub(crate) usize); +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +struct Type(u32); -#[derive(PartialEq, Eq, Clone, Hash, Debug)] -pub enum TypeEnum { - ClassType(TypeId, Vec>), - TypeVariable(VariableId), +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct TypeIndex(Index); + +impl UnifyValue for TypeIndex { + type Error = NoError; + fn unify_values(_: &Self, value2: &Self) -> Result { + // WARN: depends on the implementation details of ena. + // We do not use this to do unification, instead we perform unification and assign the type + // by `union_value(key, new_value)`, which set the value as `unify_values(key.value, new_value)`. + // So, we need to return the right one. + Ok(*value2) + } } -pub type Type = Rc; - -#[derive(Clone)] -pub struct BaseDef<'a> { - pub name: &'a str, - pub fields: HashMap<&'a str, Type>, +impl UnifyKey for Type { + type Value = TypeIndex; + fn index(&self) -> u32 { + self.0 + } + fn from_index(u: u32) -> Self { + Type(u) + } + fn tag() -> &'static str { + "TypeKey" + } } -#[derive(Clone)] -pub struct TypeDef<'a> { - pub base: BaseDef<'a>, - pub parents: Vec, - pub params: Vec, +type VarMapping = HashMap; + +struct Call { + posargs: Vec, + kwargs: HashMap, + ret: Type, + fn_id: usize, } -#[derive(Clone)] -pub struct VarDef<'a> { - pub name: Option<&'a str>, - pub bound: Vec, +struct FuncArg { + name: String, + ty: Type, + is_optional: bool, +} + +enum TypeEnum { + TVar { + // TODO: upper/lower bound + id: u32, + }, + TSeq { + index: HashMap, + }, + TTuple { + index: HashMap, + }, + TList { + ty: Type, + }, + TRecord { + fields: HashMap, + }, + TObj { + obj_id: usize, + instantiation: VarMapping, + }, + TVirtual { + obj_id: usize, + instantiation: VarMapping, + }, + TCall { + calls: Vec, + }, + TFunc { + args: Vec, + ret: Type, + instantiation: VarMapping, + }, } impl TypeEnum { - pub fn get_vars(&self, vars: &mut HashSet) { + fn get_int(&self) -> i32 { match self { - TypeEnum::TypeVariable(id) => { - vars.insert(*id); - } - TypeEnum::ClassType(_, params) => { - for t in params.iter() { - t.get_vars(vars) - } - } + TypeEnum::TVar { .. } => 1, + TypeEnum::TSeq { .. } => 5, + TypeEnum::TTuple { .. } => 10, + TypeEnum::TList { .. } => 15, + TypeEnum::TRecord { .. } => 7, + TypeEnum::TObj { .. } => 14, + TypeEnum::TVirtual { .. } => 21, + TypeEnum::TCall { .. } => 11, + TypeEnum::TFunc { .. } => 22, } } - pub fn subst(&self, map: &HashMap) -> Type { - match self { - TypeEnum::TypeVariable(id) => map - .get(id) - .cloned() - .unwrap_or_else(|| Rc::new(self.clone())), - TypeEnum::ClassType(id, params) => Rc::new(TypeEnum::ClassType( - *id, - params.iter().map(|t| t.subst(map)).collect(), - )), + // e.g. List <: Var + pub fn kind_le(&self, other: &TypeEnum) -> bool { + let a = self.get_int(); + let b = other.get_int(); + (a % b) == 0 + } +} + +struct ObjDef { + name: String, + fields: HashMap, +} + +struct Unifier { + unification_table: RefCell>, + type_arena: RefCell>, + obj_def_table: Vec, +} + +impl Unifier { + fn unify(&self, a: Type, b: Type) { + let (i_a, i_b) = { + let mut table = self.unification_table.borrow_mut(); + (table.probe_value(a), table.probe_value(b)) + }; + + if i_a == i_b { + return; + } + + let arena = self.type_arena.borrow(); + let mut ty_a = arena.get(i_a.0).unwrap(); + let mut ty_b = arena.get(i_b.0).unwrap(); + + // simplify our pattern matching... + if ty_a.kind_le(ty_b) { + std::mem::swap(&mut ty_a, &mut ty_b); + } + + // TODO: type variables bound check + match (ty_a, ty_b) { + (TypeEnum::TVar { .. }, TypeEnum::TVar { .. }) => { + self.unification_table.borrow_mut().union(a, b); + let old = if self.unification_table.borrow_mut().find(a) == a { + i_b + } else { + i_a + } + .0; + self.type_arena.borrow_mut().remove(old); + } + (TypeEnum::TVar { .. }, _) => { + let mut table = self.unification_table.borrow_mut(); + table.union(a, b); + table.union_value(a, i_b); + // TODO: occur check... + self.type_arena.borrow_mut().remove(i_a.0); + } + (TypeEnum::TSeq { .. }, TypeEnum::TSeq { .. }) => { + let is_a = { + let mut table = self.unification_table.borrow_mut(); + table.union(a, b); + table.find(a) == a + }; + // fighting with the borrow checker... + // we have to manually drop this before we call borrow_mut + std::mem::drop(arena); + let (mut new, old) = { + // the mutable arena would be dropped before calling unify later + let mut arena = self.type_arena.borrow_mut(); + let (ty_a, ty_b) = arena.get2_mut(i_a.0, i_b.0); + let index1 = if let Some(TypeEnum::TSeq { index }) = ty_a { + std::mem::take(index) + } else { + unreachable!() + }; + let index2 = if let Some(TypeEnum::TSeq { index }) = ty_b { + std::mem::take(index) + } else { + unreachable!() + }; + if is_a { + arena.remove(i_b.0); + (index1, index2) + } else { + arena.remove(i_a.0); + (index2, index1) + } + }; + for (key, value) in old.iter() { + if let Some(ty) = new.get(key) { + self.unify(*ty, *value); + } else { + new.insert(*key, *value); + } + } + // put it back + let index = if is_a { i_a } else { i_b }.0; + if let Some(TypeEnum::TSeq { index }) = self.type_arena.borrow_mut().get_mut(index) { + *index = new; + } else { + unreachable!() + } + } + _ => unimplemented!(), } } }