nac3_sca/nac3core/src/typecheck/typedef.rs

447 lines
15 KiB
Rust
Raw Normal View History

2021-07-14 08:12:47 +08:00
use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue};
use generational_arena::{Arena, Index};
use std::cell::RefCell;
2021-07-14 15:24:00 +08:00
use std::collections::BTreeMap;
use std::mem::swap;
2021-07-14 15:58:58 +08:00
use std::rc::Rc;
2021-01-04 14:49:48 +08:00
2021-07-14 08:12:47 +08:00
// Order:
// TVar
// |--> TSeq
// | |--> TTuple
// | `--> TList
// |--> TRecord
// | |--> TObj
// | `--> TVirtual
// `--> TCall
// `--> TFunc
2021-01-04 14:49:48 +08:00
2021-07-14 08:12:47 +08:00
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
struct Type(u32);
2021-01-04 14:49:48 +08:00
2021-07-14 08:12:47 +08:00
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
struct TypeIndex(Index);
impl UnifyValue for TypeIndex {
type Error = NoError;
fn unify_values(_: &Self, value2: &Self) -> Result<Self, Self::Error> {
// WARN: depends on the implementation details of ena.
2021-07-14 15:24:00 +08:00
// We do not use this to do unification, instead we perform unification
// and assign the type by `union_value(key, new_value)`, which set the
// value as `unify_values(key.value, new_value)`. So, we need to return
// the right one.
2021-07-14 08:12:47 +08:00
Ok(*value2)
}
}
impl UnifyKey for Type {
type Value = TypeIndex;
fn index(&self) -> u32 {
self.0
}
fn from_index(u: u32) -> Self {
Type(u)
}
fn tag() -> &'static str {
2021-07-14 15:24:00 +08:00
"TypeID"
2021-07-14 08:12:47 +08:00
}
2021-01-04 14:49:48 +08:00
}
2021-07-14 15:24:00 +08:00
type Mapping<K, V = Type> = BTreeMap<K, V>;
type VarMap = Mapping<u32>;
2021-01-04 14:49:48 +08:00
2021-07-14 08:12:47 +08:00
struct Call {
posargs: Vec<Type>,
2021-07-14 15:24:00 +08:00
kwargs: BTreeMap<String, Type>,
2021-07-14 08:12:47 +08:00
ret: Type,
fn_id: usize,
2021-01-04 14:49:48 +08:00
}
2021-07-14 08:12:47 +08:00
struct FuncArg {
name: String,
ty: Type,
is_optional: bool,
2021-01-04 14:49:48 +08:00
}
2021-07-14 15:58:58 +08:00
// We use a lot of `Rc`/`RefCell`s here as we want to simplify our code.
// We may not really need so much `Rc`s, but we would have to do complicated
// stuffs otherwise.
2021-07-14 08:12:47 +08:00
enum TypeEnum {
TVar {
// TODO: upper/lower bound
id: u32,
},
TSeq {
2021-07-14 15:58:58 +08:00
map: VarMap,
2021-07-14 08:12:47 +08:00
},
TTuple {
2021-07-14 15:58:58 +08:00
ty: Vec<Type>,
2021-07-14 08:12:47 +08:00
},
TList {
ty: Type,
},
TRecord {
2021-07-14 15:58:58 +08:00
fields: Mapping<String>,
2021-07-14 08:12:47 +08:00
},
TObj {
obj_id: usize,
2021-07-14 15:58:58 +08:00
fields: Mapping<String>,
params: VarMap,
2021-07-14 08:12:47 +08:00
},
TVirtual {
2021-07-14 15:24:00 +08:00
ty: Type,
2021-07-14 08:12:47 +08:00
},
TCall {
2021-07-14 15:58:58 +08:00
calls: Vec<Call>,
2021-07-14 08:12:47 +08:00
},
TFunc {
2021-07-14 15:58:58 +08:00
args: Vec<FuncArg>,
2021-07-14 08:12:47 +08:00
ret: Type,
2021-07-14 15:58:58 +08:00
params: VarMap,
2021-07-14 08:12:47 +08:00
},
2021-01-04 14:49:48 +08:00
}
2021-06-30 16:28:18 +08:00
impl TypeEnum {
2021-07-14 08:12:47 +08:00
fn get_int(&self) -> i32 {
2021-06-30 16:28:18 +08:00
match self {
2021-07-14 08:12:47 +08:00
TypeEnum::TVar { .. } => 1,
TypeEnum::TSeq { .. } => 5,
TypeEnum::TTuple { .. } => 10,
TypeEnum::TList { .. } => 15,
TypeEnum::TRecord { .. } => 7,
TypeEnum::TObj { .. } => 14,
TypeEnum::TVirtual { .. } => 21,
TypeEnum::TCall { .. } => 11,
TypeEnum::TFunc { .. } => 22,
2021-06-30 16:28:18 +08:00
}
}
2021-07-14 08:12:47 +08:00
// e.g. List <: Var
pub fn kind_le(&self, other: &TypeEnum) -> bool {
let a = self.get_int();
let b = other.get_int();
(a % b) == 0
}
2021-07-14 15:24:00 +08:00
pub fn get_kind_name(&self) -> &'static str {
// this function is for debugging only...
// a proper to_str implementation requires the context
match self {
TypeEnum::TVar { .. } => "TVar",
TypeEnum::TSeq { .. } => "TSeq",
TypeEnum::TTuple { .. } => "TTuple",
TypeEnum::TList { .. } => "TList",
TypeEnum::TRecord { .. } => "TRecord",
TypeEnum::TObj { .. } => "TObj",
TypeEnum::TVirtual { .. } => "TVirtual",
TypeEnum::TCall { .. } => "TCall",
TypeEnum::TFunc { .. } => "TFunc",
}
}
2021-07-14 08:12:47 +08:00
}
struct ObjDef {
name: String,
2021-07-14 15:24:00 +08:00
fields: Mapping<String>,
2021-07-14 08:12:47 +08:00
}
struct Unifier {
unification_table: RefCell<InPlaceUnificationTable<Type>>,
2021-07-14 15:58:58 +08:00
type_arena: RefCell<Arena<Rc<RefCell<TypeEnum>>>>,
2021-07-14 08:12:47 +08:00
obj_def_table: Vec<ObjDef>,
}
impl Unifier {
2021-07-14 15:24:00 +08:00
fn unify(&self, a: Type, b: Type) -> Result<(), String> {
let (mut i_a, mut i_b) = {
2021-07-14 08:12:47 +08:00
let mut table = self.unification_table.borrow_mut();
(table.probe_value(a), table.probe_value(b))
};
if i_a == i_b {
2021-07-14 15:24:00 +08:00
return Ok(());
2021-07-14 08:12:47 +08:00
}
2021-07-14 15:58:58 +08:00
let (ty_a_cell, ty_b_cell) = {
let arena = self.type_arena.borrow();
(
arena.get(i_a.0).unwrap().clone(),
arena.get(i_b.0).unwrap().clone(),
)
};
let mut ty_a = ty_a_cell.borrow();
let mut ty_b = ty_b_cell.borrow();
2021-07-14 08:12:47 +08:00
// simplify our pattern matching...
2021-07-14 15:58:58 +08:00
if ty_a.kind_le(&ty_b) {
2021-07-14 15:24:00 +08:00
swap(&mut i_a, &mut i_b);
swap(&mut ty_a, &mut ty_b);
2021-07-14 08:12:47 +08:00
}
2021-07-14 15:58:58 +08:00
match &*ty_a {
2021-07-14 15:24:00 +08:00
TypeEnum::TVar { .. } => {
2021-07-14 15:58:58 +08:00
match *ty_b {
2021-07-14 15:24:00 +08:00
TypeEnum::TVar { .. } => {
// TODO: type variables bound check
let old = {
let mut table = self.unification_table.borrow_mut();
table.union(a, b);
if table.find(a) == a {
i_b
} else {
i_a
}
}
.0;
self.type_arena.borrow_mut().remove(old);
}
_ => {
// TODO: type variables bound check and occur check
self.set_a_to_b(a, b);
}
2021-07-14 08:12:47 +08:00
}
}
2021-07-14 15:24:00 +08:00
TypeEnum::TSeq { map: map1 } => {
2021-07-14 15:58:58 +08:00
match &*ty_b {
2021-07-14 15:24:00 +08:00
TypeEnum::TSeq { map: map2 } => {
self.set_a_to_b(a, b);
2021-07-14 15:58:58 +08:00
drop(ty_a);
if let TypeEnum::TSeq { map: map1 } = &mut *ty_a_cell.as_ref().borrow_mut()
2021-07-14 15:24:00 +08:00
{
2021-07-14 15:58:58 +08:00
// unify them to map1
for (key, value) in map2.iter() {
if let Some(ty) = map1.get(key) {
self.unify(*ty, *value)?;
} else {
map1.insert(*key, *value);
}
}
2021-07-14 15:24:00 +08:00
} else {
unreachable!()
}
}
TypeEnum::TTuple { ty: types } => {
self.set_a_to_b(a, b);
let len = types.len() as u32;
2021-07-14 15:58:58 +08:00
for (k, v) in map1.iter() {
2021-07-14 15:24:00 +08:00
if *k >= len {
return Err(format!(
"Tuple index out of range. (Length: {}, Index: {})",
types.len(),
k
));
}
self.unify(*v, types[*k as usize])?;
}
}
TypeEnum::TList { ty } => {
self.set_a_to_b(a, b);
2021-07-14 15:58:58 +08:00
for v in map1.values() {
self.unify(*v, *ty)?;
2021-07-14 15:24:00 +08:00
}
}
_ => {
2021-07-14 15:58:58 +08:00
return self.report_kind_error(&*ty_a, &*ty_b);
2021-07-14 15:24:00 +08:00
}
}
2021-07-14 08:12:47 +08:00
}
2021-07-14 15:24:00 +08:00
TypeEnum::TTuple { ty: ty1 } => {
2021-07-14 15:58:58 +08:00
if let TypeEnum::TTuple { ty: ty2 } = &*ty_b {
2021-07-14 15:24:00 +08:00
if ty1.len() != ty2.len() {
return Err(format!(
"Cannot unify tuples with length {} and {}",
ty1.len(),
ty2.len()
));
}
self.set_a_to_b(a, b);
for (a, b) in ty1.iter().zip(ty2.iter()) {
self.unify(*a, *b)?;
}
} else {
2021-07-14 15:58:58 +08:00
return self.report_kind_error(&*ty_a, &*ty_b);
2021-07-14 15:24:00 +08:00
}
}
TypeEnum::TList { ty: ty1 } => {
2021-07-14 15:58:58 +08:00
if let TypeEnum::TList { ty: ty2 } = *ty_b {
2021-07-14 15:24:00 +08:00
self.set_a_to_b(a, b);
2021-07-14 15:58:58 +08:00
self.unify(*ty1, ty2)?;
2021-07-14 15:24:00 +08:00
} else {
2021-07-14 15:58:58 +08:00
return self.report_kind_error(&*ty_a, &*ty_b);
2021-07-14 15:24:00 +08:00
}
}
2021-07-14 15:58:58 +08:00
TypeEnum::TRecord { .. } => {
match &*ty_b {
2021-07-14 15:24:00 +08:00
TypeEnum::TRecord { fields: fields2 } => {
self.set_a_to_b(a, b);
2021-07-14 15:58:58 +08:00
drop(ty_a);
if let TypeEnum::TRecord { fields: fields1 } =
&mut *ty_a_cell.as_ref().borrow_mut()
2021-07-14 15:24:00 +08:00
{
2021-07-14 15:58:58 +08:00
for (key, value) in fields2.iter() {
if let Some(ty) = fields1.get(key) {
self.unify(*ty, *value)?;
} else {
fields1.insert(key.clone(), *value);
}
}
2021-07-14 15:24:00 +08:00
} else {
unreachable!()
}
}
// obj...
_ => {
2021-07-14 15:58:58 +08:00
return self.report_kind_error(&*ty_a, &*ty_b);
2021-07-14 15:24:00 +08:00
}
}
}
_ => unimplemented!(),
}
Ok(())
}
fn set_a_to_b(&self, a: Type, b: Type) {
// unify a and b together, and set the value to b's value this would
// also deallocate a's previous value in the arena to save space...
let mut table = self.unification_table.borrow_mut();
let i_a = table.probe_value(a);
let i_b = table.probe_value(b);
table.union(a, b);
table.union_value(a, i_b);
self.type_arena.borrow_mut().remove(i_a.0);
}
fn report_kind_error(&self, a: &TypeEnum, b: &TypeEnum) -> Result<(), String> {
Err(format!(
"Cannot unify {} with {}",
a.get_kind_name(),
b.get_kind_name()
))
}
fn subst(&self, a: Type, mapping: &VarMap) -> Option<Type> {
let index = self.unification_table.borrow_mut().probe_value(a);
2021-07-14 15:58:58 +08:00
let ty_cell = {
let arena = self.type_arena.borrow();
arena.get(index.0).unwrap().clone()
};
let ty = ty_cell.borrow();
2021-07-14 15:24:00 +08:00
// this function would only be called when we instantiate functions.
// function type signature should ONLY contain concrete types and type
// variables, i.e. things like TRecord, TCall should not occur, and we
// should be safe to not implement the substitution for those variants.
2021-07-14 15:58:58 +08:00
match &*ty {
2021-07-14 15:24:00 +08:00
TypeEnum::TVar { id } => mapping.get(&id).cloned(),
2021-07-14 15:58:58 +08:00
TypeEnum::TSeq { map } => self.subst_map(map, mapping).map(|m| {
let index = self
.type_arena
.borrow_mut()
.insert(Rc::new(TypeEnum::TSeq { map: m }.into()));
self.unification_table
.borrow_mut()
.new_key(TypeIndex(index))
}),
2021-07-14 15:24:00 +08:00
TypeEnum::TTuple { ty } => {
let mut new_ty = None;
for (i, t) in ty.iter().enumerate() {
if let Some(t1) = self.subst(*t, mapping) {
if new_ty.is_none() {
new_ty = Some(ty.clone());
}
new_ty.as_mut().unwrap()[i] = t1;
}
}
new_ty.map(|t| {
let index = self
.type_arena
.borrow_mut()
2021-07-14 15:58:58 +08:00
.insert(Rc::new(TypeEnum::TTuple { ty: t }.into()));
2021-07-14 15:24:00 +08:00
self.unification_table
.borrow_mut()
.new_key(TypeIndex(index))
})
}
2021-07-14 15:58:58 +08:00
TypeEnum::TList { ty } => self.subst(*ty, mapping).map(|t| {
let index = self
.type_arena
.borrow_mut()
.insert(Rc::new(TypeEnum::TList { ty: t }.into()));
self.unification_table
.borrow_mut()
.new_key(TypeIndex(index))
}),
TypeEnum::TVirtual { ty } => self.subst(*ty, mapping).map(|t| {
let index = self
.type_arena
.borrow_mut()
.insert(Rc::new(TypeEnum::TVirtual { ty: t }.into()));
self.unification_table
.borrow_mut()
.new_key(TypeIndex(index))
}),
2021-07-14 15:24:00 +08:00
TypeEnum::TObj {
obj_id,
fields,
params,
} => {
// Type variables in field types must be present in the type parameter.
// If the mapping does not contain any type variables in the
// parameter list, we don't need to substitute the fields.
// This is also used to prevent infinite substitution...
let need_subst = params.values().any(|v| {
let index = self.unification_table.borrow_mut().probe_value(*v);
let arena = self.type_arena.borrow();
2021-07-14 15:58:58 +08:00
let ty_cell = arena.get(index.0).unwrap();
let ty = ty_cell.borrow();
if let TypeEnum::TVar { id } = &*ty {
2021-07-14 15:24:00 +08:00
mapping.contains_key(id)
2021-07-14 08:12:47 +08:00
} else {
2021-07-14 15:24:00 +08:00
false
2021-07-14 08:12:47 +08:00
}
2021-07-14 15:24:00 +08:00
});
if need_subst {
2021-07-14 15:58:58 +08:00
let index = self.type_arena.borrow_mut().insert(Rc::new(
TypeEnum::TObj {
obj_id: *obj_id,
params: self
.subst_map(&params, mapping)
.or_else(|| Some(params.clone()))
.unwrap(),
fields: self
.subst_map(&fields, mapping)
.or_else(|| Some(fields.clone()))
.unwrap(),
}
.into(),
));
2021-07-14 15:24:00 +08:00
Some(
self.unification_table
.borrow_mut()
.new_key(TypeIndex(index)),
)
} else {
None
2021-07-14 08:12:47 +08:00
}
}
_ => unimplemented!(),
2021-06-30 16:28:18 +08:00
}
}
2021-07-14 15:24:00 +08:00
fn subst_map<K>(&self, map: &Mapping<K>, mapping: &VarMap) -> Option<Mapping<K>>
where
K: std::cmp::Ord + std::clone::Clone,
{
let mut map2 = None;
for (k, v) in map.iter() {
if let Some(v1) = self.subst(*v, mapping) {
if map2.is_none() {
map2 = Some(map.clone());
}
*map2.as_mut().unwrap().get_mut(k).unwrap() = v1;
}
}
map2
}
2021-06-30 16:28:18 +08:00
}