core: assignments to distinguish between setitem and getitem

This commit is contained in:
lyken 2024-08-02 15:01:38 +08:00
parent 19cb5bf882
commit 34661ffa7f
4 changed files with 642 additions and 398 deletions

View File

@ -1122,7 +1122,7 @@ pub fn gen_comprehension<'ctx, G: CodeGenerator>(
)
.into_pointer_value();
let val = ctx.build_gep_and_load(arr_ptr, &[tmp], Some("val"));
generator.gen_assign(ctx, target, val.into())?;
generator.gen_assign(ctx, target, val.into(), elt.custom.unwrap())?;
}
_ => {
panic!(

View File

@ -118,16 +118,36 @@ pub trait CodeGenerator {
}
/// Generate code for an assignment expression.
///
/// Item assignments will be delegated to [`CodeGenerator::gen_setitem`]
fn gen_assign<'ctx>(
&mut self,
ctx: &mut CodeGenContext<'ctx, '_>,
target: &Expr<Option<Type>>,
value: ValueEnum<'ctx>,
value_ty: Type,
) -> Result<(), String>
where
Self: Sized,
{
gen_assign(self, ctx, target, value)
gen_assign(self, ctx, target, value, value_ty)
}
/// Generate code for an item assignment.
///
/// i.e., `target[key] = value`
fn gen_setitem<'ctx>(
&mut self,
ctx: &mut CodeGenContext<'ctx, '_>,
target: &Expr<Option<Type>>,
key: &Expr<Option<Type>>,
value: ValueEnum<'ctx>,
value_ty: Type,
) -> Result<(), String>
where
Self: Sized,
{
gen_setitem(self, ctx, target, key, value, value_ty)
}
/// Generate code for a while expression.

View File

@ -10,10 +10,10 @@ use crate::{
expr::gen_binop_expr,
gen_in_range_check,
},
toplevel::{helper::PrimDef, numpy::unpack_ndarray_var_tys, DefinitionId, TopLevelDef},
toplevel::{DefinitionId, TopLevelDef},
typecheck::{
magic_methods::Binop,
typedef::{FunSignature, Type, TypeEnum},
typedef::{iter_type_vars, FunSignature, Type, TypeEnum},
},
};
use inkwell::{
@ -23,6 +23,7 @@ use inkwell::{
values::{BasicValue, BasicValueEnum, FunctionValue, IntValue, PointerValue},
IntPredicate,
};
use itertools::izip;
use nac3parser::ast::{
Constant, ExcepthandlerKind, Expr, ExprKind, Location, Stmt, StmtKind, StrRef,
};
@ -97,8 +98,6 @@ pub fn gen_store_target<'ctx, G: CodeGenerator>(
pattern: &Expr<Option<Type>>,
name: Option<&str>,
) -> Result<Option<PointerValue<'ctx>>, String> {
let llvm_usize = generator.get_size_type(ctx.ctx);
// very similar to gen_expr, but we don't do an extra load at the end
// and we flatten nested tuples
Ok(Some(match &pattern.node {
@ -137,65 +136,6 @@ pub fn gen_store_target<'ctx, G: CodeGenerator>(
}
.unwrap()
}
ExprKind::Subscript { value, slice, .. } => {
match ctx.unifier.get_ty_immutable(value.custom.unwrap()).as_ref() {
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::List.id() => {
let v = generator
.gen_expr(ctx, value)?
.unwrap()
.to_basic_value_enum(ctx, generator, value.custom.unwrap())?
.into_pointer_value();
let v = ListValue::from_ptr_val(v, llvm_usize, None);
let len = v.load_size(ctx, Some("len"));
let raw_index = generator
.gen_expr(ctx, slice)?
.unwrap()
.to_basic_value_enum(ctx, generator, slice.custom.unwrap())?
.into_int_value();
let raw_index = ctx
.builder
.build_int_s_extend(raw_index, generator.get_size_type(ctx.ctx), "sext")
.unwrap();
// handle negative index
let is_negative = ctx
.builder
.build_int_compare(
IntPredicate::SLT,
raw_index,
generator.get_size_type(ctx.ctx).const_zero(),
"is_neg",
)
.unwrap();
let adjusted = ctx.builder.build_int_add(raw_index, len, "adjusted").unwrap();
let index = ctx
.builder
.build_select(is_negative, adjusted, raw_index, "index")
.map(BasicValueEnum::into_int_value)
.unwrap();
// unsigned less than is enough, because negative index after adjustment is
// bigger than the length (for unsigned cmp)
let bound_check = ctx
.builder
.build_int_compare(IntPredicate::ULT, index, len, "inbound")
.unwrap();
ctx.make_assert(
generator,
bound_check,
"0:IndexError",
"index {0} out of bounds 0:{1}",
[Some(raw_index), Some(len), None],
slice.location,
);
v.data().ptr_offset(ctx, generator, &index, name)
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::NDArray.id() => {
todo!()
}
_ => unreachable!(),
}
}
_ => unreachable!(),
}))
}
@ -206,68 +146,37 @@ pub fn gen_assign<'ctx, G: CodeGenerator>(
ctx: &mut CodeGenContext<'ctx, '_>,
target: &Expr<Option<Type>>,
value: ValueEnum<'ctx>,
value_ty: Type,
) -> Result<(), String> {
let llvm_usize = generator.get_size_type(ctx.ctx);
match &target.node {
ExprKind::Tuple { elts, .. } => {
let BasicValueEnum::StructValue(v) =
value.to_basic_value_enum(ctx, generator, target.custom.unwrap())?
ExprKind::Subscript { value: target, slice: key, .. } => {
// Special handling for item assignment
generator.gen_setitem(ctx, target, key, value, value_ty)?;
}
ExprKind::Tuple { elts, .. } | ExprKind::List { elts, .. } => {
// Handle tuple/list assignment patterns
// e.g., `(x, y, z) = value`, and `x`, `y`, `z` might also be tuple/list patterns themselves.
// NOTE: Inferencer currently enforces value_ty to be a Tuple (not even Lists are not allowed) because of
// technical constraints.
let BasicValueEnum::StructValue(vals) =
value.to_basic_value_enum(ctx, generator, value_ty)?
else {
unreachable!()
};
for (i, elt) in elts.iter().enumerate() {
let v = ctx
.builder
.build_extract_value(v, u32::try_from(i).unwrap(), "struct_elem")
.unwrap();
generator.gen_assign(ctx, elt, v.into())?;
}
}
ExprKind::Subscript { value: ls, slice, .. }
if matches!(&slice.node, ExprKind::Slice { .. }) =>
{
let ExprKind::Slice { lower, upper, step } = &slice.node else { unreachable!() };
let ls = generator
.gen_expr(ctx, ls)?
.unwrap()
.to_basic_value_enum(ctx, generator, ls.custom.unwrap())?
.into_pointer_value();
let ls = ListValue::from_ptr_val(ls, llvm_usize, None);
let Some((start, end, step)) =
handle_slice_indices(lower, upper, step, ctx, generator, ls.load_size(ctx, None))?
else {
return Ok(());
};
let value = value
.to_basic_value_enum(ctx, generator, target.custom.unwrap())?
.into_pointer_value();
let value = ListValue::from_ptr_val(value, llvm_usize, None);
let ty = match &*ctx.unifier.get_ty_immutable(target.custom.unwrap()) {
TypeEnum::TObj { obj_id, params, .. } if *obj_id == PrimDef::List.id() => {
*params.iter().next().unwrap().1
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::NDArray.id() => {
unpack_ndarray_var_tys(&mut ctx.unifier, target.custom.unwrap()).0
}
_ => unreachable!(),
let TypeEnum::TTuple { ty: val_tys } = &*ctx.unifier.get_ty(value_ty) else {
unreachable!();
};
let ty = ctx.get_llvm_type(generator, ty);
let Some(src_ind) = handle_slice_indices(
&None,
&None,
&None,
ctx,
generator,
value.load_size(ctx, None),
)?
else {
return Ok(());
};
list_slice_assignment(generator, ctx, ty, ls, (start, end, step), value, src_ind);
debug_assert_eq!(elts.len(), val_tys.len());
for (i, (elt, val_ty)) in izip!(elts, val_tys).enumerate() {
let i = u32::try_from(i).unwrap();
let val = ctx.builder.build_extract_value(vals, i, &format!("tuple[{i}]")).unwrap();
generator.gen_assign(ctx, elt, ValueEnum::Dynamic(val), *val_ty)?;
}
}
_ => {
let name = if let ExprKind::Name { id, .. } = &target.node {
@ -293,6 +202,142 @@ pub fn gen_assign<'ctx, G: CodeGenerator>(
Ok(())
}
/// See [`CodeGenerator::gen_setitem`].
pub fn gen_setitem<'ctx, G: CodeGenerator>(
generator: &mut G,
ctx: &mut CodeGenContext<'ctx, '_>,
target: &Expr<Option<Type>>,
key: &Expr<Option<Type>>,
value: ValueEnum<'ctx>,
value_ty: Type,
) -> Result<(), String> {
let target_ty = target.custom.unwrap();
let key_ty = key.custom.unwrap();
match &*ctx.unifier.get_ty(target_ty) {
TypeEnum::TObj { obj_id, params: list_params, .. }
if *obj_id == ctx.primitives.list.obj_id(&ctx.unifier).unwrap() =>
{
// Handle list item assignment
let llvm_usize = generator.get_size_type(ctx.ctx);
let target_item_ty = iter_type_vars(list_params).nth(0).unwrap().ty;
let target = generator
.gen_expr(ctx, target)?
.unwrap()
.to_basic_value_enum(ctx, generator, target_ty)?
.into_pointer_value();
let target = ListValue::from_ptr_val(target, llvm_usize, None);
if let ExprKind::Slice { .. } = &key.node {
// Handle assigning to a slice
let ExprKind::Slice { lower, upper, step } = &key.node else { unreachable!() };
let Some((start, end, step)) = handle_slice_indices(
lower,
upper,
step,
ctx,
generator,
target.load_size(ctx, None),
)?
else {
return Ok(());
};
let value =
value.to_basic_value_enum(ctx, generator, value_ty)?.into_pointer_value();
let value = ListValue::from_ptr_val(value, llvm_usize, None);
let target_item_ty = ctx.get_llvm_type(generator, target_item_ty);
let Some(src_ind) = handle_slice_indices(
&None,
&None,
&None,
ctx,
generator,
value.load_size(ctx, None),
)?
else {
return Ok(());
};
list_slice_assignment(
generator,
ctx,
target_item_ty,
target,
(start, end, step),
value,
src_ind,
);
} else {
// Handle assigning to an index
let len = target.load_size(ctx, Some("len"));
let index = generator
.gen_expr(ctx, key)?
.unwrap()
.to_basic_value_enum(ctx, generator, key_ty)?
.into_int_value();
let index = ctx
.builder
.build_int_s_extend(index, generator.get_size_type(ctx.ctx), "sext")
.unwrap();
// handle negative index
let is_negative = ctx
.builder
.build_int_compare(
IntPredicate::SLT,
index,
generator.get_size_type(ctx.ctx).const_zero(),
"is_neg",
)
.unwrap();
let adjusted = ctx.builder.build_int_add(index, len, "adjusted").unwrap();
let index = ctx
.builder
.build_select(is_negative, adjusted, index, "index")
.map(BasicValueEnum::into_int_value)
.unwrap();
// unsigned less than is enough, because negative index after adjustment is
// bigger than the length (for unsigned cmp)
let bound_check = ctx
.builder
.build_int_compare(IntPredicate::ULT, index, len, "inbound")
.unwrap();
ctx.make_assert(
generator,
bound_check,
"0:IndexError",
"index {0} out of bounds 0:{1}",
[Some(index), Some(len), None],
key.location,
);
// Write value to index on list
let item_ptr =
target.data().ptr_offset(ctx, generator, &index, Some("list_item_ptr"));
let value = value.to_basic_value_enum(ctx, generator, value_ty)?;
ctx.builder.build_store(item_ptr, value).unwrap();
}
}
TypeEnum::TObj { obj_id, .. }
if *obj_id == ctx.primitives.ndarray.obj_id(&ctx.unifier).unwrap() =>
{
// Handle NDArray item assignment
todo!("ndarray subscript assignment is not yet implemented");
}
_ => {
panic!("encountered unknown target type: {}", ctx.unifier.stringify(target_ty));
}
}
Ok(())
}
/// See [`CodeGenerator::gen_for`].
pub fn gen_for<G: CodeGenerator>(
generator: &mut G,
@ -402,7 +447,7 @@ pub fn gen_for<G: CodeGenerator>(
.unwrap();
generator.gen_block(ctx, body.iter())?;
}
TypeEnum::TObj { obj_id, .. }
TypeEnum::TObj { obj_id, params: list_params, .. }
if *obj_id == ctx.primitives.list.obj_id(&ctx.unifier).unwrap() =>
{
let index_addr = generator.gen_var_alloc(ctx, size_t.into(), Some("for.index.addr"))?;
@ -442,8 +487,8 @@ pub fn gen_for<G: CodeGenerator>(
.map(BasicValueEnum::into_int_value)
.unwrap();
let val = ctx.build_gep_and_load(arr_ptr, &[index], Some("val"));
generator.gen_assign(ctx, target, val.into())?;
let val_ty = iter_type_vars(list_params).nth(0).unwrap().ty;
generator.gen_assign(ctx, target, val.into(), val_ty)?;
generator.gen_block(ctx, body.iter())?;
}
_ => {
@ -1604,14 +1649,14 @@ pub fn gen_stmt<G: CodeGenerator>(
}
StmtKind::AnnAssign { target, value, .. } => {
if let Some(value) = value {
let Some(value) = generator.gen_expr(ctx, value)? else { return Ok(()) };
generator.gen_assign(ctx, target, value)?;
let Some(value_enum) = generator.gen_expr(ctx, value)? else { return Ok(()) };
generator.gen_assign(ctx, target, value_enum, value.custom.unwrap())?;
}
}
StmtKind::Assign { targets, value, .. } => {
let Some(value) = generator.gen_expr(ctx, value)? else { return Ok(()) };
let Some(value_enum) = generator.gen_expr(ctx, value)? else { return Ok(()) };
for target in targets {
generator.gen_assign(ctx, target, value.clone())?;
generator.gen_assign(ctx, target, value_enum.clone(), value.custom.unwrap())?;
}
}
StmtKind::Continue { .. } => {
@ -1625,15 +1670,16 @@ pub fn gen_stmt<G: CodeGenerator>(
StmtKind::For { .. } => generator.gen_for(ctx, stmt)?,
StmtKind::With { .. } => generator.gen_with(ctx, stmt)?,
StmtKind::AugAssign { target, op, value, .. } => {
let value = gen_binop_expr(
let value_enum = gen_binop_expr(
generator,
ctx,
target,
Binop::aug_assign(*op),
value,
stmt.location,
)?;
generator.gen_assign(ctx, target, value.unwrap())?;
)?
.unwrap();
generator.gen_assign(ctx, target, value_enum, value.custom.unwrap())?;
}
StmtKind::Try { .. } => gen_try(generator, ctx, stmt)?,
StmtKind::Raise { exc, .. } => {

View File

@ -1,7 +1,6 @@
use std::collections::{HashMap, HashSet};
use std::convert::{From, TryInto};
use std::iter::once;
use std::ops::Not;
use std::{cell::RefCell, sync::Arc};
use super::{
@ -12,6 +11,7 @@ use super::{
RecordField, RecordKey, Type, TypeEnum, TypeVar, Unifier, VarMap,
},
};
use crate::typecheck::typedef::Mapping;
use crate::{
symbol_resolver::{SymbolResolver, SymbolValue},
toplevel::{
@ -123,6 +123,25 @@ fn report_type_error<T>(
Err(HashSet::from([TypeError::new(kind, loc).to_display(unifier).to_string()]))
}
/// Traverse through a LHS expression in an assignment and set [`ExprContext`] to [`ExprContext::Store`]
/// when appropriate.
///
/// nac3parser's `ExprContext` output is generally incorrect, and requires manual fixes.
fn fix_assignment_target_context(node: &mut ast::Located<ExprKind>) {
match &mut node.node {
ExprKind::Name { ctx, .. }
| ExprKind::Attribute { ctx, .. }
| ExprKind::Subscript { ctx, .. } => {
*ctx = ExprContext::Store;
}
ExprKind::Tuple { ctx, elts } | ExprKind::List { ctx, elts } => {
*ctx = ExprContext::Store;
elts.iter_mut().for_each(fix_assignment_target_context);
}
_ => {}
}
}
impl<'a> Fold<()> for Inferencer<'a> {
type TargetU = Option<Type>;
type Error = InferenceError;
@ -131,18 +150,13 @@ impl<'a> Fold<()> for Inferencer<'a> {
Ok(None)
}
fn fold_stmt(
&mut self,
mut node: ast::Stmt<()>,
) -> Result<ast::Stmt<Self::TargetU>, Self::Error> {
fn fold_stmt(&mut self, node: ast::Stmt<()>) -> Result<ast::Stmt<Self::TargetU>, Self::Error> {
let stmt = match node.node {
// we don't want fold over type annotation
ast::StmtKind::AnnAssign { mut target, annotation, value, simple, config_comment } => {
fix_assignment_target_context(&mut target); // Fix parser bug
self.infer_pattern(&target)?;
// fix parser problem...
if let ExprKind::Attribute { ctx, .. } = &mut target.node {
*ctx = ExprContext::Store;
}
let target = Box::new(self.fold_expr(*target)?);
let value = if let Some(v) = value {
@ -304,69 +318,53 @@ impl<'a> Fold<()> for Inferencer<'a> {
custom: None,
}
}
ast::StmtKind::Assign { ref mut targets, ref config_comment, .. } => {
for target in &mut *targets {
if let ExprKind::Attribute { ctx, .. } = &mut target.node {
*ctx = ExprContext::Store;
}
}
if targets.iter().all(|t| matches!(t.node, ExprKind::Name { .. })) {
let ast::StmtKind::Assign { targets, value, .. } = node.node else {
unreachable!()
};
ast::StmtKind::Assign { mut targets, type_comment, config_comment, value, .. } => {
// Fix parser bug
targets.iter_mut().for_each(fix_assignment_target_context);
// NOTE: Do not register identifiers into `self.defined_identifiers` before checking targets
// and value, otherwise the Inferencer might use undefined variables in `self.defined_identifiers`
// and produce strange errors.
let expected_value_ty = self
.unifier
.get_fresh_var(Some("expected_rhs_type".into()), Some(value.location))
.ty;
let targets: Vec<_> = targets
.into_iter()
.map(|target| -> Result<_, InferenceError> {
let (this_expected_value_ty, target) = self.fold_assign_pattern(target)?;
// Continually refine `expected_value_ty`
self.constrain(
expected_value_ty,
this_expected_value_ty,
&target.location,
)?;
Ok(target)
})
.try_collect()?;
let value = self.fold_expr(*value)?;
let value_ty = value.custom.unwrap();
let targets: Result<Vec<_>, _> = targets
.into_iter()
.map(|target| {
let ExprKind::Name { id, ctx } = target.node else { unreachable!() };
self.constrain(value.custom.unwrap(), expected_value_ty, &value.location)?;
self.defined_identifiers.insert(id);
let target_ty = if let Some(ty) = self.variable_mapping.get(&id) {
*ty
} else {
let unifier: &mut Unifier = self.unifier;
self.function_data
.resolver
.get_symbol_type(
unifier,
&self.top_level.definitions.read(),
self.primitives,
id,
)
.unwrap_or_else(|_| {
self.variable_mapping.insert(id, value_ty);
value_ty
})
};
let location = target.location;
self.unifier.unify(value_ty, target_ty).map(|()| Located {
location,
node: ExprKind::Name { id, ctx },
custom: Some(target_ty),
})
})
.collect();
let loc = node.location;
let targets = targets.map_err(|e| {
HashSet::from([e.at(Some(loc)).to_display(self.unifier).to_string()])
})?;
return Ok(Located {
// Do this only after folding targets and value
for target in &targets {
self.infer_pattern(target)?;
}
Located {
location: node.location,
node: ast::StmtKind::Assign {
targets,
type_comment,
config_comment,
value: Box::new(value),
type_comment: None,
config_comment: config_comment.clone(),
},
custom: None,
});
}
for target in targets {
self.infer_pattern(target)?;
}
fold::fold_stmt(self, node)?
}
ast::StmtKind::With { ref items, .. } => {
for item in items {
@ -379,7 +377,8 @@ impl<'a> Fold<()> for Inferencer<'a> {
_ => fold::fold_stmt(self, node)?,
};
match &stmt.node {
ast::StmtKind::AnnAssign { .. }
ast::StmtKind::Assign { .. }
| ast::StmtKind::AnnAssign { .. }
| ast::StmtKind::Break { .. }
| ast::StmtKind::Continue { .. }
| ast::StmtKind::Expr { .. }
@ -389,11 +388,6 @@ impl<'a> Fold<()> for Inferencer<'a> {
ast::StmtKind::If { test, .. } | ast::StmtKind::While { test, .. } => {
self.unify(test.custom.unwrap(), self.primitives.bool, &test.location)?;
}
ast::StmtKind::Assign { targets, value, .. } => {
for target in targets {
self.unify(target.custom.unwrap(), value.custom.unwrap(), &target.location)?;
}
}
ast::StmtKind::Raise { exc, cause, .. } => {
if let Some(cause) = cause {
return report_error("raise ... from cause is not supported", cause.location);
@ -533,6 +527,7 @@ impl<'a> Fold<()> for Inferencer<'a> {
}
_ => fold::fold_expr(self, node)?,
};
let custom = match &expr.node {
ExprKind::Constant { value, .. } => Some(self.infer_constant(value, &expr.location)?),
ExprKind::Name { id, .. } => {
@ -580,8 +575,6 @@ impl<'a> Fold<()> for Inferencer<'a> {
Some(self.infer_identifier(*id)?)
}
}
ExprKind::List { elts, .. } => Some(self.infer_list(elts)?),
ExprKind::Tuple { elts, .. } => Some(self.infer_tuple(elts)?),
ExprKind::Attribute { value, attr, ctx } => {
Some(self.infer_attribute(value, *attr, *ctx)?)
}
@ -595,8 +588,10 @@ impl<'a> Fold<()> for Inferencer<'a> {
ExprKind::Compare { left, ops, comparators } => {
Some(self.infer_compare(expr.location, left, ops, comparators)?)
}
ExprKind::Subscript { value, slice, ctx, .. } => {
Some(self.infer_subscript(value.as_ref(), slice.as_ref(), *ctx)?)
ExprKind::List { elts, .. } => Some(self.infer_list(elts)?),
ExprKind::Tuple { elts, .. } => Some(self.infer_tuple(elts)?),
ExprKind::Subscript { value, slice, .. } => {
Some(self.infer_getitem(value.as_ref(), slice.as_ref())?)
}
ExprKind::IfExp { test, body, orelse } => {
Some(self.infer_if_expr(test, body.as_ref(), orelse.as_ref())?)
@ -629,7 +624,7 @@ impl<'a> Inferencer<'a> {
})
}
fn infer_pattern(&mut self, pattern: &ast::Expr<()>) -> Result<(), InferenceError> {
fn infer_pattern<T>(&mut self, pattern: &ast::Expr<T>) -> Result<(), InferenceError> {
match &pattern.node {
ExprKind::Name { id, .. } => {
if !self.defined_identifiers.contains(id) {
@ -1867,28 +1862,186 @@ impl<'a> Inferencer<'a> {
Ok(res.unwrap())
}
/// Infers the type of a subscript expression on an `ndarray`.
fn infer_subscript_ndarray(
/// Fold an assignment's LHS recursively, and return a type that constrains the type of RHS and the folded expression.
///
/// i.e.,
/// ```python
/// (x, [z[2], z], z[1:]) = ...
/// ^^^^^^^^^^^^^^^^^^^^^ Fold these exprs recursively
/// ```
fn fold_assign_pattern(
&mut self,
value: &ast::Expr<Option<Type>>,
slice: &ast::Expr<Option<Type>>,
dummy_tvar: Type,
ndims: Type,
) -> InferenceResult {
debug_assert!(matches!(
&*self.unifier.get_ty_immutable(dummy_tvar),
TypeEnum::TVar { is_const_generic: false, .. }
));
pattern: ast::Expr<()>,
) -> Result<(Type, ast::Expr<Option<Type>>), InferenceError> {
// See https://docs.python.org/3/reference/simple_stmts.html#assignment-statements
let constrained_ty =
make_ndarray_ty(self.unifier, self.primitives, Some(dummy_tvar), Some(ndims));
self.constrain(value.custom.unwrap(), constrained_ty, &value.location)?;
#[allow(clippy::type_complexity)]
let mut handle_target_list = |elts: Vec<ast::Expr<()>>| -> Result<
(Type, Vec<ast::Expr<Option<Type>>>),
InferenceError,
> {
// Handle possibly-nested "target_list" (see Python lang ref).
let TypeEnum::TLiteral { values, .. } = &*self.unifier.get_ty_immutable(ndims) else {
panic!("Expected TLiteral for ndarray.ndims, got {}", self.unifier.stringify(ndims))
// NOTE: Currently, in NAC3, RHS must be a tuple (even if LHS is written as a list pattern)
// NOTE: Asterisks are not handled, e.g., `(x, *xs) = (1, 2, 3, 4)` is not supported.
// NOTE: In Python, RHS could be any iterable, that includes tuples and lists.
// But NAC3 does not have the infrastructure for general iterables in Python yet.
let elts: Vec<_> =
elts.into_iter().map(|elt| self.fold_assign_pattern(elt)).try_collect()?;
let (tys, elts): (Vec<Type>, Vec<ast::Expr<Option<Type>>>) = elts.into_iter().unzip();
let expected_value_ty = self.unifier.add_ty(TypeEnum::TTuple { ty: tys });
Ok((expected_value_ty, elts))
};
let ndims = values
match pattern.node {
ExprKind::Name { id, .. } => {
// Handle variable assignment.
let expected_value_ty = match self.variable_mapping.get(&id) {
None => {
// Assigning to a new variable name; RHS's type could be anything.
let expected_value_ty = self
.unifier
.get_fresh_var(
Some(format!("type_of_{id}").into()),
Some(pattern.location),
)
.ty;
self.variable_mapping.insert(id, expected_value_ty); // Register new variable
expected_value_ty
}
Some(expected_value_ty) => {
// Re-assigning to an existing variable name.
*expected_value_ty
}
};
Ok((
expected_value_ty,
Located {
location: pattern.location,
node: ExprKind::Name { id, ctx: ExprContext::Store },
custom: Some(expected_value_ty), // Type info is needed here.
},
))
}
ExprKind::Attribute { .. } => {
// Handle attribute assignment.
let pattern = self.fold_expr(pattern)?;
let expected_value_ty = pattern.custom.unwrap();
Ok((expected_value_ty, pattern))
}
ExprKind::Subscript { value: target, slice: key, .. } => {
// Handle `__setitem__`.
// TODO: Make `__setitem__` a general object field like `__add__` in NAC3?
// NOTE: Do not fold_expr on `target[key]`, because it doesn't make sense to
// do so in the context of item assignment (we don't need to know the type of
// `target[key]` in `target[key] = value`). Only fold `target` and `key`.
let target = self.fold_expr(*target)?;
let key = self.fold_expr(*key)?;
let expected_value_ty = self.infer_setitem_value_type(&target, &key)?;
Ok((
expected_value_ty,
Located {
location: pattern.location,
node: ExprKind::Subscript {
value: Box::new(target),
slice: Box::new(key),
ctx: ExprContext::Store,
},
custom: None,
},
))
}
ExprKind::List { elts, .. } => {
let (expected_value_ty, elts) = handle_target_list(elts)?;
Ok((
expected_value_ty,
Located {
location: pattern.location,
node: ExprKind::List { ctx: ExprContext::Store, elts },
custom: None,
},
))
}
ExprKind::Tuple { elts, .. } => {
let (expected_value_ty, elts) = handle_target_list(elts)?;
Ok((
expected_value_ty,
Located {
location: pattern.location,
node: ExprKind::List { ctx: ExprContext::Store, elts },
custom: None,
},
))
}
_ => report_error("encountered unsupported/illegal LHS pattern", pattern.location),
}
}
/// Typecheck the subscript slice indexing into an ndarray.
///
/// That is:
/// ```python
/// my_ndarray[::-2, 1, :, None, 9:23]
/// ^^^^^^^^^^^^^^^^^^^^^^ this
/// ```
///
/// The number of dimensions to subtract from the ndarray being indexed is also calculated and returned,
/// it could even be negative when more axes are added because of `None`.
fn fold_ndarray_subscript_slice(
&mut self,
slice: &ast::Expr<Option<Type>>,
) -> Result<i128, InferenceError> {
// TODO: Handle `None` / `np.newaxis`
// Flatten `slice` into subscript indices.
let indices = match &slice.node {
ExprKind::Tuple { elts, .. } => elts.iter().collect_vec(),
_ => vec![slice],
};
// Typecheck the subscript indices.
// We will also take the opportunity to deduce `dims_to_subtract` as well
let mut dims_to_subtract: i128 = 0;
for index in indices {
if let ExprKind::Slice { lower, upper, step } = &index.node {
for v in [lower.as_ref(), upper.as_ref(), step.as_ref()].iter().flatten() {
self.constrain(v.custom.unwrap(), self.primitives.int32, &v.location)?;
}
} else {
// Treat anything else as an integer index, and force unify their type to int32.
self.unify(index.custom.unwrap(), self.primitives.int32, &index.location)?;
dims_to_subtract += 1;
}
}
Ok(dims_to_subtract)
}
/// Check if the `ndims` [`Type`] of an ndarray is valid (e.g., no negative values),
/// and attempt to subtract `ndims` by `dims_to_subtract` and return subtracted `ndims`.
///
/// `dims_to_subtract` can be set to `0` if you only want to check if `ndims` is valid.
fn check_ndarray_ndims_and_subtract(
&mut self,
target_ty: Type,
ndims: Type,
dims_to_subtract: i128,
) -> Result<Type, InferenceError> {
// Typecheck `ndims`.
let TypeEnum::TLiteral { values: ndims, .. } = &*self.unifier.get_ty_immutable(ndims)
else {
panic!("Expected TLiteral for ndarray.ndims, got {}", self.unifier.stringify(ndims))
};
assert!(!ndims.is_empty());
// Check if there are negative literals.
// NOTE: Don't mix this with subtracting dims, otherwise the user errors could be confusing.
let ndims = ndims
.iter()
.map(|ndim| u64::try_from(ndim.clone()).map_err(|()| ndim.clone()))
.collect::<Result<Vec<_>, _>>()
@ -1899,204 +2052,229 @@ impl<'a> Inferencer<'a> {
)])
})?;
assert!(!ndims.is_empty());
// The number of dimensions subscripted by the index expression.
// Slicing a ndarray will yield the same number of dimensions, whereas indexing into a
// dimension will remove a dimension.
let subscripted_dims = match &slice.node {
ExprKind::Tuple { elts, .. } => elts.iter().fold(0, |acc, value_subexpr| {
if let ExprKind::Slice { .. } = &value_subexpr.node {
acc
} else {
acc + 1
}
}),
ExprKind::Slice { .. } => 0,
_ => 1,
};
if ndims.len() == 1 && ndims[0] - subscripted_dims == 0 {
// ndarray[T, Literal[1]] - Non-Slice index always returns an object of type T
assert_ne!(ndims[0], 0);
Ok(dummy_tvar)
} else {
// Otherwise - Index returns an object of type ndarray[T, Literal[N - subscripted_dims]]
// Disallow subscripting if any Literal value will subscript on an element
// Infer the new `ndims` after indexing the ndarray with `slice`.
// Disallow subscripting if any Literal value will subscript on an element.
let new_ndims = ndims
.into_iter()
.map(|v| {
let v = i128::from(v) - i128::from(subscripted_dims);
let v = i128::from(v) - dims_to_subtract;
u64::try_from(v)
})
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
HashSet::from([format!(
"Cannot subscript {} by {subscripted_dims} dimensions",
self.unifier.stringify(value.custom.unwrap()),
"Cannot subscript {} by {dims_to_subtract} dimension(s)",
self.unifier.stringify(target_ty),
)])
})?;
if new_ndims.iter().any(|v| *v == 0) {
let new_ndims_ty = self
.unifier
.get_fresh_literal(new_ndims.into_iter().map(SymbolValue::U64).collect(), None);
Ok(new_ndims_ty)
}
/// Infer the type of the result of indexing into an ndarray.
///
/// * `ndarray_ty` - The [`Type`] of the ndarray being indexed into.
/// * `slice` - The subscript expression indexing into the ndarray.
fn infer_ndarray_subscript(
&mut self,
ndarray_ty: Type,
slice: &ast::Expr<Option<Type>>,
) -> InferenceResult {
let (dtype, ndims) = unpack_ndarray_var_tys(self.unifier, ndarray_ty);
let dims_to_substract = self.fold_ndarray_subscript_slice(slice)?;
let new_ndims =
self.check_ndarray_ndims_and_subtract(ndarray_ty, ndims, dims_to_substract)?;
// Now we need extra work to check `new_ndims` to see if the user has indexed into a single element.
let TypeEnum::TLiteral { values: new_ndims_values, .. } = &*self.unifier.get_ty(new_ndims)
else {
unreachable!("infer_ndarray_ndims should always return TLiteral")
};
let new_ndims_values = new_ndims_values
.iter()
.map(|v| u64::try_from(v.clone()).expect("new_ndims should be convertible to u64"))
.collect_vec();
if new_ndims_values.len() == 1 && new_ndims_values[0] == 0 {
// The subscripted ndarray must be unsized
// The user must be indexing into a single element
Ok(dtype)
} else {
// The subscripted ndarray is not unsized / may not be unsized. (i.e., may or may not have indexed into a single element)
if new_ndims_values.iter().any(|v| *v == 0) {
// TODO: Difficult to implement since now the return may both be a scalar type, or an ndarray type.
unimplemented!("Inference for ndarray subscript operator with Literal[0, ...] bound unimplemented")
}
let ndims_ty = self
.unifier
.get_fresh_literal(new_ndims.into_iter().map(SymbolValue::U64).collect(), None);
let subscripted_ty =
make_ndarray_ty(self.unifier, self.primitives, Some(dummy_tvar), Some(ndims_ty));
Ok(subscripted_ty)
let new_ndarray_ty =
make_ndarray_ty(self.unifier, self.primitives, Some(dtype), Some(new_ndims));
Ok(new_ndarray_ty)
}
}
fn infer_subscript(
/// Infer the type of the result of indexing into a list.
///
/// * `list_ty` - The [`Type`] of the list being indexed into.
/// * `key` - The subscript expression indexing into the list.
fn infer_list_subscript(
&mut self,
value: &ast::Expr<Option<Type>>,
slice: &ast::Expr<Option<Type>>,
ctx: ExprContext,
) -> InferenceResult {
let report_unscriptable_error = |unifier: &mut Unifier| {
// User is attempting to index into a value of an unsupported type.
let value_ty = value.custom.unwrap();
let value_ty_str = unifier.stringify(value_ty);
return report_error(
format!("'{value_ty_str}' object is not subscriptable").as_str(),
slice.location, // using the slice's location (rather than value's) because it is more clear
);
list_ty: Type,
key: &ast::Expr<Option<Type>>,
) -> Result<Type, InferenceError> {
let TypeEnum::TObj { params: list_params, .. } = &*self.unifier.get_ty(list_ty) else {
unreachable!()
};
let item_ty = iter_type_vars(list_params).nth(0).unwrap().ty;
let ty = self.unifier.get_dummy_var().ty;
match &slice.node {
ExprKind::Slice { lower, upper, step } => {
if let ExprKind::Slice { lower, upper, step } = &key.node {
// Typecheck on the slice
for v in [lower.as_ref(), upper.as_ref(), step.as_ref()].iter().flatten() {
self.constrain(v.custom.unwrap(), self.primitives.int32, &v.location)?;
}
let list_like_ty = match &*self.unifier.get_ty(value.custom.unwrap()) {
TypeEnum::TObj { obj_id, params, .. } if *obj_id == PrimDef::List.id() => {
let list_tvar = iter_type_vars(params).nth(0).unwrap();
self.unifier
.subst(
self.primitives.list,
&into_var_map([TypeVar { id: list_tvar.id, ty }]),
)
.unwrap()
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::NDArray.id() => {
let (_, ndims) =
unpack_ndarray_var_tys(self.unifier, value.custom.unwrap());
make_ndarray_ty(self.unifier, self.primitives, Some(ty), Some(ndims))
}
_ => {
return report_unscriptable_error(self.unifier);
}
};
self.constrain(value.custom.unwrap(), list_like_ty, &value.location)?;
Ok(list_like_ty)
}
ExprKind::Constant { value: ast::Constant::Int(val), .. } => {
match &*self.unifier.get_ty(value.custom.unwrap()) {
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::NDArray.id() => {
let (_, ndims) =
unpack_ndarray_var_tys(self.unifier, value.custom.unwrap());
self.infer_subscript_ndarray(value, slice, ty, ndims)
}
_ => {
// the index is a constant, so value can be a sequence.
let ind: Option<i32> = (*val).try_into().ok();
let ind =
ind.ok_or_else(|| HashSet::from(["Index must be int32".to_string()]))?;
let map = once((
ind.into(),
RecordField::new(ty, ctx == ExprContext::Store, Some(value.location)),
))
.collect();
let seq = self.unifier.add_record(map);
self.constrain(value.custom.unwrap(), seq, &value.location)?;
Ok(ty)
}
}
}
ExprKind::Tuple { elts, .. } => {
if value
.custom
.unwrap()
.obj_id(self.unifier)
.is_some_and(|id| id == PrimDef::NDArray.id())
.not()
{
return report_error(
"Tuple slices are only supported for ndarrays",
slice.location,
);
}
for elt in elts {
if let ExprKind::Slice { lower, upper, step } = &elt.node {
for v in [lower.as_ref(), upper.as_ref(), step.as_ref()].iter().flatten() {
self.constrain(v.custom.unwrap(), self.primitives.int32, &v.location)?;
let v_ty = v.custom.unwrap();
self.constrain(v_ty, self.primitives.int32, &v.location)?;
}
Ok(list_ty) // type list[T]
} else {
self.constrain(elt.custom.unwrap(), self.primitives.int32, &elt.location)?;
// Treat anything else as an integer index, and force unify their type to int32.
self.constrain(key.custom.unwrap(), self.primitives.int32, &key.location)?;
Ok(item_ty) // type T
}
}
let (_, ndims) = unpack_ndarray_var_tys(self.unifier, value.custom.unwrap());
self.infer_subscript_ndarray(value, slice, ty, ndims)
/// Generate a type that constrains the type of `target` to have a `__getitem__` at `index`.
///
/// * `target` - The target being indexed by `index`.
/// * `index` - The constant index.
/// * `mutable` - Should the constraint be mutable or immutable?
fn get_constant_index_item_type(
&mut self,
target: &ast::Expr<Option<Type>>,
index: i128,
mutable: bool,
) -> InferenceResult {
let Ok(index) = i32::try_from(index) else {
return Err(HashSet::from(["Index must be int32".to_string()]));
};
let item_ty = self.unifier.get_dummy_var().ty; // To be resolved by the unifier
// Constrain `target`
let fields_constrain = Mapping::from_iter([(
RecordKey::Int(index),
RecordField::new(item_ty, mutable, Some(target.location)),
)]);
let fields_constrain_ty = self.unifier.add_record(fields_constrain);
self.constrain(target.custom.unwrap(), fields_constrain_ty, &target.location)?;
Ok(item_ty)
}
/// Infer the return type of a `__getitem__` expression.
///
/// i.e., `target[key]`, where the [`ExprContext`] is [`ExprContext::Load`].
fn infer_getitem(
&mut self,
target: &ast::Expr<Option<Type>>,
key: &ast::Expr<Option<Type>>,
) -> InferenceResult {
let target_ty = target.custom.unwrap();
match &*self.unifier.get_ty(target_ty) {
TypeEnum::TObj { obj_id, .. }
if *obj_id == self.primitives.list.obj_id(self.unifier).unwrap() =>
{
self.infer_list_subscript(target_ty, key)
}
TypeEnum::TObj { obj_id, .. }
if *obj_id == self.primitives.ndarray.obj_id(self.unifier).unwrap() =>
{
self.infer_ndarray_subscript(target_ty, key)
}
_ => {
if let TypeEnum::TTuple { .. } = &*self.unifier.get_ty(value.custom.unwrap()) {
return report_error(
"Tuple index must be a constant (KernelInvariant is also not supported)",
slice.location,
);
}
// Now `target_ty` either:
// 1) is a `TTuple`, or
// 2) is simply not obvious for doing __getitem__ on.
// the index is not a constant, so value can only be a list-like structure
match &*self.unifier.get_ty(value.custom.unwrap()) {
TypeEnum::TObj { obj_id, params, .. } if *obj_id == PrimDef::List.id() => {
self.constrain(
slice.custom.unwrap(),
self.primitives.int32,
&slice.location,
)?;
let list_tvar = iter_type_vars(params).nth(0).unwrap();
let list = self
.unifier
.subst(
self.primitives.list,
&into_var_map([TypeVar { id: list_tvar.id, ty }]),
if let ExprKind::Constant { value: ast::Constant::Int(index), .. } = &key.node {
// If `key` is a constant int, then the value can be a sequence.
// Therefore, this can be handled by the unifier
let getitem_ty = self.get_constant_index_item_type(target, *index, false)?;
Ok(getitem_ty)
} else {
// Out of ways to resolve __getitem__, throw an error.
report_error(
&format!(
"'{}' cannot be indexed by this subscript",
self.unifier.stringify(target_ty)
),
key.location,
)
.unwrap();
self.constrain(value.custom.unwrap(), list, &value.location)?;
Ok(ty)
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::NDArray.id() => {
let (_, ndims) =
unpack_ndarray_var_tys(self.unifier, value.custom.unwrap());
}
}
}
let valid_index_tys = [self.primitives.int32, self.primitives.isize()]
.into_iter()
.unique()
.collect_vec();
let valid_index_ty = self
.unifier
.get_fresh_var_with_range(valid_index_tys.as_slice(), None, None)
.ty;
self.constrain(slice.custom.unwrap(), valid_index_ty, &slice.location)?;
self.infer_subscript_ndarray(value, slice, ty, ndims)
/// Fold an item assignment, and return a type that constrains the type of RHS.
fn infer_setitem_value_type(
&mut self,
target: &ast::Expr<Option<Type>>,
key: &ast::Expr<Option<Type>>,
) -> Result<Type, InferenceError> {
let target_ty = target.custom.unwrap();
match &*self.unifier.get_ty(target_ty) {
TypeEnum::TObj { obj_id, .. }
if *obj_id == self.primitives.list.obj_id(self.unifier).unwrap() =>
{
// Handle list item assignment
// The expected value type is the same as the type of list.__getitem__
self.infer_list_subscript(target_ty, key)
}
_ => report_unscriptable_error(self.unifier),
TypeEnum::TObj { obj_id, .. }
if *obj_id == self.primitives.ndarray.obj_id(self.unifier).unwrap() =>
{
// Handle ndarray item assignment
// NOTE: `value` can either be an ndarray of or a scalar, even if `target` is an unsized ndarray.
// TODO: NumPy does automatic casting on `value`. (Currently not supported)
// See https://numpy.org/doc/stable/user/basics.indexing.html#assigning-values-to-indexed-arrays
let (scalar_ty, _) = unpack_ndarray_var_tys(self.unifier, target_ty);
let ndarray_ty =
make_ndarray_ty(self.unifier, self.primitives, Some(scalar_ty), None);
let expected_value_ty =
self.unifier.get_fresh_var_with_range(&[scalar_ty, ndarray_ty], None, None).ty;
Ok(expected_value_ty)
}
_ => {
// Handle item assignments of other types.
// Now `target_ty` either:
// 1) is a `TTuple`, or
// 2) is simply not obvious for doing __setitem__ on.
if let ExprKind::Constant { value: ast::Constant::Int(index), .. } = &key.node {
// If `key` is a constant int, then the value can be a sequence.
// Therefore, this can be handled by the unifier
self.get_constant_index_item_type(target, *index, false)
} else {
// Out of ways to resolve __getitem__, throw an error.
report_error(
&format!(
"'{}' does not allow item assignment with this subscript",
self.unifier.stringify(target_ty)
),
key.location,
)
}
}
}