Implement/Fix support for tuple-indexing into ndarrays #429

Merged
sb10q merged 3 commits from enhance/issue-149-ndarray/nd-indices into master 2024-06-20 12:50:31 +08:00
3 changed files with 243 additions and 160 deletions

View File

@ -3,8 +3,8 @@ use std::{collections::HashMap, convert::TryInto, iter::once, iter::zip};
use crate::{
codegen::{
classes::{
ArrayLikeIndexer, ArrayLikeValue, ArraySliceValue, ListValue, NDArrayValue, ProxyValue,
RangeValue, TypedArrayLikeAccessor, UntypedArrayLikeAccessor,
ArrayLikeIndexer, ArrayLikeValue, ListValue, NDArrayValue, ProxyValue, RangeValue,
TypedArrayLikeAccessor, UntypedArrayLikeAccessor,
},
concrete_type::{ConcreteFuncArg, ConcreteTypeEnum, ConcreteTypeStore},
gen_in_range_check, get_llvm_abi_type, get_llvm_type,
@ -1741,22 +1741,37 @@ fn gen_ndarray_subscript_expr<'ctx, G: CodeGenerator>(
let ndims = values
.iter()
.map(|ndim| match *ndim {
SymbolValue::U64(v) => Ok(v),
SymbolValue::U32(v) => Ok(u64::from(v)),
SymbolValue::I32(v) => u64::try_from(v)
.map_err(|_| format!("Expected non-negative literal for ndarray.ndims, got {v}")),
SymbolValue::I64(v) => u64::try_from(v)
.map_err(|_| format!("Expected non-negative literal for ndarray.ndims, got {v}")),
_ => unreachable!(),
})
.collect::<Result<Vec<_>, _>>()?;
.map(|ndim| u64::try_from(ndim.clone()).map_err(|()| ndim.clone()))
.collect::<Result<Vec<_>, _>>()
.map_err(|val| {
format!(
"Expected non-negative literal for ndarray.ndims, got {}",
i128::try_from(val).unwrap()
)
})?;
assert!(!ndims.is_empty());
let ndarray_ndims_ty = ctx
.unifier
.get_fresh_literal(ndims.iter().map(|v| SymbolValue::U64(v - 1)).collect(), None);
// The number of dimensions subscripted by the index expression.
// Slicing a ndarray will yield the same number of dimensions, whereas indexing into a
// dimension will remove a dimension.
let subscripted_dims = match &slice.node {
ExprKind::Tuple { elts, .. } => elts.iter().fold(0, |acc, value_subexpr| {
if let ExprKind::Slice { .. } = &value_subexpr.node {
acc
} else {
acc + 1
}
}),
ExprKind::Slice { .. } => 0,
_ => 1,
};
let ndarray_ndims_ty = ctx.unifier.get_fresh_literal(
ndims.iter().map(|v| SymbolValue::U64(v - subscripted_dims)).collect(),
None,
);
let ndarray_ty =
make_ndarray_ty(&mut ctx.unifier, &ctx.primitives, Some(ty), Some(ndarray_ndims_ty));
let llvm_pndarray_t = ctx.get_llvm_type(generator, ndarray_ty).into_pointer_type();
@ -1859,123 +1874,165 @@ fn gen_ndarray_subscript_expr<'ctx, G: CodeGenerator>(
}
};
Ok(Some(match &slice.node {
ExprKind::Tuple { elts, .. } => {
let slices = elts
.iter()
.enumerate()
.map(|(dim, elt)| expr_to_slice(generator, ctx, &elt.node, dim as u64))
.take_while_inclusive(|slice| slice.as_ref().is_ok_and(Option::is_some))
.collect::<Result<Vec<_>, _>>()?;
if slices.len() < elts.len() {
return Ok(None);
}
let slices = slices.into_iter().map(Option::unwrap).collect_vec();
numpy::ndarray_sliced_copy(generator, ctx, ty, v, &slices)?.as_base_value().into()
}
ExprKind::Slice { .. } => {
let Some(slice) = expr_to_slice(generator, ctx, &slice.node, 0)? else {
return Ok(None);
};
numpy::ndarray_sliced_copy(generator, ctx, ty, v, &[slice])?.as_base_value().into()
}
_ => {
let index = if let Some(index) = generator.gen_expr(ctx, slice)? {
index.to_basic_value_enum(ctx, generator, slice.custom.unwrap())?.into_int_value()
} else {
return Ok(None);
};
let Some(index) = normalize_index(generator, ctx, index, 0)? else { return Ok(None) };
let index_addr = generator.gen_var_alloc(ctx, index.get_type().into(), None)?;
ctx.builder.build_store(index_addr, index).unwrap();
if ndims.len() == 1 && ndims[0] == 1 {
// Accessing an element from a 1-dimensional `ndarray`
return Ok(Some(
v.data()
.get(
ctx,
generator,
&ArraySliceValue::from_ptr_val(
index_addr,
llvm_usize.const_int(1, false),
None,
),
None,
)
.into(),
));
}
// Accessing an element from a multi-dimensional `ndarray`
// Create a new array, remove the top dimension from the dimension-size-list, and copy the
// elements over
let subscripted_ndarray = generator.gen_var_alloc(ctx, llvm_ndarray_t.into(), None)?;
let ndarray = NDArrayValue::from_ptr_val(subscripted_ndarray, llvm_usize, None);
let num_dims = v.load_ndims(ctx);
ndarray.store_ndims(
let make_indices_arr = |generator: &mut G,
ctx: &mut CodeGenContext<'ctx, '_>|
-> Result<_, String> {
Ok(if let ExprKind::Tuple { elts, .. } = &slice.node {
let llvm_int_ty = ctx.get_llvm_type(generator, elts[0].custom.unwrap());
let index_addr = generator.gen_array_var_alloc(
ctx,
generator,
ctx.builder.build_int_sub(num_dims, llvm_usize.const_int(1, false), "").unwrap(),
);
llvm_int_ty,
llvm_usize.const_int(elts.len() as u64, false),
None,
)?;
let ndarray_num_dims = ndarray.load_ndims(ctx);
ndarray.create_dim_sizes(ctx, llvm_usize, ndarray_num_dims);
for (i, elt) in elts.iter().enumerate() {
let Some(index) = generator.gen_expr(ctx, elt)? else {
return Ok(None);
};
let ndarray_num_dims = ndarray.load_ndims(ctx);
let v_dims_src_ptr = unsafe {
v.dim_sizes().ptr_offset_unchecked(
let index = index
.to_basic_value_enum(ctx, generator, elt.custom.unwrap())?
.into_int_value();
let Some(index) = normalize_index(generator, ctx, index, 0)? else {
return Ok(None);
};
let store_ptr = unsafe {
index_addr.ptr_offset_unchecked(
ctx,
generator,
&llvm_usize.const_int(i as u64, false),
None,
)
};
ctx.builder.build_store(store_ptr, index).unwrap();
}
Some(index_addr)
} else if let Some(index) = generator.gen_expr(ctx, slice)? {
let llvm_int_ty = ctx.get_llvm_type(generator, slice.custom.unwrap());
let index_addr = generator.gen_array_var_alloc(
ctx,
llvm_int_ty,
llvm_usize.const_int(1u64, false),
None,
)?;
let index =
index.to_basic_value_enum(ctx, generator, slice.custom.unwrap())?.into_int_value();
let Some(index) = normalize_index(generator, ctx, index, 0)? else { return Ok(None) };
let store_ptr = unsafe {
index_addr.ptr_offset_unchecked(ctx, generator, &llvm_usize.const_zero(), None)
};
ctx.builder.build_store(store_ptr, index).unwrap();
Some(index_addr)
} else {
None
})
};
Ok(Some(if ndims.len() == 1 && ndims[0] - subscripted_dims == 0 {
let Some(index_addr) = make_indices_arr(generator, ctx)? else { return Ok(None) };
v.data().get(ctx, generator, &index_addr, None).into()
} else {
match &slice.node {
ExprKind::Tuple { elts, .. } => {
let slices = elts
.iter()
.enumerate()
.map(|(dim, elt)| expr_to_slice(generator, ctx, &elt.node, dim as u64))
.take_while_inclusive(|slice| slice.as_ref().is_ok_and(Option::is_some))
.collect::<Result<Vec<_>, _>>()?;
if slices.len() < elts.len() {
return Ok(None);
}
let slices = slices.into_iter().map(Option::unwrap).collect_vec();
numpy::ndarray_sliced_copy(generator, ctx, ty, v, &slices)?.as_base_value().into()
}
ExprKind::Slice { .. } => {
let Some(slice) = expr_to_slice(generator, ctx, &slice.node, 0)? else {
return Ok(None);
};
numpy::ndarray_sliced_copy(generator, ctx, ty, v, &[slice])?.as_base_value().into()
}
_ => {
// Accessing an element from a multi-dimensional `ndarray`
let Some(index_addr) = make_indices_arr(generator, ctx)? else { return Ok(None) };
// Create a new array, remove the top dimension from the dimension-size-list, and copy the
// elements over
let subscripted_ndarray =
generator.gen_var_alloc(ctx, llvm_ndarray_t.into(), None)?;
let ndarray = NDArrayValue::from_ptr_val(subscripted_ndarray, llvm_usize, None);
let num_dims = v.load_ndims(ctx);
ndarray.store_ndims(
ctx,
generator,
&llvm_usize.const_int(1, false),
None,
)
};
call_memcpy_generic(
ctx,
ndarray.dim_sizes().base_ptr(ctx, generator),
v_dims_src_ptr,
ctx.builder
.build_int_mul(ndarray_num_dims, llvm_usize.size_of(), "")
.map(Into::into)
.unwrap(),
llvm_i1.const_zero(),
);
ctx.builder
.build_int_sub(num_dims, llvm_usize.const_int(1, false), "")
.unwrap(),
);
let ndarray_num_elems = call_ndarray_calc_size(
generator,
ctx,
&ndarray.dim_sizes().as_slice_value(ctx, generator),
(None, None),
);
ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
let ndarray_num_dims = ndarray.load_ndims(ctx);
ndarray.create_dim_sizes(ctx, llvm_usize, ndarray_num_dims);
let v_data_src_ptr = v.data().ptr_offset(
ctx,
generator,
&ArraySliceValue::from_ptr_val(index_addr, llvm_usize.const_int(1, false), None),
None,
);
call_memcpy_generic(
ctx,
ndarray.data().base_ptr(ctx, generator),
v_data_src_ptr,
ctx.builder
.build_int_mul(ndarray_num_elems, llvm_ndarray_data_t.size_of().unwrap(), "")
.map(Into::into)
.unwrap(),
llvm_i1.const_zero(),
);
let ndarray_num_dims = ndarray.load_ndims(ctx);
let v_dims_src_ptr = unsafe {
v.dim_sizes().ptr_offset_unchecked(
ctx,
generator,
&llvm_usize.const_int(1, false),
None,
)
};
call_memcpy_generic(
ctx,
ndarray.dim_sizes().base_ptr(ctx, generator),
v_dims_src_ptr,
ctx.builder
.build_int_mul(ndarray_num_dims, llvm_usize.size_of(), "")
.map(Into::into)
.unwrap(),
llvm_i1.const_zero(),
);
ndarray.as_base_value().into()
let ndarray_num_elems = call_ndarray_calc_size(
generator,
ctx,
&ndarray.dim_sizes().as_slice_value(ctx, generator),
(None, None),
);
ndarray.create_data(ctx, llvm_ndarray_data_t, ndarray_num_elems);
let v_data_src_ptr = v.data().ptr_offset(ctx, generator, &index_addr, None);
call_memcpy_generic(
ctx,
ndarray.data().base_ptr(ctx, generator),
v_data_src_ptr,
ctx.builder
.build_int_mul(
ndarray_num_elems,
llvm_ndarray_data_t.size_of().unwrap(),
"",
)
.map(Into::into)
.unwrap(),
llvm_i1.const_zero(),
);
ndarray.as_base_value().into()
}
}
}))
}

View File

@ -1586,6 +1586,7 @@ impl<'a> Inferencer<'a> {
fn infer_subscript_ndarray(
&mut self,
value: &ast::Expr<Option<Type>>,
slice: &ast::Expr<Option<Type>>,
dummy_tvar: Type,
ndims: Type,
) -> InferenceResult {
@ -1604,48 +1605,66 @@ impl<'a> Inferencer<'a> {
let ndims = values
.iter()
.map(|ndim| match *ndim {
SymbolValue::U64(v) => Ok(v),
SymbolValue::U32(v) => Ok(u64::from(v)),
SymbolValue::I32(v) => u64::try_from(v).map_err(|_| {
HashSet::from([format!(
"Expected non-negative literal for ndarray.ndims, got {v}"
)])
}),
SymbolValue::I64(v) => u64::try_from(v).map_err(|_| {
HashSet::from([format!(
"Expected non-negative literal for ndarray.ndims, got {v}"
)])
}),
_ => unreachable!(),
})
.collect::<Result<Vec<_>, _>>()?;
.map(|ndim| u64::try_from(ndim.clone()).map_err(|()| ndim.clone()))
.collect::<Result<Vec<_>, _>>()
.map_err(|val| {
HashSet::from([format!(
"Expected non-negative literal for ndarray.ndims, got {}",
i128::try_from(val).unwrap()
)])
})?;
assert!(!ndims.is_empty());
if ndims.len() == 1 && ndims[0] == 1 {
// ndarray[T, Literal[1]] - Index always returns an object of type T
// The number of dimensions subscripted by the index expression.
// Slicing a ndarray will yield the same number of dimensions, whereas indexing into a
// dimension will remove a dimension.
let subscripted_dims = match &slice.node {
ExprKind::Tuple { elts, .. } => elts.iter().fold(0, |acc, value_subexpr| {
if let ExprKind::Slice { .. } = &value_subexpr.node {
acc
} else {
acc + 1
}
}),
ExprKind::Slice { .. } => 0,
_ => 1,
};
if ndims.len() == 1 && ndims[0] - subscripted_dims == 0 {
// ndarray[T, Literal[1]] - Non-Slice index always returns an object of type T
assert_ne!(ndims[0], 0);
Ok(dummy_tvar)
} else {
// ndarray[T, Literal[N]] where N != 1 - Index returns an object of type ndarray[T, Literal[N - 1]]
// Otherwise - Index returns an object of type ndarray[T, Literal[N - subscripted_dims]]
if ndims.iter().any(|v| *v == 0) {
// Disallow subscripting if any Literal value will subscript on an element
let new_ndims = ndims
.into_iter()
.map(|v| {
let v = i128::from(v) - i128::from(subscripted_dims);
u64::try_from(v)
})
.collect::<Result<Vec<_>, _>>()
.map_err(|_| {
HashSet::from([format!(
"Cannot subscript {} by {subscripted_dims} dimensions",
self.unifier.stringify(value.custom.unwrap()),
)])
})?;
if new_ndims.iter().any(|v| *v == 0) {
unimplemented!("Inference for ndarray subscript operator with Literal[0, ...] bound unimplemented")
}
let ndims_min_one_ty = self.unifier.get_fresh_literal(
ndims.into_iter().map(|v| SymbolValue::U64(v - 1)).collect(),
None,
);
let subscripted_ty = make_ndarray_ty(
self.unifier,
self.primitives,
Some(dummy_tvar),
Some(ndims_min_one_ty),
);
let ndims_ty = self
.unifier
.get_fresh_literal(new_ndims.into_iter().map(SymbolValue::U64).collect(), None);
let subscripted_ty =
make_ndarray_ty(self.unifier, self.primitives, Some(dummy_tvar), Some(ndims_ty));
Ok(subscripted_ty)
}
@ -1682,7 +1701,7 @@ impl<'a> Inferencer<'a> {
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::NDArray.id() => {
let (_, ndims) =
unpack_ndarray_var_tys(self.unifier, value.custom.unwrap());
self.infer_subscript_ndarray(value, ty, ndims)
self.infer_subscript_ndarray(value, slice, ty, ndims)
}
_ => {
// the index is a constant, so value can be a sequence.
@ -1725,10 +1744,7 @@ impl<'a> Inferencer<'a> {
}
let (_, ndims) = unpack_ndarray_var_tys(self.unifier, value.custom.unwrap());
let ndarray_ty =
make_ndarray_ty(self.unifier, self.primitives, Some(ty), Some(ndims));
self.constrain(value.custom.unwrap(), ndarray_ty, &value.location)?;
Ok(ndarray_ty)
self.infer_subscript_ndarray(value, slice, ty, ndims)
}
_ => {
if let TypeEnum::TTuple { .. } = &*self.unifier.get_ty(value.custom.unwrap()) {
@ -1763,7 +1779,7 @@ impl<'a> Inferencer<'a> {
.get_fresh_var_with_range(valid_index_tys.as_slice(), None, None)
.ty;
self.constrain(slice.custom.unwrap(), valid_index_ty, &slice.location)?;
self.infer_subscript_ndarray(value, ty, ndims)
self.infer_subscript_ndarray(value, slice, ty, ndims)
}
_ => unreachable!(),
}

View File

@ -150,6 +150,15 @@ def test_ndarray_slices():
x2 = x[0::2, 0::2]
output_ndarray_float_2(x2)
def test_ndarray_nd_idx():
x = np_identity(2)
x0: float = x[0, 0]
output_float64(x0)
output_float64(x[0, 1])
output_float64(x[1, 0])
output_float64(x[1, 1])
def test_ndarray_add():
x = np_identity(2)
y = x + np_ones([2, 2])
@ -1393,6 +1402,7 @@ def run() -> int32:
test_ndarray_neg_idx()
test_ndarray_slices()
test_ndarray_nd_idx()
test_ndarray_add()
test_ndarray_add_broadcast()