core: Use i8 for boolean variable allocation

In LLVM, i1 represents a 1-byte integer with a single valid bit; The
rest of the 7 upper bits are undefined. This causes problems when
using these variables in memory operations (e.g. memcpy/memmove as
needed by List slicing and assignment).

We fix this by treating all local boolean variables as i8 so that they
are well-defined for memory operations. Function ABIs will continue to
use i1, as memory operations cannot be directly performed on function
arguments or return types, instead they are always converted back into
local boolean variables (which are i8s anyways).

Fixes #315.
This commit is contained in:
David Mak 2023-09-20 13:34:50 +08:00
parent fc93fc2f0e
commit 31dcd2dde9
9 changed files with 255 additions and 45 deletions

View File

@ -450,7 +450,7 @@ fn rpc_codegen_callback_fn<'ctx, 'a>(
let alloc_bb = ctx.ctx.append_basic_block(current_function, "rpc.continue");
let tail_bb = ctx.ctx.append_basic_block(current_function, "rpc.tail");
let ret_ty = ctx.get_llvm_type(generator, fun.0.ret);
let ret_ty = ctx.get_llvm_abi_type(generator, fun.0.ret);
let need_load = !ret_ty.is_pointer_type();
let slot = ctx.builder.build_alloca(ret_ty, "rpc.ret.slot");
let slotgen = ctx.builder.build_bitcast(slot, ptr_type, "rpc.ret.ptr");

View File

@ -134,9 +134,7 @@ impl StaticValue for PythonValue {
PrimitiveValue::U32(val) => ctx.ctx.i32_type().const_int(*val as u64, false).into(),
PrimitiveValue::U64(val) => ctx.ctx.i64_type().const_int(*val as u64, false).into(),
PrimitiveValue::F64(val) => ctx.ctx.f64_type().const_float(*val).into(),
PrimitiveValue::Bool(val) => {
ctx.ctx.bool_type().const_int(*val as u64, false).into()
}
PrimitiveValue::Bool(val) => ctx.ctx.i8_type().const_int(*val as u64, false).into(),
});
}
if let Some(global) = ctx.module.get_global(&self.id.to_string()) {
@ -808,7 +806,7 @@ impl InnerResolver {
} else if ty_id == self.primitive_ids.bool {
let val: bool = obj.extract().unwrap();
self.id_to_primitive.write().insert(id, PrimitiveValue::Bool(val));
Ok(Some(ctx.ctx.bool_type().const_int(val as u64, false).into()))
Ok(Some(ctx.ctx.i8_type().const_int(val as u64, false).into()))
} else if ty_id == self.primitive_ids.float || ty_id == self.primitive_ids.float64 {
let val: f64 = obj.extract().unwrap();
self.id_to_primitive.write().insert(id, PrimitiveValue::F64(val));

View File

@ -4,6 +4,7 @@ use crate::{
codegen::{
concrete_type::{ConcreteFuncArg, ConcreteTypeEnum, ConcreteTypeStore},
get_llvm_type,
get_llvm_abi_type,
irrt::*,
stmt::gen_raise,
CodeGenContext, CodeGenTask,
@ -103,7 +104,7 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> {
SymbolValue::I64(v) => self.ctx.i64_type().const_int(*v as u64, true).into(),
SymbolValue::U32(v) => self.ctx.i32_type().const_int(*v as u64, false).into(),
SymbolValue::U64(v) => self.ctx.i64_type().const_int(*v as u64, false).into(),
SymbolValue::Bool(v) => self.ctx.bool_type().const_int(*v as u64, true).into(),
SymbolValue::Bool(v) => self.ctx.i8_type().const_int(*v as u64, true).into(),
SymbolValue::Double(v) => self.ctx.f64_type().const_float(*v).into(),
SymbolValue::Str(v) => {
let str_ptr =
@ -160,6 +161,7 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> {
}
}
/// See [get_llvm_type].
pub fn get_llvm_type(
&mut self,
generator: &mut dyn CodeGenerator,
@ -177,6 +179,24 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> {
)
}
/// See [get_llvm_abi_type].
pub fn get_llvm_abi_type(
&mut self,
generator: &mut dyn CodeGenerator,
ty: Type,
) -> BasicTypeEnum<'ctx> {
get_llvm_abi_type(
self.ctx,
&self.module,
generator,
&mut self.unifier,
self.top_level,
&mut self.type_cache,
&self.primitives,
ty,
)
}
pub fn gen_const(
&mut self,
generator: &mut dyn CodeGenerator,
@ -186,7 +206,7 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> {
match value {
Constant::Bool(v) => {
assert!(self.unifier.unioned(ty, self.primitives.bool));
let ty = self.ctx.bool_type();
let ty = self.ctx.i8_type();
ty.const_int(if *v { 1 } else { 0 }, false).into()
}
Constant::Int(val) => {
@ -713,18 +733,19 @@ pub fn gen_call<'ctx, 'a, G: CodeGenerator>(
let ret_type = if ctx.unifier.unioned(fun.0.ret, ctx.primitives.none) {
None
} else {
Some(ctx.get_llvm_type(generator, fun.0.ret))
Some(ctx.get_llvm_abi_type(generator, fun.0.ret))
};
let has_sret = ret_type.map_or(false, |ret_type| need_sret(ctx.ctx, ret_type));
let mut byvals = Vec::new();
let mut params =
args.iter().enumerate().map(|(i, arg)| match ctx.get_llvm_type(generator, arg.ty) {
let mut params = args.iter().enumerate()
.map(|(i, arg)| match ctx.get_llvm_abi_type(generator, arg.ty) {
BasicTypeEnum::StructType(ty) if is_extern => {
byvals.push((i, ty));
ty.ptr_type(AddressSpace::default()).into()
},
x => x
}.into()).collect_vec();
}.into())
.collect_vec();
if has_sret {
params.insert(0, ret_type.unwrap().ptr_type(AddressSpace::default()).into());
}
@ -746,6 +767,25 @@ pub fn gen_call<'ctx, 'a, G: CodeGenerator>(
}
fun_val
});
// Convert boolean parameter values into i1
let param_vals = (&fun_val.get_params()).iter().zip(param_vals)
.map(|(p, v)| {
if p.is_int_value() && v.is_int_value() {
let expected_ty = p.into_int_value().get_type();
let param_val = v.into_int_value();
if expected_ty.get_bit_width() == 1 && param_val.get_type().get_bit_width() != 1 {
generator.bool_to_i1(ctx, param_val)
} else {
param_val
}.into()
} else {
v
}
})
.collect_vec();
Ok(ctx.build_call_or_invoke(fun_val, &param_vals, "call"))
}
@ -951,12 +991,14 @@ pub fn gen_comprehension<'ctx, 'a, G: CodeGenerator>(
let val = ctx.build_gep_and_load(arr_ptr, &[tmp], Some("val"));
generator.gen_assign(ctx, target, val.into())?;
}
for cond in ifs.iter() {
let result = generator
.gen_expr(ctx, cond)?
.unwrap()
.to_basic_value_enum(ctx, generator, cond.custom.unwrap())?
.into_int_value();
let result = generator.bool_to_i1(ctx, result);
let succ = ctx.ctx.append_basic_block(current, "then");
ctx.builder.build_conditional_branch(result, succ, test_bb);
@ -1223,11 +1265,11 @@ pub fn gen_expr<'ctx, 'a, G: CodeGenerator>(
let a_bb = ctx.ctx.append_basic_block(current, "a");
let b_bb = ctx.ctx.append_basic_block(current, "b");
let cont_bb = ctx.ctx.append_basic_block(current, "cont");
ctx.builder.build_conditional_branch(left, a_bb, b_bb);
ctx.builder.build_conditional_branch(generator.bool_to_i1(ctx, left), a_bb, b_bb);
let (a, b) = match op {
Boolop::Or => {
ctx.builder.position_at_end(a_bb);
let a = ctx.ctx.bool_type().const_int(1, false);
let a = ctx.ctx.i8_type().const_int(1, false);
ctx.builder.build_unconditional_branch(cont_bb);
ctx.builder.position_at_end(b_bb);
let b = generator
@ -1235,6 +1277,7 @@ pub fn gen_expr<'ctx, 'a, G: CodeGenerator>(
.unwrap()
.to_basic_value_enum(ctx, generator, values[1].custom.unwrap())?
.into_int_value();
let b = generator.bool_to_i8(ctx, b);
ctx.builder.build_unconditional_branch(cont_bb);
(a, b)
}
@ -1245,15 +1288,16 @@ pub fn gen_expr<'ctx, 'a, G: CodeGenerator>(
.unwrap()
.to_basic_value_enum(ctx, generator, values[1].custom.unwrap())?
.into_int_value();
let a = generator.bool_to_i8(ctx, a);
ctx.builder.build_unconditional_branch(cont_bb);
ctx.builder.position_at_end(b_bb);
let b = ctx.ctx.bool_type().const_int(0, false);
let b = ctx.ctx.i8_type().const_zero();
ctx.builder.build_unconditional_branch(cont_bb);
(a, b)
}
};
ctx.builder.position_at_end(cont_bb);
let phi = ctx.builder.build_phi(ctx.ctx.bool_type(), "phi");
let phi = ctx.builder.build_phi(ctx.ctx.i8_type(), "");
phi.add_incoming(&[(&a, a_bb), (&b, b_bb)]);
phi.as_basic_value().into()
}

View File

@ -1,5 +1,5 @@
use crate::{
codegen::{expr::*, stmt::*, CodeGenContext},
codegen::{expr::*, stmt::*, bool_to_i1, bool_to_i8, CodeGenContext},
symbol_resolver::ValueEnum,
toplevel::{DefinitionId, TopLevelDef},
typecheck::typedef::{FunSignature, Type},
@ -7,7 +7,7 @@ use crate::{
use inkwell::{
context::Context,
types::{BasicTypeEnum, IntType},
values::{BasicValueEnum, PointerValue},
values::{BasicValueEnum, IntValue, PointerValue},
};
use nac3parser::ast::{Expr, Stmt, StrRef};
@ -180,6 +180,24 @@ pub trait CodeGenerator {
{
gen_stmt(self, ctx, stmt)
}
/// Converts the value of [a boolean-like value][bool_value] into an `i1`.
fn bool_to_i1<'ctx, 'a>(
&self,
ctx: &CodeGenContext<'ctx, 'a>,
bool_value: IntValue<'ctx>
) -> IntValue<'ctx> {
bool_to_i1(&ctx.builder, bool_value)
}
/// Converts the value of [a boolean-like value][bool_value] into an `i8`.
fn bool_to_i8<'ctx, 'a>(
&self,
ctx: &CodeGenContext<'ctx, 'a>,
bool_value: IntValue<'ctx>
) -> IntValue<'ctx> {
bool_to_i8(&ctx.builder, &ctx.ctx, bool_value)
}
}
pub struct DefaultCodeGenerator {

View File

@ -9,6 +9,7 @@ use crate::{
use crossbeam::channel::{unbounded, Receiver, Sender};
use inkwell::{
AddressSpace,
IntPredicate,
OptimizationLevel,
attributes::{Attribute, AttributeLoc},
basic_block::BasicBlock,
@ -18,7 +19,7 @@ use inkwell::{
passes::PassBuilderOptions,
targets::{CodeModel, RelocMode, Target, TargetMachine, TargetTriple},
types::{AnyType, BasicType, BasicTypeEnum},
values::{BasicValueEnum, FunctionValue, PhiValue, PointerValue},
values::{BasicValueEnum, FunctionValue, IntValue, PhiValue, PointerValue},
debug_info::{
DebugInfoBuilder, DICompileUnit, DISubprogram, AsDIScope, DIFlagsConstants, DIScope
},
@ -354,6 +355,10 @@ pub struct CodeGenTask {
pub id: usize,
}
/// Retrieves the [LLVM type][BasicTypeEnum] corresponding to the [Type].
///
/// This function is used to obtain the in-memory representation of [ty], e.g. a `bool` variable
/// would be represented by an `i8`.
fn get_llvm_type<'ctx>(
ctx: &'ctx Context,
module: &Module<'ctx>,
@ -465,6 +470,34 @@ fn get_llvm_type<'ctx>(
})
}
/// Retrieves the [LLVM type][BasicTypeEnum] corresponding to the [Type].
///
/// This function is used mainly to obtain the ABI representation of [ty], e.g. a `bool` is
/// would be represented by an `i1`.
///
/// The difference between the in-memory representation (as returned by [get_llvm_type]) and the
/// ABI representation is that the in-memory representation must be at least byte-sized and must
/// be byte-aligned for the variable to be addressable in memory, whereas there is no such
/// restriction for ABI representations.
fn get_llvm_abi_type<'ctx>(
ctx: &'ctx Context,
module: &Module<'ctx>,
generator: &mut dyn CodeGenerator,
unifier: &mut Unifier,
top_level: &TopLevelContext,
type_cache: &mut HashMap<Type, BasicTypeEnum<'ctx>>,
primitives: &PrimitiveStore,
ty: Type,
) -> BasicTypeEnum<'ctx> {
// If the type is used in the definition of a function, return `i1` instead of `i8` for ABI
// consistency.
return if unifier.unioned(ty, primitives.bool) {
ctx.bool_type().into()
} else {
get_llvm_type(ctx, module, generator, unifier, top_level, type_cache, primitives, ty)
}
}
fn need_sret<'ctx>(ctx: &'ctx Context, ty: BasicTypeEnum<'ctx>) -> bool {
fn need_sret_impl<'ctx>(ctx: &'ctx Context, ty: BasicTypeEnum<'ctx>, maybe_large: bool) -> bool {
match ty {
@ -534,7 +567,7 @@ pub fn gen_func_impl<'ctx, G: CodeGenerator, F: FnOnce(&mut G, &mut CodeGenConte
(primitives.uint32, context.i32_type().into()),
(primitives.uint64, context.i64_type().into()),
(primitives.float, context.f64_type().into()),
(primitives.bool, context.bool_type().into()),
(primitives.bool, context.i8_type().into()),
(primitives.str, {
let name = "str";
match module.get_struct_type(name) {
@ -592,14 +625,14 @@ pub fn gen_func_impl<'ctx, G: CodeGenerator, F: FnOnce(&mut G, &mut CodeGenConte
let ret_type = if unifier.unioned(ret, primitives.none) {
None
} else {
Some(get_llvm_type(context, &module, generator, &mut unifier, top_level_ctx.as_ref(), &mut type_cache, &primitives, ret))
Some(get_llvm_abi_type(context, &module, generator, &mut unifier, top_level_ctx.as_ref(), &mut type_cache, &primitives, ret))
};
let has_sret = ret_type.map_or(false, |ty| need_sret(context, ty));
let mut params = args
.iter()
.map(|arg| {
get_llvm_type(
get_llvm_abi_type(
context,
&module,
generator,
@ -647,19 +680,35 @@ pub fn gen_func_impl<'ctx, G: CodeGenerator, F: FnOnce(&mut G, &mut CodeGenConte
let offset = if has_sret { 1 } else { 0 };
for (n, arg) in args.iter().enumerate() {
let param = fn_val.get_nth_param((n as u32) + offset).unwrap();
let alloca = builder.build_alloca(
get_llvm_type(
context,
&module,
generator,
&mut unifier,
top_level_ctx.as_ref(),
&mut type_cache,
&primitives,
arg.ty,
),
&arg.name.to_string(),
let local_type = get_llvm_type(
context,
&module,
generator,
&mut unifier,
top_level_ctx.as_ref(),
&mut type_cache,
&primitives,
arg.ty,
);
let alloca = builder.build_alloca(
local_type,
&format!("{}.addr", &arg.name.to_string()),
);
// Remap boolean parameters into i8
let param = if local_type.is_int_type() && param.is_int_value() {
let expected_ty = local_type.into_int_type();
let param_val = param.into_int_value();
if expected_ty.get_bit_width() == 8 && param_val.get_type().get_bit_width() == 1 {
bool_to_i8(&builder, &context, param_val)
} else {
param_val
}.into()
} else {
param
};
builder.build_store(alloca, param);
var_assignment.insert(arg.name, (alloca, None, 0));
}
@ -802,3 +851,40 @@ pub fn gen_func<'ctx, G: CodeGenerator>(
Ok(())
})
}
/// Converts the value of [a boolean-like value][bool_value] into an `i1`.
fn bool_to_i1<'ctx>(builder: &Builder<'ctx>, bool_value: IntValue<'ctx>) -> IntValue<'ctx> {
if bool_value.get_type().get_bit_width() != 1 {
builder.build_int_compare(
IntPredicate::NE,
bool_value,
bool_value.get_type().const_zero(),
"tobool"
)
} else {
bool_value
}
}
/// Converts the value of [a boolean-like value][bool_value] into an `i8`.
fn bool_to_i8<'ctx>(
builder: &Builder<'ctx>,
ctx: &'ctx Context,
bool_value: IntValue<'ctx>
) -> IntValue<'ctx> {
let value_bits = bool_value.get_type().get_bit_width();
match value_bits {
8 => bool_value,
1 => builder.build_int_z_extend(bool_value, ctx.i8_type(), "frombool"),
_ => bool_to_i8(
builder,
ctx,
builder.build_int_compare(
IntPredicate::NE,
bool_value,
bool_value.get_type().const_zero(),
""
)
),
}
}

View File

@ -411,7 +411,7 @@ pub fn gen_while<'ctx, 'a, G: CodeGenerator>(
test.custom.unwrap(),
)?;
if let BasicValueEnum::IntValue(test) = test {
ctx.builder.build_conditional_branch(test, body_bb, orelse_bb);
ctx.builder.build_conditional_branch(generator.bool_to_i1(ctx, test), body_bb, orelse_bb);
} else {
unreachable!()
};
@ -470,13 +470,11 @@ pub fn gen_if<'ctx, 'a, G: CodeGenerator>(
};
ctx.builder.build_unconditional_branch(test_bb);
ctx.builder.position_at_end(test_bb);
let test = generator.gen_expr(ctx, test)?.unwrap().to_basic_value_enum(
ctx,
generator,
test.custom.unwrap(),
)?;
let test = generator.gen_expr(ctx, test)?
.unwrap()
.to_basic_value_enum(ctx, generator, test.custom.unwrap())?;
if let BasicValueEnum::IntValue(test) = test {
ctx.builder.build_conditional_branch(test, body_bb, orelse_bb);
ctx.builder.build_conditional_branch(generator.bool_to_i1(ctx, test), body_bb, orelse_bb);
} else {
unreachable!()
};
@ -1021,6 +1019,7 @@ pub fn gen_return<'ctx, 'a, G: CodeGenerator>(
ctx: &mut CodeGenContext<'ctx, 'a>,
value: &Option<Box<Expr<Option<Type>>>>,
) -> Result<(), String> {
let func = ctx.builder.get_insert_block().and_then(|bb| bb.get_parent()).unwrap();
let value = value
.as_ref()
.map(|v_expr| {
@ -1039,8 +1038,26 @@ pub fn gen_return<'ctx, 'a, G: CodeGenerator>(
ctx.builder.build_store(ctx.return_buffer.unwrap(), value.unwrap());
ctx.builder.build_return(None);
} else {
// Remap boolean return type into i1
let value = value.map(|v| {
let expected_ty = func.get_type().get_return_type().unwrap();
let ret_val = v.as_basic_value_enum();
if expected_ty.is_int_type() && ret_val.is_int_value() {
let ret_type = expected_ty.into_int_type();
let ret_val = ret_val.into_int_value();
if ret_type.get_bit_width() == 1 && ret_val.get_type().get_bit_width() != 1 {
generator.bool_to_i1(ctx, ret_val)
} else {
ret_val
}.into()
} else {
ret_val
}
});
let value = value.as_ref().map(|v| v as &dyn BasicValue);
ctx.builder.build_return(value);
ctx.builder.build_return(value.into());
}
Ok(())
}

View File

@ -1050,7 +1050,7 @@ pub fn get_builtins(primitives: &mut (PrimitiveStore, Unifier)) -> BuiltinInfo {
let uint32 = ctx.primitives.uint32;
let uint64 = ctx.primitives.uint64;
let float = ctx.primitives.float;
let llvm_i1 = ctx.ctx.bool_type().as_basic_type_enum();
let llvm_i8 = ctx.ctx.i8_type().as_basic_type_enum();
let llvm_i32 = ctx.ctx.i32_type().as_basic_type_enum();
let llvm_i64 = ctx.ctx.i64_type().as_basic_type_enum();
let llvm_f64 = ctx.ctx.f64_type().as_basic_type_enum();
@ -1060,7 +1060,7 @@ pub fn get_builtins(primitives: &mut (PrimitiveStore, Unifier)) -> BuiltinInfo {
let n_val = args[1].1.clone().to_basic_value_enum(ctx, generator, n_ty)?;
let mut is_type = |a: Type, b: Type| ctx.unifier.unioned(a, b);
let (fun_name, arg_ty) = if is_type(m_ty, n_ty) && is_type(n_ty, boolean) {
("llvm.umin.i1", llvm_i1)
("llvm.umin.i8", llvm_i8)
} else if is_type(m_ty, n_ty) && is_type(n_ty, int32) {
("llvm.smin.i32", llvm_i32)
} else if is_type(m_ty, n_ty) && is_type(n_ty, int64) {
@ -1112,7 +1112,7 @@ pub fn get_builtins(primitives: &mut (PrimitiveStore, Unifier)) -> BuiltinInfo {
let uint32 = ctx.primitives.uint32;
let uint64 = ctx.primitives.uint64;
let float = ctx.primitives.float;
let llvm_i1 = ctx.ctx.bool_type().as_basic_type_enum();
let llvm_i8 = ctx.ctx.i8_type().as_basic_type_enum();
let llvm_i32 = ctx.ctx.i32_type().as_basic_type_enum();
let llvm_i64 = ctx.ctx.i64_type().as_basic_type_enum();
let llvm_f64 = ctx.ctx.f64_type().as_basic_type_enum();
@ -1122,7 +1122,7 @@ pub fn get_builtins(primitives: &mut (PrimitiveStore, Unifier)) -> BuiltinInfo {
let n_val = args[1].1.clone().to_basic_value_enum(ctx, generator, n_ty)?;
let mut is_type = |a: Type, b: Type| ctx.unifier.unioned(a, b);
let (fun_name, arg_ty) = if is_type(m_ty, n_ty) && is_type(n_ty, boolean) {
("llvm.umax.i1", llvm_i1)
("llvm.umax.i8", llvm_i8)
} else if is_type(m_ty, n_ty) && is_type(n_ty, int32) {
("llvm.smax.i32", llvm_i32)
} else if is_type(m_ty, n_ty) && is_type(n_ty, int64) {

View File

@ -0,0 +1,30 @@
# Different cases for using boolean variables in boolean contexts.
# Tests whether all boolean variables (expressed as i8s) are lowered into i1s before used in branching instruction (`br`)
def bfunc(b: bool) -> bool:
return not b
def run() -> int32:
b1 = True
b2 = False
if b1:
pass
if not b2:
pass
while b2:
pass
l = [i for i in range(10) if b2]
b_and = True and False
b_or = True or False
b_and = b1 and b2
b_or = b1 or b2
bfunc(b1)
return 0

View File

@ -0,0 +1,17 @@
@extern
def output_int32(x: int32):
...
@extern
def output_int32_list(x: list[int32]):
...
def run() -> int32:
bl = [True, False]
bl1 = bl[:]
bl1[1:] = [True]
output_int32_list([int32(b) for b in bl1])
output_int32_list([int32(b) for b in bl1])
return 0