Implement core_log and rtio_log to ARTIQ #488

Merged
sb10q merged 8 commits from feature/core-rtio-log into master 2024-08-13 15:19:03 +08:00
13 changed files with 664 additions and 69 deletions

52
Cargo.lock generated
View File

@ -117,9 +117,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.1.7"
version = "1.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292"
[[package]]
name = "cfg-if"
@ -129,9 +129,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.13"
version = "4.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
checksum = "11d8838454fda655dafd3accb2b6e2bea645b9e4078abe84a22ceb947235c5cc"
dependencies = [
"clap_builder",
"clap_derive",
@ -139,9 +139,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.13"
version = "4.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6"
dependencies = [
"anstream",
"anstyle",
@ -158,7 +158,7 @@ dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.72",
"syn 2.0.74",
]
[[package]]
@ -421,7 +421,7 @@ checksum = "4fa4d8d74483041a882adaa9a29f633253a66dde85055f0495c121620ac484b2"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
"syn 2.0.74",
]
[[package]]
@ -749,7 +749,7 @@ dependencies = [
"phf_shared 0.11.2",
"proc-macro2",
"quote",
"syn 2.0.72",
"syn 2.0.74",
]
[[package]]
@ -853,7 +853,7 @@ dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn 2.0.72",
"syn 2.0.74",
]
[[package]]
@ -866,7 +866,7 @@ dependencies = [
"proc-macro2",
"pyo3-build-config",
"quote",
"syn 2.0.72",
"syn 2.0.74",
]
[[package]]
@ -1032,29 +1032,29 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "serde"
version = "1.0.204"
version = "1.0.206"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
checksum = "5b3e4cd94123dd520a128bcd11e34d9e9e423e7e3e50425cb1b4b1e3549d0284"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.204"
version = "1.0.206"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
checksum = "fabfb6138d2383ea8208cf98ccf69cdfb1aff4088460681d84189aa259762f97"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
"syn 2.0.74",
]
[[package]]
name = "serde_json"
version = "1.0.122"
version = "1.0.124"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da"
checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d"
dependencies = [
"itoa",
"memchr",
@ -1138,7 +1138,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.72",
"syn 2.0.74",
]
[[package]]
@ -1154,9 +1154,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.72"
version = "2.0.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af"
checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7"
dependencies = [
"proc-macro2",
"quote",
@ -1171,15 +1171,15 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
[[package]]
name = "tempfile"
version = "3.11.0"
version = "3.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8fcd239983515c23a32fb82099f97d0b11b8c72f654ed659363a95c3dad7a53"
checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64"
dependencies = [
"cfg-if",
"fastrand",
"once_cell",
"rustix",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@ -1223,7 +1223,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
"syn 2.0.74",
]
[[package]]
@ -1501,5 +1501,5 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.72",
"syn 2.0.74",
]

View File

@ -1,8 +1,10 @@
use nac3core::{
codegen::{
expr::gen_call,
classes::{ListValue, NDArrayValue, RangeValue, UntypedArrayLikeAccessor},
expr::{destructure_range, gen_call},
irrt::call_ndarray_calc_size,
llvm_intrinsics::{call_int_smax, call_stackrestore, call_stacksave},
stmt::{gen_block, gen_with},
stmt::{gen_block, gen_for_callback_incrementing, gen_if_callback, gen_with},
CodeGenContext, CodeGenerator,
},
symbol_resolver::ValueEnum,
@ -13,7 +15,11 @@ use nac3core::{
use nac3parser::ast::{Expr, ExprKind, Located, Stmt, StmtKind, StrRef};
use inkwell::{
context::Context, module::Linkage, types::IntType, values::BasicValueEnum, AddressSpace,
context::Context,
module::Linkage,
types::IntType,
values::{BasicValueEnum, StructValue},
AddressSpace, IntPredicate,
};
use pyo3::{
@ -23,10 +29,12 @@ use pyo3::{
use crate::{symbol_resolver::InnerResolver, timeline::TimeFns};
use itertools::Itertools;
use std::{
collections::hash_map::DefaultHasher,
collections::HashMap,
collections::{hash_map::DefaultHasher, HashMap},
hash::{Hash, Hasher},
iter::once,
mem,
sync::Arc,
};
@ -724,3 +732,472 @@ pub fn rpc_codegen_callback() -> Arc<GenCall> {
rpc_codegen_callback_fn(ctx, obj, fun, args, generator)
})))
}
/// Returns the `fprintf` format constant for the given [`llvm_int_t`][`IntType`] on a platform with
/// [`llvm_usize`] as its native word size.
///
/// Note that, similar to format constants in `<inttypes.h>`, these constants need to be prepended
/// with `%`.
#[must_use]
fn get_fprintf_format_constant<'ctx>(
llvm_usize: IntType<'ctx>,
Outdated
Review

I suppose this should make it straightforward now to implement the full print_rpc() with several arguments?

I suppose this should make it straightforward now to implement the full ``print_rpc()`` with several arguments?

Not really. This only makes it straightforward to implement print_rpc/core_log/rtio_log with several arguments of the same type, as is required by the current implementation of varargs (Varargs are typechecked against its expected type, e.g. *int32 requires all arguments in the variadic position to be int32).

In order to implement these functions with variadic arguments of different types, we still need a type category which accepts any type (think TypeEnum::TAny), and that needs to be first implemented. The closest equivalent we have is *T (where T is a typevar), but this still requires all arguments to be of the same type T.

Not really. This only makes it straightforward to implement `print_rpc`/`core_log`/`rtio_log` with several arguments *of the same type*, as is required by the current implementation of varargs (Varargs are typechecked against its expected type, e.g. `*int32` requires all arguments in the variadic position to be `int32`). In order to implement these functions with variadic arguments of *different* types, we still need a type category which accepts any type (think `TypeEnum::TAny`), and that needs to be first implemented. The closest equivalent we have is `*T` (where `T` is a typevar), but this still requires all arguments to be of the same type `T`.
Outdated
Review

with several arguments of the same type, as is required by the current implementation of varargs

So the _log functions currently have the same limitation, correct?

> with several arguments of the same type, as is required by the current implementation of varargs So the _log functions currently have the same limitation, correct?

_log functions are currently implemented with a single non-vararg parameter. I think we can merge this first before expanding support to homogeneous varargs?

`_log` functions are currently implemented with a single non-vararg parameter. I think we can merge this first before expanding support to homogeneous varargs?
Outdated
Review

Yes, just trying to understand where we are.

Yes, just trying to understand where we are.
llvm_int_t: IntType<'ctx>,
is_unsigned: bool,
) -> String {
debug_assert!(matches!(llvm_usize.get_bit_width(), 8 | 16 | 32 | 64));
let conv_spec = if is_unsigned { 'u' } else { 'd' };
// https://en.cppreference.com/w/c/language/arithmetic_types
// Note that NAC3 does **not** support LP32 and LLP64 configurations
match llvm_int_t.get_bit_width() {
8 => format!("hh{conv_spec}"),
16 => format!("h{conv_spec}"),
32 => conv_spec.to_string(),
64 => format!("{}{conv_spec}", if llvm_usize.get_bit_width() == 64 { "l" } else { "ll" }),
_ => todo!(
"Not yet implemented for i{} on {}-bit platform",
llvm_int_t.get_bit_width(),
llvm_usize.get_bit_width()
),
}
}
/// Prints one or more `values` to `core_log` or `rtio_log`.
///
/// * `separator` - The separator between multiple values.
/// * `suffix` - String to terminate the printed string, if any.
/// * `as_repr` - Whether the `repr()` output of values instead of `str()`.
/// * `as_rtio` - Whether to print to `rtio_log` instead of `core_log`.
fn polymorphic_print<'ctx>(
ctx: &mut CodeGenContext<'ctx, '_>,
generator: &mut dyn CodeGenerator,
values: &[(Type, ValueEnum<'ctx>)],
separator: &str,
suffix: Option<&str>,
as_repr: bool,
as_rtio: bool,
) -> Result<(), String> {
let printf = |ctx: &mut CodeGenContext<'ctx, '_>,
generator: &mut dyn CodeGenerator,
fmt: String,
args: Vec<BasicValueEnum<'ctx>>| {
debug_assert!(!fmt.is_empty());
debug_assert_eq!(fmt.as_bytes().last().unwrap(), &0u8);
let fn_name = if as_rtio { "rtio_log" } else { "core_log" };
let print_fn = ctx.module.get_function(fn_name).unwrap_or_else(|| {
let llvm_pi8 = ctx.ctx.i8_type().ptr_type(AddressSpace::default());
let fn_t = if as_rtio {
let llvm_void = ctx.ctx.void_type();
llvm_void.fn_type(&[llvm_pi8.into()], true)
} else {
let llvm_i32 = ctx.ctx.i32_type();
llvm_i32.fn_type(&[llvm_pi8.into()], true)
};
ctx.module.add_function(fn_name, fn_t, None)
});
let fmt = ctx.gen_string(generator, fmt);
let fmt = unsafe { fmt.get_field_at_index_unchecked(0) }.into_pointer_value();
ctx.builder
.build_call(
print_fn,
&once(fmt.into()).chain(args).map(BasicValueEnum::into).collect_vec(),
"",
)
.unwrap();
};
let llvm_i32 = ctx.ctx.i32_type();
let llvm_i64 = ctx.ctx.i64_type();
let llvm_usize = generator.get_size_type(ctx.ctx);
let suffix = suffix.unwrap_or_default();
let mut fmt = String::new();
let mut args = Vec::new();
let flush = |ctx: &mut CodeGenContext<'ctx, '_>,
generator: &mut dyn CodeGenerator,
fmt: &mut String,
args: &mut Vec<BasicValueEnum<'ctx>>| {
if !fmt.is_empty() {
fmt.push('\0');
printf(ctx, generator, mem::take(fmt), mem::take(args));
}
};
for (ty, value) in values {
let ty = *ty;
let value = value.clone().to_basic_value_enum(ctx, generator, ty).unwrap();
if !fmt.is_empty() {
fmt.push_str(separator);
}
match &*ctx.unifier.get_ty_immutable(ty) {
TypeEnum::TTuple { ty: tys, is_vararg_ctx: false } => {
let pvalue = {
let pvalue = generator.gen_var_alloc(ctx, value.get_type(), None).unwrap();
ctx.builder.build_store(pvalue, value).unwrap();
pvalue
};
fmt.push('(');
flush(ctx, generator, &mut fmt, &mut args);
let tuple_vals = tys
.iter()
.enumerate()
.map(|(i, ty)| {
(*ty, {
let pfield =
ctx.builder.build_struct_gep(pvalue, i as u32, "").unwrap();
ValueEnum::from(ctx.builder.build_load(pfield, "").unwrap())
})
})
.collect_vec();
polymorphic_print(ctx, generator, &tuple_vals, ", ", None, true, as_rtio)?;
if tuple_vals.len() == 1 {
fmt.push_str(",)");
} else {
fmt.push(')');
}
}
TypeEnum::TFunc { .. } => todo!(),
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::None.id() => {
fmt.push_str("None");
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::Bool.id() => {
fmt.push_str("%.*s");
let true_str = ctx.gen_string(generator, "True");
let true_data =
unsafe { true_str.get_field_at_index_unchecked(0) }.into_pointer_value();
let true_len = unsafe { true_str.get_field_at_index_unchecked(1) }.into_int_value();
let false_str = ctx.gen_string(generator, "False");
let false_data =
unsafe { false_str.get_field_at_index_unchecked(0) }.into_pointer_value();
let false_len =
unsafe { false_str.get_field_at_index_unchecked(1) }.into_int_value();
let bool_val = generator.bool_to_i1(ctx, value.into_int_value());
args.extend([
ctx.builder.build_select(bool_val, true_len, false_len, "").unwrap(),
ctx.builder.build_select(bool_val, true_data, false_data, "").unwrap(),
]);
}
TypeEnum::TObj { obj_id, .. }
if *obj_id == PrimDef::Int32.id()
|| *obj_id == PrimDef::Int64.id()
|| *obj_id == PrimDef::UInt32.id()
|| *obj_id == PrimDef::UInt64.id() =>
{
let is_unsigned =
*obj_id == PrimDef::UInt32.id() || *obj_id == PrimDef::UInt64.id();
let llvm_int_t = value.get_type().into_int_type();
debug_assert!(matches!(llvm_usize.get_bit_width(), 32 | 64));
debug_assert!(matches!(llvm_int_t.get_bit_width(), 32 | 64));
let fmt_spec = format!(
"%{}",
get_fprintf_format_constant(llvm_usize, llvm_int_t, is_unsigned)
);
fmt.push_str(fmt_spec.as_str());
args.push(value);
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::Float.id() => {
fmt.push_str("%g");
args.push(value);
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::Str.id() => {
if as_repr {
fmt.push_str("\"%.*s\"");
} else {
fmt.push_str("%.*s");
}
let str = value.into_struct_value();
let str_data = unsafe { str.get_field_at_index_unchecked(0) }.into_pointer_value();
let str_len = unsafe { str.get_field_at_index_unchecked(1) }.into_int_value();
args.extend(&[str_len.into(), str_data.into()]);
}
TypeEnum::TObj { obj_id, params, .. } if *obj_id == PrimDef::List.id() => {
let elem_ty = *params.iter().next().unwrap().1;
fmt.push('[');
flush(ctx, generator, &mut fmt, &mut args);
let val = ListValue::from_ptr_val(value.into_pointer_value(), llvm_usize, None);
let len = val.load_size(ctx, None);
let last =
ctx.builder.build_int_sub(len, llvm_usize.const_int(1, false), "").unwrap();
gen_for_callback_incrementing(
generator,
ctx,
None,
llvm_usize.const_zero(),
(len, false),
|generator, ctx, _, i| {
let elem = unsafe { val.data().get_unchecked(ctx, generator, &i, None) };
polymorphic_print(
ctx,
generator,
&[(elem_ty, elem.into())],
"",
None,
true,
as_rtio,
)?;
gen_if_callback(
generator,
ctx,
|_, ctx| {
Ok(ctx
.builder
.build_int_compare(IntPredicate::ULT, i, last, "")
.unwrap())
},
|generator, ctx| {
printf(ctx, generator, ", \0".into(), Vec::default());
Ok(())
},
|_, _| Ok(()),
)?;
Ok(())
},
llvm_usize.const_int(1, false),
)?;
fmt.push(']');
flush(ctx, generator, &mut fmt, &mut args);
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::NDArray.id() => {
let (elem_ty, _) = unpack_ndarray_var_tys(&mut ctx.unifier, ty);
fmt.push_str("array([");
flush(ctx, generator, &mut fmt, &mut args);
let val = NDArrayValue::from_ptr_val(value.into_pointer_value(), llvm_usize, None);
let len = call_ndarray_calc_size(generator, ctx, &val.dim_sizes(), (None, None));
let last =
ctx.builder.build_int_sub(len, llvm_usize.const_int(1, false), "").unwrap();
gen_for_callback_incrementing(
generator,
ctx,
None,
llvm_usize.const_zero(),
(len, false),
|generator, ctx, _, i| {
let elem = unsafe { val.data().get_unchecked(ctx, generator, &i, None) };
polymorphic_print(
ctx,
generator,
&[(elem_ty, elem.into())],
"",
None,
true,
as_rtio,
)?;
gen_if_callback(
generator,
ctx,
|_, ctx| {
Ok(ctx
.builder
.build_int_compare(IntPredicate::ULT, i, last, "")
.unwrap())
},
|generator, ctx| {
printf(ctx, generator, ", \0".into(), Vec::default());
Ok(())
},
|_, _| Ok(()),
)?;
Ok(())
},
llvm_usize.const_int(1, false),
)?;
fmt.push_str(")]");
flush(ctx, generator, &mut fmt, &mut args);
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::Range.id() => {
fmt.push_str("range(");
flush(ctx, generator, &mut fmt, &mut args);
let val = RangeValue::from_ptr_val(value.into_pointer_value(), None);
let (start, stop, step) = destructure_range(ctx, val);
polymorphic_print(
ctx,
generator,
&[
(ctx.primitives.int32, start.into()),
(ctx.primitives.int32, stop.into()),
(ctx.primitives.int32, step.into()),
],
", ",
None,
false,
as_rtio,
)?;
fmt.push(')');
}
TypeEnum::TObj { obj_id, .. } if *obj_id == PrimDef::Exception.id() => {
let fmt_str = format!(
"%{}(%{}, %{1:}, %{1:})",
get_fprintf_format_constant(llvm_usize, llvm_i32, false),
get_fprintf_format_constant(llvm_usize, llvm_i64, false),
);
let exn = value.into_pointer_value();
let name = ctx
.build_in_bounds_gep_and_load(
exn,
&[llvm_i32.const_zero(), llvm_i32.const_zero()],
None,
)
.into_int_value();
let param0 = ctx
.build_in_bounds_gep_and_load(
exn,
&[llvm_i32.const_zero(), llvm_i32.const_int(6, false)],
None,
)
.into_int_value();
let param1 = ctx
.build_in_bounds_gep_and_load(
exn,
&[llvm_i32.const_zero(), llvm_i32.const_int(7, false)],
None,
)
.into_int_value();
let param2 = ctx
.build_in_bounds_gep_and_load(
exn,
&[llvm_i32.const_zero(), llvm_i32.const_int(8, false)],
None,
)
.into_int_value();
fmt.push_str(fmt_str.as_str());
args.extend_from_slice(&[name.into(), param0.into(), param1.into(), param2.into()]);
}
_ => unreachable!(
"Unsupported object type for polymorphic_print: {}",
ctx.unifier.stringify(ty)
),
}
}
fmt.push_str(suffix);
flush(ctx, generator, &mut fmt, &mut args);
Ok(())
}
/// Invokes the `core_log` intrinsic function.
pub fn call_core_log_impl<'ctx>(
ctx: &mut CodeGenContext<'ctx, '_>,
generator: &mut dyn CodeGenerator,
arg: (Type, BasicValueEnum<'ctx>),
) -> Result<(), String> {
let (arg_ty, arg_val) = arg;
polymorphic_print(ctx, generator, &[(arg_ty, arg_val.into())], " ", Some("\n"), false, false)?;
Ok(())
}
/// Invokes the `rtio_log` intrinsic function.
pub fn call_rtio_log_impl<'ctx>(
ctx: &mut CodeGenContext<'ctx, '_>,
generator: &mut dyn CodeGenerator,
channel: StructValue<'ctx>,
arg: (Type, BasicValueEnum<'ctx>),
) -> Result<(), String> {
let (arg_ty, arg_val) = arg;
polymorphic_print(
ctx,
generator,
&[(ctx.primitives.str, channel.into())],
" ",
Some("\x1E"),
false,
true,
)?;
polymorphic_print(ctx, generator, &[(arg_ty, arg_val.into())], " ", Some("\x1D"), false, true)?;
Ok(())
}
/// Generates a call to `core_log`.
pub fn gen_core_log<'ctx>(
ctx: &mut CodeGenContext<'ctx, '_>,
obj: &Option<(Type, ValueEnum<'ctx>)>,
fun: (&FunSignature, DefinitionId),
args: &[(Option<StrRef>, ValueEnum<'ctx>)],
generator: &mut dyn CodeGenerator,
) -> Result<(), String> {
assert!(obj.is_none());
assert_eq!(args.len(), 1);
let value_ty = fun.0.args[0].ty;
let value_arg = args[0].1.clone().to_basic_value_enum(ctx, generator, value_ty)?;
call_core_log_impl(ctx, generator, (value_ty, value_arg))
}
/// Generates a call to `rtio_log`.
pub fn gen_rtio_log<'ctx>(
ctx: &mut CodeGenContext<'ctx, '_>,
obj: &Option<(Type, ValueEnum<'ctx>)>,
fun: (&FunSignature, DefinitionId),
args: &[(Option<StrRef>, ValueEnum<'ctx>)],
generator: &mut dyn CodeGenerator,
) -> Result<(), String> {
assert!(obj.is_none());
assert_eq!(args.len(), 2);
let channel_ty = fun.0.args[0].ty;
assert!(ctx.unifier.unioned(channel_ty, ctx.primitives.str));
let channel_arg =
args[0].1.clone().to_basic_value_enum(ctx, generator, channel_ty)?.into_struct_value();
let value_ty = fun.0.args[1].ty;
let value_arg = args[1].1.clone().to_basic_value_enum(ctx, generator, value_ty)?;
call_rtio_log_impl(ctx, generator, channel_arg, (value_ty, value_arg))
}

View File

@ -35,7 +35,7 @@ use inkwell::{
use itertools::Itertools;
use nac3core::codegen::{gen_func_impl, CodeGenLLVMOptions, CodeGenTargetMachineOptions};
use nac3core::toplevel::builtins::get_exn_constructor;
use nac3core::typecheck::typedef::{TypeEnum, Unifier, VarMap};
use nac3core::typecheck::typedef::{into_var_map, TypeEnum, Unifier, VarMap};
use nac3parser::{
ast::{ExprKind, Stmt, StmtKind, StrRef},
parser::parse_program,
@ -51,7 +51,7 @@ use nac3core::{
codegen::{concrete_type::ConcreteTypeStore, CodeGenTask, WithCall, WorkerRegistry},
symbol_resolver::SymbolResolver,
toplevel::{
composer::{ComposerConfig, TopLevelComposer},
composer::{BuiltinFuncCreator, BuiltinFuncSpec, ComposerConfig, TopLevelComposer},
DefinitionId, GenCall, TopLevelDef,
},
typecheck::typedef::{FunSignature, FuncArg},
@ -60,13 +60,13 @@ use nac3core::{
use nac3ld::Linker;
use tempfile::{self, TempDir};
use crate::codegen::attributes_writeback;
use crate::{
codegen::{rpc_codegen_callback, ArtiqCodeGenerator},
codegen::{
attributes_writeback, gen_core_log, gen_rtio_log, rpc_codegen_callback, ArtiqCodeGenerator,
},
symbol_resolver::{DeferredEvaluationStore, InnerResolver, PythonHelper, Resolver},
};
use tempfile::{self, TempDir};
mod codegen;
mod symbol_resolver;
@ -127,7 +127,7 @@ struct Nac3 {
isa: Isa,
time_fns: &'static (dyn TimeFns + Sync),
primitive: PrimitiveStore,
builtins: Vec<(StrRef, FunSignature, Arc<GenCall>)>,
builtins: Vec<BuiltinFuncSpec>,
pyid_to_def: Arc<RwLock<HashMap<u64, DefinitionId>>>,
primitive_ids: PrimitivePythonId,
working_directory: TempDir,
@ -301,6 +301,64 @@ impl Nac3 {
None
}
/// Returns a [`Vec`] of builtins that needs to be initialized during method compilation time.
fn get_lateinit_builtins() -> Vec<Box<BuiltinFuncCreator>> {
vec![
Box::new(|primitives, unifier| {
let arg_ty = unifier.get_fresh_var(Some("T".into()), None);
(
"core_log".into(),
FunSignature {
args: vec![FuncArg {
name: "arg".into(),
ty: arg_ty.ty,
default_value: None,
is_vararg: false,
}],
ret: primitives.none,
vars: into_var_map([arg_ty]),
},
Arc::new(GenCall::new(Box::new(move |ctx, obj, fun, args, generator| {
gen_core_log(ctx, &obj, fun, &args, generator)?;
Ok(None)
}))),
)
}),
Box::new(|primitives, unifier| {
let arg_ty = unifier.get_fresh_var(Some("T".into()), None);
(
"rtio_log".into(),
FunSignature {
args: vec![
FuncArg {
name: "channel".into(),
ty: primitives.str,
default_value: None,
is_vararg: false,
},
FuncArg {
name: "arg".into(),
ty: arg_ty.ty,
default_value: None,
is_vararg: false,
},
],
ret: primitives.none,
vars: into_var_map([arg_ty]),
},
Arc::new(GenCall::new(Box::new(move |ctx, obj, fun, args, generator| {
gen_rtio_log(ctx, &obj, fun, &args, generator)?;
Ok(None)
}))),
)
}),
]
}
fn compile_method<T>(
&self,
obj: &PyAny,
@ -313,6 +371,7 @@ impl Nac3 {
let size_t = self.isa.get_size_type();
let (mut composer, mut builtins_def, mut builtins_ty) = TopLevelComposer::new(
self.builtins.clone(),
Self::get_lateinit_builtins(),
ComposerConfig { kernel_ann: Some("Kernel"), kernel_invariant_ann: "KernelInvariant" },
size_t,
);
@ -853,7 +912,7 @@ impl Nac3 {
Isa::RiscV32IMA => &timeline::NOW_PINNING_TIME_FNS,
Isa::CortexA9 | Isa::Host => &timeline::EXTERN_TIME_FNS,
};
let primitive: PrimitiveStore = TopLevelComposer::make_primitives(isa.get_size_type()).0;
let (primitive, _) = TopLevelComposer::make_primitives(isa.get_size_type());
let builtins = vec![
(
"now_mu".into(),

View File

@ -32,7 +32,7 @@ use crate::{
use inkwell::{
attributes::{Attribute, AttributeLoc},
types::{AnyType, BasicType, BasicTypeEnum},
values::{BasicValueEnum, CallSiteValue, FunctionValue, IntValue, PointerValue},
values::{BasicValueEnum, CallSiteValue, FunctionValue, IntValue, PointerValue, StructValue},
AddressSpace, IntPredicate, OptimizationLevel,
};
use itertools::{chain, izip, Either, Itertools};
@ -82,6 +82,20 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> {
self.builder.build_load(gep, name.unwrap_or_default()).unwrap()
}
/// Builds a sequence of `getelementptr inbounds` and `load` instructions which stores the value
/// of a struct field into an LLVM value.
///
/// Any out-of-bounds accesses to `ptr` will return in a `poison` value.
pub fn build_in_bounds_gep_and_load(
&mut self,
ptr: PointerValue<'ctx>,
index: &[IntValue<'ctx>],
name: Option<&str>,
) -> BasicValueEnum<'ctx> {
let gep = unsafe { self.builder.build_in_bounds_gep(ptr, index, "") }.unwrap();
self.builder.build_load(gep, name.unwrap_or_default()).unwrap()
}
fn get_subst_key(
&mut self,
obj: Option<Type>,
@ -308,7 +322,7 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> {
self.raise_exn(
generator,
"0:NotImplementedError",
msg,
msg.into(),
[None, None, None],
self.current_loc,
);
@ -568,12 +582,14 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> {
}
/// Helper function for generating a LLVM variable storing a [String].
pub fn gen_string<G, S>(&mut self, generator: &mut G, s: S) -> BasicValueEnum<'ctx>
pub fn gen_string<G, S>(&mut self, generator: &mut G, s: S) -> StructValue<'ctx>
where
G: CodeGenerator + ?Sized,
S: Into<String>,
{
self.gen_const(generator, &Constant::Str(s.into()), self.primitives.str).unwrap()
self.gen_const(generator, &Constant::Str(s.into()), self.primitives.str)
.map(BasicValueEnum::into_struct_value)
.unwrap()
}
pub fn raise_exn<G: CodeGenerator + ?Sized>(
@ -632,7 +648,7 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> {
loc: Location,
) {
let err_msg = self.gen_string(generator, err_msg);
self.make_assert_impl(generator, cond, err_name, err_msg, params, loc);
self.make_assert_impl(generator, cond, err_name, err_msg.into(), params, loc);
}
pub fn make_assert_impl<G: CodeGenerator + ?Sized>(
@ -3053,7 +3069,7 @@ pub fn gen_expr<'ctx, G: CodeGenerator>(
ctx.raise_exn(
generator,
"0:UnwrapNoneError",
err_msg,
err_msg.into(),
[None, None, None],
ctx.current_loc,
);

View File

@ -257,7 +257,7 @@ fn ndarray_zero_value<'ctx, G: CodeGenerator + ?Sized>(
} else if ctx.unifier.unioned(elem_ty, ctx.primitives.bool) {
ctx.ctx.bool_type().const_zero().into()
} else if ctx.unifier.unioned(elem_ty, ctx.primitives.str) {
ctx.gen_string(generator, "")
ctx.gen_string(generator, "").into()
} else {
unreachable!()
}
@ -285,7 +285,7 @@ fn ndarray_one_value<'ctx, G: CodeGenerator + ?Sized>(
} else if ctx.unifier.unioned(elem_ty, ctx.primitives.bool) {
ctx.ctx.bool_type().const_int(1, false).into()
} else if ctx.unifier.unioned(elem_ty, ctx.primitives.str) {
ctx.gen_string(generator, "1")
ctx.gen_string(generator, "1").into()
} else {
unreachable!()
}

View File

@ -1780,7 +1780,7 @@ pub fn gen_stmt<G: CodeGenerator>(
return Ok(());
}
}
None => ctx.gen_string(generator, ""),
None => ctx.gen_string(generator, "").into(),
};
ctx.make_assert_impl(
generator,

View File

@ -94,7 +94,7 @@ fn test_primitives() {
"};
let statements = parse_program(source, FileName::default()).unwrap();
let composer = TopLevelComposer::new(Vec::new(), ComposerConfig::default(), 32).0;
let composer = TopLevelComposer::new(Vec::new(), Vec::new(), ComposerConfig::default(), 32).0;
let mut unifier = composer.unifier.clone();
let primitives = composer.primitives_ty;
let top_level = Arc::new(composer.make_top_level_context());
@ -258,7 +258,7 @@ fn test_simple_call() {
"};
let statements_2 = parse_program(source_2, FileName::default()).unwrap();
let composer = TopLevelComposer::new(Vec::new(), ComposerConfig::default(), 32).0;
let composer = TopLevelComposer::new(Vec::new(), Vec::new(), ComposerConfig::default(), 32).0;
let mut unifier = composer.unifier.clone();
let primitives = composer.primitives_ty;
let top_level = Arc::new(composer.make_top_level_context());

View File

@ -44,12 +44,27 @@ pub struct TopLevelComposer {
pub size_t: u32,
}
/// The specification for a builtin function, consisting of the function name, the function
/// signature, and a [code generation callback][`GenCall`].
pub type BuiltinFuncSpec = (StrRef, FunSignature, Arc<GenCall>);
/// A function that creates a [`BuiltinFuncSpec`] using the provided [`PrimitiveStore`] and
/// [`Unifier`].
pub type BuiltinFuncCreator = dyn Fn(&PrimitiveStore, &mut Unifier) -> BuiltinFuncSpec;
impl TopLevelComposer {
/// return a composer and things to make a "primitive" symbol resolver, so that the symbol
/// resolver can later figure out primitive type definitions when passed a primitive type name
/// resolver can later figure out primitive tye definitions when passed a primitive type name
///
/// `lateinit_builtins` are specifically for the ARTIQ module. Since the [`Unifier`] instance
/// used to create builtin functions do not persist until method compilation, any types
/// created (e.g. [`TypeEnum::TVar`]) also do not persist. Those functions should be instead put
/// in `lateinit_builtins`, where they will be instantiated with the [`Unifier`] instance used
/// for method compilation.
#[must_use]
pub fn new(
builtins: Vec<(StrRef, FunSignature, Arc<GenCall>)>,
builtins: Vec<BuiltinFuncSpec>,
lateinit_builtins: Vec<Box<BuiltinFuncCreator>>,
core_config: ComposerConfig,
size_t: u32,
) -> (Self, HashMap<StrRef, DefinitionId>, HashMap<StrRef, Type>) {
@ -119,7 +134,13 @@ impl TopLevelComposer {
}
}
for (name, sig, codegen_callback) in builtins {
// Materialize lateinit_builtins, now that the unifier is ready
let lateinit_builtins = lateinit_builtins
.into_iter()
.map(|builtin| builtin(&primitives_ty, &mut unifier))
.collect_vec();
for (name, sig, codegen_callback) in builtins.into_iter().chain(lateinit_builtins) {
let fun_sig = unifier.add_ty(TypeEnum::TFunc(sig));
builtin_ty.insert(name, fun_sig);
builtin_id.insert(name, DefinitionId(definition_ast_list.len()));

View File

@ -117,7 +117,8 @@ impl SymbolResolver for Resolver {
"register"
)]
fn test_simple_register(source: Vec<&str>) {
let mut composer = TopLevelComposer::new(Vec::new(), ComposerConfig::default(), 64).0;
let mut composer =
TopLevelComposer::new(Vec::new(), Vec::new(), ComposerConfig::default(), 64).0;
for s in source {
let ast = parse_program(s, FileName::default()).unwrap();
@ -137,7 +138,8 @@ fn test_simple_register(source: Vec<&str>) {
"register"
)]
fn test_simple_register_without_constructor(source: &str) {
let mut composer = TopLevelComposer::new(Vec::new(), ComposerConfig::default(), 64).0;
let mut composer =
TopLevelComposer::new(Vec::new(), Vec::new(), ComposerConfig::default(), 64).0;
let ast = parse_program(source, FileName::default()).unwrap();
let ast = ast[0].clone();
composer.register_top_level(ast, None, "", true).unwrap();
@ -171,7 +173,8 @@ fn test_simple_register_without_constructor(source: &str) {
"function compose"
)]
fn test_simple_function_analyze(source: &[&str], tys: &[&str], names: &[&str]) {
let mut composer = TopLevelComposer::new(Vec::new(), ComposerConfig::default(), 64).0;
let mut composer =
TopLevelComposer::new(Vec::new(), Vec::new(), ComposerConfig::default(), 64).0;
let internal_resolver = Arc::new(ResolverInternal {
id_to_def: Mutex::default(),
@ -519,7 +522,8 @@ fn test_simple_function_analyze(source: &[&str], tys: &[&str], names: &[&str]) {
)]
fn test_analyze(source: &[&str], res: &[&str]) {
let print = false;
let mut composer = TopLevelComposer::new(Vec::new(), ComposerConfig::default(), 64).0;
let mut composer =
TopLevelComposer::new(Vec::new(), Vec::new(), ComposerConfig::default(), 64).0;
let internal_resolver = make_internal_resolver_with_tvar(
vec![
@ -696,7 +700,8 @@ fn test_analyze(source: &[&str], res: &[&str]) {
)]
fn test_inference(source: Vec<&str>, res: &[&str]) {
let print = true;
let mut composer = TopLevelComposer::new(Vec::new(), ComposerConfig::default(), 64).0;
let mut composer =
TopLevelComposer::new(Vec::new(), Vec::new(), ComposerConfig::default(), 64).0;
let internal_resolver = make_internal_resolver_with_tvar(
vec![

View File

@ -8,12 +8,15 @@ if [ -z "$1" ]; then
fi
declare -a nac3args
while [ $# -ge 2 ]; do
while [ $# -gt 1 ]; do
case "$1" in
--help)
echo "Usage: check_demo.sh [-i686] -- demo [NAC3ARGS...]"
echo "Usage: check_demo.sh [--debug] [-i686] -- [NAC3ARGS...] demo"
exit
;;
--debug)
debug=1
;;
-i686)
i686=1
;;
@ -22,18 +25,18 @@ while [ $# -ge 2 ]; do
break
;;
*)
break
echo "Unrecognized argument \"$1\""
exit 1
;;
esac
shift
done
demo="$1"
shift
while [ $# -gt 1 ]; do
nac3args+=("$1")
shift
done
demo="$1"
echo "### Checking $demo..."
@ -43,12 +46,20 @@ echo ">>>>>> Running $demo with the Python interpreter"
if [ -n "$i686" ]; then
echo "...... Trying NAC3's 32-bit code generator output"
./run_demo.sh -i686 --out run_32.log "${nac3args[@]}" "$demo"
if [ -n "$debug" ]; then
./run_demo.sh --debug -i686 --out run_32.log -- "${nac3args[@]}" "$demo"
else
./run_demo.sh -i686 --out run_32.log -- "${nac3args[@]}" "$demo"
fi
diff -Nau interpreted.log run_32.log
fi
echo "...... Trying NAC3's 64-bit code generator output"
./run_demo.sh --out run_64.log "${nac3args[@]}" "$demo"
if [ -n "$debug" ]; then
./run_demo.sh --debug --out run_64.log -- "${nac3args[@]}" "$demo"
else
./run_demo.sh --out run_64.log -- "${nac3args[@]}" "$demo"
fi
diff -Nau interpreted.log run_64.log
echo "...... OK"

View File

@ -2,6 +2,11 @@
set -e
if [ "$1" == "--help" ]; then
echo "Usage: check_demos.sh [CHECKARGS...] [--] [NAC3ARGS...]"
exit
fi
count=0
for demo in src/*.py; do
./check_demo.sh "$@" "$demo"

View File

@ -14,7 +14,7 @@ declare -a nac3args
while [ $# -ge 1 ]; do
case "$1" in
--help)
echo "Usage: run_demo.sh [--help] [--out OUTFILE] [--debug] [-i686] -- [NAC3ARGS...]"
echo "Usage: run_demo.sh [--help] [--out OUTFILE] [--debug] [-i686] -- [NAC3ARGS...] demo"
exit
;;
--out)
@ -32,7 +32,8 @@ while [ $# -ge 1 ]; do
break
;;
*)
break
echo "Unrecognized argument \"$1\""
exit 1
;;
esac
shift
@ -59,7 +60,7 @@ if [ -z "$i686" ]; then
clang -c -std=gnu11 -Wall -Wextra -O3 -o demo.o demo.c
clang -o demo module.o demo.o $DEMO_LINALG_STUB -lm -Wl,--no-warn-search-mismatch
else
$nac3standalone --triple i686-unknown-linux-gnu "${nac3args[@]}"
$nac3standalone --triple i686-unknown-linux-gnu --target-features +sse2 "${nac3args[@]}"
Outdated
Review

@abdul124 I thought you said this sse2 option was no longer required when using i686 instead of i386?

@abdul124 I thought you said this sse2 option was no longer required when using i686 instead of i386?

@abdul124 I thought you said this sse2 option was no longer required when using i686 instead of i386?

sse2 should be enforced by default on rustc i686 target https://github.com/rust-lang/rust/issues/82435#issuecomment-783939789

> @abdul124 I thought you said this sse2 option was no longer required when using i686 instead of i386? sse2 should be enforced by default on rustc i686 target https://github.com/rust-lang/rust/issues/82435#issuecomment-783939789
Outdated
Review

Okay, but that comment is about Rust, not LLVM. nac3 is only using the latter here.

Okay, but that comment is about Rust, not LLVM. nac3 is only using the latter here.

Okay, but that comment is about Rust, not LLVM. nac3 is only using the latter here.

LLVM does interpret "i686" as implying sse2 for most part (Debian LLVM requires patch though). This commit 56d3ad9d23 describes the LLVM situation for sse2 support on LLVM i686 target.

> Okay, but that comment is about Rust, not LLVM. nac3 is only using the latter here. LLVM does interpret "i686" as implying sse2 for most part (Debian LLVM requires patch though). This commit https://github.com/google/boringssl/commit/56d3ad9d23bc130aa9404bfdd1957fe81b3ba498 describes the LLVM situation for sse2 support on LLVM i686 target.
Outdated
Review

So the present change should be reverted i.e. there is no need to add --target-features +sse2 ?

So the present change should be reverted i.e. there is no need to add ``--target-features +sse2`` ?

So the present change should be reverted i.e. there is no need to add --target-features +sse2 ?

Yes, since the feature is already implied by i686 target, --target-feature +sse2 is redundant.

> So the present change should be reverted i.e. there is no need to add ``--target-features +sse2`` ? Yes, since the feature is already implied by i686 target, ``--target-feature +sse2`` is redundant.

I don't think any of that is true. The reason why BoringSSL requires SSE2 on i686 targets is because (from the commit message)

As far as I know, all our supported 32-bit x86 consumers require SSE2.

So it should be seen as OpenSSL's own project requirements, rather than a behavior of the compiler.

In fact, the quoted issue in LLVM's repo says that while SSE2 support is implied by i686, it is ultimately treated as an i386 target.

-m32 on Clang is i386 too.

When compiling without any flags, all below f-prefixed instructions are from the X87 ISA (List of X87 Instructions).

$ ../../target/debug/nac3standalone --triple i686-unknown-linux-gnu src/mandelbrot.py
$ llvm-objdump -Cd -M intel --no-show-raw-insn module.o
[...]
      14:       fldz
      16:       fld     dword ptr [ebx]
      1c:       fstp    qword ptr [esp + 32]
      20:       fst     qword ptr [esp + 16]
[...]
      30:       fstp    st(1)
      32:       mov     dword ptr [esp], 4294967295
      39:       fstp    qword ptr [esp + 8]
      3d:       call    0x3e <run+0x3e>
      42:       fld     qword ptr [esp + 24]
      46:       fld     qword ptr [esp + 8]
      4a:       faddp   st(1), st
[...]
      56:       fst     qword ptr [esp + 24]
      5a:       fmul    qword ptr [ebx]
      60:       fdiv    dword ptr [ebx]
      66:       fadd    qword ptr [ebx]
      6c:       fstp    qword ptr [esp + 8]
      70:       fld     qword ptr [esp + 16]
      74:       xor     edi, edi
      76:       fldz
[...]

With --target-features +sse2, all the above X87 instructions are replaced with their SSE2 counterparts (note the use of xmm registers):

$ ../../target/debug/nac3standalone --triple i686-unknown-linux-gnu --target-features +sse2 src/mandelbrot.py
$ llvm-objdump -Cd -M intel --no-show-raw-insn module.o
[...]
      14:       xorpd   xmm0, xmm0
      18:       movsd   xmm1, qword ptr [ebx]   # xmm1 = mem[0],zero
      20:       movsd   qword ptr [esp + 32], xmm1
      26:       movsd   xmm1, qword ptr [ebx]   # xmm1 = mem[0],zero
      2e:       movsd   qword ptr [esp + 24], xmm1
      34:       movsd   xmm1, qword ptr [ebx]   # xmm1 = mem[0],zero
      3c:       movsd   qword ptr [esp + 16], xmm1
      42:       movsd   xmm1, qword ptr [ebx]   # xmm1 = mem[0],zero
      4a:       movsd   qword ptr [esp + 72], xmm1
      50:       movsd   xmm1, qword ptr [ebx]   # xmm1 = mem[0],zero
      58:       movsd   qword ptr [esp + 64], xmm1
      5e:       movsd   xmm1, qword ptr [ebx]   # xmm1 = mem[0],zero
      66:       movsd   qword ptr [esp + 56], xmm1
      6c:       movsd   xmm1, qword ptr [ebx]   # xmm1 = mem[0],zero
      74:       movsd   qword ptr [esp + 48], xmm1
      7a:       movsd   xmm1, qword ptr [ebx]   # xmm1 = mem[0],zero
      82:       movsd   qword ptr [esp + 8], xmm1
[...]
      9c:       movsd   xmm0, qword ptr [esp + 40] # xmm0 = mem[0],zero
      a2:       addsd   xmm0, qword ptr [esp + 8]
[...]
      b2:       movsd   qword ptr [esp + 40], xmm0
      b8:       movapd  xmm7, xmm0
      bc:       mulsd   xmm7, qword ptr [esp + 32]
      c2:       divsd   xmm7, qword ptr [esp + 24]
      c8:       addsd   xmm7, qword ptr [esp + 16]
      ce:       xorpd   xmm0, xmm0
      d2:       xor     edi, edi
      d4:       movsd   qword ptr [esp + 80], xmm7
[...]

This also explains the test failures when removing --target-features +sse2 with src/mandelbrot.py.

I don't think any of that is true. The reason why BoringSSL requires SSE2 on i686 targets is because (from the commit message) > As far as I know, all our supported 32-bit x86 consumers require SSE2. So it should be seen as OpenSSL's own project requirements, rather than a behavior of the compiler. In fact, the [quoted issue in LLVM's repo](https://github.com/llvm/llvm-project/issues/61347) says that while SSE2 support is implied by `i686`, it is ultimately treated as an `i386` target. > -m32 on Clang is i386 too. When compiling without any flags, all below `f`-prefixed instructions are from the X87 ISA ([List of X87 Instructions](https://www2.math.uni-wuppertal.de/~fpf/Uebungen/GdR-SS02/opcode_f.html)). ``` $ ../../target/debug/nac3standalone --triple i686-unknown-linux-gnu src/mandelbrot.py $ llvm-objdump -Cd -M intel --no-show-raw-insn module.o [...] 14: fldz 16: fld dword ptr [ebx] 1c: fstp qword ptr [esp + 32] 20: fst qword ptr [esp + 16] [...] 30: fstp st(1) 32: mov dword ptr [esp], 4294967295 39: fstp qword ptr [esp + 8] 3d: call 0x3e <run+0x3e> 42: fld qword ptr [esp + 24] 46: fld qword ptr [esp + 8] 4a: faddp st(1), st [...] 56: fst qword ptr [esp + 24] 5a: fmul qword ptr [ebx] 60: fdiv dword ptr [ebx] 66: fadd qword ptr [ebx] 6c: fstp qword ptr [esp + 8] 70: fld qword ptr [esp + 16] 74: xor edi, edi 76: fldz [...] ``` With `--target-features +sse2`, all the above X87 instructions are replaced with their SSE2 counterparts (note the use of `xmm` registers): ``` $ ../../target/debug/nac3standalone --triple i686-unknown-linux-gnu --target-features +sse2 src/mandelbrot.py $ llvm-objdump -Cd -M intel --no-show-raw-insn module.o [...] 14: xorpd xmm0, xmm0 18: movsd xmm1, qword ptr [ebx] # xmm1 = mem[0],zero 20: movsd qword ptr [esp + 32], xmm1 26: movsd xmm1, qword ptr [ebx] # xmm1 = mem[0],zero 2e: movsd qword ptr [esp + 24], xmm1 34: movsd xmm1, qword ptr [ebx] # xmm1 = mem[0],zero 3c: movsd qword ptr [esp + 16], xmm1 42: movsd xmm1, qword ptr [ebx] # xmm1 = mem[0],zero 4a: movsd qword ptr [esp + 72], xmm1 50: movsd xmm1, qword ptr [ebx] # xmm1 = mem[0],zero 58: movsd qword ptr [esp + 64], xmm1 5e: movsd xmm1, qword ptr [ebx] # xmm1 = mem[0],zero 66: movsd qword ptr [esp + 56], xmm1 6c: movsd xmm1, qword ptr [ebx] # xmm1 = mem[0],zero 74: movsd qword ptr [esp + 48], xmm1 7a: movsd xmm1, qword ptr [ebx] # xmm1 = mem[0],zero 82: movsd qword ptr [esp + 8], xmm1 [...] 9c: movsd xmm0, qword ptr [esp + 40] # xmm0 = mem[0],zero a2: addsd xmm0, qword ptr [esp + 8] [...] b2: movsd qword ptr [esp + 40], xmm0 b8: movapd xmm7, xmm0 bc: mulsd xmm7, qword ptr [esp + 32] c2: divsd xmm7, qword ptr [esp + 24] c8: addsd xmm7, qword ptr [esp + 16] ce: xorpd xmm0, xmm0 d2: xor edi, edi d4: movsd qword ptr [esp + 80], xmm7 [...] ``` This also explains the test failures when removing `--target-features +sse2` with `src/mandelbrot.py`.
clang -m32 -c -std=gnu11 -Wall -Wextra -O3 -msse2 -o demo.o demo.c
clang -m32 -o demo module.o demo.o $DEMO_LINALG_STUB32 -lm -Wl,--no-warn-search-mismatch
fi

View File

@ -301,7 +301,7 @@ fn main() {
let primitive: PrimitiveStore = TopLevelComposer::make_primitives(size_t).0;
let (mut composer, builtins_def, builtins_ty) =
TopLevelComposer::new(vec![], ComposerConfig::default(), size_t);
TopLevelComposer::new(vec![], vec![], ComposerConfig::default(), size_t);
let internal_resolver: Arc<ResolverInternal> = ResolverInternal {
id_to_type: builtins_ty.into(),