From 35e9c5b38e2bfc240f990b789bca632ed795e1d6 Mon Sep 17 00:00:00 2001 From: David Mak Date: Mon, 3 Feb 2025 15:43:48 +0800 Subject: [PATCH] [core] codegen: Add String{Type,Value} --- nac3core/src/codegen/expr.rs | 59 ++------- nac3core/src/codegen/irrt/string.rs | 26 ++-- nac3core/src/codegen/mod.rs | 16 +-- nac3core/src/codegen/types/mod.rs | 2 + nac3core/src/codegen/types/string.rs | 177 ++++++++++++++++++++++++++ nac3core/src/codegen/values/mod.rs | 2 + nac3core/src/codegen/values/string.rs | 87 +++++++++++++ 7 files changed, 290 insertions(+), 79 deletions(-) create mode 100644 nac3core/src/codegen/types/string.rs create mode 100644 nac3core/src/codegen/values/string.rs diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index bab3b75..c398ed9 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -32,7 +32,7 @@ use super::{ gen_for_callback_incrementing, gen_if_callback, gen_if_else_expr_callback, gen_raise, gen_var, }, - types::{ndarray::NDArrayType, ListType, RangeType, TupleType}, + types::{ndarray::NDArrayType, ListType, RangeType, StringType, TupleType}, values::{ ndarray::{NDArrayOut, RustNDIndex, ScalarOrNDArray}, ArrayLikeIndexer, ArrayLikeValue, ListValue, ProxyValue, RangeValue, @@ -168,14 +168,7 @@ impl<'ctx> CodeGenContext<'ctx, '_> { SymbolValue::Bool(v) => self.ctx.i8_type().const_int(u64::from(*v), true).into(), SymbolValue::Double(v) => self.ctx.f64_type().const_float(*v).into(), SymbolValue::Str(v) => { - let str_ptr = self - .builder - .build_global_string_ptr(v, "const") - .map(|v| v.as_pointer_value().into()) - .unwrap(); - let size = self.get_size_type().const_int(v.len() as u64, false); - let ty = self.get_llvm_type(generator, self.primitives.str).into_struct_type(); - ty.const_named_struct(&[str_ptr, size.into()]).into() + StringType::new(self).construct_constant(self, v, None).as_abi_value(self).into() } SymbolValue::Tuple(ls) => { let vals = ls.iter().map(|v| self.gen_symbol_val(generator, v, ty)).collect_vec(); @@ -308,15 +301,10 @@ impl<'ctx> CodeGenContext<'ctx, '_> { if let Some(v) = self.const_strings.get(v) { Some(*v) } else { - let str_ptr = self - .builder - .build_global_string_ptr(v, "const") - .map(|v| v.as_pointer_value().into()) - .unwrap(); - let size = self.get_size_type().const_int(v.len() as u64, false); - let ty = self.get_llvm_type(generator, self.primitives.str); - let val = - ty.into_struct_type().const_named_struct(&[str_ptr, size.into()]).into(); + let val = StringType::new(self) + .construct_constant(self, v, None) + .as_abi_value(self) + .into(); self.const_strings.insert(v.to_string(), val); Some(val) } @@ -1950,39 +1938,12 @@ pub fn gen_cmpop_expr_with_values<'ctx, G: CodeGenerator>( } else if left_ty == ctx.primitives.str { assert!(ctx.unifier.unioned(left_ty, right_ty)); - let lhs = lhs.into_struct_value(); - let rhs = rhs.into_struct_value(); + let llvm_str = StringType::new(ctx); - let llvm_i32 = ctx.ctx.i32_type(); - let llvm_usize = ctx.get_size_type(); + let lhs = llvm_str.map_struct_value(lhs.into_struct_value(), None); + let rhs = llvm_str.map_struct_value(rhs.into_struct_value(), None); - let plhs = generator.gen_var_alloc(ctx, lhs.get_type().into(), None).unwrap(); - ctx.builder.build_store(plhs, lhs).unwrap(); - let prhs = generator.gen_var_alloc(ctx, lhs.get_type().into(), None).unwrap(); - ctx.builder.build_store(prhs, rhs).unwrap(); - - let lhs_ptr = ctx.build_in_bounds_gep_and_load( - plhs, - &[llvm_usize.const_zero(), llvm_i32.const_zero()], - None, - ).into_pointer_value(); - let lhs_len = ctx.build_in_bounds_gep_and_load( - plhs, - &[llvm_usize.const_zero(), llvm_i32.const_int(1, false)], - None, - ).into_int_value(); - - let rhs_ptr = ctx.build_in_bounds_gep_and_load( - prhs, - &[llvm_usize.const_zero(), llvm_i32.const_zero()], - None, - ).into_pointer_value(); - let rhs_len = ctx.build_in_bounds_gep_and_load( - prhs, - &[llvm_usize.const_zero(), llvm_i32.const_int(1, false)], - None, - ).into_int_value(); - let result = call_string_eq(ctx, lhs_ptr, lhs_len, rhs_ptr, rhs_len); + let result = call_string_eq(ctx, lhs, rhs); if *op == Cmpop::NotEq { gen_unaryop_expr_with_values( generator, diff --git a/nac3core/src/codegen/irrt/string.rs b/nac3core/src/codegen/irrt/string.rs index e015570..c7e4eeb 100644 --- a/nac3core/src/codegen/irrt/string.rs +++ b/nac3core/src/codegen/irrt/string.rs @@ -1,26 +1,15 @@ -use inkwell::{ - values::{BasicValueEnum, IntValue, PointerValue}, - AddressSpace, -}; +use inkwell::values::{BasicValueEnum, IntValue}; use super::get_usize_dependent_function_name; -use crate::codegen::{expr::infer_and_call_function, CodeGenContext}; +use crate::codegen::{expr::infer_and_call_function, values::StringValue, CodeGenContext}; /// Generates a call to string equality comparison. Returns an `i1` representing whether the strings are equal. pub fn call_string_eq<'ctx>( ctx: &CodeGenContext<'ctx, '_>, - str1_ptr: PointerValue<'ctx>, - str1_len: IntValue<'ctx>, - str2_ptr: PointerValue<'ctx>, - str2_len: IntValue<'ctx>, + str1: StringValue<'ctx>, + str2: StringValue<'ctx>, ) -> IntValue<'ctx> { let llvm_i1 = ctx.ctx.bool_type(); - let llvm_pi8 = ctx.ctx.i8_type().ptr_type(AddressSpace::default()); - let llvm_usize = ctx.get_size_type(); - assert_eq!(str1_ptr.get_type(), llvm_pi8); - assert_eq!(str1_len.get_type(), llvm_usize); - assert_eq!(str2_ptr.get_type(), llvm_pi8); - assert_eq!(str2_len.get_type(), llvm_usize); let func_name = get_usize_dependent_function_name(ctx, "nac3_str_eq"); @@ -28,7 +17,12 @@ pub fn call_string_eq<'ctx>( ctx, &func_name, Some(llvm_i1.into()), - &[str1_ptr.into(), str1_len.into(), str2_ptr.into(), str2_len.into()], + &[ + str1.extract_ptr(ctx).into(), + str1.extract_len(ctx).into(), + str2.extract_ptr(ctx).into(), + str2.extract_len(ctx).into(), + ], Some("str_eq_call"), None, ) diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index f1b9cfb..5b6fa21 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -43,7 +43,7 @@ use crate::{ }; use concrete_type::{ConcreteType, ConcreteTypeEnum, ConcreteTypeStore}; pub use generator::{CodeGenerator, DefaultCodeGenerator}; -use types::{ndarray::NDArrayType, ListType, ProxyType, RangeType, TupleType}; +use types::{ndarray::NDArrayType, ListType, ProxyType, RangeType, StringType, TupleType}; pub mod builtin_fns; pub mod concrete_type; @@ -786,19 +786,7 @@ pub fn gen_func_impl< (primitives.float, context.f64_type().into()), (primitives.bool, context.i8_type().into()), (primitives.str, { - let name = "str"; - match module.get_struct_type(name) { - None => { - let str_type = context.opaque_struct_type("str"); - let fields = [ - context.i8_type().ptr_type(AddressSpace::default()).into(), - generator.get_size_type(context).into(), - ]; - str_type.set_body(&fields, false); - str_type.into() - } - Some(t) => t.as_basic_type_enum(), - } + StringType::new_with_generator(generator, context).as_abi_type().into() }), (primitives.range, RangeType::new_with_generator(generator, context).as_abi_type().into()), (primitives.exception, { diff --git a/nac3core/src/codegen/types/mod.rs b/nac3core/src/codegen/types/mod.rs index abeab5b..bceb804 100644 --- a/nac3core/src/codegen/types/mod.rs +++ b/nac3core/src/codegen/types/mod.rs @@ -27,11 +27,13 @@ use super::{ }; pub use list::*; pub use range::*; +pub use string::*; pub use tuple::*; mod list; pub mod ndarray; mod range; +mod string; pub mod structure; mod tuple; pub mod utils; diff --git a/nac3core/src/codegen/types/string.rs b/nac3core/src/codegen/types/string.rs new file mode 100644 index 0000000..eae275d --- /dev/null +++ b/nac3core/src/codegen/types/string.rs @@ -0,0 +1,177 @@ +use inkwell::{ + context::Context, + types::{BasicType, BasicTypeEnum, IntType, PointerType, StructType}, + values::{GlobalValue, IntValue, PointerValue, StructValue}, + AddressSpace, +}; +use itertools::Itertools; + +use nac3core_derive::StructFields; + +use super::{ + structure::{check_struct_type_matches_fields, StructField, StructFields}, + ProxyType, +}; +use crate::codegen::{values::StringValue, CodeGenContext, CodeGenerator}; + +/// Proxy type for a `str` type in LLVM. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct StringType<'ctx> { + ty: StructType<'ctx>, + llvm_usize: IntType<'ctx>, +} + +#[derive(PartialEq, Eq, Clone, Copy, StructFields)] +pub struct StringStructFields<'ctx> { + /// Pointer to the first character of the string. + #[value_type(i8_type().ptr_type(AddressSpace::default()))] + pub ptr: StructField<'ctx, PointerValue<'ctx>>, + + /// Length of the string. + #[value_type(usize)] + pub len: StructField<'ctx, IntValue<'ctx>>, +} + +impl<'ctx> StringType<'ctx> { + /// Returns an instance of [`StructFields`] containing all field accessors for this type. + #[must_use] + fn fields(llvm_usize: IntType<'ctx>) -> StringStructFields<'ctx> { + StringStructFields::new(llvm_usize.get_context(), llvm_usize) + } + + /// Creates an LLVM type corresponding to the expected structure of a `str`. + #[must_use] + fn llvm_type(ctx: &'ctx Context, llvm_usize: IntType<'ctx>) -> StructType<'ctx> { + const NAME: &str = "str"; + + if let Some(t) = ctx.get_struct_type(NAME) { + t + } else { + let str_ty = ctx.opaque_struct_type(NAME); + let field_tys = Self::fields(llvm_usize).into_iter().map(|field| field.1).collect_vec(); + str_ty.set_body(&field_tys, false); + str_ty + } + } + + fn new_impl(ctx: &'ctx Context, llvm_usize: IntType<'ctx>) -> Self { + let llvm_str = Self::llvm_type(ctx, llvm_usize); + + Self { ty: llvm_str, llvm_usize } + } + + /// Creates an instance of [`StringType`]. + #[must_use] + pub fn new(ctx: &CodeGenContext<'ctx, '_>) -> Self { + Self::new_impl(ctx.ctx, ctx.get_size_type()) + } + + /// Creates an instance of [`StringType`]. + #[must_use] + pub fn new_with_generator( + generator: &G, + ctx: &'ctx Context, + ) -> Self { + Self::new_impl(ctx, generator.get_size_type(ctx)) + } + + /// Creates an [`StringType`] from a [`StructType`] representing a `str`. + #[must_use] + pub fn from_struct_type(ty: StructType<'ctx>, llvm_usize: IntType<'ctx>) -> Self { + debug_assert!(Self::has_same_repr(ty, llvm_usize).is_ok()); + + Self { ty, llvm_usize } + } + + /// Creates an [`StringType`] from a [`PointerType`] representing a `str`. + #[must_use] + pub fn from_pointer_type(ptr_ty: PointerType<'ctx>, llvm_usize: IntType<'ctx>) -> Self { + Self::from_struct_type(ptr_ty.get_element_type().into_struct_type(), llvm_usize) + } + + /// Returns the fields present in this [`StringType`]. + #[must_use] + pub fn get_fields(&self) -> StringStructFields<'ctx> { + Self::fields(self.llvm_usize) + } + + /// Constructs a global constant string. + #[must_use] + pub fn construct_constant( + &self, + ctx: &CodeGenContext<'ctx, '_>, + v: &str, + name: Option<&'ctx str>, + ) -> StringValue<'ctx> { + let str_ptr = ctx + .builder + .build_global_string_ptr(v, "const") + .map(GlobalValue::as_pointer_value) + .unwrap(); + let size = ctx.get_size_type().const_int(v.len() as u64, false); + self.map_struct_value( + self.as_abi_type().const_named_struct(&[str_ptr.into(), size.into()]), + name, + ) + } + + /// Converts an existing value into a [`StringValue`]. + #[must_use] + pub fn map_struct_value( + &self, + value: StructValue<'ctx>, + name: Option<&'ctx str>, + ) -> >::Value { + >::Value::from_struct_value(value, self.llvm_usize, name) + } + + /// Converts an existing value into a [`StringValue`]. + #[must_use] + pub fn map_pointer_value( + &self, + ctx: &CodeGenContext<'ctx, '_>, + value: PointerValue<'ctx>, + name: Option<&'ctx str>, + ) -> >::Value { + >::Value::from_pointer_value(ctx, value, self.llvm_usize, name) + } +} + +impl<'ctx> ProxyType<'ctx> for StringType<'ctx> { + type ABI = StructType<'ctx>; + type Base = StructType<'ctx>; + type Value = StringValue<'ctx>; + + fn is_representable( + llvm_ty: impl BasicType<'ctx>, + llvm_usize: IntType<'ctx>, + ) -> Result<(), String> { + if let BasicTypeEnum::StructType(ty) = llvm_ty.as_basic_type_enum() { + Self::has_same_repr(ty, llvm_usize) + } else { + Err(format!("Expected structure type, got {llvm_ty:?}")) + } + } + + fn has_same_repr(ty: Self::Base, llvm_usize: IntType<'ctx>) -> Result<(), String> { + check_struct_type_matches_fields(Self::fields(llvm_usize), ty, "str", &[]) + } + + fn alloca_type(&self) -> impl BasicType<'ctx> { + self.as_abi_type() + } + + fn as_base_type(&self) -> Self::Base { + self.ty + } + + fn as_abi_type(&self) -> Self::ABI { + self.as_base_type() + } +} + +impl<'ctx> From> for StructType<'ctx> { + fn from(value: StringType<'ctx>) -> Self { + value.as_base_type() + } +} diff --git a/nac3core/src/codegen/values/mod.rs b/nac3core/src/codegen/values/mod.rs index 90f327e..cf125fe 100644 --- a/nac3core/src/codegen/values/mod.rs +++ b/nac3core/src/codegen/values/mod.rs @@ -4,12 +4,14 @@ use super::{types::ProxyType, CodeGenContext}; pub use array::*; pub use list::*; pub use range::*; +pub use string::*; pub use tuple::*; mod array; mod list; pub mod ndarray; mod range; +mod string; pub mod structure; mod tuple; pub mod utils; diff --git a/nac3core/src/codegen/values/string.rs b/nac3core/src/codegen/values/string.rs new file mode 100644 index 0000000..a4c8bea --- /dev/null +++ b/nac3core/src/codegen/values/string.rs @@ -0,0 +1,87 @@ +use inkwell::{ + types::IntType, + values::{BasicValueEnum, IntValue, PointerValue, StructValue}, +}; + +use crate::codegen::{ + types::{structure::StructField, StringType}, + values::ProxyValue, + CodeGenContext, +}; + +/// Proxy type for accessing a `str` value in LLVM. +#[derive(Copy, Clone)] +pub struct StringValue<'ctx> { + value: StructValue<'ctx>, + llvm_usize: IntType<'ctx>, + name: Option<&'ctx str>, +} + +impl<'ctx> StringValue<'ctx> { + /// Creates an [`StringValue`] from a [`StructValue`]. + #[must_use] + pub fn from_struct_value( + val: StructValue<'ctx>, + llvm_usize: IntType<'ctx>, + name: Option<&'ctx str>, + ) -> Self { + debug_assert!(Self::is_instance(val, llvm_usize).is_ok()); + + Self { value: val, llvm_usize, name } + } + + /// Creates an [`StringValue`] from a [`PointerValue`]. + #[must_use] + pub fn from_pointer_value( + ctx: &CodeGenContext<'ctx, '_>, + ptr: PointerValue<'ctx>, + llvm_usize: IntType<'ctx>, + name: Option<&'ctx str>, + ) -> Self { + let val = ctx.builder.build_load(ptr, "").map(BasicValueEnum::into_struct_value).unwrap(); + + Self::from_struct_value(val, llvm_usize, name) + } + + fn ptr_field(&self) -> StructField<'ctx, PointerValue<'ctx>> { + self.get_type().get_fields().ptr + } + + /// Returns the pointer to the beginning of the string. + pub fn extract_ptr(&self, ctx: &CodeGenContext<'ctx, '_>) -> PointerValue<'ctx> { + self.ptr_field().extract_value(ctx, self.value) + } + + fn len_field(&self) -> StructField<'ctx, IntValue<'ctx>> { + self.get_type().get_fields().len + } + + /// Returns the length of the string. + pub fn extract_len(&self, ctx: &CodeGenContext<'ctx, '_>) -> IntValue<'ctx> { + self.len_field().extract_value(ctx, self.value) + } +} + +impl<'ctx> ProxyValue<'ctx> for StringValue<'ctx> { + type ABI = StructValue<'ctx>; + type Base = StructValue<'ctx>; + type Type = StringType<'ctx>; + + fn get_type(&self) -> Self::Type { + Self::Type::from_struct_type(self.value.get_type(), self.llvm_usize) + } + + fn as_base_value(&self) -> Self::Base { + self.value + } + + fn as_abi_value(&self, _: &CodeGenContext<'ctx, '_>) -> Self::ABI { + self.as_base_value() + } +} + +impl<'ctx> From> for StructValue<'ctx> { + fn from(value: StringValue<'ctx>) -> Self { + value.as_base_value() + } +}