[core] codegen: Add String{Type,Value}

This commit is contained in:
David Mak 2025-02-03 15:43:48 +08:00
parent 0a761cb263
commit 35e9c5b38e
7 changed files with 290 additions and 79 deletions

View File

@ -32,7 +32,7 @@ use super::{
gen_for_callback_incrementing, gen_if_callback, gen_if_else_expr_callback, gen_raise,
gen_var,
},
types::{ndarray::NDArrayType, ListType, RangeType, TupleType},
types::{ndarray::NDArrayType, ListType, RangeType, StringType, TupleType},
values::{
ndarray::{NDArrayOut, RustNDIndex, ScalarOrNDArray},
ArrayLikeIndexer, ArrayLikeValue, ListValue, ProxyValue, RangeValue,
@ -168,14 +168,7 @@ impl<'ctx> CodeGenContext<'ctx, '_> {
SymbolValue::Bool(v) => self.ctx.i8_type().const_int(u64::from(*v), true).into(),
SymbolValue::Double(v) => self.ctx.f64_type().const_float(*v).into(),
SymbolValue::Str(v) => {
let str_ptr = self
.builder
.build_global_string_ptr(v, "const")
.map(|v| v.as_pointer_value().into())
.unwrap();
let size = self.get_size_type().const_int(v.len() as u64, false);
let ty = self.get_llvm_type(generator, self.primitives.str).into_struct_type();
ty.const_named_struct(&[str_ptr, size.into()]).into()
StringType::new(self).construct_constant(self, v, None).as_abi_value(self).into()
}
SymbolValue::Tuple(ls) => {
let vals = ls.iter().map(|v| self.gen_symbol_val(generator, v, ty)).collect_vec();
@ -308,15 +301,10 @@ impl<'ctx> CodeGenContext<'ctx, '_> {
if let Some(v) = self.const_strings.get(v) {
Some(*v)
} else {
let str_ptr = self
.builder
.build_global_string_ptr(v, "const")
.map(|v| v.as_pointer_value().into())
.unwrap();
let size = self.get_size_type().const_int(v.len() as u64, false);
let ty = self.get_llvm_type(generator, self.primitives.str);
let val =
ty.into_struct_type().const_named_struct(&[str_ptr, size.into()]).into();
let val = StringType::new(self)
.construct_constant(self, v, None)
.as_abi_value(self)
.into();
self.const_strings.insert(v.to_string(), val);
Some(val)
}
@ -1950,39 +1938,12 @@ pub fn gen_cmpop_expr_with_values<'ctx, G: CodeGenerator>(
} else if left_ty == ctx.primitives.str {
assert!(ctx.unifier.unioned(left_ty, right_ty));
let lhs = lhs.into_struct_value();
let rhs = rhs.into_struct_value();
let llvm_str = StringType::new(ctx);
let llvm_i32 = ctx.ctx.i32_type();
let llvm_usize = ctx.get_size_type();
let lhs = llvm_str.map_struct_value(lhs.into_struct_value(), None);
let rhs = llvm_str.map_struct_value(rhs.into_struct_value(), None);
let plhs = generator.gen_var_alloc(ctx, lhs.get_type().into(), None).unwrap();
ctx.builder.build_store(plhs, lhs).unwrap();
let prhs = generator.gen_var_alloc(ctx, lhs.get_type().into(), None).unwrap();
ctx.builder.build_store(prhs, rhs).unwrap();
let lhs_ptr = ctx.build_in_bounds_gep_and_load(
plhs,
&[llvm_usize.const_zero(), llvm_i32.const_zero()],
None,
).into_pointer_value();
let lhs_len = ctx.build_in_bounds_gep_and_load(
plhs,
&[llvm_usize.const_zero(), llvm_i32.const_int(1, false)],
None,
).into_int_value();
let rhs_ptr = ctx.build_in_bounds_gep_and_load(
prhs,
&[llvm_usize.const_zero(), llvm_i32.const_zero()],
None,
).into_pointer_value();
let rhs_len = ctx.build_in_bounds_gep_and_load(
prhs,
&[llvm_usize.const_zero(), llvm_i32.const_int(1, false)],
None,
).into_int_value();
let result = call_string_eq(ctx, lhs_ptr, lhs_len, rhs_ptr, rhs_len);
let result = call_string_eq(ctx, lhs, rhs);
if *op == Cmpop::NotEq {
gen_unaryop_expr_with_values(
generator,

View File

@ -1,26 +1,15 @@
use inkwell::{
values::{BasicValueEnum, IntValue, PointerValue},
AddressSpace,
};
use inkwell::values::{BasicValueEnum, IntValue};
use super::get_usize_dependent_function_name;
use crate::codegen::{expr::infer_and_call_function, CodeGenContext};
use crate::codegen::{expr::infer_and_call_function, values::StringValue, CodeGenContext};
/// Generates a call to string equality comparison. Returns an `i1` representing whether the strings are equal.
pub fn call_string_eq<'ctx>(
ctx: &CodeGenContext<'ctx, '_>,
str1_ptr: PointerValue<'ctx>,
str1_len: IntValue<'ctx>,
str2_ptr: PointerValue<'ctx>,
str2_len: IntValue<'ctx>,
str1: StringValue<'ctx>,
str2: StringValue<'ctx>,
) -> IntValue<'ctx> {
let llvm_i1 = ctx.ctx.bool_type();
let llvm_pi8 = ctx.ctx.i8_type().ptr_type(AddressSpace::default());
let llvm_usize = ctx.get_size_type();
assert_eq!(str1_ptr.get_type(), llvm_pi8);
assert_eq!(str1_len.get_type(), llvm_usize);
assert_eq!(str2_ptr.get_type(), llvm_pi8);
assert_eq!(str2_len.get_type(), llvm_usize);
let func_name = get_usize_dependent_function_name(ctx, "nac3_str_eq");
@ -28,7 +17,12 @@ pub fn call_string_eq<'ctx>(
ctx,
&func_name,
Some(llvm_i1.into()),
&[str1_ptr.into(), str1_len.into(), str2_ptr.into(), str2_len.into()],
&[
str1.extract_ptr(ctx).into(),
str1.extract_len(ctx).into(),
str2.extract_ptr(ctx).into(),
str2.extract_len(ctx).into(),
],
Some("str_eq_call"),
None,
)

View File

@ -43,7 +43,7 @@ use crate::{
};
use concrete_type::{ConcreteType, ConcreteTypeEnum, ConcreteTypeStore};
pub use generator::{CodeGenerator, DefaultCodeGenerator};
use types::{ndarray::NDArrayType, ListType, ProxyType, RangeType, TupleType};
use types::{ndarray::NDArrayType, ListType, ProxyType, RangeType, StringType, TupleType};
pub mod builtin_fns;
pub mod concrete_type;
@ -786,19 +786,7 @@ pub fn gen_func_impl<
(primitives.float, context.f64_type().into()),
(primitives.bool, context.i8_type().into()),
(primitives.str, {
let name = "str";
match module.get_struct_type(name) {
None => {
let str_type = context.opaque_struct_type("str");
let fields = [
context.i8_type().ptr_type(AddressSpace::default()).into(),
generator.get_size_type(context).into(),
];
str_type.set_body(&fields, false);
str_type.into()
}
Some(t) => t.as_basic_type_enum(),
}
StringType::new_with_generator(generator, context).as_abi_type().into()
}),
(primitives.range, RangeType::new_with_generator(generator, context).as_abi_type().into()),
(primitives.exception, {

View File

@ -27,11 +27,13 @@ use super::{
};
pub use list::*;
pub use range::*;
pub use string::*;
pub use tuple::*;
mod list;
pub mod ndarray;
mod range;
mod string;
pub mod structure;
mod tuple;
pub mod utils;

View File

@ -0,0 +1,177 @@
use inkwell::{
context::Context,
types::{BasicType, BasicTypeEnum, IntType, PointerType, StructType},
values::{GlobalValue, IntValue, PointerValue, StructValue},
AddressSpace,
};
use itertools::Itertools;
use nac3core_derive::StructFields;
use super::{
structure::{check_struct_type_matches_fields, StructField, StructFields},
ProxyType,
};
use crate::codegen::{values::StringValue, CodeGenContext, CodeGenerator};
/// Proxy type for a `str` type in LLVM.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct StringType<'ctx> {
ty: StructType<'ctx>,
llvm_usize: IntType<'ctx>,
}
#[derive(PartialEq, Eq, Clone, Copy, StructFields)]
pub struct StringStructFields<'ctx> {
/// Pointer to the first character of the string.
#[value_type(i8_type().ptr_type(AddressSpace::default()))]
pub ptr: StructField<'ctx, PointerValue<'ctx>>,
/// Length of the string.
#[value_type(usize)]
pub len: StructField<'ctx, IntValue<'ctx>>,
}
impl<'ctx> StringType<'ctx> {
/// Returns an instance of [`StructFields`] containing all field accessors for this type.
#[must_use]
fn fields(llvm_usize: IntType<'ctx>) -> StringStructFields<'ctx> {
StringStructFields::new(llvm_usize.get_context(), llvm_usize)
}
/// Creates an LLVM type corresponding to the expected structure of a `str`.
#[must_use]
fn llvm_type(ctx: &'ctx Context, llvm_usize: IntType<'ctx>) -> StructType<'ctx> {
const NAME: &str = "str";
if let Some(t) = ctx.get_struct_type(NAME) {
t
} else {
let str_ty = ctx.opaque_struct_type(NAME);
let field_tys = Self::fields(llvm_usize).into_iter().map(|field| field.1).collect_vec();
str_ty.set_body(&field_tys, false);
str_ty
}
}
fn new_impl(ctx: &'ctx Context, llvm_usize: IntType<'ctx>) -> Self {
let llvm_str = Self::llvm_type(ctx, llvm_usize);
Self { ty: llvm_str, llvm_usize }
}
/// Creates an instance of [`StringType`].
#[must_use]
pub fn new(ctx: &CodeGenContext<'ctx, '_>) -> Self {
Self::new_impl(ctx.ctx, ctx.get_size_type())
}
/// Creates an instance of [`StringType`].
#[must_use]
pub fn new_with_generator<G: CodeGenerator + ?Sized>(
generator: &G,
ctx: &'ctx Context,
) -> Self {
Self::new_impl(ctx, generator.get_size_type(ctx))
}
/// Creates an [`StringType`] from a [`StructType`] representing a `str`.
#[must_use]
pub fn from_struct_type(ty: StructType<'ctx>, llvm_usize: IntType<'ctx>) -> Self {
debug_assert!(Self::has_same_repr(ty, llvm_usize).is_ok());
Self { ty, llvm_usize }
}
/// Creates an [`StringType`] from a [`PointerType`] representing a `str`.
#[must_use]
pub fn from_pointer_type(ptr_ty: PointerType<'ctx>, llvm_usize: IntType<'ctx>) -> Self {
Self::from_struct_type(ptr_ty.get_element_type().into_struct_type(), llvm_usize)
}
/// Returns the fields present in this [`StringType`].
#[must_use]
pub fn get_fields(&self) -> StringStructFields<'ctx> {
Self::fields(self.llvm_usize)
}
/// Constructs a global constant string.
#[must_use]
pub fn construct_constant(
&self,
ctx: &CodeGenContext<'ctx, '_>,
v: &str,
name: Option<&'ctx str>,
) -> StringValue<'ctx> {
let str_ptr = ctx
.builder
.build_global_string_ptr(v, "const")
.map(GlobalValue::as_pointer_value)
.unwrap();
let size = ctx.get_size_type().const_int(v.len() as u64, false);
self.map_struct_value(
self.as_abi_type().const_named_struct(&[str_ptr.into(), size.into()]),
name,
)
}
/// Converts an existing value into a [`StringValue`].
#[must_use]
pub fn map_struct_value(
&self,
value: StructValue<'ctx>,
name: Option<&'ctx str>,
) -> <Self as ProxyType<'ctx>>::Value {
<Self as ProxyType<'ctx>>::Value::from_struct_value(value, self.llvm_usize, name)
}
/// Converts an existing value into a [`StringValue`].
#[must_use]
pub fn map_pointer_value(
&self,
ctx: &CodeGenContext<'ctx, '_>,
value: PointerValue<'ctx>,
name: Option<&'ctx str>,
) -> <Self as ProxyType<'ctx>>::Value {
<Self as ProxyType<'ctx>>::Value::from_pointer_value(ctx, value, self.llvm_usize, name)
}
}
impl<'ctx> ProxyType<'ctx> for StringType<'ctx> {
type ABI = StructType<'ctx>;
type Base = StructType<'ctx>;
type Value = StringValue<'ctx>;
fn is_representable(
llvm_ty: impl BasicType<'ctx>,
llvm_usize: IntType<'ctx>,
) -> Result<(), String> {
if let BasicTypeEnum::StructType(ty) = llvm_ty.as_basic_type_enum() {
Self::has_same_repr(ty, llvm_usize)
} else {
Err(format!("Expected structure type, got {llvm_ty:?}"))
}
}
fn has_same_repr(ty: Self::Base, llvm_usize: IntType<'ctx>) -> Result<(), String> {
check_struct_type_matches_fields(Self::fields(llvm_usize), ty, "str", &[])
}
fn alloca_type(&self) -> impl BasicType<'ctx> {
self.as_abi_type()
}
fn as_base_type(&self) -> Self::Base {
self.ty
}
fn as_abi_type(&self) -> Self::ABI {
self.as_base_type()
}
}
impl<'ctx> From<StringType<'ctx>> for StructType<'ctx> {
fn from(value: StringType<'ctx>) -> Self {
value.as_base_type()
}
}

View File

@ -4,12 +4,14 @@ use super::{types::ProxyType, CodeGenContext};
pub use array::*;
pub use list::*;
pub use range::*;
pub use string::*;
pub use tuple::*;
mod array;
mod list;
pub mod ndarray;
mod range;
mod string;
pub mod structure;
mod tuple;
pub mod utils;

View File

@ -0,0 +1,87 @@
use inkwell::{
types::IntType,
values::{BasicValueEnum, IntValue, PointerValue, StructValue},
};
use crate::codegen::{
types::{structure::StructField, StringType},
values::ProxyValue,
CodeGenContext,
};
/// Proxy type for accessing a `str` value in LLVM.
#[derive(Copy, Clone)]
pub struct StringValue<'ctx> {
value: StructValue<'ctx>,
llvm_usize: IntType<'ctx>,
name: Option<&'ctx str>,
}
impl<'ctx> StringValue<'ctx> {
/// Creates an [`StringValue`] from a [`StructValue`].
#[must_use]
pub fn from_struct_value(
val: StructValue<'ctx>,
llvm_usize: IntType<'ctx>,
name: Option<&'ctx str>,
) -> Self {
debug_assert!(Self::is_instance(val, llvm_usize).is_ok());
Self { value: val, llvm_usize, name }
}
/// Creates an [`StringValue`] from a [`PointerValue`].
#[must_use]
pub fn from_pointer_value(
ctx: &CodeGenContext<'ctx, '_>,
ptr: PointerValue<'ctx>,
llvm_usize: IntType<'ctx>,
name: Option<&'ctx str>,
) -> Self {
let val = ctx.builder.build_load(ptr, "").map(BasicValueEnum::into_struct_value).unwrap();
Self::from_struct_value(val, llvm_usize, name)
}
fn ptr_field(&self) -> StructField<'ctx, PointerValue<'ctx>> {
self.get_type().get_fields().ptr
}
/// Returns the pointer to the beginning of the string.
pub fn extract_ptr(&self, ctx: &CodeGenContext<'ctx, '_>) -> PointerValue<'ctx> {
self.ptr_field().extract_value(ctx, self.value)
}
fn len_field(&self) -> StructField<'ctx, IntValue<'ctx>> {
self.get_type().get_fields().len
}
/// Returns the length of the string.
pub fn extract_len(&self, ctx: &CodeGenContext<'ctx, '_>) -> IntValue<'ctx> {
self.len_field().extract_value(ctx, self.value)
}
}
impl<'ctx> ProxyValue<'ctx> for StringValue<'ctx> {
type ABI = StructValue<'ctx>;
type Base = StructValue<'ctx>;
type Type = StringType<'ctx>;
fn get_type(&self) -> Self::Type {
Self::Type::from_struct_type(self.value.get_type(), self.llvm_usize)
}
fn as_base_value(&self) -> Self::Base {
self.value
}
fn as_abi_value(&self, _: &CodeGenContext<'ctx, '_>) -> Self::ABI {
self.as_base_value()
}
}
impl<'ctx> From<StringValue<'ctx>> for StructValue<'ctx> {
fn from(value: StringValue<'ctx>) -> Self {
value.as_base_value()
}
}