1
0
forked from M-Labs/nac3

Implement string equality comparison in irrt with relevant test cases

This commit is contained in:
ram 2024-12-10 14:43:16 +00:00
parent c6e6b7bc95
commit f2880dce03
8 changed files with 99 additions and 148 deletions

View File

@ -3,3 +3,4 @@
#include "irrt/math.hpp"
#include "irrt/ndarray.hpp"
#include "irrt/slice.hpp"
#include "irrt/string.hpp"

View File

@ -0,0 +1,22 @@
#pragma once
#include "irrt/int_types.hpp"
namespace {
template<typename SizeT>
int32_t __nac3_str_eq_impl(const char* str1, SizeT len1, const char* str2, SizeT len2) {
if (str1 == str2) return 1;
if (len1 != len2) return 0;
for (SizeT i = 0; i < len1; ++i) {
if (static_cast<unsigned char>(str1[i]) != static_cast<unsigned char>(str2[i])) {
return 0;
}
}
return 1;
}
} // namespace
extern "C" {
int32_t nac3_str_eq(const char* str1, uint64_t len1, const char* str2, uint64_t len2) {
return __nac3_str_eq_impl<uint64_t>(str1, len1, str2, len2);
}
}

View File

@ -1898,33 +1898,16 @@ pub fn gen_cmpop_expr_with_values<'ctx, G: CodeGenerator>(
debug_assert_eq!(comparators.len(), ops.len());
if comparators.len() == 1 {
let (Some(left_ty), _) = left else { codegen_unreachable!(ctx) };
let left_ty = ctx.unifier.get_representative(left_ty);
let (Some(right_ty), _) = comparators[0] else { codegen_unreachable!(ctx) };
let right_ty = ctx.unifier.get_representative(right_ty);
let left_ty = ctx.unifier.get_representative(left.0.unwrap());
let right_ty = ctx.unifier.get_representative(comparators[0].0.unwrap());
if left_ty.obj_id(&ctx.unifier).is_some_and(|id| id == PrimDef::NDArray.id())
|| right_ty.obj_id(&ctx.unifier).is_some_and(|id| id == PrimDef::NDArray.id())
{
let llvm_usize = generator.get_size_type(ctx.ctx);
let (left_ty_opt, lhs) = left;
let left_ty = match left_ty_opt {
Some(ty) => ctx.unifier.get_representative(ty),
None => codegen_unreachable!(ctx),
};
let (right_ty_opt, rhs) = match comparators.first().copied() {
Some((Some(ty), val)) => (Some(ty), val),
Some((None, _)) | None => {
codegen_unreachable!(ctx);
}
};
let right_ty = match right_ty_opt {
Some(ty) => ctx.unifier.get_representative(ty),
None => codegen_unreachable!(ctx),
};
let (Some(left_ty), lhs) = left else { codegen_unreachable!(ctx) };
let (Some(right_ty), rhs) = comparators[0] else { codegen_unreachable!(ctx) };
let op = ops[0];
let is_ndarray1 =
@ -2009,77 +1992,6 @@ pub fn gen_cmpop_expr_with_values<'ctx, G: CodeGenerator>(
}
}
let (Some(left_ty), lhs_val) = left else { codegen_unreachable!(ctx) };
let left_ty = ctx.unifier.get_representative(left_ty);
let (Some(right_ty), rhs_val) = comparators.first().copied().unwrap() else {
codegen_unreachable!(ctx)
};
let right_ty = ctx.unifier.get_representative(right_ty);
if ctx.unifier.unioned(left_ty, ctx.primitives.str)
&& ctx.unifier.unioned(right_ty, ctx.primitives.str)
{
if ops.len() == 1 && (ops[0] == ast::Cmpop::Eq || ops[0] == ast::Cmpop::NotEq) {
let lhs_struct = lhs_val.into_struct_value();
let lhs_ptr = ctx
.builder
.build_extract_value(lhs_struct, 0, "lhs_ptr")
.unwrap()
.into_pointer_value();
let lhs_len =
ctx.builder.build_extract_value(lhs_struct, 1, "lhs_len").unwrap().into_int_value();
let rhs_struct = rhs_val.into_struct_value();
let rhs_ptr = ctx
.builder
.build_extract_value(rhs_struct, 0, "rhs_ptr")
.unwrap()
.into_pointer_value();
let rhs_len =
ctx.builder.build_extract_value(rhs_struct, 1, "rhs_len").unwrap().into_int_value();
let str_eq_fn = if let Some(fun) = ctx.module.get_function("nac3_str_eq") {
fun
} else {
let bool_type = ctx.ctx.bool_type();
let i8_ptr_type = ctx.ctx.i8_type().ptr_type(AddressSpace::default());
let usize_type = generator.get_size_type(ctx.ctx);
let fn_type = bool_type.fn_type(
&[i8_ptr_type.into(), usize_type.into(), i8_ptr_type.into(), usize_type.into()],
false,
);
ctx.module.add_function("nac3_str_eq", fn_type, None)
};
let call_site = ctx
.builder
.build_call(
str_eq_fn,
&[lhs_ptr.into(), lhs_len.into(), rhs_ptr.into(), rhs_len.into()],
"str_eq_call",
)
.expect("Failed to build call to nac3_str_eq");
let eq_result = match call_site.try_as_basic_value() {
Either::Left(inkwell::values::BasicValueEnum::IntValue(val)) => val,
Either::Left(_) | Either::Right(_) => codegen_unreachable!(ctx),
};
let eq_i8 =
ctx.builder.build_int_z_extend(eq_result, ctx.ctx.i8_type(), "eq_i8").unwrap();
let final_result = if ops[0] == ast::Cmpop::NotEq {
ctx.builder.build_not(eq_i8, "neq").unwrap()
} else {
eq_i8
};
return Ok(Some(ValueEnum::Dynamic(final_result.into())));
}
codegen_unreachable!(ctx);
}
let cmp_val = izip!(chain(once(&left), comparators.iter()), comparators.iter(), ops.iter(),)
.fold(Ok(None), |prev: Result<Option<_>, String>, (lhs, rhs, op)| {
let (left_ty, lhs) = lhs;
@ -2314,7 +2226,7 @@ pub fn gen_cmpop_expr_with_values<'ctx, G: CodeGenerator>(
}
if ![Cmpop::Eq, Cmpop::NotEq].contains(op) {
codegen_unreachable!(ctx, "Only __eq__ and __ne__ supported for this type")
todo!("Only __eq__ and __ne__ is implemented for lists")
}
let left_val =
@ -2438,10 +2350,10 @@ pub fn gen_cmpop_expr_with_values<'ctx, G: CodeGenerator>(
gen_list_cmpop(generator, ctx)?
} else if [left_ty, right_ty].iter().any(|ty| matches!(&*ctx.unifier.get_ty_immutable(*ty), TypeEnum::TTuple { .. })) {
let TypeEnum::TTuple { ty: left_tys, .. } = &*ctx.unifier.get_ty_immutable(left_ty) else {
codegen_unreachable!(ctx)
return Err(format!("'{}' not supported between instances of '{}' and '{}'", op.op_info().symbol, ctx.unifier.stringify(left_ty), ctx.unifier.stringify(right_ty)))
};
let TypeEnum::TTuple { ty: right_tys, .. } = &*ctx.unifier.get_ty_immutable(right_ty) else {
codegen_unreachable!(ctx)
return Err(format!("'{}' not supported between instances of '{}' and '{}'", op.op_info().symbol, ctx.unifier.stringify(left_ty), ctx.unifier.stringify(right_ty)))
};
if ![Cmpop::Eq, Cmpop::NotEq].contains(op) {
@ -2566,7 +2478,10 @@ pub fn gen_cmpop_expr_with_values<'ctx, G: CodeGenerator>(
ctx.ctx.bool_type().get_poison()
} else {
codegen_unreachable!(ctx)
return Err(format!("'{}' not supported between instances of '{}' and '{}'",
op.op_info().symbol,
ctx.unifier.stringify(left_ty),
ctx.unifier.stringify(right_ty)))
};
Ok(prev?.map(|v| ctx.builder.build_and(v, current, "cmp").unwrap()).or(Some(current)))

View File

@ -20,6 +20,7 @@ mod list;
mod math;
mod ndarray;
mod slice;
mod string;
#[must_use]
pub fn load_irrt<'ctx>(ctx: &'ctx Context, symbol_resolver: &dyn SymbolResolver) -> Module<'ctx> {

View File

@ -0,0 +1,45 @@
use inkwell::{
values::{BasicValueEnum, CallSiteValue, IntValue, PointerValue},
AddressSpace,
};
use itertools::Either;
use crate::codegen::{CodeGenContext, CodeGenerator};
/// Generates a call to string equality comparison. Returns an `i1` representing whether the strings are equal.
pub fn call_string_eq<'ctx, G: CodeGenerator + ?Sized>(
generator: &G,
ctx: &CodeGenContext<'ctx, '_>,
str1_ptr: PointerValue<'ctx>,
str1_len: IntValue<'ctx>,
str2_ptr: PointerValue<'ctx>,
str2_len: IntValue<'ctx>,
) -> IntValue<'ctx> {
let string_eq_fn = ctx.module.get_function("nac3_str_eq").unwrap_or_else(|| {
let i8_ptr_type = ctx.ctx.i8_type().ptr_type(AddressSpace::default());
let i64_type = ctx.ctx.i64_type();
let i32_type = ctx.ctx.i32_type();
let fn_type = i32_type.fn_type(
&[i8_ptr_type.into(), i64_type.into(), i8_ptr_type.into(), i64_type.into()],
false,
);
ctx.module.add_function("nac3_str_eq", fn_type, None)
});
let result = ctx
.builder
.build_call(
string_eq_fn,
&[
str1_ptr.into(),
ctx.builder.build_int_z_extend(str1_len, ctx.ctx.i64_type(), "").unwrap().into(),
str2_ptr.into(),
ctx.builder.build_int_z_extend(str2_len, ctx.ctx.i64_type(), "").unwrap().into(),
],
"string_eq",
)
.map(CallSiteValue::try_as_basic_value)
.map(|v| v.map_left(BasicValueEnum::into_int_value))
.map(Either::unwrap_left)
.unwrap();
generator.bool_to_i1(ctx, result)
}

View File

@ -748,7 +748,7 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie
impl_eq(unifier, store, bool_t, &[bool_t, ndarray_bool_t], None);
/* str ========= */
impl_eq(unifier, store, str_t, &[str_t], Some(bool_t));
impl_cmpop(unifier, store, str_t, &[str_t], &[Cmpop::Eq, Cmpop::NotEq], Some(bool_t));
/* list ======== */
impl_binop(unifier, store, list_t, &[list_t], Some(list_t), &[Operator::Add]);

View File

@ -105,14 +105,6 @@ uint32_t __nac3_personality(uint32_t state, uint32_t exception_object, uint32_t
__builtin_unreachable();
}
// Compare two strings by content and length.
bool nac3_str_eq(const char* lhs, size_t lhs_len, const char* rhs, size_t rhs_len) {
if (lhs_len != rhs_len) {
return false;
}
return memcmp(lhs, rhs, lhs_len) == 0;
}
// See `struct Exception<'a>` in
// https://github.com/m-labs/artiq/blob/master/artiq/firmware/libeh/eh_artiq.rs
struct Exception {
@ -143,4 +135,4 @@ extern int32_t run(void);
int main(void) {
run();
}
}

View File

@ -2,62 +2,37 @@
def output_bool(x: bool):
...
def str_eq():
# Basic cases
def test_str_eq():
output_bool("" == "")
output_bool("a" == "")
output_bool("a" == "b")
output_bool("b" == "a")
output_bool("a" == "a")
# Longer identical strings
output_bool("a" == "b")
output_bool("test string" == "test string")
output_bool("Lorem ipsum dolor sit amet" == "Lorem ipsum dolor sit amet")
# Different by one character
output_bool("test string1" == "test string2")
# Numeric strings
output_bool("test1" == "test2")
output_bool("123" == "123")
output_bool("123" == "321")
# Different lengths
output_bool("abc" == "abcde")
output_bool("a" == "aa")
output_bool(" " == " ")
output_bool(" a " == " a ")
# Leading and trailing spaces
output_bool(" leading space" == "leading space")
output_bool("trailing space " == "trailing space")
output_bool(" " == " ")
def str_ne():
# Basic cases
def test_str_ne():
output_bool("" != "")
output_bool("a" != "")
output_bool("a" != "b")
output_bool("b" != "a")
output_bool("a" != "a")
# Longer identical strings
output_bool("a" != "b")
output_bool("test string" != "test string")
# Different by one character
output_bool("test string1" != "test string2")
# Numeric strings
output_bool("Lorem ipsum dolor sit amet" != "Lorem ipsum dolor sit amet")
output_bool("test1" != "test2")
output_bool("123" != "123")
output_bool("123" != "321")
# Different lengths
output_bool("abc" != "abcde")
# Leading and trailing spaces
output_bool(" leading space" != "leading space")
output_bool("trailing space " != "trailing space")
output_bool(" " != " ")
output_bool("a" != "aa")
output_bool(" " != " ")
output_bool(" a " != " a ")
def run() -> int32:
str_eq()
str_ne()
return 0
test_str_eq()
test_str_ne()
return 0