LLVM Call Parameter Mismatch Fix #293

Closed
ychenfo wants to merge 3 commits from fix-call-param-type into master
Collaborator

This patch fixes #276 and fixes #290.

#290 is caused by the fact that we previously did not handle inheritance properly. A simple bitcast works to fix this problem, and this is also what clang does according to the following output:

input cpp file
class A {
    public: 
        int a;
        
        A() {
            this -> a = 123;
        }

        int f() {
            return this -> f2();
        }

        int f2() {
            return (this -> a) + 1;
        }
};

class B: public A {
    public:
        int b;
    
        B(int b) {
            this -> b = b;
        }

        int f2() {
            return (this -> a) + 2;
        }
};

int f(A *a) {
    return a -> f();
}

int main() {
    A aaa;
    B bbb { 234 };
    f(&bbb);
}
output (by clang++ -O0 -S -emit-llvm ./class.cpp)
%class.A = type { i32 }
%class.B = type { %class.A, i32 }

$_ZN1A1fEv = comdat any

$_ZN1AC2Ev = comdat any

$_ZN1BC2Ei = comdat any

$_ZN1A2f2Ev = comdat any

; Function Attrs: mustprogress noinline optnone uwtable
define dso_local noundef i32 @_Z1fP1A(%class.A* noundef %0) #0 {
  %2 = alloca %class.A*, align 8
  store %class.A* %0, %class.A** %2, align 8
  %3 = load %class.A*, %class.A** %2, align 8
  %4 = call noundef i32 @_ZN1A1fEv(%class.A* noundef nonnull align 4 dereferenceable(4) %3)
  ret i32 %4
}

; Function Attrs: mustprogress noinline optnone uwtable
define linkonce_odr dso_local noundef i32 @_ZN1A1fEv(%class.A* noundef nonnull align 4 dereferenceable(4) %0) #0 comdat align 2 {
  %2 = alloca %class.A*, align 8
  store %class.A* %0, %class.A** %2, align 8
  %3 = load %class.A*, %class.A** %2, align 8
  %4 = call noundef i32 @_ZN1A2f2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %3)
  ret i32 %4
}

; Function Attrs: mustprogress noinline norecurse optnone uwtable
define dso_local noundef i32 @main() #1 {
  %1 = alloca %class.A, align 4
  %2 = alloca %class.B, align 4
  call void @_ZN1AC2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %1)
  call void @_ZN1BC2Ei(%class.B* noundef nonnull align 4 dereferenceable(8) %2, i32 noundef 234)
  %3 = bitcast %class.B* %2 to %class.A*
  %4 = call noundef i32 @_Z1fP1A(%class.A* noundef %3)
  ret i32 0
}

; Function Attrs: noinline nounwind optnone uwtable
define linkonce_odr dso_local void @_ZN1AC2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %0) unnamed_addr #2 comdat align 2 {
  %2 = alloca %class.A*, align 8
  store %class.A* %0, %class.A** %2, align 8
  %3 = load %class.A*, %class.A** %2, align 8
  %4 = getelementptr inbounds %class.A, %class.A* %3, i32 0, i32 0
  store i32 123, i32* %4, align 4
  ret void
}

; Function Attrs: noinline nounwind optnone uwtable
define linkonce_odr dso_local void @_ZN1BC2Ei(%class.B* noundef nonnull align 4 dereferenceable(8) %0, i32 noundef %1) unnamed_addr #2 comdat align 2 {
  %3 = alloca %class.B*, align 8
  %4 = alloca i32, align 4
  store %class.B* %0, %class.B** %3, align 8
  store i32 %1, i32* %4, align 4
  %5 = load %class.B*, %class.B** %3, align 8
  %6 = bitcast %class.B* %5 to %class.A*
  call void @_ZN1AC2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %6)
  %7 = load i32, i32* %4, align 4
  %8 = getelementptr inbounds %class.B, %class.B* %5, i32 0, i32 1
  store i32 %7, i32* %8, align 4
  ret void
}

; Function Attrs: mustprogress noinline nounwind optnone uwtable
define linkonce_odr dso_local noundef i32 @_ZN1A2f2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %0) #3 comdat align 2 {
  %2 = alloca %class.A*, align 8
  store %class.A* %0, %class.A** %2, align 8
  %3 = load %class.A*, %class.A** %2, align 8
  %4 = getelementptr inbounds %class.A, %class.A* %3, i32 0, i32 0
  %5 = load i32, i32* %4, align 4
  %6 = add nsw i32 %5, 1
  ret i32 %6
}

attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { mustprogress noinline norecurse optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{i32 7, !"frame-pointer", i32 2}
!3 = !{!"clang version 14.0.1"}

#276 is due to llvm sometimes cannot merge opaque types when linking different llvm modules (though strangely it seems that llvm sometimes can do the merge under some simple cases..), and this is fixed by using module.get_struct_type to get named structs that are already defined in the current context.

This patch fixes #276 and fixes #290. #290 is caused by the fact that we previously did not handle inheritance properly. A simple bitcast works to fix this problem, and this is also what clang does according to the following output: <details><summary>input cpp file</summary> class A { public: int a; A() { this -> a = 123; } int f() { return this -> f2(); } int f2() { return (this -> a) + 1; } }; class B: public A { public: int b; B(int b) { this -> b = b; } int f2() { return (this -> a) + 2; } }; int f(A *a) { return a -> f(); } int main() { A aaa; B bbb { 234 }; f(&bbb); } </details> <details> <summary>output (by clang++ -O0 -S -emit-llvm ./class.cpp)</summary> %class.A = type { i32 } %class.B = type { %class.A, i32 } $_ZN1A1fEv = comdat any $_ZN1AC2Ev = comdat any $_ZN1BC2Ei = comdat any $_ZN1A2f2Ev = comdat any ; Function Attrs: mustprogress noinline optnone uwtable define dso_local noundef i32 @_Z1fP1A(%class.A* noundef %0) #0 { %2 = alloca %class.A*, align 8 store %class.A* %0, %class.A** %2, align 8 %3 = load %class.A*, %class.A** %2, align 8 %4 = call noundef i32 @_ZN1A1fEv(%class.A* noundef nonnull align 4 dereferenceable(4) %3) ret i32 %4 } ; Function Attrs: mustprogress noinline optnone uwtable define linkonce_odr dso_local noundef i32 @_ZN1A1fEv(%class.A* noundef nonnull align 4 dereferenceable(4) %0) #0 comdat align 2 { %2 = alloca %class.A*, align 8 store %class.A* %0, %class.A** %2, align 8 %3 = load %class.A*, %class.A** %2, align 8 %4 = call noundef i32 @_ZN1A2f2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %3) ret i32 %4 } ; Function Attrs: mustprogress noinline norecurse optnone uwtable define dso_local noundef i32 @main() #1 { %1 = alloca %class.A, align 4 %2 = alloca %class.B, align 4 call void @_ZN1AC2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %1) call void @_ZN1BC2Ei(%class.B* noundef nonnull align 4 dereferenceable(8) %2, i32 noundef 234) %3 = bitcast %class.B* %2 to %class.A* %4 = call noundef i32 @_Z1fP1A(%class.A* noundef %3) ret i32 0 } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN1AC2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %0) unnamed_addr #2 comdat align 2 { %2 = alloca %class.A*, align 8 store %class.A* %0, %class.A** %2, align 8 %3 = load %class.A*, %class.A** %2, align 8 %4 = getelementptr inbounds %class.A, %class.A* %3, i32 0, i32 0 store i32 123, i32* %4, align 4 ret void } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN1BC2Ei(%class.B* noundef nonnull align 4 dereferenceable(8) %0, i32 noundef %1) unnamed_addr #2 comdat align 2 { %3 = alloca %class.B*, align 8 %4 = alloca i32, align 4 store %class.B* %0, %class.B** %3, align 8 store i32 %1, i32* %4, align 4 %5 = load %class.B*, %class.B** %3, align 8 %6 = bitcast %class.B* %5 to %class.A* call void @_ZN1AC2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %6) %7 = load i32, i32* %4, align 4 %8 = getelementptr inbounds %class.B, %class.B* %5, i32 0, i32 1 store i32 %7, i32* %8, align 4 ret void } ; Function Attrs: mustprogress noinline nounwind optnone uwtable define linkonce_odr dso_local noundef i32 @_ZN1A2f2Ev(%class.A* noundef nonnull align 4 dereferenceable(4) %0) #3 comdat align 2 { %2 = alloca %class.A*, align 8 store %class.A* %0, %class.A** %2, align 8 %3 = load %class.A*, %class.A** %2, align 8 %4 = getelementptr inbounds %class.A, %class.A* %3, i32 0, i32 0 %5 = load i32, i32* %4, align 4 %6 = add nsw i32 %5, 1 ret i32 %6 } attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #1 = { mustprogress noinline norecurse optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #2 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } attributes #3 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } !llvm.module.flags = !{!0, !1, !2} !llvm.ident = !{!3} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 7, !"uwtable", i32 1} !2 = !{i32 7, !"frame-pointer", i32 2} !3 = !{!"clang version 14.0.1"} </details> ----- #276 is due to llvm sometimes cannot merge opaque types when linking different llvm modules (though strangely it seems that llvm sometimes can do the merge under some simple cases..), and this is fixed by using `module.get_struct_type` to get named structs that are already defined in the current context.
ychenfo added 3 commits 2022-06-01 04:46:56 +08:00
sb10q reviewed 2022-06-01 07:49:35 +08:00
@ -235,3 +235,4 @@
Err((old_builder, e)) => {
builder = old_builder;
errors.insert(e);
module = context.create_module_from_ir(prev_module).unwrap();

What's the impact on performance of this?
It sounds strange that we would have to serialize and deserialize things for passing objects around in the same program.

What's the impact on performance of this? It sounds strange that we would have to serialize and deserialize things for passing objects around in the same program.
Poster
Collaborator

Thanks for pointing this out! I just run some benchmark and this is indeed significantly slowing things down, as shown below. I will look into this.

python porgram from 10 to 150 functions (50 loc per function); compile time (second);

10 20 30 40 50 60 70 80 90 100 110 120 130 140 150
before 0.67500 0.82808 0.95160 1.04178 1.12146 1.2736 1.3426 1.43848 1.52762 1.66717 1.8724 1.9046 1.9810 2.1185 2.1865
after 1.00600 1.2652 1.48977 1.6315 1.78203 2.1227 2.3221 2.5354 2.6856 3.0505 3.5356 3.6710 4.0322 4.1374 4.3924
Thanks for pointing this out! I just run some benchmark and this is indeed significantly slowing things down, as shown below. I will look into this. python porgram from 10 to 150 functions (50 loc per function); compile time (second); | | 10 | 20 | 30 | 40 | 50 | 60 | 70 | 80 | 90 | 100 | 110 | 120 | 130 | 140 | 150 | | ------ | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | | before |0.67500 | 0.82808 | 0.95160 | 1.04178 | 1.12146 | 1.2736 | 1.3426 | 1.43848 | 1.52762 | 1.66717 | 1.8724 | 1.9046 | 1.9810 | 2.1185 | 2.1865 | | after |1.00600 | 1.2652 | 1.48977 | 1.6315 | 1.78203 | 2.1227 | 2.3221 | 2.5354 | 2.6856 | 3.0505 | 3.5356 | 3.6710 | 4.0322 | 4.1374 | 4.3924 |
Poster
Collaborator

Ah actually there seems no need to precisely restore the previous llvm module, creating a new empty one just to collect errors should be enough.

Ah actually there seems no need to precisely restore the previous llvm module, creating a new empty one just to collect errors should be enough.

Please make separate PRs so they can be merged separately.

Please make separate PRs so they can be merged separately.
ychenfo closed this pull request 2022-06-01 17:28:42 +08:00
Poster
Collaborator

This PR closed to be separates into to 2

This PR closed to be separates into to 2

Pull request closed

Sign in to join this conversation.
No reviewers
No Milestone
No Assignees
2 Participants
Notifications
Due Date
The due date is invalid or out of range. Please use the format 'yyyy-mm-dd'.

No due date set.

Dependencies

No dependencies set.

Reference: M-Labs/nac3#293
There is no content yet.