pass/return structs of float/int as float/i32 instead of double/i64
to make the code generated for ABI cleaner. Passing in the low part
of a double is the same as passing in a float.
For example, we now compile:
struct DeclGroup { float NumDecls; };
float foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
%struct.DeclGroup = type { float }
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
%0 = load float* %coerce.dive, align 1 ; <float> [#uses=1]
%call = call float @_Z3foo9DeclGroup(float %0) ; <float> [#uses=0]
ret void
}
instead of:
%struct.DeclGroup = type { float }
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca double ; <double*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <float*> [#uses=1]
%0 = bitcast double* %tmp3 to float* ; <float*> [#uses=1]
%1 = load float* %coerce.dive ; <float> [#uses=1]
store float %1, float* %0, align 1
%2 = load double* %tmp3 ; <double> [#uses=1]
%call = call float @_Z3foo9DeclGroup(double %2) ; <float> [#uses=0]
ret void
}
which is this machine code (at -O0):
__Z3barP9DeclGroup:
subq $24, %rsp
movq %rdi, 16(%rsp)
movq 16(%rsp), %rdi
leaq 8(%rsp), %rax
movl (%rdi), %ecx
movl %ecx, (%rax)
movss 8(%rsp), %xmm0
callq __Z3foo9DeclGroup
addq $24, %rsp
ret
vs this:
__Z3barP9DeclGroup:
subq $24, %rsp
movq %rdi, 16(%rsp)
movq 16(%rsp), %rdi
leaq 8(%rsp), %rax
movl (%rdi), %ecx
movl %ecx, (%rax)
movss 8(%rsp), %xmm0
movss %xmm0, (%rsp)
movsd (%rsp), %xmm0
callq __Z3foo9DeclGroup
addq $24, %rsp
ret
At -O3, it is the difference between this now:
__Z3barP9DeclGroup:
movss (%rdi), %xmm0
jmp __Z3foo9DeclGroup # TAILCALL
vs this before:
__Z3barP9DeclGroup:
movl (%rdi), %eax
movd %rax, %xmm0
jmp __Z3foo9DeclGroup # TAILCALL
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@107048
91177308-0d34-0410-b5e6-
96231b3b80d8
if (Ty->isIntegralOrEnumerationType() || Ty->hasPointerRepresentation())
return (Ty->isPromotableIntegerType() ?
ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+
+ // If this is a 32-bit structure that is passed as an int64, then it will be
+ // passed in the low 32-bits of a 64-bit GPR, which is the same as how an
+ // i32 is passed. Coerce to a i32 instead of a i64.
+ if (Context.getTypeSizeInChars(Ty).getQuantity() == 4)
+ CoerceTo = llvm::Type::getInt32Ty(CoerceTo->getContext());
+
} else if (CoerceTo->isDoubleTy()) {
assert(Ty.isCanonical() && "should always have a canonical type here");
assert(!Ty.hasQualifiers() && "should never have a qualified type here");
if (Ty == Context.FloatTy || Ty == Context.DoubleTy)
return ABIArgInfo::getDirect();
+ // If this is a 32-bit structure that is passed as a double, then it will be
+ // passed in the low 32-bits of the XMM register, which is the same as how a
+ // float is passed. Coerce to a float instead of a double.
+ if (Context.getTypeSizeInChars(Ty).getQuantity() == 4)
+ CoerceTo = llvm::Type::getFloatTy(CoerceTo->getContext());
}
return ABIArgInfo::getCoerce(CoerceTo);
void f17(float a, float b, float c, float d, float e, float f, float g, float h,
long double X) {}
-// Check for valid coercion.
-// CHECK: [[f18_t1:%.*]] = trunc i64 {{.*}} to i32
-// CHECK: store i32 [[f18_t1]], i32*
+// Check for valid coercion. The struct should be passed/returned as i32, not
+// as i64 for better code quality.
+// rdar://8135035
+// CHECK: define void @f18(i32 %a, i32)
struct f18_s0 { int f0; };
void f18(int a, struct f18_s0 f18_arg1) { while (1) {} }
(void) (struct s0) { 0, 0, 0, 0 };
}
-// CHECK: define i64 @f2
+// CHECK: define i32 @f2
// CHECK: alloca %struct.s1, align 2
struct s1 { short x; short y; };
extern "C" struct s1 f2(int a, struct s1 *x, struct s1 *y) {
void B::f() { }
-// CHECK: define i64 @_ZN1D1gEv(%struct.B* %this)
+// CHECK: define i32 @_ZN1D1gEv(%struct.B* %this)
// CHECK: declare void @_ZN1B1gEv()
struct C;