/// getIndirectResult - Give a source type \arg Ty, return a suitable result
/// such that the argument will be passed in memory.
- ABIArgInfo getIndirectResult(QualType Ty) const;
+ ///
+ /// \param freeIntRegs - The number of free integer registers remaining
+ /// available.
+ ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyArgumentType(QualType Ty,
+ unsigned freeIntRegs,
unsigned &neededInt,
unsigned &neededSSE) const;
bool isPassedUsingAVXType(QualType type) const {
unsigned neededInt, neededSSE;
- ABIArgInfo info = classifyArgumentType(type, neededInt, neededSSE);
+ // The freeIntRegs argument doesn't matter here.
+ ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE);
if (info.isDirect()) {
llvm::Type *ty = info.getCoerceToType();
if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
return false;
}
-ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty) const {
+ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
+ unsigned freeIntRegs) const {
// If this is a scalar LLVM value then assume LLVM will pass it in the right
// place naturally.
+ //
+ // This assumption is optimistic, as there could be free registers available
+ // when we need to pass this argument in memory, and LLVM could try to pass
+ // the argument in the free register. This does not seem to happen currently,
+ // but this code would be much safer if we could mark the argument with
+ // 'onstack'. See PR12193.
if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
// Compute the byval alignment. We specify the alignment of the byval in all
// cases so that the mid-level optimizer knows the alignment of the byval.
unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U);
+
+ // Attempt to avoid passing indirect results using byval when possible. This
+ // is important for good codegen.
+ //
+ // We do this by coercing the value into a scalar type which the backend can
+ // handle naturally (i.e., without using byval).
+ //
+ // For simplicity, we currently only do this when we have exhausted all of the
+ // free integer registers. Doing this when there are free integer registers
+ // would require more care, as we would have to ensure that the coerced value
+ // did not claim the unused register. That would require either reording the
+ // arguments to the function (so that any subsequent inreg values came first),
+ // or only doing this optimization when there were no following arguments that
+ // might be inreg.
+ //
+ // We currently expect it to be rare (particularly in well written code) for
+ // arguments to be passed on the stack when there are still free integer
+ // registers available (this would typically imply large structs being passed
+ // by value), so this seems like a fair tradeoff for now.
+ //
+ // We can revisit this if the backend grows support for 'onstack' parameter
+ // attributes. See PR12193.
+ if (freeIntRegs == 0) {
+ uint64_t Size = getContext().getTypeSize(Ty);
+
+ // If this type fits in an eightbyte, coerce it into the matching integral
+ // type, which will end up on the stack (with alignment 8).
+ if (Align == 8 && Size <= 64)
+ return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
+ Size));
+ }
+
return ABIArgInfo::getIndirect(Align);
}
return ABIArgInfo::getDirect(ResType);
}
-ABIArgInfo X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned &neededInt,
- unsigned &neededSSE) const {
+ABIArgInfo X86_64ABIInfo::classifyArgumentType(
+ QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE)
+ const
+{
X86_64ABIInfo::Class Lo, Hi;
classify(Ty, 0, Lo, Hi);
case ComplexX87:
if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty))
++neededInt;
- return getIndirectResult(Ty);
+ return getIndirectResult(Ty, freeIntRegs);
case SSEUp:
case X87Up:
for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
it != ie; ++it) {
unsigned neededInt, neededSSE;
- it->info = classifyArgumentType(it->type, neededInt, neededSSE);
+ it->info = classifyArgumentType(it->type, freeIntRegs, neededInt,
+ neededSSE);
// AMD64-ABI 3.2.3p3: If there are no registers available for any
// eightbyte of an argument, the whole argument is passed on the
freeIntRegs -= neededInt;
freeSSERegs -= neededSSE;
} else {
- it->info = getIndirectResult(it->type);
+ it->info = getIndirectResult(it->type, freeIntRegs);
}
}
}
unsigned neededInt, neededSSE;
Ty = CGF.getContext().getCanonicalType(Ty);
- ABIArgInfo AI = classifyArgumentType(Ty, neededInt, neededSSE);
+ ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE);
// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
// in the registers. If not go to step 7.
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin10 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o %t -triple=x86_64-apple-darwin10
+// RUN: FileCheck < %t %s
-// Make sure this doesn't crash, and that we don't generate a byval alloca
-// with insufficient alignment.
+// Make sure this doesn't crash. We used to generate a byval here and wanted to
+// verify a valid alignment, but we now realize we can use an i16 and let the
+// backend guarantee the alignment.
void a(int,int,int,int,int,int,__complex__ char);
void b(__complex__ char *y) { a(0,0,0,0,0,0,*y); }
// CHECK: define void @b
// CHECK: alloca { i8, i8 }*, align 8
-// CHECK: call void @a(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, { i8, i8 }* byval align 8
+// CHECK: call void @a(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i16 {{.*}})
typedef float v46 __attribute((vector_size(8)));
void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
+
+// Check that we pass the struct below without using byval, which helps out
+// codegen.
+//
+// CHECK: @test47
+// CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+struct s47 { unsigned a; };
+void f47(int,int,int,int,int,int,struct s47);
+void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }