IRgen/ABI/x86_64: Avoid passing small structs using byval sometimes.

author Daniel Dunbar <daniel@zuster.org>

Sat, 10 Mar 2012 01:03:58 +0000 (01:03 +0000)

committer Daniel Dunbar <daniel@zuster.org>

Sat, 10 Mar 2012 01:03:58 +0000 (01:03 +0000)
author Daniel Dunbar <daniel@zuster.org>
Sat, 10 Mar 2012 01:03:58 +0000 (01:03 +0000)
committer Daniel Dunbar <daniel@zuster.org>
Sat, 10 Mar 2012 01:03:58 +0000 (01:03 +0000)
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp

index 0f00a6ebae2e07df14fb00e30a37f52b3868a420..5950c539c303321b79dca5202297a17ad488720e 100644 (file)
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -924,11 +924,15 @@ class X86_64ABIInfo : public ABIInfo {
  
    /// getIndirectResult - Give a source type \arg Ty, return a suitable result
    /// such that the argument will be passed in memory.
-  ABIArgInfo getIndirectResult(QualType Ty) const;
+  ///
+  /// \param freeIntRegs - The number of free integer registers remaining
+  /// available.
+  ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;
  
    ABIArgInfo classifyReturnType(QualType RetTy) const;
  
    ABIArgInfo classifyArgumentType(QualType Ty,
+                                  unsigned freeIntRegs,
                                    unsigned &neededInt,
                                    unsigned &neededSSE) const;
  
@@ -951,7 +955,8 @@ public:
  
    bool isPassedUsingAVXType(QualType type) const {
      unsigned neededInt, neededSSE;
-    ABIArgInfo info = classifyArgumentType(type, neededInt, neededSSE);
+    // The freeIntRegs argument doesn't matter here.
+    ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE);
      if (info.isDirect()) {
        llvm::Type *ty = info.getCoerceToType();
        if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
@@ -1441,9 +1446,16 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
    return false;
  }
  
-ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty) const {
+ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
+                                            unsigned freeIntRegs) const {
    // If this is a scalar LLVM value then assume LLVM will pass it in the right
    // place naturally.
+  //
+  // This assumption is optimistic, as there could be free registers available
+  // when we need to pass this argument in memory, and LLVM could try to pass
+  // the argument in the free register. This does not seem to happen currently,
+  // but this code would be much safer if we could mark the argument with
+  // 'onstack'. See PR12193.
    if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty)) {
      // Treat an enum type as its underlying type.
      if (const EnumType *EnumTy = Ty->getAs<EnumType>())
@@ -1459,6 +1471,38 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty) const {
    // Compute the byval alignment. We specify the alignment of the byval in all
    // cases so that the mid-level optimizer knows the alignment of the byval.
    unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U);
+
+  // Attempt to avoid passing indirect results using byval when possible. This
+  // is important for good codegen.
+  //
+  // We do this by coercing the value into a scalar type which the backend can
+  // handle naturally (i.e., without using byval).
+  //
+  // For simplicity, we currently only do this when we have exhausted all of the
+  // free integer registers. Doing this when there are free integer registers
+  // would require more care, as we would have to ensure that the coerced value
+  // did not claim the unused register. That would require either reording the
+  // arguments to the function (so that any subsequent inreg values came first),
+  // or only doing this optimization when there were no following arguments that
+  // might be inreg.
+  //
+  // We currently expect it to be rare (particularly in well written code) for
+  // arguments to be passed on the stack when there are still free integer
+  // registers available (this would typically imply large structs being passed
+  // by value), so this seems like a fair tradeoff for now.
+  //
+  // We can revisit this if the backend grows support for 'onstack' parameter
+  // attributes. See PR12193.
+  if (freeIntRegs == 0) {
+    uint64_t Size = getContext().getTypeSize(Ty);
+
+    // If this type fits in an eightbyte, coerce it into the matching integral
+    // type, which will end up on the stack (with alignment 8).
+    if (Align == 8 && Size <= 64)
+      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
+                                                          Size));
+  }
+
    return ABIArgInfo::getIndirect(Align);
  }
  
@@ -1874,8 +1918,10 @@ classifyReturnType(QualType RetTy) const {
    return ABIArgInfo::getDirect(ResType);
  }
  
-ABIArgInfo X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned &neededInt,
-                                               unsigned &neededSSE) const {
+ABIArgInfo X86_64ABIInfo::classifyArgumentType(
+  QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE)
+  const
+{
    X86_64ABIInfo::Class Lo, Hi;
    classify(Ty, 0, Lo, Hi);
  
@@ -1907,7 +1953,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned &neededInt,
    case ComplexX87:
      if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty))
        ++neededInt;
-    return getIndirectResult(Ty);
+    return getIndirectResult(Ty, freeIntRegs);
  
    case SSEUp:
    case X87Up:
@@ -2015,7 +2061,8 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
         it != ie; ++it) {
      unsigned neededInt, neededSSE;
-    it->info = classifyArgumentType(it->type, neededInt, neededSSE);
+    it->info = classifyArgumentType(it->type, freeIntRegs, neededInt,
+                                    neededSSE);
  
      // AMD64-ABI 3.2.3p3: If there are no registers available for any
      // eightbyte of an argument, the whole argument is passed on the
@@ -2025,7 +2072,7 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
        freeIntRegs -= neededInt;
        freeSSERegs -= neededSSE;
      } else {
-      it->info = getIndirectResult(it->type);
+      it->info = getIndirectResult(it->type, freeIntRegs);
      }
    }
  }
@@ -2091,7 +2138,7 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
    unsigned neededInt, neededSSE;
  
    Ty = CGF.getContext().getCanonicalType(Ty);
-  ABIArgInfo AI = classifyArgumentType(Ty, neededInt, neededSSE);
+  ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE);
  
    // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
    // in the registers. If not go to step 7.
diff --git a/test/CodeGen/complex-indirect.c b/test/CodeGen/complex-indirect.c

index 45eb1954842f2c25439572f1202234f0f176f22b..0daa970e760aaf2a3637ceb4c10275e9f6b7b0dd 100644 (file)
--- a/test/CodeGen/complex-indirect.c
+++ b/test/CodeGen/complex-indirect.c
@@ -1,10 +1,12 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin10 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -o %t -triple=x86_64-apple-darwin10
+// RUN: FileCheck < %t %s
  
-// Make sure this doesn't crash, and that we don't generate a byval alloca
-// with insufficient alignment.
+// Make sure this doesn't crash. We used to generate a byval here and wanted to
+// verify a valid alignment, but we now realize we can use an i16 and let the
+// backend guarantee the alignment.
  
  void a(int,int,int,int,int,int,__complex__ char);
  void b(__complex__ char *y) { a(0,0,0,0,0,0,*y); }
  // CHECK: define void @b
  // CHECK: alloca { i8, i8 }*, align 8
-// CHECK: call void @a(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, { i8, i8 }* byval align 8
+// CHECK: call void @a(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i16 {{.*}})
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c

index 8e7119ef2c987eeb387ebf0e8c11be56b8c94a38..f73e1f026a83710d1fcedd273135481917609111 100644 (file)
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -345,3 +345,12 @@ void test45() { f45(x45); }
  typedef float v46 __attribute((vector_size(8)));
  void f46(v46,v46,v46,v46,v46,v46,v46,v46,v46,v46);
  void test46() { v46 x = {1,2}; f46(x,x,x,x,x,x,x,x,x,x); }
+
+// Check that we pass the struct below without using byval, which helps out
+// codegen.
+//
+// CHECK: @test47
+// CHECK: call void @f47(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}})
+struct s47 { unsigned a; };
+void f47(int,int,int,int,int,int,struct s47);
+void test47(int a, struct s47 b) { f47(a, a, a, a, a, a, b); }
author	Daniel Dunbar <daniel@zuster.org>
	Sat, 10 Mar 2012 01:03:58 +0000 (01:03 +0000)
committer	Daniel Dunbar <daniel@zuster.org>
	Sat, 10 Mar 2012 01:03:58 +0000 (01:03 +0000)
lib/CodeGen/TargetInfo.cpp		patch \| blob \| history
test/CodeGen/complex-indirect.c		patch \| blob \| history
test/CodeGen/x86_64-arguments.c		patch \| blob \| history