From: Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Date: Fri, 12 Oct 2012 19:26:17 +0000 (+0000)
Subject: This patch addresses PR13948.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b1f5fe017a596e0c7749dee10c9d3ff1c0f2788c;p=clang

This patch addresses PR13948.

For 64-bit PowerPC SVR4, an aggregate containing only one
floating-point field (float, double, or long double) must be passed in
a register as though just that field were present.  This patch
addresses the issue during Clang code generation by specifying in the
ABIArgInfo for the argument that the underlying type is passed
directly in a register.  The included test case verifies flat and
nested structs for the three data types.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@165816 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 21318b37c2..fe85283930 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -2602,13 +2602,31 @@ class PPC64_SVR4_ABIInfo : public DefaultABIInfo {
 public:
   PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
 
-  // TODO: Could override computeInfo to model the ABI more completely if
-  // it would be helpful.  Example: We might remove the byVal flag from
-  // aggregate arguments that fit in a register to avoid pushing them to
-  // memory on function entry.  Note that this is a performance optimization,
-  // not a compliance issue.  In general we prefer to keep ABI details in
-  // the back end where possible, but modifying an argument flag seems like
-  // a good thing to do before invoking the back end.
+  // TODO: We can add more logic to computeInfo to improve performance.
+  // Example: For aggregate arguments that fit in a register, we could
+  // use getDirectInReg (as is done below for structs containing a single
+  // floating-point value) to avoid pushing them to memory on function
+  // entry.  This would require changing the logic in PPCISelLowering
+  // when lowering the parameters in the caller and args in the callee.
+  virtual void computeInfo(CGFunctionInfo &FI) const {
+    FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+         it != ie; ++it) {
+      // We rely on the default argument classification for the most part.
+      // One exception:  An aggregate containing a single floating-point
+      // item must be passed in a register if one is available.
+      const Type *T = isSingleElementStruct(it->type, getContext());
+      if (T) {
+        const BuiltinType *BT = T->getAs<BuiltinType>();
+        if (BT && BT->isFloatingPoint()) {
+          QualType QT(T, 0);
+          it->info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
+          continue;
+        }
+      }
+      it->info = classifyArgumentType(it->type);
+    }
+  }
 
   virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, 
                                  QualType Ty,
diff --git a/test/CodeGen/ppc64-struct-onefloat.c b/test/CodeGen/ppc64-struct-onefloat.c
new file mode 100644
index 0000000000..4f7c896225
--- /dev/null
+++ b/test/CodeGen/ppc64-struct-onefloat.c
@@ -0,0 +1,65 @@
+// REQUIRES: ppc64-registered-target
+// RUN: %clang_cc1 -O0 -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+typedef struct s1 { float f; } Sf;
+typedef struct s2 { double d; } Sd;
+typedef struct s3 { long double ld; } Sld;
+typedef struct s4 { Sf fs; } SSf;
+typedef struct s5 { Sd ds; } SSd;
+typedef struct s6 { Sld lds; } SSld;
+
+void bar(Sf a, Sd b, Sld c, SSf d, SSd e, SSld f) {}
+
+// CHECK: define void @bar
+// CHECK:  %a = alloca %struct.s1, align 4
+// CHECK:  %b = alloca %struct.s2, align 8
+// CHECK:  %c = alloca %struct.s3, align 16
+// CHECK:  %d = alloca %struct.s4, align 4
+// CHECK:  %e = alloca %struct.s5, align 8
+// CHECK:  %f = alloca %struct.s6, align 16
+// CHECK:  %coerce.dive = getelementptr %struct.s1* %a, i32 0, i32 0
+// CHECK:  store float %a.coerce, float* %coerce.dive, align 1
+// CHECK:  %coerce.dive1 = getelementptr %struct.s2* %b, i32 0, i32 0
+// CHECK:  store double %b.coerce, double* %coerce.dive1, align 1
+// CHECK:  %coerce.dive2 = getelementptr %struct.s3* %c, i32 0, i32 0
+// CHECK:  store ppc_fp128 %c.coerce, ppc_fp128* %coerce.dive2, align 1
+// CHECK:  %coerce.dive3 = getelementptr %struct.s4* %d, i32 0, i32 0
+// CHECK:  %coerce.dive4 = getelementptr %struct.s1* %coerce.dive3, i32 0, i32 0
+// CHECK:  store float %d.coerce, float* %coerce.dive4, align 1
+// CHECK:  %coerce.dive5 = getelementptr %struct.s5* %e, i32 0, i32 0
+// CHECK:  %coerce.dive6 = getelementptr %struct.s2* %coerce.dive5, i32 0, i32 0
+// CHECK:  store double %e.coerce, double* %coerce.dive6, align 1
+// CHECK:  %coerce.dive7 = getelementptr %struct.s6* %f, i32 0, i32 0
+// CHECK:  %coerce.dive8 = getelementptr %struct.s3* %coerce.dive7, i32 0, i32 0
+// CHECK:  store ppc_fp128 %f.coerce, ppc_fp128* %coerce.dive8, align 1
+// CHECK:  ret void
+
+void foo(void) 
+{
+  Sf p1 = { 22.63f };
+  Sd p2 = { 19.47 };
+  Sld p3 = { -155.1l };
+  SSf p4 = { { 22.63f } };
+  SSd p5 = { { 19.47 } };
+  SSld p6 = { { -155.1l } };
+  bar(p1, p2, p3, p4, p5, p6);
+}
+
+// CHECK: define void @foo
+// CHECK:  %coerce.dive = getelementptr %struct.s1* %p1, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load float* %coerce.dive, align 1
+// CHECK:  %coerce.dive1 = getelementptr %struct.s2* %p2, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load double* %coerce.dive1, align 1
+// CHECK:  %coerce.dive2 = getelementptr %struct.s3* %p3, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load ppc_fp128* %coerce.dive2, align 1
+// CHECK:  %coerce.dive3 = getelementptr %struct.s4* %p4, i32 0, i32 0
+// CHECK:  %coerce.dive4 = getelementptr %struct.s1* %coerce.dive3, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load float* %coerce.dive4, align 1
+// CHECK:  %coerce.dive5 = getelementptr %struct.s5* %p5, i32 0, i32 0
+// CHECK:  %coerce.dive6 = getelementptr %struct.s2* %coerce.dive5, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load double* %coerce.dive6, align 1
+// CHECK:  %coerce.dive7 = getelementptr %struct.s6* %p6, i32 0, i32 0
+// CHECK:  %coerce.dive8 = getelementptr %struct.s3* %coerce.dive7, i32 0, i32 0
+// CHECK:  %{{[0-9]+}} = load ppc_fp128* %coerce.dive8, align 1
+// CHECK:  call void @bar(float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, ppc_fp128 inreg %{{[0-9]+}}, float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, ppc_fp128 inreg %{{[0-9]+}})
+// CHECK:  ret void