From: Eli Friedman <eli.friedman@gmail.com>
Date: Wed, 12 Jun 2013 00:13:45 +0000 (+0000)
Subject: Make va_arg and argument passing to varargs functions work correctly with
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7a1b586a383622e3287a5f3d82736ec513032744;p=clang

Make va_arg and argument passing to varargs functions work correctly with
AVX vectors when AVX is turned on.

Fixes <rdar://problem/10513611>.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@183813 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 179fdb3a92..981a38d7ff 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -1128,6 +1128,9 @@ class X86_64ABIInfo : public ABIInfo {
   /// containing object.  Some parameters are classified different
   /// depending on whether they straddle an eightbyte boundary.
   ///
+  /// \param isNamedArg - Whether the argument in question is a "named"
+  /// argument, as used in AMD64-ABI 3.5.7.
+  ///
   /// If a word is unused its result will be NoClass; if a type should
   /// be passed in Memory then at least the classification of \arg Lo
   /// will be Memory.
@@ -1136,7 +1139,8 @@ class X86_64ABIInfo : public ABIInfo {
   ///
   /// If the \arg Lo class is ComplexX87, then the \arg Hi class will
   /// also be ComplexX87.
-  void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi) const;
+  void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
+                bool isNamedArg) const;
 
   llvm::Type *GetByteVectorType(QualType Ty) const;
   llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType,
@@ -1162,7 +1166,8 @@ class X86_64ABIInfo : public ABIInfo {
   ABIArgInfo classifyArgumentType(QualType Ty,
                                   unsigned freeIntRegs,
                                   unsigned &neededInt,
-                                  unsigned &neededSSE) const;
+                                  unsigned &neededSSE,
+                                  bool isNamedArg) const;
 
   bool IsIllegalVectorType(QualType Ty) const;
 
@@ -1189,7 +1194,8 @@ public:
   bool isPassedUsingAVXType(QualType type) const {
     unsigned neededInt, neededSSE;
     // The freeIntRegs argument doesn't matter here.
-    ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE);
+    ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE,
+                                           /*isNamedArg*/true);
     if (info.isDirect()) {
       llvm::Type *ty = info.getCoerceToType();
       if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
@@ -1411,7 +1417,7 @@ X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
 }
 
 void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
-                             Class &Lo, Class &Hi) const {
+                             Class &Lo, Class &Hi, bool isNamedArg) const {
   // FIXME: This code can be simplified by introducing a simple value class for
   // Class pairs with appropriate constructor methods for the various
   // situations.
@@ -1450,7 +1456,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
 
   if (const EnumType *ET = Ty->getAs<EnumType>()) {
     // Classify the underlying integer type.
-    classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi);
+    classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg);
     return;
   }
 
@@ -1498,7 +1504,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
       // split.
       if (OffsetBase && OffsetBase != 64)
         Hi = Lo;
-    } else if (Size == 128 || (HasAVX && Size == 256)) {
+    } else if (Size == 128 || (HasAVX && isNamedArg && Size == 256)) {
       // Arguments of 256-bits are split into four eightbyte chunks. The
       // least significant one belongs to class SSE and all the others to class
       // SSEUP. The original Lo and Hi design considers that types can't be
@@ -1506,6 +1512,10 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
       // This design isn't correct for 256-bits, but since there're no cases
       // where the upper parts would need to be inspected, avoid adding
       // complexity and just consider Hi to match the 64-256 part.
+      //
+      // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
+      // registers if they are "named", i.e. not part of the "..." of a
+      // variadic function.
       Lo = SSE;
       Hi = SSEUp;
     }
@@ -1571,7 +1581,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
 
     for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
       Class FieldLo, FieldHi;
-      classify(AT->getElementType(), Offset, FieldLo, FieldHi);
+      classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg);
       Lo = merge(Lo, FieldLo);
       Hi = merge(Hi, FieldHi);
       if (Lo == Memory || Hi == Memory)
@@ -1625,7 +1635,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
         Class FieldLo, FieldHi;
         uint64_t Offset =
           OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base));
-        classify(i->getType(), Offset, FieldLo, FieldHi);
+        classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
         Lo = merge(Lo, FieldLo);
         Hi = merge(Hi, FieldHi);
         if (Lo == Memory || Hi == Memory)
@@ -1688,7 +1698,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
           FieldHi = EB_Hi ? Integer : NoClass;
         }
       } else
-        classify(i->getType(), Offset, FieldLo, FieldHi);
+        classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
       Lo = merge(Lo, FieldLo);
       Hi = merge(Hi, FieldHi);
       if (Lo == Memory || Hi == Memory)
@@ -2076,7 +2086,7 @@ classifyReturnType(QualType RetTy) const {
   // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
   // classification algorithm.
   X86_64ABIInfo::Class Lo, Hi;
-  classify(RetTy, 0, Lo, Hi);
+  classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true);
 
   // Check some invariants.
   assert((Hi != Memory || Lo == Memory) && "Invalid memory classification.");
@@ -2201,11 +2211,12 @@ classifyReturnType(QualType RetTy) const {
 }
 
 ABIArgInfo X86_64ABIInfo::classifyArgumentType(
-  QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE)
+  QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE,
+  bool isNamedArg)
   const
 {
   X86_64ABIInfo::Class Lo, Hi;
-  classify(Ty, 0, Lo, Hi);
+  classify(Ty, 0, Lo, Hi, isNamedArg);
 
   // Check some invariants.
   // FIXME: Enforce these by construction.
@@ -2338,13 +2349,22 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
   if (FI.getReturnInfo().isIndirect())
     --freeIntRegs;
 
+  bool isVariadic = FI.isVariadic();
+  unsigned numRequiredArgs = 0;
+  if (isVariadic)
+    numRequiredArgs = FI.getRequiredArgs().getNumRequiredArgs();
+
   // AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
   // get assigned (in left-to-right order) for passing as follows...
   for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
        it != ie; ++it) {
+    bool isNamedArg = true;
+    if (isVariadic)
+      isNamedArg = (it - FI.arg_begin()) < numRequiredArgs;
+
     unsigned neededInt, neededSSE;
     it->info = classifyArgumentType(it->type, freeIntRegs, neededInt,
-                                    neededSSE);
+                                    neededSSE, isNamedArg);
 
     // AMD64-ABI 3.2.3p3: If there are no registers available for any
     // eightbyte of an argument, the whole argument is passed on the
@@ -2420,7 +2440,8 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   unsigned neededInt, neededSSE;
 
   Ty = CGF.getContext().getCanonicalType(Ty);
-  ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE);
+  ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE, 
+                                       /*isNamedArg*/false);
 
   // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
   // in the registers. If not go to step 7.
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index 7bd534407c..10e6d5bd65 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -411,3 +411,26 @@ void test51(struct test51_s *s, __builtin_va_list argList) {
 // CHECK-NEXT: add i32 {{.*}}, 16
 // CHECK-NEXT: store i32 {{.*}}, i32* {{.*}}
 // CHECK-NEXT: br label
+
+void test52_helper(int, ...);
+__m256 x52;
+void test52() {
+  test52_helper(0, x52, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
+// AVX: @test52_helper(i32 0, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+
+void test53(__m256 *m, __builtin_va_list argList) {
+  *m = __builtin_va_arg(argList, __m256);
+}
+// AVX: define void @test53
+// AVX-NOT: br i1
+// AVX: ret void
+
+void test54_helper(__m256, ...);
+__m256 x54;
+void test54() {
+  test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+  test54_helper(x54, x54, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i);
+}
+// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}})
+// AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}})