From: Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Date: Fri, 8 Jul 2011 22:18:40 +0000 (+0000)
Subject: Add support for AVX 256-bit in the x86_64 ABI (as in the 0.99.5 draft)
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=df41b4c10ab2a0096957e415e520bd467f8b2e9e;p=clang

Add support for AVX 256-bit in the x86_64 ABI (as in the 0.99.5 draft)

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@134754 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 8736d59f56..b6cfb7375a 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -820,6 +820,22 @@ class X86_64ABIInfo : public ABIInfo {
   /// should just return Memory for the aggregate).
   static Class merge(Class Accum, Class Field);
 
+  /// postMerge - Implement the X86_64 ABI post merging algorithm.
+  ///
+  /// Post merger cleanup, reduces a malformed Hi and Lo pair to
+  /// final MEMORY or SSE classes when necessary.
+  ///
+  /// \param AggregateSize - The size of the current aggregate in
+  /// the classification process.
+  ///
+  /// \param Lo - The classification for the parts of the type
+  /// residing in the low word of the containing object.
+  ///
+  /// \param Hi - The classification for the parts of the type
+  /// residing in the higher words of the containing object.
+  ///
+  void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;
+
   /// classify - Determine the x86_64 register classes in which the
   /// given type T should be passed.
   ///
@@ -843,7 +859,7 @@ class X86_64ABIInfo : public ABIInfo {
   /// also be ComplexX87.
   void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi) const;
 
-  const llvm::Type *Get16ByteVectorType(QualType Ty) const;
+  const llvm::Type *GetByteVectorType(QualType Ty) const;
   const llvm::Type *GetSSETypeAtOffset(const llvm::Type *IRType,
                                        unsigned IROffset, QualType SourceTy,
                                        unsigned SourceOffset) const;
@@ -956,6 +972,39 @@ public:
 
 }
 
+void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
+                              Class &Hi) const {
+  // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
+  //
+  // (a) If one of the classes is Memory, the whole argument is passed in
+  //     memory.
+  //
+  // (b) If X87UP is not preceded by X87, the whole argument is passed in
+  //     memory.
+  //
+  // (c) If the size of the aggregate exceeds two eightbytes and the first
+  //     eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
+  //     argument is passed in memory. NOTE: This is necessary to keep the
+  //     ABI working for processors that don't support the __m256 type.
+  //
+  // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
+  //
+  // Some of these are enforced by the merging logic.  Others can arise
+  // only with unions; for example:
+  //   union { _Complex double; unsigned; }
+  //
+  // Note that clauses (b) and (c) were added in 0.98.
+  //
+  if (Hi == Memory)
+    Lo = Memory;
+  if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
+    Lo = Memory;
+  if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp))
+    Lo = Memory;
+  if (Hi == SSEUp && Lo != SSE)
+    Hi = SSE;
+}
+
 X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
   // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
   // classified recursively so that always two fields are
@@ -1082,7 +1131,14 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
       // split.
       if (OffsetBase && OffsetBase != 64)
         Hi = Lo;
-    } else if (Size == 128) {
+    } else if (Size == 128 | Size == 256) {
+      // Arguments of 256-bits are split into four eightbyte chunks. The
+      // least significant one belongs to class SSE and all the others to class
+      // SSEUP. The original Lo and Hi design considers that types can't be
+      // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
+      // This design isn't correct for 256-bits, but since there're no cases
+      // where the upper parts would need to be inspected, avoid adding
+      // complexity and just consider Hi to match the 64-256 part.
       Lo = SSE;
       Hi = SSEUp;
     }
@@ -1121,8 +1177,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
     uint64_t Size = getContext().getTypeSize(Ty);
 
     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
-    // than two eightbytes, ..., it has class MEMORY.
-    if (Size > 128)
+    // than four eightbytes, ..., it has class MEMORY.
+    if (Size > 256)
       return;
 
     // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
@@ -1146,9 +1202,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
         break;
     }
 
-    // Do post merger cleanup (see below). Only case we worry about is Memory.
-    if (Hi == Memory)
-      Lo = Memory;
+    postMerge(Size, Lo, Hi);
     assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification.");
     return;
   }
@@ -1157,8 +1211,8 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
     uint64_t Size = getContext().getTypeSize(Ty);
 
     // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
-    // than two eightbytes, ..., it has class MEMORY.
-    if (Size > 128)
+    // than four eightbytes, ..., it has class MEMORY.
+    if (Size > 256)
       return;
 
     // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
@@ -1257,31 +1311,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
         break;
     }
 
-    // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
-    //
-    // (a) If one of the classes is MEMORY, the whole argument is
-    // passed in memory.
-    //
-    // (b) If X87UP is not preceded by X87, the whole argument is 
-    // passed in memory.
-    // 
-    // (c) If the size of the aggregate exceeds two eightbytes and the first
-    // eight-byte isn't SSE or any other eightbyte isn't SSEUP, the whole 
-    // argument is passed in memory.
-    // 
-    // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
-    //
-    // Some of these are enforced by the merging logic.  Others can arise
-    // only with unions; for example:
-    //   union { _Complex double; unsigned; }
-    //
-    // Note that clauses (b) and (c) were added in 0.98.
-    if (Hi == Memory)
-      Lo = Memory;
-    if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
-      Lo = Memory;
-    if (Hi == SSEUp && Lo != SSE)
-      Hi = SSE;
+    postMerge(Size, Lo, Hi);
   }
 }
 
@@ -1321,10 +1351,10 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty) const {
   return ABIArgInfo::getIndirect(Align);
 }
 
-/// Get16ByteVectorType - The ABI specifies that a value should be passed in an
-/// full vector XMM register.  Pick an LLVM IR type that will be passed as a
+/// GetByteVectorType - The ABI specifies that a value should be passed in an
+/// full vector XMM/YMM register.  Pick an LLVM IR type that will be passed as a
 /// vector register.
-const llvm::Type *X86_64ABIInfo::Get16ByteVectorType(QualType Ty) const {
+const llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
   const llvm::Type *IRType = CGT.ConvertTypeRecursive(Ty);
 
   // Wrapper structs that just contain vectors are passed just like vectors,
@@ -1335,10 +1365,11 @@ const llvm::Type *X86_64ABIInfo::Get16ByteVectorType(QualType Ty) const {
     STy = dyn_cast<llvm::StructType>(IRType);
   }
 
-  // If the preferred type is a 16-byte vector, prefer to pass it.
+  // If the preferred type is a 16/32-byte vector, prefer to pass it.
   if (const llvm::VectorType *VT = dyn_cast<llvm::VectorType>(IRType)){
     const llvm::Type *EltTy = VT->getElementType();
-    if (VT->getBitWidth() == 128 &&
+    unsigned BitWidth = VT->getBitWidth();
+    if ((BitWidth == 128 || BitWidth == 256) &&
         (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
          EltTy->isIntegerTy(8) || EltTy->isIntegerTy(16) ||
          EltTy->isIntegerTy(32) || EltTy->isIntegerTy(64) ||
@@ -1701,12 +1732,13 @@ classifyReturnType(QualType RetTy) const {
     break;
 
     // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
-    // is passed in the upper half of the last used SSE register.
+    // is passed in the next available eightbyte chunk if the last used
+    // vector register.
     //
     // SSEUP should always be preceded by SSE, just widen.
   case SSEUp:
     assert(Lo == SSE && "Unexpected SSEUp classification.");
-    ResType = Get16ByteVectorType(RetTy);
+    ResType = GetByteVectorType(RetTy);
     break;
 
     // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
@@ -1846,7 +1878,7 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned &neededInt,
     // register.  This only happens when 128-bit vectors are passed.
   case SSEUp:
     assert(Lo == SSE && "Unexpected SSEUp classification");
-    ResType = Get16ByteVectorType(Ty);
+    ResType = GetByteVectorType(Ty);
     break;
   }
 
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index 090e70d810..eeb4bafc11 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -262,3 +262,18 @@ void f9122143()
 // CHECK: define double @f36(double %arg.coerce)
 typedef unsigned v2i32 __attribute((__vector_size__(8)));
 v2i32 f36(v2i32 arg) { return arg; }
+
+// CHECK: declare void @f38(<8 x float>)
+// CHECK: declare void @f37(<8 x float>)
+typedef float __m256 __attribute__ ((__vector_size__ (32)));
+typedef struct {
+  __m256 m;
+} s256;
+
+s256 x38;
+__m256 x37;
+
+void f38(s256 x);
+void f37(__m256 x);
+void f39() { f38(x38); f37(x37); }
+