From ccafadb68f5a8132a4ee23f441cf5d6976a4133b Mon Sep 17 00:00:00 2001
From: Bruno Cardoso Lopes <bruno.cardoso@gmail.com>
Date: Tue, 12 Jul 2011 00:30:27 +0000
Subject: [PATCH] Fix one x86_64 abi issue and the test to actually look for
 the right thing, which is: { <4 x float>, <4 x float> } should continue to go
 through memory.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@134946 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/TargetInfo.cpp      | 9 ++++++++-
 test/CodeGen/x86_64-arguments.c | 4 ++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 0c070a19d8..7c850afbf8 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -1228,6 +1228,13 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
 
     const RecordDecl *RD = RT->getDecl();
 
+    // The only case a 256-bit wide vector could be used is when the struct
+    // contains a single 256-bit element. Since Lo and Hi logic isn't extended
+    // to work for sizes wider than 128, early check and fallback to memory.
+    RecordDecl::field_iterator FirstElt = RD->field_begin();
+    if (Size > 128 && getContext().getTypeSize(FirstElt->getType()) != 256)
+      return;
+
     // Assume variable sized types are passed in memory.
     if (RD->hasFlexibleArrayMember())
       return;
@@ -1263,7 +1270,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
 
     // Classify the fields one at a time, merging the results.
     unsigned idx = 0;
-    for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
+    for (RecordDecl::field_iterator i = FirstElt, e = RD->field_end();
            i != e; ++i, ++idx) {
       uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
       bool BitField = i->isBitField();
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index 38278f49d8..3d4016db82 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -280,9 +280,9 @@ void f39() { f38(x38); f37(x37); }
 // Make sure that the struct below is passed in the same way
 // regardless of avx being used
 //
-// CHECK: define void @func41(<2 x double> %s.coerce)
+// CHECK: declare void @func40(%struct.t128* byval align 16)
 typedef float __m128 __attribute__ ((__vector_size__ (16)));
-typedef struct {
+typedef struct t128 {
   __m128 m;
   __m128 n;
 } two128;
-- 
2.40.0