From ab5722e67794b3954c874a369086fc5f41ac46a5 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Wed, 28 Jul 2010 23:47:21 +0000
Subject: [PATCH] pass argument vectors in a type that corresponds to the user
 type if possible.  This improves the example to pass <4 x float> instead of
 <2 x double> but we still get awful code, and still don't get the return
 value right.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@109700 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/TargetInfo.cpp      | 16 ++++++++++++++--
 test/CodeGen/x86_64-arguments.c |  8 ++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 613e2f6281..fc5e91fb7d 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -1432,10 +1432,22 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(QualType Ty,
 
     // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
     // eightbyte is passed in the upper half of the last used SSE
-    // register.
+    // register.  This only happens when 128-bit vectors are passed. 
   case SSEUp:
-    assert(Lo == SSE && "Unexpected SSEUp classification.");
+    assert(Lo == SSE && "Unexpected SSEUp classification");
     ResType = llvm::VectorType::get(llvm::Type::getDoubleTy(VMContext), 2);
+      
+    // If the preferred type is a 16-byte vector, prefer to pass it.
+    if (const llvm::VectorType *VT =
+          dyn_cast_or_null<llvm::VectorType>(PrefType)) {
+      const llvm::Type *EltTy = VT->getElementType();
+      if (VT->getBitWidth() == 128 &&
+          (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
+           EltTy->isIntegerTy(8) || EltTy->isIntegerTy(16) ||
+           EltTy->isIntegerTy(32) || EltTy->isIntegerTy(64) ||
+           EltTy->isIntegerTy(128)))
+        ResType = PrefType;
+    }
     break;
   }
 
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index 1b7967d0de..b4500d6de6 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -152,3 +152,11 @@ struct f23S f24(struct f23S *X, struct f24s *P2) {
   // CHECK: define %struct.f24s @f24(%struct.f23S* %X, %struct.f24s* %P2)
 }
 
+typedef float v4f32 __attribute__((__vector_size__(16)));
+
+v4f32 f25(v4f32 X) {
+  // CHECK: define <2 x double> @f25(<4 x float> %X.coerce)
+  return X+X;
+}
+
+
-- 
2.40.0