From ab5722e67794b3954c874a369086fc5f41ac46a5 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Wed, 28 Jul 2010 23:47:21 +0000 Subject: [PATCH] pass argument vectors in a type that corresponds to the user type if possible. This improves the example to pass <4 x float> instead of <2 x double> but we still get awful code, and still don't get the return value right. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@109700 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetInfo.cpp | 16 ++++++++++++++-- test/CodeGen/x86_64-arguments.c | 8 ++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 613e2f6281..fc5e91fb7d 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -1432,10 +1432,22 @@ ABIArgInfo X86_64ABIInfo::classifyArgumentType(QualType Ty, // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the // eightbyte is passed in the upper half of the last used SSE - // register. + // register. This only happens when 128-bit vectors are passed. case SSEUp: - assert(Lo == SSE && "Unexpected SSEUp classification."); + assert(Lo == SSE && "Unexpected SSEUp classification"); ResType = llvm::VectorType::get(llvm::Type::getDoubleTy(VMContext), 2); + + // If the preferred type is a 16-byte vector, prefer to pass it. + if (const llvm::VectorType *VT = + dyn_cast_or_null(PrefType)) { + const llvm::Type *EltTy = VT->getElementType(); + if (VT->getBitWidth() == 128 && + (EltTy->isFloatTy() || EltTy->isDoubleTy() || + EltTy->isIntegerTy(8) || EltTy->isIntegerTy(16) || + EltTy->isIntegerTy(32) || EltTy->isIntegerTy(64) || + EltTy->isIntegerTy(128))) + ResType = PrefType; + } break; } diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c index 1b7967d0de..b4500d6de6 100644 --- a/test/CodeGen/x86_64-arguments.c +++ b/test/CodeGen/x86_64-arguments.c @@ -152,3 +152,11 @@ struct f23S f24(struct f23S *X, struct f24s *P2) { // CHECK: define %struct.f24s @f24(%struct.f23S* %X, %struct.f24s* %P2) } +typedef float v4f32 __attribute__((__vector_size__(16))); + +v4f32 f25(v4f32 X) { + // CHECK: define <2 x double> @f25(<4 x float> %X.coerce) + return X+X; +} + + -- 2.40.0