From: Chris Lattner <sabre@nondot.org>
Date: Wed, 25 Aug 2010 23:39:14 +0000 (+0000)
Subject: Finally pass "two floats in a 64-bit unit" as a <2 x float> instead of
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=22fd4baf2eba2103e2b41e463f1a5f6486c398fb;p=clang

Finally pass "two floats in a 64-bit unit" as a <2 x float> instead of
as a double in the x86-64 ABI.  This allows us to generate much better
code for certain things, e.g.:

_Complex float f32(_Complex float A, _Complex float B) {
  return A+B;
}

Used to compile into (look at the integer silliness!):

_f32:                                   ## @f32
## BB#0:                                ## %entry
	movd	%xmm1, %rax
	movd	%eax, %xmm1
	movd	%xmm0, %rcx
	movd	%ecx, %xmm0
	addss	%xmm1, %xmm0
	movd	%xmm0, %edx
	shrq	$32, %rax
	movd	%eax, %xmm0
	shrq	$32, %rcx
	movd	%ecx, %xmm1
	addss	%xmm0, %xmm1
	movd	%xmm1, %eax
	shlq	$32, %rax
	addq	%rdx, %rax
	movd	%rax, %xmm0
	ret

Now we get:

_f32:                                   ## @f32
	movdqa	%xmm0, %xmm2
	addss	%xmm1, %xmm2
	pshufd	$16, %xmm2, %xmm2
	pshufd	$1, %xmm1, %xmm1
	pshufd	$1, %xmm0, %xmm0
	addss	%xmm1, %xmm0
	pshufd	$16, %xmm0, %xmm1
	movdqa	%xmm2, %xmm0
	unpcklps	%xmm1, %xmm0
	ret

and compile stuff like:

extern float _Complex ccoshf( float _Complex ) ;
float _Complex ccosf ( float _Complex z ) {
 float _Complex iz;
 (__real__ iz) = -(__imag__ z);
 (__imag__ iz) = (__real__ z);
 return ccoshf(iz);
}

into:

_ccosf:                                 ## @ccosf
## BB#0:                                ## %entry
	pshufd	$1, %xmm0, %xmm1
	xorps	LCPI4_0(%rip), %xmm1
	unpcklps	%xmm0, %xmm1
	movaps	%xmm1, %xmm0
	jmp	_ccoshf                 ## TAILCALL

instead of:

_ccosf:                                 ## @ccosf
## BB#0:                                ## %entry
	movd	%xmm0, %rax
	movq	%rax, %rcx
	shlq	$32, %rcx
	shrq	$32, %rax
	xorl	$-2147483648, %eax      ## imm = 0xFFFFFFFF80000000
	addq	%rcx, %rax
	movd	%rax, %xmm0
	jmp	_ccoshf                 ## TAILCALL


There is still "stuff to be done" here for the struct case,
but this resolves rdar://6379669 - [x86-64 ABI] Pass and return
_Complex float / double efficiently


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@112111 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index a08e7f4b80..c5b858dbfa 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -1294,12 +1294,8 @@ GetSSETypeAtOffset(const llvm::Type *IRType, unsigned IROffset,
   // offset+0 and offset+4.  Walk the LLVM IR type to find out if this is the
   // case.
   if (ContainsFloatAtOffset(IRType, IROffset, getTargetData()) &&
-      ContainsFloatAtOffset(IRType, IROffset+4, getTargetData())) {
-    // FIXME: <2 x float> doesn't pass as one XMM register yet.  Don't enable
-    // this code until it does.
-    //return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2);
-
-  }
+      ContainsFloatAtOffset(IRType, IROffset+4, getTargetData()))
+    return llvm::VectorType::get(llvm::Type::getFloatTy(getVMContext()), 2);
 
   return llvm::Type::getDoubleTy(getVMContext());
 }
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index 9fd08dd4a8..039dd27869 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o %t %s
-// RUN: FileCheck < %t %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s| FileCheck %s
 
 // CHECK: %0 = type { i64, double }
 
@@ -215,8 +214,14 @@ void f30(struct S0 p_4) {
 // rdar://8251384
 struct f31foo { float a, b, c; };
 float f31(struct f31foo X) {
-  // CHECK: define float @f31(double %X.coerce0, float %X.coerce1)
+  // CHECK: define float @f31(<2 x float> %X.coerce0, float %X.coerce1)
   return X.c;
 }
 
+_Complex float f32(_Complex float A, _Complex float B) {
+  // rdar://6379669
+  // CHECK: define <2 x float> @f32(<2 x float> %A.coerce, <2 x float> %B.coerce)
+  return A+B;
+}
+