From 453a2ffd484445bd158c9c131fb144e26b00e944 Mon Sep 17 00:00:00 2001
From: Yaxun Liu <Yaxun.Liu@amd.com>
Date: Thu, 31 Jan 2019 21:57:51 +0000
Subject: [PATCH] Do not copy long double and 128-bit fp format from aux target
 for AMDGPU

rC352620 caused regressions because it copied floating point format from
aux target.

floating point format decides whether extended long double is supported.
It is x86_fp80 on x86 but IEEE double on amdgcn.

Document usage of long doubel type in HIP programming guide
https://github.com/ROCm-Developer-Tools/HIP/pull/890

Differential Revision: https://reviews.llvm.org/D57527


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@352801 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Basic/Targets/AMDGPU.cpp | 11 +++++++++++
 test/CodeGenCUDA/types.cu    | 10 ++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 test/CodeGenCUDA/types.cu

diff --git a/lib/Basic/Targets/AMDGPU.cpp b/lib/Basic/Targets/AMDGPU.cpp
index 19dc05f5b0..b04d894855 100644
--- a/lib/Basic/Targets/AMDGPU.cpp
+++ b/lib/Basic/Targets/AMDGPU.cpp
@@ -307,5 +307,16 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
 }
 
 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
+  assert(HalfFormat == Aux->HalfFormat);
+  assert(FloatFormat == Aux->FloatFormat);
+  assert(DoubleFormat == Aux->DoubleFormat);
+
+  // On x86_64 long double is 80-bit extended precision format, which is
+  // not supported by AMDGPU. 128-bit floating point format is also not
+  // supported by AMDGPU. Therefore keep its own format for these two types.
+  auto SaveLongDoubleFormat = LongDoubleFormat;
+  auto SaveFloat128Format = Float128Format;
   copyAuxTarget(Aux);
+  LongDoubleFormat = SaveLongDoubleFormat;
+  Float128Format = SaveFloat128Format;
 }
diff --git a/test/CodeGenCUDA/types.cu b/test/CodeGenCUDA/types.cu
new file mode 100644
index 0000000000..43c2b85ea7
--- /dev/null
+++ b/test/CodeGenCUDA/types.cu
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple amdgcn -aux-triple x86_64 -fcuda-is-device -emit-llvm %s -o - | FileCheck -check-prefix=DEV %s
+// RUN: %clang_cc1 -triple x86_64 -aux-triple amdgcn -emit-llvm %s -o - | FileCheck -check-prefix=HOST %s
+
+#include "Inputs/cuda.h"
+
+// HOST: @ld_host = global x86_fp80 0xK00000000000000000000
+long double ld_host;
+
+// DEV: @ld_device = addrspace(1) externally_initialized global double 0.000000e+00
+__device__ long double ld_device;
-- 
2.40.0