From 34008583693dc9db0cdbb9d448803773ceadb0db Mon Sep 17 00:00:00 2001
From: Justin Lebar <jlebar@google.com>
Date: Fri, 29 Apr 2016 23:05:19 +0000
Subject: [PATCH] [CUDA] Copy host builtin types to NVPTXTargetInfo.

Summary:
Host and device types must match, otherwise when we pass values back and
forth between the host and device, we will get the wrong result.

This patch makes NVPTXTargetInfo inherit most of its type information
from the host's target info.

Reviewers: rsmith

Subscribers: cfe-commits, jhen, tra

Differential Revision: http://reviews.llvm.org/D19346

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@268131 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/clang/Basic/TargetOptions.h |  4 ++
 lib/Basic/Targets.cpp               | 64 ++++++++++++++++++++++++++++-
 lib/Frontend/CompilerInstance.cpp   |  3 +-
 lib/Frontend/CompilerInvocation.cpp |  6 +++
 test/Preprocessor/cuda-types.cu     | 27 ++++++++++++
 5 files changed, 102 insertions(+), 2 deletions(-)
 create mode 100644 test/Preprocessor/cuda-types.cu
diff --git a/include/clang/Basic/TargetOptions.h b/include/clang/Basic/TargetOptions.h
index 3718ee316a..81fb7b2117 100644
--- a/include/clang/Basic/TargetOptions.h
+++ b/include/clang/Basic/TargetOptions.h
@@ -27,6 +27,10 @@ public:
   /// target will be selected to match the host.
   std::string Triple;
 
+  /// When compiling for the device side, contains the triple used to compile
+  /// for the host.
+  std::string HostTriple;
+
   /// If given, the name of the target CPU to generate code for.
   std::string CPU;
 
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 0a87cdf061..1df35ab846 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -65,6 +65,9 @@ static void defineCPUMacros(MacroBuilder &Builder, StringRef CPUName,
     Builder.defineMacro("__tune_" + CPUName + "__");
 }
 
+static TargetInfo *AllocateTarget(const llvm::Triple &Triple,
+                                  const TargetOptions &Opts);
+
 //===----------------------------------------------------------------------===//
 // Defines specific to certain operating systems.
 //===----------------------------------------------------------------------===//
@@ -1627,7 +1630,7 @@ class NVPTXTargetInfo : public TargetInfo {
   } GPU;
 
 public:
-  NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+  NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
       : TargetInfo(Triple) {
     BigEndian = false;
     TLSSupported = false;
@@ -1639,6 +1642,65 @@ public:
     NoAsmVariants = true;
     // Set the default GPU to sm20
     GPU = GK_SM20;
+
+    // If possible, get a TargetInfo for our host triple, so we can match its
+    // types.
+    llvm::Triple HostTriple(Opts.HostTriple);
+    if (HostTriple.isNVPTX())
+      return;
+    std::unique_ptr<TargetInfo> HostTarget(
+        AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));
+    if (!HostTarget) {
+      return;
+    }
+
+    PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0);
+    PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0);
+    BoolWidth = HostTarget->getBoolWidth();
+    BoolAlign = HostTarget->getBoolAlign();
+    IntWidth = HostTarget->getIntWidth();
+    IntAlign = HostTarget->getIntAlign();
+    HalfWidth = HostTarget->getHalfWidth();
+    HalfAlign = HostTarget->getHalfAlign();
+    FloatWidth = HostTarget->getFloatWidth();
+    FloatAlign = HostTarget->getFloatAlign();
+    DoubleWidth = HostTarget->getDoubleWidth();
+    DoubleAlign = HostTarget->getDoubleAlign();
+    LongWidth = HostTarget->getLongWidth();
+    LongAlign = HostTarget->getLongAlign();
+    LongLongWidth = HostTarget->getLongLongWidth();
+    LongLongAlign = HostTarget->getLongLongAlign();
+    MinGlobalAlign = HostTarget->getMinGlobalAlign();
+    DefaultAlignForAttributeAligned =
+        HostTarget->getDefaultAlignForAttributeAligned();
+    SizeType = HostTarget->getSizeType();
+    IntMaxType = HostTarget->getIntMaxType();
+    PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0);
+    IntPtrType = HostTarget->getIntPtrType();
+    WCharType = HostTarget->getWCharType();
+    WIntType = HostTarget->getWIntType();
+    Char16Type = HostTarget->getChar16Type();
+    Char32Type = HostTarget->getChar32Type();
+    Int64Type = HostTarget->getInt64Type();
+    SigAtomicType = HostTarget->getSigAtomicType();
+    ProcessIDType = HostTarget->getProcessIDType();
+
+    UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
+    UseZeroLengthBitfieldAlignment =
+        HostTarget->useZeroLengthBitfieldAlignment();
+    UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
+    ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
+
+    // Properties intentionally not copied from host:
+    // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
+    //   host/device boundary.
+    // - SuitableAlign: Not visible across the host/device boundary, and may
+    //   correctly be different on host/device, e.g. if host has wider vector
+    //   types than device.
+    // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
+    //   as its double type, but that's not necessarily true on the host.
+    //   TODO: nvcc emits a warning when using long double on device; we should
+    //   do the same.
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index cac73befff..d6f4749db2 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -836,8 +836,9 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
 
   // Create TargetInfo for the other side of CUDA compilation.
   if (getLangOpts().CUDA && !getFrontendOpts().AuxTriple.empty()) {
-    std::shared_ptr<TargetOptions> TO(new TargetOptions);
+    auto TO = std::make_shared<TargetOptions>();
     TO->Triple = getFrontendOpts().AuxTriple;
+    TO->HostTriple = getTarget().getTriple().str();
     setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO));
   }
 
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 4bef159f3b..02a570076b 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -2155,6 +2155,12 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
       LangOpts.ObjCExceptions = 1;
   }
 
+  // During CUDA device-side compilation, the aux triple is the triple used for
+  // host compilation.
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
+    Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
+  }
+
   // FIXME: Override value name discarding when asan or msan is used because the
   // backend passes depend on the name of the alloca in order to print out
   // names.
diff --git a/test/Preprocessor/cuda-types.cu b/test/Preprocessor/cuda-types.cu
new file mode 100644
index 0000000000..dd8eef4aae
--- /dev/null
+++ b/test/Preprocessor/cuda-types.cu
@@ -0,0 +1,27 @@
+// Check that types, widths, etc. match on the host and device sides of CUDA
+// compilations.  Note that we filter out long double, as this is intentionally
+// different on host and device.
+
+// RUN: %clang --cuda-host-only -nocudainc -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/i386-host-defines
+// RUN: %clang --cuda-device-only -nocudainc -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/i386-device-defines
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)' %T/i386-host-defines   | grep -v '__LDBL\|_LONG_DOUBLE' > %T/i386-host-defines-filtered
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)' %T/i386-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/i386-device-defines-filtered
+// RUN: diff %T/i386-host-defines-filtered %T/i386-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/x86_64-host-defines
+// RUN: %clang --cuda-device-only -nocudainc -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/x86_64-device-defines
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/x86_64-host-defines   | grep -v '__LDBL\|_LONG_DOUBLE' > %T/x86_64-host-defines-filtered
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/x86_64-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/x86_64-device-defines-filtered
+// RUN: diff %T/x86_64-host-defines-filtered %T/x86_64-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/powerpc64-host-defines
+// RUN: %clang --cuda-device-only -nocudainc -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/powerpc64-device-defines
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/powerpc64-host-defines   | grep -v '__LDBL\|_LONG_DOUBLE' > %T/powerpc64-host-defines-filtered
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/powerpc64-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/powerpc64-device-defines-filtered
+// RUN: diff %T/powerpc64-host-defines-filtered %T/powerpc64-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -target nvptx-nvidia-cuda -x cuda -E -dM -o - /dev/null > %T/nvptx-host-defines
+// RUN: %clang --cuda-device-only -nocudainc -target nvptx-nvidia-cuda -x cuda -E -dM -o - /dev/null > %T/nvptx-device-defines
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/nvptx-host-defines   | grep -v '__LDBL\|_LONG_DOUBLE' > %T/nvptx-host-defines-filtered
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/nvptx-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/nvptx-device-defines-filtered
+// RUN: diff %T/nvptx-host-defines-filtered %T/nvptx-device-defines-filtered
-- 
2.40.0