From 1bdec838c54117d9befe6dfcd3336a0ba5de9e0d Mon Sep 17 00:00:00 2001
From: Jonas Hahnfeld <hahnjo@hahnjo.de>
Date: Sat, 25 Aug 2018 13:42:40 +0000
Subject: [PATCH] [CUDA/OpenMP] Define only some host macros during device
 compilation

When compiling CUDA or OpenMP device code Clang parses header files
that expect certain predefined macros from the host architecture. To
make this work the compiler passes the host triple via the -aux-triple
argument and (until now) pulls in all macros for that "auxiliary triple"
unconditionally.

However this results in defines like __SSE_MATH__ that will trigger
inline assembly making use of the "advertised" target features. See
the discussion of D47849 and PR38464 for a detailed explanation of
the encountered problems.

Instead of blacklisting "known bad" examples this patch starts adding
defines that are needed for certain headers like bits/wordsize.h and
bits/mathinline.h.
The disadvantage of this approach is that it decouples the definitions
from their target toolchain. However in my opinion it's more important
to keep definitions for one header close together. For one this will
include a clear documentation why these particular defines are needed.
Furthermore it simplifies maintenance because adding defines for a new
header or support for a new aux-triple only needs to touch one piece
of code.

Differential Revision: https://reviews.llvm.org/D50845

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@340681 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Frontend/InitPreprocessor.cpp | 46 ++++++++++++++++++++---
 test/Preprocessor/aux-triple.c    | 62 +++++++++++++++++++++++++++++++
 test/SemaCUDA/builtins.cu         |  4 +-
 3 files changed, 104 insertions(+), 8 deletions(-)
 create mode 100644 test/Preprocessor/aux-triple.c

diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp
index e576fc098d..05192555c5 100644
--- a/lib/Frontend/InitPreprocessor.cpp
+++ b/lib/Frontend/InitPreprocessor.cpp
@@ -1099,6 +1099,44 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   TI.getTargetDefines(LangOpts, Builder);
 }
 
+/// Initialize macros based on AuxTargetInfo.
+static void InitializePredefinedAuxMacros(const TargetInfo &AuxTI,
+                                          const LangOptions &LangOpts,
+                                          MacroBuilder &Builder) {
+  auto AuxTriple = AuxTI.getTriple();
+
+  // Define basic target macros needed by at least bits/wordsize.h and
+  // bits/mathinline.h
+  switch (AuxTriple.getArch()) {
+  case llvm::Triple::x86_64:
+    Builder.defineMacro("__x86_64__");
+    break;
+  case llvm::Triple::ppc64:
+  case llvm::Triple::ppc64le:
+    Builder.defineMacro("__powerpc64__");
+    break;
+  default:
+    break;
+  }
+
+  // libc++ needs to find out the object file format and threading API.
+  if (AuxTriple.getOS() == llvm::Triple::Linux) {
+    Builder.defineMacro("__ELF__");
+    Builder.defineMacro("__linux__");
+    // Used in features.h. If this is omitted, math.h doesn't declare float
+    // versions of the functions in bits/mathcalls.h.
+    if (LangOpts.CPlusPlus)
+      Builder.defineMacro("_GNU_SOURCE");
+  } else if (AuxTriple.isOSDarwin()) {
+    Builder.defineMacro("__APPLE__");
+    Builder.defineMacro("__MACH__");
+  } else if (AuxTriple.isOSWindows()) {
+    Builder.defineMacro("_WIN32");
+    if (AuxTriple.isWindowsGNUEnvironment())
+      Builder.defineMacro("__MINGW32__");
+  }
+}
+
 /// InitializePreprocessor - Initialize the preprocessor getting it and the
 /// environment ready to process a single file. This returns true on error.
 ///
@@ -1120,13 +1158,9 @@ void clang::InitializePreprocessor(
 
   // Install things like __POWERPC__, __GNUC__, etc into the macro table.
   if (InitOpts.UsePredefines) {
-    // FIXME: This will create multiple definitions for most of the predefined
-    // macros. This is not the right way to handle this.
-    if ((LangOpts.CUDA || LangOpts.OpenMPIsDevice) && PP.getAuxTargetInfo())
-      InitializePredefinedMacros(*PP.getAuxTargetInfo(), LangOpts, FEOpts,
-                                 Builder);
-
     InitializePredefinedMacros(PP.getTargetInfo(), LangOpts, FEOpts, Builder);
+    if ((LangOpts.CUDA || LangOpts.OpenMPIsDevice) && PP.getAuxTargetInfo())
+      InitializePredefinedAuxMacros(*PP.getAuxTargetInfo(), LangOpts, Builder);
 
     // Install definitions to make Objective-C++ ARC work well with various
     // C++ Standard Library implementations.
diff --git a/test/Preprocessor/aux-triple.c b/test/Preprocessor/aux-triple.c
new file mode 100644
index 0000000000..0211a15673
--- /dev/null
+++ b/test/Preprocessor/aux-triple.c
@@ -0,0 +1,62 @@
+// Ensure that Clang sets some very basic target defines based on -aux-triple.
+
+// RUN: %clang_cc1 -E -dM -ffreestanding < /dev/null \
+// RUN:     -triple nvptx64-none-none \
+// RUN:   | FileCheck -match-full-lines -check-prefixes NVPTX64,NONE %s
+// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding < /dev/null \
+// RUN:     -triple nvptx64-none-none \
+// RUN:   | FileCheck -match-full-lines -check-prefixes NVPTX64,NONE %s
+// RUN: %clang_cc1 -x cuda -E -dM -ffreestanding < /dev/null \
+// RUN:     -triple nvptx64-none-none \
+// RUN:   | FileCheck -match-full-lines -check-prefixes NVPTX64,NONE %s
+
+// CUDA:
+// RUN: %clang_cc1 -x cuda -E -dM -ffreestanding < /dev/null \
+// RUN:     -triple nvptx64-none-none -aux-triple powerpc64le-unknown-linux-gnu \
+// RUN:   | FileCheck -match-full-lines %s \
+// RUN:     -check-prefixes NVPTX64,PPC64,LINUX,LINUX-CPP
+// RUN: %clang_cc1 -x cuda -E -dM -ffreestanding < /dev/null \
+// RUN:     -triple nvptx64-none-none -aux-triple x86_64-unknown-linux-gnu \
+// RUN:   | FileCheck -match-full-lines %s \
+// RUN:     -check-prefixes NVPTX64,X86_64,LINUX,LINUX-CPP
+
+// OpenMP:
+// RUN: %clang_cc1 -E -dM -ffreestanding < /dev/null \
+// RUN:     -fopenmp -fopenmp-is-device -triple nvptx64-none-none \
+// RUN:     -aux-triple powerpc64le-unknown-linux-gnu \
+// RUN:   | FileCheck -match-full-lines -check-prefixes NVPTX64,PPC64,LINUX %s
+// RUN: %clang_cc1 -E -dM -ffreestanding < /dev/null \
+// RUN:     -fopenmp -fopenmp-is-device -triple nvptx64-none-none \
+// RUN:     -aux-triple x86_64-unknown-linux-gnu \
+// RUN:   | FileCheck -match-full-lines -check-prefixes NVPTX64,X86_64,LINUX %s
+// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding < /dev/null \
+// RUN:     -fopenmp -fopenmp-is-device -triple nvptx64-none-none \
+// RUN:     -aux-triple powerpc64le-unknown-linux-gnu \
+// RUN:   | FileCheck -match-full-lines %s \
+// RUN:     -check-prefixes NVPTX64,PPC64,LINUX,LINUX-CPP
+// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding < /dev/null \
+// RUN:     -fopenmp -fopenmp-is-device -triple nvptx64-none-none \
+// RUN:     -aux-triple x86_64-unknown-linux-gnu \
+// RUN:   | FileCheck -match-full-lines %s \
+// RUN:     -check-prefixes NVPTX64,X86_64,LINUX,LINUX-CPP
+
+// NONE-NOT:#define _GNU_SOURCE
+// LINUX-CPP:#define _GNU_SOURCE 1
+
+// NVPTX64:#define _LP64 1
+
+// NONE-NOT:#define __ELF__
+// LINUX:#define __ELF__ 1
+
+// NVPTX64:#define __LP64__ 1
+// NVPTX64:#define __NVPTX__ 1
+// NVPTX64:#define __PTX__ 1
+
+// NONE-NOT:#define __linux__
+// LINUX:#define __linux__ 1
+
+// NONE-NOT:#define __powerpc64__
+// PPC64:#define __powerpc64__ 1
+
+// NONE-NOT:#define __x86_64__
+// X86_64:#define __x86_64__ 1
diff --git a/test/SemaCUDA/builtins.cu b/test/SemaCUDA/builtins.cu
index 814fda2ac7..41418b5a27 100644
--- a/test/SemaCUDA/builtins.cu
+++ b/test/SemaCUDA/builtins.cu
@@ -12,8 +12,8 @@
 // RUN:     -aux-triple x86_64-unknown-unknown \
 // RUN:     -fsyntax-only -verify %s
 
-#if !(defined(__amd64__) && defined(__PTX__))
-#error "Expected to see preprocessor macros from both sides of compilation."
+#if !defined(__x86_64__)
+#error "Expected to see preprocessor macros from the host."
 #endif
 
 void hf() {
-- 
2.40.0