[CUDA] use -aux-triple to pass target triple of opposite side of compilation

author Artem Belevich <tra@google.com>

Tue, 17 Nov 2015 22:28:40 +0000 (22:28 +0000)

committer Artem Belevich <tra@google.com>

Tue, 17 Nov 2015 22:28:40 +0000 (22:28 +0000)
author Artem Belevich <tra@google.com>
Tue, 17 Nov 2015 22:28:40 +0000 (22:28 +0000)
committer Artem Belevich <tra@google.com>
Tue, 17 Nov 2015 22:28:40 +0000 (22:28 +0000)
diff --git a/include/clang/Driver/Action.h b/include/clang/Driver/Action.h

index 077f1a9efb26ad5e1ac9a5248544cacae9f0af7a..fc31d4b0dec2d6a0058b512a5f1e9793dbbab730 100644 (file)
--- a/include/clang/Driver/Action.h
+++ b/include/clang/Driver/Action.h
@@ -139,17 +139,15 @@ class CudaDeviceAction : public Action {
    virtual void anchor();
    /// GPU architecture to bind -- e.g 'sm_35'.
    const char *GpuArchName;
-  const char *DeviceTriple;
    /// True when action results are not consumed by the host action (e.g when
    /// -fsyntax-only or --cuda-device-only options are used).
    bool AtTopLevel;
  
  public:
    CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName,
-                   const char *DeviceTriple, bool AtTopLevel);
+                   bool AtTopLevel);
  
    const char *getGpuArchName() const { return GpuArchName; }
-  const char *getDeviceTriple() const { return DeviceTriple; }
    bool isAtTopLevel() const { return AtTopLevel; }
  
    static bool classof(const Action *A) {
@@ -160,16 +158,13 @@ public:
  class CudaHostAction : public Action {
    virtual void anchor();
    ActionList DeviceActions;
-  const char *DeviceTriple;
  
  public:
-  CudaHostAction(std::unique_ptr<Action> Input, const ActionList &DeviceActions,
-                 const char *DeviceTriple);
+  CudaHostAction(std::unique_ptr<Action> Input,
+                 const ActionList &DeviceActions);
    ~CudaHostAction() override;
  
-  ActionList &getDeviceActions() { return DeviceActions; }
    const ActionList &getDeviceActions() const { return DeviceActions; }
-  const char *getDeviceTriple() const { return DeviceTriple; }
  
    static bool classof(const Action *A) { return A->getKind() == CudaHostClass; }
  };
diff --git a/include/clang/Driver/Compilation.h b/include/clang/Driver/Compilation.h

index f0c1bedb3989002d256c278fa12de4bda8136f5a..6a93fa2beac22bcaf0ef5b4e31c7c0da2ac5418d 100644 (file)
--- a/include/clang/Driver/Compilation.h
+++ b/include/clang/Driver/Compilation.h
@@ -38,6 +38,9 @@ class Compilation {
    /// The default tool chain.
    const ToolChain &DefaultToolChain;
  
+  const ToolChain *CudaHostToolChain;
+  const ToolChain *CudaDeviceToolChain;
+
    /// The original (untranslated) input argument list.
    llvm::opt::InputArgList *Args;
  
@@ -81,6 +84,17 @@ public:
    const Driver &getDriver() const { return TheDriver; }
  
    const ToolChain &getDefaultToolChain() const { return DefaultToolChain; }
+  const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; }
+  const ToolChain *getCudaDeviceToolChain() const {
+    return CudaDeviceToolChain;
+  }
+
+  void setCudaHostToolChain(const ToolChain *HostToolChain) {
+    CudaHostToolChain = HostToolChain;
+  }
+  void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) {
+    CudaDeviceToolChain = DeviceToolChain;
+  }
  
    const llvm::opt::InputArgList &getInputArgs() const { return *Args; }
  
diff --git a/include/clang/Driver/Driver.h b/include/clang/Driver/Driver.h

index 213ae38843ed74e7c57f21204b033691e1cb58fd..c7bd9161f167586dd6fe239b9f8e02d8949b4d83 100644 (file)
--- a/include/clang/Driver/Driver.h
+++ b/include/clang/Driver/Driver.h
@@ -297,22 +297,23 @@ public:
    /// BuildActions - Construct the list of actions to perform for the
    /// given arguments, which are only done for a single architecture.
    ///
+  /// \param C - The compilation that is being built.
    /// \param TC - The default host tool chain.
    /// \param Args - The input arguments.
    /// \param Actions - The list to store the resulting actions onto.
-  void BuildActions(const ToolChain &TC, llvm::opt::DerivedArgList &Args,
-                    const InputList &Inputs, ActionList &Actions) const;
+  void BuildActions(Compilation &C, const ToolChain &TC,
+                    llvm::opt::DerivedArgList &Args, const InputList &Inputs,
+                    ActionList &Actions) const;
  
    /// BuildUniversalActions - Construct the list of actions to perform
    /// for the given arguments, which may require a universal build.
    ///
+  /// \param C - The compilation that is being built.
    /// \param TC - The default host tool chain.
    /// \param Args - The input arguments.
    /// \param Actions - The list to store the resulting actions onto.
-  void BuildUniversalActions(const ToolChain &TC,
-                             llvm::opt::DerivedArgList &Args,
-                             const InputList &BAInputs,
-                             ActionList &Actions) const;
+  void BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                             const InputList &BAInputs) const;
  
    /// BuildJobs - Bind actions to concrete tools and translate
    /// arguments to form the list of jobs to run.
diff --git a/lib/Driver/Action.cpp b/lib/Driver/Action.cpp

index fdbae113ff8021a737b6327ce512ef4802ebd744..49dccd224bff23b4418e8dfe305b2938b39ff3f0 100644 (file)
--- a/lib/Driver/Action.cpp
+++ b/lib/Driver/Action.cpp
@@ -58,18 +58,15 @@ BindArchAction::BindArchAction(std::unique_ptr<Action> Input,
  void CudaDeviceAction::anchor() {}
  
  CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
-                                   const char *ArchName,
-                                   const char *DeviceTriple, bool AtTopLevel)
+                                   const char *ArchName, bool AtTopLevel)
      : Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
-      DeviceTriple(DeviceTriple), AtTopLevel(AtTopLevel) {}
+      AtTopLevel(AtTopLevel) {}
  
  void CudaHostAction::anchor() {}
  
  CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
-                               const ActionList &DeviceActions,
-                               const char *DeviceTriple)
-    : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions),
-      DeviceTriple(DeviceTriple) {}
+                               const ActionList &DeviceActions)
+    : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions) {}
  
  CudaHostAction::~CudaHostAction() {
    for (auto &DA : DeviceActions)
diff --git a/lib/Driver/Compilation.cpp b/lib/Driver/Compilation.cpp

index 101d1fcc832ae98bc3f71249ff44630e2750cb57..e4af2a6ced8aad158bb079a77feb5c90d9db7b95 100644 (file)
--- a/lib/Driver/Compilation.cpp
+++ b/lib/Driver/Compilation.cpp
@@ -24,8 +24,9 @@ using namespace llvm::opt;
  
  Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
                           InputArgList *_Args, DerivedArgList *_TranslatedArgs)
-    : TheDriver(D), DefaultToolChain(_DefaultToolChain), Args(_Args),
-      TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
+    : TheDriver(D), DefaultToolChain(_DefaultToolChain),
+      CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr),
+      Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
        ForDiagnostics(false) {}
  
  Compilation::~Compilation() {
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp

index 32d248e93836f2a3202357176020e65ef62fdbef..5ff57093537cdeb7ad95066b1e3ecefedfe303cd 100644 (file)
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -491,6 +491,10 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
    // The compilation takes ownership of Args.
    Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
  
+  C->setCudaDeviceToolChain(
+      &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
+                                                   ? "nvptx64-nvidia-cuda"
+                                                   : "nvptx-nvidia-cuda")));
    if (!HandleImmediateArgs(*C))
      return C;
  
@@ -501,10 +505,9 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
    // Construct the list of abstract actions to perform for this compilation. On
    // MachO targets this uses the driver-driver and universal actions.
    if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
-                          C->getActions());
+    BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
    else
-    BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
+    BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs,
                   C->getActions());
  
    if (CCCPrintPhases) {
@@ -617,9 +620,9 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
    // Darwin OSes this uses the driver-driver and builds universal actions.
    const ToolChain &TC = C.getDefaultToolChain();
    if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildUniversalActions(C, TC, Inputs);
    else
-    BuildActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildActions(C, TC, C.getArgs(), Inputs, C.getActions());
  
    BuildJobs(C);
  
@@ -948,7 +951,7 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
      os << '"' << CDA->getGpuArchName() << '"' << ", {"
         << PrintActions1(C, *CDA->begin(), Ids) << "}";
    } else {
-    ActionList *AL;
+    const ActionList *AL;
      if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
        os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
           << ", gpu binaries ";
@@ -997,9 +1000,10 @@ static bool ContainsCompileOrAssembleAction(const Action *A) {
    return false;
  }
  
-void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
-                                   const InputList &BAInputs,
-                                   ActionList &Actions) const {
+void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                                   const InputList &BAInputs) const {
+  DerivedArgList &Args = C.getArgs();
+  ActionList &Actions = C.getActions();
    llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
    // Collect the list of architectures. Duplicates are allowed, but should only
    // be handled once (in the order seen).
@@ -1028,7 +1032,7 @@ void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
      Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
  
    ActionList SingleActions;
-  BuildActions(TC, Args, BAInputs, SingleActions);
+  BuildActions(C, TC, Args, BAInputs, SingleActions);
  
    // Add in arch bindings for every top level action, as well as lipo and
    // dsymutil steps if needed.
@@ -1279,21 +1283,15 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
  // and returns a new CudaHostAction which wraps /p Current and device
  // side actions.
  static std::unique_ptr<Action>
-buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
-                 const Arg *InputArg, std::unique_ptr<Action> HostAction,
-                 ActionList &Actions) {
-  // Figure out which NVPTX triple to use for device-side compilation based on
-  // whether host is 64-bit.
-  const char *DeviceTriple = TC.getTriple().isArch64Bit()
-                                 ? "nvptx64-nvidia-cuda"
-                                 : "nvptx-nvidia-cuda";
+buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
+                 std::unique_ptr<Action> HostAction, ActionList &Actions) {
    Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
                                                 options::OPT_cuda_device_only);
    // Host-only compilation case.
    if (PartialCompilationArg &&
        PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only))
      return std::unique_ptr<Action>(
-        new CudaHostAction(std::move(HostAction), {}, DeviceTriple));
+        new CudaHostAction(std::move(HostAction), {}));
  
    // Collect all cuda_gpu_arch parameters, removing duplicates.
    SmallVector<const char *, 4> GpuArchList;
@@ -1317,8 +1315,11 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
      CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
  
    // Build actions for all device inputs.
+  assert(C.getCudaDeviceToolChain() &&
+         "Missing toolchain for device-side compilation.");
    ActionList CudaDeviceActions;
-  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
+  C.getDriver().BuildActions(C, *C.getCudaDeviceToolChain(), Args,
+                             CudaDeviceInputs, CudaDeviceActions);
    assert(GpuArchList.size() == CudaDeviceActions.size() &&
           "Failed to create actions for all devices");
  
@@ -1342,14 +1343,15 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
      // -o is ambiguous if we have more than one top-level action.
      if (Args.hasArg(options::OPT_o) &&
          (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
-      D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
+      C.getDriver().Diag(
+          clang::diag::err_drv_output_argument_with_multiple_files);
        return nullptr;
      }
  
      for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
        Actions.push_back(new CudaDeviceAction(
            std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
-          DeviceTriple, /* AtTopLevel */ true));
+          /* AtTopLevel */ true));
      // Kill host action in case of device-only compilation.
      if (DeviceOnlyCompilation)
        HostAction.reset(nullptr);
@@ -1362,15 +1364,16 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
    for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
      DeviceActions.push_back(new CudaDeviceAction(
          std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
-        DeviceTriple, /* AtTopLevel */ false));
+        /* AtTopLevel */ false));
    // Return a new host action that incorporates original host action and all
    // device actions.
    return std::unique_ptr<Action>(
-      new CudaHostAction(std::move(HostAction), DeviceActions, DeviceTriple));
+      new CudaHostAction(std::move(HostAction), DeviceActions));
  }
  
-void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
-                          const InputList &Inputs, ActionList &Actions) const {
+void Driver::BuildActions(Compilation &C, const ToolChain &TC,
+                          DerivedArgList &Args, const InputList &Inputs,
+                          ActionList &Actions) const {
    llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
  
    if (!SuppressMissingInputWarning && Inputs.empty()) {
@@ -1500,8 +1503,8 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
        Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
  
        if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
-        Current = buildCudaActions(*this, TC, Args, InputArg,
-                                   std::move(Current), Actions);
+        Current =
+            buildCudaActions(C, Args, InputArg, std::move(Current), Actions);
          if (!Current)
            break;
        }
@@ -1803,7 +1806,7 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
      InputInfo II;
      // Append outputs of device jobs to the input list.
      for (const Action *DA : CHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+      BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
                           /*MultipleArchs*/ false, LinkingOutput, II);
        CudaDeviceInputInfos.push_back(II);
      }
@@ -1843,11 +1846,12 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
    }
  
    if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    BuildJobsForAction(
-        C, *CDA->begin(),
-        &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())),
-        CDA->getGpuArchName(), CDA->isAtTopLevel(),
-        /*MultipleArchs*/ true, LinkingOutput, Result);
+    // Initial processing of CudaDeviceAction carries host params.
+    // Call BuildJobsForAction() again, now with correct device parameters.
+    assert(CDA->getGpuArchName() && "No GPU name in device action.");
+    BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
+                       CDA->getGpuArchName(), CDA->isAtTopLevel(),
+                       /*MultipleArchs*/ true, LinkingOutput, Result);
      return;
    }
  
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp

index 384d39e4a2261573193a4412d1e2fc52549dc91a..3c593fc66b87f7a465b66fa280f63cbe98d59e6d 100644 (file)
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -3262,6 +3262,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
    CmdArgs.push_back("-triple");
    CmdArgs.push_back(Args.MakeArgString(TripleStr));
  
+  if (IsCuda) {
+    // FIXME: We need a (better) way to pass information about
+    // particular compilation pass we're constructing here. For now we
+    // can check which toolchain we're using and pick the other one to
+    // extract the triple.
+    const ToolChain *AuxToolChain;
+    if (&getToolChain() == C.getCudaDeviceToolChain())
+      AuxToolChain = C.getCudaHostToolChain();
+    else if (&getToolChain() == C.getCudaHostToolChain())
+      AuxToolChain = C.getCudaDeviceToolChain();
+    else
+      llvm_unreachable("Can't figure out CUDA compilation mode.");
+    assert(AuxToolChain != nullptr && "No aux toolchain.");
+    CmdArgs.push_back("-aux-triple");
+    CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str()));
+  }
+
    if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
                                 Triple.getArch() == llvm::Triple::thumb)) {
      unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
diff --git a/test/Driver/cuda-options.cu b/test/Driver/cuda-options.cu

index 23ba2967e8d49fb7f6566f97e9a1fef6f624cf38..21625259d3ef6144ce82c9badad7a618fbb5e8ca 100644 (file)
--- a/test/Driver/cuda-options.cu
+++ b/test/Driver/cuda-options.cu
@@ -111,14 +111,6 @@
  // Make sure we don't link anything.
  // RUN:   -check-prefix CUDA-NL %s
  
-// Match device-side preprocessor, and compiler phases with -save-temps
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda"
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
-
  // --cuda-host-only should never trigger unused arg warning.
  // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -c %s 2>&1 | \
  // RUN:    FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s
@@ -133,34 +125,47 @@
  // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -x c -c %s 2>&1 | \
  // RUN:    FileCheck -check-prefix CUDA-UNUSED-CDO %s
  
+// Match device-side preprocessor, and compiler phases with -save-temps
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda"
+
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
+
  // Match the job that produces PTX assembly
-// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-D1: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1NS-SAME: "-aux-triple" "x86_64--linux-gnu"
  // CUDA-D1-SAME: "-fcuda-is-device"
  // CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
  // CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
  // CUDA-D1NS-SAME: "-x" "cuda"
  // CUDA-D1S-SAME: "-x" "ir"
  
-// Match anothe device-side compilation
-// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// Match another device-side compilation
+// CUDA-D2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D2-SAME: "-aux-triple" "x86_64--linux-gnu"
  // CUDA-D2-SAME: "-fcuda-is-device"
  // CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
  // CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
  // CUDA-D2-SAME: "-x" "cuda"
  
  // Match no device-side compilation
-// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-ND-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
  // CUDA-ND-SAME-NOT: "-fcuda-is-device"
  
  // Match host-side preprocessor job with -save-temps
-// CUDA-HS: "-cc1" "-triple"
-// CUDA-HS-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-HS: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-HS-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
  // CUDA-HS-SAME-NOT: "-fcuda-is-device"
  // CUDA-HS-SAME: "-x" "cuda"
  
  // Match host-side compilation
-// CUDA-H: "-cc1" "-triple"
-// CUDA-H-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-H: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-H-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
  // CUDA-H-SAME-NOT: "-fcuda-is-device"
  // CUDA-H-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
  // CUDA-HNS-SAME: "-x" "cuda"
diff --git a/test/SemaCUDA/function-target-hd.cu b/test/SemaCUDA/function-target-hd.cu

index 25fcc6e9188f5b7d258446810edd06c683a7b6ee..685f4f9cda628d207687e2892c7f4fa08ef8007a 100644 (file)
--- a/test/SemaCUDA/function-target-hd.cu
+++ b/test/SemaCUDA/function-target-hd.cu
@@ -8,9 +8,9 @@
  // host device functions are not allowed to call device functions.
  
  // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -verify %s
  // RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
  
  #include "Inputs/cuda.h"
author	Artem Belevich <tra@google.com>
	Tue, 17 Nov 2015 22:28:40 +0000 (22:28 +0000)
committer	Artem Belevich <tra@google.com>
	Tue, 17 Nov 2015 22:28:40 +0000 (22:28 +0000)
include/clang/Driver/Action.h		patch \| blob \| history
include/clang/Driver/Compilation.h		patch \| blob \| history
include/clang/Driver/Driver.h		patch \| blob \| history
lib/Driver/Action.cpp		patch \| blob \| history
lib/Driver/Compilation.cpp		patch \| blob \| history
lib/Driver/Driver.cpp		patch \| blob \| history
lib/Driver/Tools.cpp		patch \| blob \| history
test/Driver/cuda-options.cu		patch \| blob \| history
test/SemaCUDA/function-target-hd.cu		patch \| blob \| history