virtual void anchor();
/// GPU architecture to bind -- e.g 'sm_35'.
const char *GpuArchName;
- const char *DeviceTriple;
/// True when action results are not consumed by the host action (e.g when
/// -fsyntax-only or --cuda-device-only options are used).
bool AtTopLevel;
public:
CudaDeviceAction(std::unique_ptr<Action> Input, const char *ArchName,
- const char *DeviceTriple, bool AtTopLevel);
+ bool AtTopLevel);
const char *getGpuArchName() const { return GpuArchName; }
- const char *getDeviceTriple() const { return DeviceTriple; }
bool isAtTopLevel() const { return AtTopLevel; }
static bool classof(const Action *A) {
class CudaHostAction : public Action {
virtual void anchor();
ActionList DeviceActions;
- const char *DeviceTriple;
public:
- CudaHostAction(std::unique_ptr<Action> Input, const ActionList &DeviceActions,
- const char *DeviceTriple);
+ CudaHostAction(std::unique_ptr<Action> Input,
+ const ActionList &DeviceActions);
~CudaHostAction() override;
- ActionList &getDeviceActions() { return DeviceActions; }
const ActionList &getDeviceActions() const { return DeviceActions; }
- const char *getDeviceTriple() const { return DeviceTriple; }
static bool classof(const Action *A) { return A->getKind() == CudaHostClass; }
};
/// The default tool chain.
const ToolChain &DefaultToolChain;
+ const ToolChain *CudaHostToolChain;
+ const ToolChain *CudaDeviceToolChain;
+
/// The original (untranslated) input argument list.
llvm::opt::InputArgList *Args;
const Driver &getDriver() const { return TheDriver; }
const ToolChain &getDefaultToolChain() const { return DefaultToolChain; }
+ const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; }
+ const ToolChain *getCudaDeviceToolChain() const {
+ return CudaDeviceToolChain;
+ }
+
+ void setCudaHostToolChain(const ToolChain *HostToolChain) {
+ CudaHostToolChain = HostToolChain;
+ }
+ void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) {
+ CudaDeviceToolChain = DeviceToolChain;
+ }
const llvm::opt::InputArgList &getInputArgs() const { return *Args; }
/// BuildActions - Construct the list of actions to perform for the
/// given arguments, which are only done for a single architecture.
///
+ /// \param C - The compilation that is being built.
/// \param TC - The default host tool chain.
/// \param Args - The input arguments.
/// \param Actions - The list to store the resulting actions onto.
- void BuildActions(const ToolChain &TC, llvm::opt::DerivedArgList &Args,
- const InputList &Inputs, ActionList &Actions) const;
+ void BuildActions(Compilation &C, const ToolChain &TC,
+ llvm::opt::DerivedArgList &Args, const InputList &Inputs,
+ ActionList &Actions) const;
/// BuildUniversalActions - Construct the list of actions to perform
/// for the given arguments, which may require a universal build.
///
+ /// \param C - The compilation that is being built.
/// \param TC - The default host tool chain.
/// \param Args - The input arguments.
/// \param Actions - The list to store the resulting actions onto.
- void BuildUniversalActions(const ToolChain &TC,
- llvm::opt::DerivedArgList &Args,
- const InputList &BAInputs,
- ActionList &Actions) const;
+ void BuildUniversalActions(Compilation &C, const ToolChain &TC,
+ const InputList &BAInputs) const;
/// BuildJobs - Bind actions to concrete tools and translate
/// arguments to form the list of jobs to run.
void CudaDeviceAction::anchor() {}
CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
- const char *ArchName,
- const char *DeviceTriple, bool AtTopLevel)
+ const char *ArchName, bool AtTopLevel)
: Action(CudaDeviceClass, std::move(Input)), GpuArchName(ArchName),
- DeviceTriple(DeviceTriple), AtTopLevel(AtTopLevel) {}
+ AtTopLevel(AtTopLevel) {}
void CudaHostAction::anchor() {}
CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
- const ActionList &DeviceActions,
- const char *DeviceTriple)
- : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions),
- DeviceTriple(DeviceTriple) {}
+ const ActionList &DeviceActions)
+ : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions) {}
CudaHostAction::~CudaHostAction() {
for (auto &DA : DeviceActions)
Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
InputArgList *_Args, DerivedArgList *_TranslatedArgs)
- : TheDriver(D), DefaultToolChain(_DefaultToolChain), Args(_Args),
- TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
+ : TheDriver(D), DefaultToolChain(_DefaultToolChain),
+ CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr),
+ Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
ForDiagnostics(false) {}
Compilation::~Compilation() {
// The compilation takes ownership of Args.
Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
+ C->setCudaDeviceToolChain(
+ &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
+ ? "nvptx64-nvidia-cuda"
+ : "nvptx-nvidia-cuda")));
if (!HandleImmediateArgs(*C))
return C;
// Construct the list of abstract actions to perform for this compilation. On
// MachO targets this uses the driver-driver and universal actions.
if (TC.getTriple().isOSBinFormatMachO())
- BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
- C->getActions());
+ BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
else
- BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
+ BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs,
C->getActions());
if (CCCPrintPhases) {
// Darwin OSes this uses the driver-driver and builds universal actions.
const ToolChain &TC = C.getDefaultToolChain();
if (TC.getTriple().isOSBinFormatMachO())
- BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions());
+ BuildUniversalActions(C, TC, Inputs);
else
- BuildActions(TC, C.getArgs(), Inputs, C.getActions());
+ BuildActions(C, TC, C.getArgs(), Inputs, C.getActions());
BuildJobs(C);
os << '"' << CDA->getGpuArchName() << '"' << ", {"
<< PrintActions1(C, *CDA->begin(), Ids) << "}";
} else {
- ActionList *AL;
+ const ActionList *AL;
if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
<< ", gpu binaries ";
return false;
}
-void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
- const InputList &BAInputs,
- ActionList &Actions) const {
+void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
+ const InputList &BAInputs) const {
+ DerivedArgList &Args = C.getArgs();
+ ActionList &Actions = C.getActions();
llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
// Collect the list of architectures. Duplicates are allowed, but should only
// be handled once (in the order seen).
Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
ActionList SingleActions;
- BuildActions(TC, Args, BAInputs, SingleActions);
+ BuildActions(C, TC, Args, BAInputs, SingleActions);
// Add in arch bindings for every top level action, as well as lipo and
// dsymutil steps if needed.
// and returns a new CudaHostAction which wraps /p Current and device
// side actions.
static std::unique_ptr<Action>
-buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
- const Arg *InputArg, std::unique_ptr<Action> HostAction,
- ActionList &Actions) {
- // Figure out which NVPTX triple to use for device-side compilation based on
- // whether host is 64-bit.
- const char *DeviceTriple = TC.getTriple().isArch64Bit()
- ? "nvptx64-nvidia-cuda"
- : "nvptx-nvidia-cuda";
+buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
+ std::unique_ptr<Action> HostAction, ActionList &Actions) {
Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
options::OPT_cuda_device_only);
// Host-only compilation case.
if (PartialCompilationArg &&
PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only))
return std::unique_ptr<Action>(
- new CudaHostAction(std::move(HostAction), {}, DeviceTriple));
+ new CudaHostAction(std::move(HostAction), {}));
// Collect all cuda_gpu_arch parameters, removing duplicates.
SmallVector<const char *, 4> GpuArchList;
CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
// Build actions for all device inputs.
+ assert(C.getCudaDeviceToolChain() &&
+ "Missing toolchain for device-side compilation.");
ActionList CudaDeviceActions;
- D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
+ C.getDriver().BuildActions(C, *C.getCudaDeviceToolChain(), Args,
+ CudaDeviceInputs, CudaDeviceActions);
assert(GpuArchList.size() == CudaDeviceActions.size() &&
"Failed to create actions for all devices");
// -o is ambiguous if we have more than one top-level action.
if (Args.hasArg(options::OPT_o) &&
(!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
- D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
+ C.getDriver().Diag(
+ clang::diag::err_drv_output_argument_with_multiple_files);
return nullptr;
}
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
Actions.push_back(new CudaDeviceAction(
std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
- DeviceTriple, /* AtTopLevel */ true));
+ /* AtTopLevel */ true));
// Kill host action in case of device-only compilation.
if (DeviceOnlyCompilation)
HostAction.reset(nullptr);
for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
DeviceActions.push_back(new CudaDeviceAction(
std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
- DeviceTriple, /* AtTopLevel */ false));
+ /* AtTopLevel */ false));
// Return a new host action that incorporates original host action and all
// device actions.
return std::unique_ptr<Action>(
- new CudaHostAction(std::move(HostAction), DeviceActions, DeviceTriple));
+ new CudaHostAction(std::move(HostAction), DeviceActions));
}
-void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
- const InputList &Inputs, ActionList &Actions) const {
+void Driver::BuildActions(Compilation &C, const ToolChain &TC,
+ DerivedArgList &Args, const InputList &Inputs,
+ ActionList &Actions) const {
llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
if (!SuppressMissingInputWarning && Inputs.empty()) {
Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
- Current = buildCudaActions(*this, TC, Args, InputArg,
- std::move(Current), Actions);
+ Current =
+ buildCudaActions(C, Args, InputArg, std::move(Current), Actions);
if (!Current)
break;
}
InputInfo II;
// Append outputs of device jobs to the input list.
for (const Action *DA : CHA->getDeviceActions()) {
- BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+ BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
/*MultipleArchs*/ false, LinkingOutput, II);
CudaDeviceInputInfos.push_back(II);
}
}
if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
- BuildJobsForAction(
- C, *CDA->begin(),
- &getToolChain(C.getArgs(), llvm::Triple(CDA->getDeviceTriple())),
- CDA->getGpuArchName(), CDA->isAtTopLevel(),
- /*MultipleArchs*/ true, LinkingOutput, Result);
+ // Initial processing of CudaDeviceAction carries host params.
+ // Call BuildJobsForAction() again, now with correct device parameters.
+ assert(CDA->getGpuArchName() && "No GPU name in device action.");
+ BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
+ CDA->getGpuArchName(), CDA->isAtTopLevel(),
+ /*MultipleArchs*/ true, LinkingOutput, Result);
return;
}
CmdArgs.push_back("-triple");
CmdArgs.push_back(Args.MakeArgString(TripleStr));
+ if (IsCuda) {
+ // FIXME: We need a (better) way to pass information about
+ // particular compilation pass we're constructing here. For now we
+ // can check which toolchain we're using and pick the other one to
+ // extract the triple.
+ const ToolChain *AuxToolChain;
+ if (&getToolChain() == C.getCudaDeviceToolChain())
+ AuxToolChain = C.getCudaHostToolChain();
+ else if (&getToolChain() == C.getCudaHostToolChain())
+ AuxToolChain = C.getCudaDeviceToolChain();
+ else
+ llvm_unreachable("Can't figure out CUDA compilation mode.");
+ assert(AuxToolChain != nullptr && "No aux toolchain.");
+ CmdArgs.push_back("-aux-triple");
+ CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str()));
+ }
+
if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
Triple.getArch() == llvm::Triple::thumb)) {
unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
// Make sure we don't link anything.
// RUN: -check-prefix CUDA-NL %s
-// Match device-side preprocessor, and compiler phases with -save-temps
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda"
-// CUDA-D1S: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
-// CUDA-D1S-SAME: "-fcuda-is-device"
-// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
-
// --cuda-host-only should never trigger unused arg warning.
// RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -c %s 2>&1 | \
// RUN: FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -x c -c %s 2>&1 | \
// RUN: FileCheck -check-prefix CUDA-UNUSED-CDO %s
+// Match device-side preprocessor, and compiler phases with -save-temps
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda"
+
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
+
// Match the job that produces PTX assembly
-// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-D1: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1NS-SAME: "-aux-triple" "x86_64--linux-gnu"
// CUDA-D1-SAME: "-fcuda-is-device"
// CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
// CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
// CUDA-D1NS-SAME: "-x" "cuda"
// CUDA-D1S-SAME: "-x" "ir"
-// Match anothe device-side compilation
-// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// Match another device-side compilation
+// CUDA-D2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D2-SAME: "-aux-triple" "x86_64--linux-gnu"
// CUDA-D2-SAME: "-fcuda-is-device"
// CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
// CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
// CUDA-D2-SAME: "-x" "cuda"
// Match no device-side compilation
-// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-ND-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
// CUDA-ND-SAME-NOT: "-fcuda-is-device"
// Match host-side preprocessor job with -save-temps
-// CUDA-HS: "-cc1" "-triple"
-// CUDA-HS-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-HS: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-HS-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
// CUDA-HS-SAME-NOT: "-fcuda-is-device"
// CUDA-HS-SAME: "-x" "cuda"
// Match host-side compilation
-// CUDA-H: "-cc1" "-triple"
-// CUDA-H-SAME-NOT: "nvptx{{(64)?}}-nvidia-cuda"
+// CUDA-H: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-H-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
// CUDA-H-SAME-NOT: "-fcuda-is-device"
// CUDA-H-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
// CUDA-HNS-SAME: "-x" "cuda"
// host device functions are not allowed to call device functions.
// RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -verify %s
// RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
#include "Inputs/cuda.h"