From: Sergey Dmitriev Date: Wed, 9 Oct 2019 20:42:58 +0000 (+0000) Subject: [Clang][OpenMP Offload] Add new tool for wrapping offload device binaries X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7823560840f75b2a59347a03c422783d0dd913f4;p=clang [Clang][OpenMP Offload] Add new tool for wrapping offload device binaries This patch removes the remaining part of the OpenMP offload linker scripts which was used for inserting device binaries into the output linked binary. Device binaries are now inserted into the host binary with a help of the wrapper bit-code file which contains device binaries as data. Wrapper bit-code file is dynamically created by the clang driver with a help of new tool clang-offload-wrapper which takes device binaries as input and produces bit-code file with required contents. Wrapper bit-code is then compiled to an object and resulting object is appended to the host linking by the clang driver. This is the second part of the patch for eliminating OpenMP linker script (please see https://reviews.llvm.org/D64943). Differential Revision: https://reviews.llvm.org/D68166 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@374219 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Driver/Action.h b/include/clang/Driver/Action.h index c6e90b2978..8ccbb6c2bb 100644 --- a/include/clang/Driver/Action.h +++ b/include/clang/Driver/Action.h @@ -72,9 +72,10 @@ public: VerifyPCHJobClass, OffloadBundlingJobClass, OffloadUnbundlingJobClass, + OffloadWrapperJobClass, JobClassFirst = PreprocessJobClass, - JobClassLast = OffloadUnbundlingJobClass + JobClassLast = OffloadWrapperJobClass }; // The offloading kind determines if this action is binded to a particular @@ -625,6 +626,17 @@ public: } }; +class OffloadWrapperJobAction : public JobAction { + void anchor() override; + +public: + OffloadWrapperJobAction(ActionList &Inputs, types::ID Type); + + static bool classof(const Action *A) { + return A->getKind() == OffloadWrapperJobClass; + } +}; + } // namespace driver } // namespace clang diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td index cbcf2ab326..c6e06f2e7d 100644 --- a/include/clang/Driver/Options.td +++ b/include/clang/Driver/Options.td @@ -1604,8 +1604,6 @@ def fnoopenmp_use_tls : Flag<["-"], "fnoopenmp-use-tls">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fopenmp_targets_EQ : CommaJoined<["-"], "fopenmp-targets=">, Flags<[DriverOption, CC1Option]>, HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">; -def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">, - Group, Flags<[NoArgumentUnused, HelpHidden]>; def fopenmp_relocatable_target : Flag<["-"], "fopenmp-relocatable-target">, Group, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>; def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">, diff --git a/include/clang/Driver/ToolChain.h b/include/clang/Driver/ToolChain.h index 2ba16ab21a..f0676eee2d 100644 --- a/include/clang/Driver/ToolChain.h +++ b/include/clang/Driver/ToolChain.h @@ -138,6 +138,7 @@ private: mutable std::unique_ptr Link; mutable std::unique_ptr IfsMerge; mutable std::unique_ptr OffloadBundler; + mutable std::unique_ptr OffloadWrapper; Tool *getClang() const; Tool *getAssemble() const; @@ -145,6 +146,7 @@ private: Tool *getIfsMerge() const; Tool *getClangAs() const; Tool *getOffloadBundler() const; + Tool *getOffloadWrapper() const; mutable std::unique_ptr SanitizerArguments; mutable std::unique_ptr XRayArguments; diff --git a/lib/Driver/Action.cpp b/lib/Driver/Action.cpp index 0187cf981e..0eb4c7257e 100644 --- a/lib/Driver/Action.cpp +++ b/lib/Driver/Action.cpp @@ -41,6 +41,8 @@ const char *Action::getClassName(ActionClass AC) { return "clang-offload-bundler"; case OffloadUnbundlingJobClass: return "clang-offload-unbundler"; + case OffloadWrapperJobClass: + return "clang-offload-wrapper"; } llvm_unreachable("invalid class"); @@ -407,3 +409,9 @@ void OffloadUnbundlingJobAction::anchor() {} OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input) : JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {} + +void OffloadWrapperJobAction::anchor() {} + +OffloadWrapperJobAction::OffloadWrapperJobAction(ActionList &Inputs, + types::ID Type) + : JobAction(OffloadWrapperJobClass, Inputs, Type) {} diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp index a095274298..51c262aa0a 100644 --- a/lib/Driver/Driver.cpp +++ b/lib/Driver/Driver.cpp @@ -2287,6 +2287,9 @@ class OffloadingActionBuilder final { /// Append top level actions generated by the builder. virtual void appendTopLevelActions(ActionList &AL) {} + /// Append linker actions generated by the builder. + virtual void appendLinkActions(ActionList &AL) {} + /// Append linker actions generated by the builder. virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} @@ -2890,7 +2893,7 @@ class OffloadingActionBuilder final { OpenMPDeviceActions.clear(); } - void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { + void appendLinkActions(ActionList &AL) override { assert(ToolChains.size() == DeviceLinkerInputs.size() && "Toolchains and linker inputs sizes do not match."); @@ -2899,12 +2902,18 @@ class OffloadingActionBuilder final { for (auto &LI : DeviceLinkerInputs) { auto *DeviceLinkAction = C.MakeAction(LI, types::TY_Image); - DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, - Action::OFK_OpenMP); + OffloadAction::DeviceDependences DeviceLinkDeps; + DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, + Action::OFK_OpenMP); + AL.push_back(C.MakeAction(DeviceLinkDeps, + DeviceLinkAction->getType())); ++TC; } + DeviceLinkerInputs.clear(); } + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} + bool initialize() override { // Get the OpenMP toolchains. If we don't get any, the action builder will // know there is nothing to do related to OpenMP offloading. @@ -3129,6 +3138,25 @@ public: return false; } + Action* makeHostLinkAction() { + // Build a list of device linking actions. + ActionList DeviceAL; + for (DeviceActionBuilder *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + SB->appendLinkActions(DeviceAL); + } + + if (DeviceAL.empty()) + return nullptr; + + // Create wrapper bitcode from the result of device link actions and compile + // it to an object which will be added to the host link command. + auto *BC = C.MakeAction(DeviceAL, types::TY_LLVM_BC); + auto *ASM = C.MakeAction(BC, types::TY_PP_Asm); + return C.MakeAction(ASM, types::TY_Object); + } + /// Processes the host linker action. This currently consists of replacing it /// with an offload action if there are device link objects and propagate to /// the host action all the offload kinds used in the current compilation. The @@ -3434,6 +3462,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Add a link action if necessary. if (!LinkerInputs.empty()) { + if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) + LinkerInputs.push_back(Wrapper); Action *LA = C.MakeAction(LinkerInputs, types::TY_Image); LA = OffloadBuilder.processHostLinkAction(LA); Actions.push_back(LA); @@ -3820,18 +3850,8 @@ class ToolSelector final { if (!AJ || !BJ) return nullptr; - // Retrieve the compile job, backend action must always be preceded by one. - ActionList CompileJobOffloadActions; - auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions, - /*CanBeCollapsed=*/false); - if (!AJ || !BJ || !CJ) - return nullptr; - - assert(isa(CJ) && - "Expecting compile job preceding backend job."); - - // Get compiler tool. - const Tool *T = TC.SelectTool(*CJ); + // Get backend tool. + const Tool *T = TC.SelectTool(*BJ); if (!T) return nullptr; @@ -4236,6 +4256,13 @@ InputInfo Driver::BuildJobsForActionNoCache( A->getOffloadingDeviceKind(), TC->getTriple().normalize(), /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() && !AtTopLevel); + if (isa(JA)) { + OffloadingPrefix += "-wrapper"; + if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) + BaseInput = FinalOutput->getValue(); + else + BaseInput = getDefaultImageName(); + } Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, MultipleArchs, OffloadingPrefix), diff --git a/lib/Driver/ToolChain.cpp b/lib/Driver/ToolChain.cpp index db2497a108..357a5106ab 100644 --- a/lib/Driver/ToolChain.cpp +++ b/lib/Driver/ToolChain.cpp @@ -292,6 +292,12 @@ Tool *ToolChain::getOffloadBundler() const { return OffloadBundler.get(); } +Tool *ToolChain::getOffloadWrapper() const { + if (!OffloadWrapper) + OffloadWrapper.reset(new tools::OffloadWrapper(*this)); + return OffloadWrapper.get(); +} + Tool *ToolChain::getTool(Action::ActionClass AC) const { switch (AC) { case Action::AssembleJobClass: @@ -324,6 +330,9 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const { case Action::OffloadBundlingJobClass: case Action::OffloadUnbundlingJobClass: return getOffloadBundler(); + + case Action::OffloadWrapperJobClass: + return getOffloadWrapper(); } llvm_unreachable("Invalid tool kind."); diff --git a/lib/Driver/ToolChains/Clang.cpp b/lib/Driver/ToolChains/Clang.cpp index 98e8c57235..22b830ee83 100644 --- a/lib/Driver/ToolChains/Clang.cpp +++ b/lib/Driver/ToolChains/Clang.cpp @@ -6463,3 +6463,57 @@ void OffloadBundler::ConstructJobMultipleOutputs( TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())), CmdArgs, None)); } + +void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + ArgStringList CmdArgs; + + const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); + + // Add the "effective" target triple. + CmdArgs.push_back("-target"); + CmdArgs.push_back(Args.MakeArgString(Triple.getTriple())); + + assert(JA.getInputs().size() == Inputs.size() && + "Not have inputs for all dependence actions??"); + + // Add offload targets. It is a comma-separated list of offload target + // triples. + SmallString<128> Targets; + Targets += "-offload-targets="; + for (unsigned I = 0; I < Inputs.size(); ++I) { + if (I) + Targets += ','; + + // Get input's Offload Kind and ToolChain. + const auto *OA = cast(JA.getInputs()[I]); + assert(OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) && + "Expected one device dependence!"); + const ToolChain *DeviceTC = nullptr; + OA->doOnEachDependence([&DeviceTC](Action *, const ToolChain *TC, + const char *) { DeviceTC = TC; }); + + // And add it to the offload targets. + Targets += DeviceTC->getTriple().normalize(); + } + CmdArgs.push_back(Args.MakeArgString(Targets)); + + // Add the output file name. + assert(Output.isFilename() && "Invalid output."); + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + + // Add inputs. + for (const InputInfo &I : Inputs) { + assert(I.isFilename() && "Invalid input."); + CmdArgs.push_back(I.getFilename()); + } + + C.addCommand(std::make_unique( + JA, *this, + Args.MakeArgString(getToolChain().GetProgramPath(getShortName())), + CmdArgs, Inputs)); +} diff --git a/lib/Driver/ToolChains/Clang.h b/lib/Driver/ToolChains/Clang.h index 8b6ac43ebd..b345c02489 100644 --- a/lib/Driver/ToolChains/Clang.h +++ b/lib/Driver/ToolChains/Clang.h @@ -152,6 +152,20 @@ public: const llvm::opt::ArgList &TCArgs, const char *LinkingOutput) const override; }; + +/// Offload wrapper tool. +class LLVM_LIBRARY_VISIBILITY OffloadWrapper final : public Tool { +public: + OffloadWrapper(const ToolChain &TC) + : Tool("offload wrapper", "clang-offload-wrapper", TC) {} + + bool hasIntegratedCPP() const override { return false; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + } // end namespace tools } // end namespace driver diff --git a/lib/Driver/ToolChains/CommonArgs.cpp b/lib/Driver/ToolChains/CommonArgs.cpp index 47964099bd..159b42a642 100644 --- a/lib/Driver/ToolChains/CommonArgs.cpp +++ b/lib/Driver/ToolChains/CommonArgs.cpp @@ -1244,120 +1244,6 @@ void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D, } } -/// Add OpenMP linker script arguments at the end of the argument list so that -/// the fat binary is built by embedding each of the device images into the -/// host. The linker script also defines a few symbols required by the code -/// generation so that the images can be easily retrieved at runtime by the -/// offloading library. This should be used only in tool chains that support -/// linker scripts. -void tools::AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, ArgStringList &CmdArgs, - const JobAction &JA) { - - // If this is not an OpenMP host toolchain, we don't need to do anything. - if (!JA.isHostOffloading(Action::OFK_OpenMP)) - return; - - // Create temporary linker script. Keep it if save-temps is enabled. - const char *LKS; - SmallString<256> Name = llvm::sys::path::filename(Output.getFilename()); - if (C.getDriver().isSaveTempsEnabled()) { - llvm::sys::path::replace_extension(Name, "lk"); - LKS = C.getArgs().MakeArgString(Name.c_str()); - } else { - llvm::sys::path::replace_extension(Name, ""); - Name = C.getDriver().GetTemporaryPath(Name, "lk"); - LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str())); - } - - // Add linker script option to the command. - CmdArgs.push_back("-T"); - CmdArgs.push_back(LKS); - - // Create a buffer to write the contents of the linker script. - std::string LksBuffer; - llvm::raw_string_ostream LksStream(LksBuffer); - - // Get the OpenMP offload tool chains so that we can extract the triple - // associated with each device input. - auto OpenMPToolChains = C.getOffloadToolChains(); - assert(OpenMPToolChains.first != OpenMPToolChains.second && - "No OpenMP toolchains??"); - - // Track the input file name and device triple in order to build the script, - // inserting binaries in the designated sections. - SmallVector, 8> InputBinaryInfo; - - // Add commands to embed target binaries. We ensure that each section and - // image is 16-byte aligned. This is not mandatory, but increases the - // likelihood of data to be aligned with a cache block in several main host - // machines. - LksStream << "/*\n"; - LksStream << " OpenMP Offload Linker Script\n"; - LksStream << " *** Automatically generated by Clang ***\n"; - LksStream << "*/\n"; - LksStream << "TARGET(binary)\n"; - auto DTC = OpenMPToolChains.first; - for (auto &II : Inputs) { - const Action *A = II.getAction(); - // Is this a device linking action? - if (A && isa(A) && - A->isDeviceOffloading(Action::OFK_OpenMP)) { - assert(DTC != OpenMPToolChains.second && - "More device inputs than device toolchains??"); - InputBinaryInfo.push_back(std::make_pair( - DTC->second->getTriple().normalize(), II.getFilename())); - ++DTC; - LksStream << "INPUT(" << II.getFilename() << ")\n"; - } - } - - assert(DTC == OpenMPToolChains.second && - "Less device inputs than device toolchains??"); - - LksStream << "SECTIONS\n"; - LksStream << "{\n"; - - // Put each target binary into a separate section. - for (const auto &BI : InputBinaryInfo) { - LksStream << " .omp_offloading." << BI.first << " :\n"; - LksStream << " ALIGN(0x10)\n"; - LksStream << " {\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first - << " = .);\n"; - LksStream << " " << BI.second << "\n"; - LksStream << " PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first - << " = .);\n"; - LksStream << " }\n"; - } - - LksStream << "}\n"; - LksStream << "INSERT BEFORE .data\n"; - LksStream.flush(); - - // Dump the contents of the linker script if the user requested that. We - // support this option to enable testing of behavior with -###. - if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script)) - llvm::errs() << LksBuffer; - - // If this is a dry run, do not create the linker script file. - if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) - return; - - // Open script file and write the contents. - std::error_code EC; - llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None); - - if (EC) { - C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message(); - return; - } - - Lksf << LksBuffer; -} - /// Add HIP linker script arguments at the end of the argument list so that /// the fat binary is built by embedding the device images into the host. The /// linker script also defines a symbol required by the code generation so that diff --git a/lib/Driver/ToolChains/CommonArgs.h b/lib/Driver/ToolChains/CommonArgs.h index 1aff07ab8f..63359d7736 100644 --- a/lib/Driver/ToolChains/CommonArgs.h +++ b/lib/Driver/ToolChains/CommonArgs.h @@ -45,13 +45,6 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D, llvm::opt::ArgStringList &CmdArgs, const llvm::opt::ArgList &Args); -void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C, - const InputInfo &Output, - const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, - const JobAction &JA); - void AddHIPLinkerScript(const ToolChain &TC, Compilation &C, const InputInfo &Output, const InputInfoList &Inputs, const llvm::opt::ArgList &Args, diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp index 4cada743b8..8c704a3078 100644 --- a/lib/Driver/ToolChains/Cuda.cpp +++ b/lib/Driver/ToolChains/Cuda.cpp @@ -563,8 +563,6 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(CubinF); } - AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); - const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("nvlink")); C.addCommand(std::make_unique(JA, *this, Exec, CmdArgs, Inputs)); diff --git a/lib/Driver/ToolChains/Gnu.cpp b/lib/Driver/ToolChains/Gnu.cpp index c7abe5c69a..c302a31cd2 100644 --- a/lib/Driver/ToolChains/Gnu.cpp +++ b/lib/Driver/ToolChains/Gnu.cpp @@ -499,7 +499,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, P = ToolChain.GetFilePath(crtbegin); } CmdArgs.push_back(Args.MakeArgString(P)); - } + } // Add crtfastmath.o if available and fast math is enabled. ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs); @@ -623,9 +623,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } - // Add OpenMP offloading linker script args if required. - AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA); - // Add HIP offloading linker script args if required. AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA, *this); diff --git a/test/Driver/clang-offload-wrapper.c b/test/Driver/clang-offload-wrapper.c new file mode 100644 index 0000000000..542930bc3c --- /dev/null +++ b/test/Driver/clang-offload-wrapper.c @@ -0,0 +1,29 @@ +// REQUIRES: x86-registered-target + +// +// Check help message. +// +// RUN: clang-offload-wrapper --help | FileCheck %s --check-prefix CHECK-HELP +// CHECK-HELP: {{.*}}OVERVIEW: A tool to create a wrapper bitcode for offload target binaries. Takes offload +// CHECK-HELP: {{.*}}target binaries as input and produces bitcode file containing target binaries packaged +// CHECK-HELP: {{.*}}as data. +// CHECK-HELP: {{.*}}USAGE: clang-offload-wrapper [options] +// CHECK-HELP: {{.*}} -o= - Output filename +// CHECK-HELP: {{.*}} --offload-targets= - Comma-separated list of device target triples +// CHECK-HELP: {{.*}} --target= - Target triple for the output module + +// +// Generate a file to wrap. +// +// RUN: echo 'Content of device file' > %t.tgt + +// +// Check bitcode produced by the wrapper tool. +// +// RUN: clang-offload-wrapper -target=x86_64-pc-linux-gnu -offload-targets=powerpc64le-ibm-linux-gnu -o %t.wrapper.bc %t.tgt +// RUN: llvm-dis %t.wrapper.bc -o - | FileCheck %s --check-prefix CHECK-IR + +// CHECK-IR: target triple = "x86_64-pc-linux-gnu" + +// CHECK-IR: @.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = hidden unnamed_addr constant [{{[0-9]+}} x i8] c"Content of device file{{.+}}", section ".omp_offloading.powerpc64le-ibm-linux-gnu" +// CHECK-IR: @.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = hidden unnamed_addr constant [0 x i8] zeroinitializer, section ".omp_offloading.powerpc64le-ibm-linux-gnu" diff --git a/test/Driver/openmp-offload-gpu.c b/test/Driver/openmp-offload-gpu.c index 3d2ac4525f..dc4dbd1f37 100644 --- a/test/Driver/openmp-offload-gpu.c +++ b/test/Driver/openmp-offload-gpu.c @@ -55,7 +55,7 @@ // RUN: -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s -// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]" +// CHK-CUBIN-NVLINK: clang{{.*}}" {{.*}}"-fopenmp-is-device" {{.*}}"-o" "[[PTX:.*\.s]]" // CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]" // CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]" diff --git a/test/Driver/openmp-offload.c b/test/Driver/openmp-offload.c index 0ba3c43263..292818b84f 100644 --- a/test/Driver/openmp-offload.c +++ b/test/Driver/openmp-offload.c @@ -106,15 +106,18 @@ // CHK-PHASES: 2: compiler, {1}, ir, (host-openmp) // CHK-PHASES: 3: backend, {2}, assembler, (host-openmp) // CHK-PHASES: 4: assembler, {3}, object, (host-openmp) -// CHK-PHASES: 5: linker, {4}, image, (host-openmp) -// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp) -// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp) -// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp) -// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir -// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp) -// CHK-PHASES: 11: assembler, {10}, object, (device-openmp) -// CHK-PHASES: 12: linker, {11}, image, (device-openmp) -// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image +// CHK-PHASES: 5: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp) +// CHK-PHASES: 7: compiler, {6}, ir, (device-openmp) +// CHK-PHASES: 8: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {7}, ir +// CHK-PHASES: 9: backend, {8}, assembler, (device-openmp) +// CHK-PHASES: 10: assembler, {9}, object, (device-openmp) +// CHK-PHASES: 11: linker, {10}, image, (device-openmp) +// CHK-PHASES: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, image +// CHK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp) +// CHK-PHASES: 14: backend, {13}, assembler, (host-openmp) +// CHK-PHASES: 15: assembler, {14}, object, (host-openmp) +// CHK-PHASES: 16: linker, {4, 15}, image, (host-openmp) /// ########################################################################### @@ -128,15 +131,15 @@ // CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp) // CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp) // CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp) -// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp) -// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp) -// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp) -// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp) -// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp) -// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir -// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp) -// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp) -// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp) +// CHK-PHASES-LIB: 6: input, "somelib", object, (device-openmp) +// CHK-PHASES-LIB: 7: input, "[[INPUT]]", c, (device-openmp) +// CHK-PHASES-LIB: 8: preprocessor, {7}, cpp-output, (device-openmp) +// CHK-PHASES-LIB: 9: compiler, {8}, ir, (device-openmp) +// CHK-PHASES-LIB: 10: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {9}, ir +// CHK-PHASES-LIB: 11: backend, {10}, assembler, (device-openmp) +// CHK-PHASES-LIB: 12: assembler, {11}, object, (device-openmp) +// CHK-PHASES-LIB: 13: linker, {6, 12}, image, (device-openmp) +// CHK-PHASES-LIB: 14: offload, "device-openmp (x86_64-pc-linux-gnu)" {13}, image // CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp) // CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp) // CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp) @@ -145,8 +148,11 @@ // CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp) // CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp) // CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp) -// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image - +// CHK-PHASES-LIB: 23: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image +// CHK-PHASES-LIB: 24: clang-offload-wrapper, {14, 23}, ir, (host-openmp) +// CHK-PHASES-LIB: 25: backend, {24}, assembler, (host-openmp) +// CHK-PHASES-LIB: 26: assembler, {25}, object, (host-openmp) +// CHK-PHASES-LIB: 27: linker, {0, 5, 26}, image, (host-openmp) /// ########################################################################### @@ -165,21 +171,21 @@ // CHK-PHASES-FILES: 8: compiler, {7}, ir, (host-openmp) // CHK-PHASES-FILES: 9: backend, {8}, assembler, (host-openmp) // CHK-PHASES-FILES: 10: assembler, {9}, object, (host-openmp) -// CHK-PHASES-FILES: 11: linker, {0, 5, 10}, image, (host-openmp) -// CHK-PHASES-FILES: 12: input, "somelib", object, (device-openmp) -// CHK-PHASES-FILES: 13: input, "[[INPUT1]]", c, (device-openmp) -// CHK-PHASES-FILES: 14: preprocessor, {13}, cpp-output, (device-openmp) -// CHK-PHASES-FILES: 15: compiler, {14}, ir, (device-openmp) -// CHK-PHASES-FILES: 16: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {15}, ir -// CHK-PHASES-FILES: 17: backend, {16}, assembler, (device-openmp) -// CHK-PHASES-FILES: 18: assembler, {17}, object, (device-openmp) -// CHK-PHASES-FILES: 19: input, "[[INPUT2]]", c, (device-openmp) -// CHK-PHASES-FILES: 20: preprocessor, {19}, cpp-output, (device-openmp) -// CHK-PHASES-FILES: 21: compiler, {20}, ir, (device-openmp) -// CHK-PHASES-FILES: 22: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {21}, ir -// CHK-PHASES-FILES: 23: backend, {22}, assembler, (device-openmp) -// CHK-PHASES-FILES: 24: assembler, {23}, object, (device-openmp) -// CHK-PHASES-FILES: 25: linker, {12, 18, 24}, image, (device-openmp) +// CHK-PHASES-FILES: 11: input, "somelib", object, (device-openmp) +// CHK-PHASES-FILES: 12: input, "[[INPUT1]]", c, (device-openmp) +// CHK-PHASES-FILES: 13: preprocessor, {12}, cpp-output, (device-openmp) +// CHK-PHASES-FILES: 14: compiler, {13}, ir, (device-openmp) +// CHK-PHASES-FILES: 15: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir +// CHK-PHASES-FILES: 16: backend, {15}, assembler, (device-openmp) +// CHK-PHASES-FILES: 17: assembler, {16}, object, (device-openmp) +// CHK-PHASES-FILES: 18: input, "[[INPUT2]]", c, (device-openmp) +// CHK-PHASES-FILES: 19: preprocessor, {18}, cpp-output, (device-openmp) +// CHK-PHASES-FILES: 20: compiler, {19}, ir, (device-openmp) +// CHK-PHASES-FILES: 21: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {20}, ir +// CHK-PHASES-FILES: 22: backend, {21}, assembler, (device-openmp) +// CHK-PHASES-FILES: 23: assembler, {22}, object, (device-openmp) +// CHK-PHASES-FILES: 24: linker, {11, 17, 23}, image, (device-openmp) +// CHK-PHASES-FILES: 25: offload, "device-openmp (x86_64-pc-linux-gnu)" {24}, image // CHK-PHASES-FILES: 26: input, "somelib", object, (device-openmp) // CHK-PHASES-FILES: 27: input, "[[INPUT1]]", c, (device-openmp) // CHK-PHASES-FILES: 28: preprocessor, {27}, cpp-output, (device-openmp) @@ -194,7 +200,11 @@ // CHK-PHASES-FILES: 37: backend, {36}, assembler, (device-openmp) // CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp) // CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp) -// CHK-PHASES-FILES: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {11}, "device-openmp (x86_64-pc-linux-gnu)" {25}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image +// CHK-PHASES-FILES: 40: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image +// CHK-PHASES-FILES: 41: clang-offload-wrapper, {25, 40}, ir, (host-openmp) +// CHK-PHASES-FILES: 42: backend, {41}, assembler, (host-openmp) +// CHK-PHASES-FILES: 43: assembler, {42}, object, (host-openmp) +// CHK-PHASES-FILES: 44: linker, {0, 5, 10, 43}, image, (host-openmp) /// ########################################################################### @@ -216,15 +226,18 @@ // CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir // CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp) // CHK-PHASES-WITH-CUDA: 13: assembler, {12}, object, (host-cuda-openmp) -// CHK-PHASES-WITH-CUDA: 14: linker, {13}, image, (host-cuda-openmp) -// CHK-PHASES-WITH-CUDA: 15: input, "[[INPUT]]", cuda, (device-openmp) -// CHK-PHASES-WITH-CUDA: 16: preprocessor, {15}, cuda-cpp-output, (device-openmp) -// CHK-PHASES-WITH-CUDA: 17: compiler, {16}, ir, (device-openmp) -// CHK-PHASES-WITH-CUDA: 18: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {17}, ir -// CHK-PHASES-WITH-CUDA: 19: backend, {18}, assembler, (device-openmp) -// CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp) -// CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp) -// CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image +// CHK-PHASES-WITH-CUDA: 14: input, "[[INPUT]]", cuda, (device-openmp) +// CHK-PHASES-WITH-CUDA: 15: preprocessor, {14}, cuda-cpp-output, (device-openmp) +// CHK-PHASES-WITH-CUDA: 16: compiler, {15}, ir, (device-openmp) +// CHK-PHASES-WITH-CUDA: 17: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {16}, ir +// CHK-PHASES-WITH-CUDA: 18: backend, {17}, assembler, (device-openmp) +// CHK-PHASES-WITH-CUDA: 19: assembler, {18}, object, (device-openmp) +// CHK-PHASES-WITH-CUDA: 20: linker, {19}, image, (device-openmp) +// CHK-PHASES-WITH-CUDA: 21: offload, "device-openmp (nvptx64-nvidia-cuda)" {20}, image +// CHK-PHASES-WITH-CUDA: 22: clang-offload-wrapper, {21}, ir, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 23: backend, {22}, assembler, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 24: assembler, {23}, object, (host-cuda-openmp) +// CHK-PHASES-WITH-CUDA: 25: linker, {13, 24}, image, (host-cuda-openmp) /// ########################################################################### @@ -237,65 +250,31 @@ /// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by /// the target code generation to gather information about which declaration /// really need to be emitted. -/// We use -fopenmp-dump-offload-linker-script to dump the linker script and -/// check its contents. /// -// RUN: %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -fopenmp-dump-offload-linker-script -no-canonical-prefixes 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-COMMANDS -check-prefix=CHK-LKS -check-prefix=CHK-LKS-REG %s -// RUN: %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -fopenmp-dump-offload-linker-script -no-canonical-prefixes 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST -check-prefix=CHK-LKS -check-prefix=CHK-LKS-ST %s - -// Make sure we are not dumping the script unless the user requested it. // RUN: %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -no-canonical-prefixes 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s +// RUN: | FileCheck -check-prefix=CHK-COMMANDS %s // RUN: %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -no-canonical-prefixes 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-LKS-NODUMP %s - -// -// Check the linker script contains what we expect. -// -// CHK-LKS: /* -// CHK-LKS: OpenMP Offload Linker Script -// CHK-LKS: *** Automatically generated by Clang *** -// CHK-LKS-NODUMP-NOT: OpenMP Offload Linker Script. -// CHK-LKS: */ -// CHK-LKS: TARGET(binary) -// CHK-LKS-REG: INPUT([[T1BIN:.+\.out]]) -// CHK-LKS-REG: INPUT([[T2BIN:.+\.out]]) -// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]]) -// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]]) -// CHK-LKS: SECTIONS -// CHK-LKS: { -// CHK-LKS: .omp_offloading.powerpc64le-ibm-linux-gnu : -// CHK-LKS: ALIGN(0x10) -// CHK-LKS: { -// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = .); -// CHK-LKS: [[T1BIN]] -// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = .); -// CHK-LKS: } -// CHK-LKS: .omp_offloading.x86_64-pc-linux-gnu : -// CHK-LKS: ALIGN(0x10) -// CHK-LKS: { -// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_start.x86_64-pc-linux-gnu = .); -// CHK-LKS: [[T2BIN]] -// CHK-LKS: PROVIDE_HIDDEN(.omp_offloading.img_end.x86_64-pc-linux-gnu = .); -// CHK-LKS: } -// CHK-LKS: } -// CHK-LKS: INSERT BEFORE .data +// RUN: | FileCheck -check-prefix=CHK-COMMANDS-ST %s // -// Generate host BC file. +// Generate host BC file and host object. // // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" // CHK-COMMANDS-SAME: "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" // CHK-COMMANDS-SAME: "-o" " // CHK-COMMANDS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" " // CHK-COMMANDS-SAME: [[INPUT:[^\\/]+\.c]]" +// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" " // CHK-COMMANDS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" " // CHK-COMMANDS-ST-SAME: [[INPUT:[^\\/]+\.c]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-COMMANDS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]" +// CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]" // // Compile for the powerpc device. @@ -335,21 +314,26 @@ // CHK-COMMANDS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]" // -// Generate host object from the BC file and link using the linker script. +// Create wrapper BC file and wrapper object. // +// CHK-COMMANDS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " -// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]" -// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"-lomptarget" {{.*}}"-T" " -// CHK-COMMANDS-SAME: [[HOSTLK:[^\\/]+\.lk]]" +// CHK-COMMANDS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" +// CHK-COMMANDS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " -// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]" +// CHK-COMMANDS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " -// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]" -// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"-lomptarget" {{.*}}"-T" " -// CHK-COMMANDS-ST-SAME: [[HOSTLK:[^\\/]+\.lk]]" +// CHK-COMMANDS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]" +// +// Link host binary. +// +// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" " +// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget" +// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" " +// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget" /// ########################################################################### @@ -391,20 +375,24 @@ // CHK-UBACTIONS: 3: compiler, {2}, ir, (host-openmp) // CHK-UBACTIONS: 4: backend, {3}, assembler, (host-openmp) // CHK-UBACTIONS: 5: assembler, {4}, object, (host-openmp) -// CHK-UBACTIONS: 6: linker, {0, 5}, image, (host-openmp) -// CHK-UBACTIONS: 7: input, "somelib", object, (device-openmp) -// CHK-UBACTIONS: 8: compiler, {2}, ir, (device-openmp) -// CHK-UBACTIONS: 9: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, ir -// CHK-UBACTIONS: 10: backend, {9}, assembler, (device-openmp) -// CHK-UBACTIONS: 11: assembler, {10}, object, (device-openmp) -// CHK-UBACTIONS: 12: linker, {7, 11}, image, (device-openmp) +// CHK-UBACTIONS: 6: input, "somelib", object, (device-openmp) +// CHK-UBACTIONS: 7: compiler, {2}, ir, (device-openmp) +// CHK-UBACTIONS: 8: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {7}, ir +// CHK-UBACTIONS: 9: backend, {8}, assembler, (device-openmp) +// CHK-UBACTIONS: 10: assembler, {9}, object, (device-openmp) +// CHK-UBACTIONS: 11: linker, {6, 10}, image, (device-openmp) +// CHK-UBACTIONS: 12: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {11}, image // CHK-UBACTIONS: 13: input, "somelib", object, (device-openmp) // CHK-UBACTIONS: 14: compiler, {2}, ir, (device-openmp) // CHK-UBACTIONS: 15: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir // CHK-UBACTIONS: 16: backend, {15}, assembler, (device-openmp) // CHK-UBACTIONS: 17: assembler, {16}, object, (device-openmp) // CHK-UBACTIONS: 18: linker, {13, 17}, image, (device-openmp) -// CHK-UBACTIONS: 19: offload, "host-openmp (powerpc64le-unknown-linux)" {6}, "device-openmp (powerpc64le-ibm-linux-gnu)" {12}, "device-openmp (x86_64-pc-linux-gnu)" {18}, image +// CHK-UBACTIONS: 19: offload, "device-openmp (x86_64-pc-linux-gnu)" {18}, image +// CHK-UBACTIONS: 20: clang-offload-wrapper, {12, 19}, ir, (host-openmp) +// CHK-UBACTIONS: 21: backend, {20}, assembler, (host-openmp) +// CHK-UBACTIONS: 22: assembler, {21}, object, (host-openmp) +// CHK-UBACTIONS: 23: linker, {0, 5, 22}, image, (host-openmp) /// ########################################################################### @@ -507,6 +495,8 @@ // CHK-UBJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle" // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-UBJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" +// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]" // CHK-UBJOBS-ST: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs= // CHK-UBJOBS-ST-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs= // CHK-UBJOBS-ST-SAME: [[HOSTPP:[^\\/,]+\.i]], @@ -514,6 +504,10 @@ // CHK-UBJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle" // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" " // CHK-UBJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]" +// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " +// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]" // Create target 1 object. // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" " @@ -543,19 +537,23 @@ // CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" " // CHK-UBJOBS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]" -// Create binary. +// Create wrapper BC file and wrapper object. +// CHK-UBJOBS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-UBJOBS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " -// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]" -// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" " -// CHK-UBJOBS-SAME: [[LKS:[^\\/]+\.lk]]" +// CHK-UBJOBS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" +// CHK-UBJOBS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-UBJOBS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" " -// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]" +// CHK-UBJOBS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" // CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " -// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]" +// CHK-UBJOBS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]" + +// Create binary. +// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" " +// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" // CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" " -// CHK-UBJOBS-ST-SAME: [[LKS:[^\\/]+\.lk]]" +// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" // Unbundle object file. // CHK-UBJOBS2: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs= @@ -567,9 +565,12 @@ // CHK-UBJOBS2-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]" // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" " // CHK-UBJOBS2-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]" +// CHK-UBJOBS2: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-UBJOBS2-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" +// CHK-UBJOBS2: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-UBJOBS2-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" " -// CHK-UBJOBS2-SAME: [[LKS:[^\\/]+\.lk]]" +// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" // CHK-UBJOBS2-ST-NOT: clang-offload-bundler{{.*}}in.so // CHK-UBJOBS2-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs= // CHK-UBJOBS2-ST-SAME: [[INPUT:[^\\/]+\.o]]" "-outputs= @@ -581,9 +582,14 @@ // CHK-UBJOBS2-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]" // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" " // CHK-UBJOBS2-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]" +// CHK-UBJOBS2-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" " +// CHK-UBJOBS2-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]" +// CHK-UBJOBS2-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" " +// CHK-UBJOBS2-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]" +// CHK-UBJOBS2-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" " +// CHK-UBJOBS2-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]" // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" " -// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" " -// CHK-UBJOBS2-ST-SAME: [[LKS:[^\\/]+\.lk]]" +// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" /// ########################################################################### diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 223f1f74f3..e46c3669a2 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -8,6 +8,7 @@ add_clang_subdirectory(clang-format-vs) add_clang_subdirectory(clang-fuzzer) add_clang_subdirectory(clang-import-test) add_clang_subdirectory(clang-offload-bundler) +add_clang_subdirectory(clang-offload-wrapper) add_clang_subdirectory(clang-scan-deps) add_clang_subdirectory(c-index-test) diff --git a/tools/clang-offload-wrapper/CMakeLists.txt b/tools/clang-offload-wrapper/CMakeLists.txt new file mode 100644 index 0000000000..6f8940f88e --- /dev/null +++ b/tools/clang-offload-wrapper/CMakeLists.txt @@ -0,0 +1,23 @@ +set(LLVM_LINK_COMPONENTS BitWriter Core Support TransformUtils) + +if(NOT CLANG_BUILT_STANDALONE) + set(tablegen_deps intrinsics_gen) +endif() + +add_clang_tool(clang-offload-wrapper + ClangOffloadWrapper.cpp + + DEPENDS + ${tablegen_deps} + ) + +set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS + clangBasic + ) + +add_dependencies(clang clang-offload-wrapper) + +clang_target_link_libraries(clang-offload-wrapper + PRIVATE + ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS} + ) diff --git a/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp new file mode 100644 index 0000000000..e18da35e79 --- /dev/null +++ b/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -0,0 +1,196 @@ +//===-- clang-offload-wrapper/ClangOffloadWrapper.cpp -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation of the offload wrapper tool. It takes offload target binaries +/// as input and creates wrapper bitcode file containing target binaries +/// packaged as data. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Version.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; + +static cl::opt Help("h", cl::desc("Alias for -help"), cl::Hidden); + +// Mark all our options with this category, everything else (except for -version +// and -help) will be hidden. +static cl::OptionCategory + ClangOffloadWrapperCategory("clang-offload-wrapper options"); + +static cl::opt Output("o", cl::Required, + cl::desc("Output filename"), + cl::value_desc("filename"), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::list Inputs(cl::Positional, cl::OneOrMore, + cl::desc(""), + cl::cat(ClangOffloadWrapperCategory)); + +static cl::opt + Target("target", cl::Required, + cl::desc("Target triple for the output module"), + cl::value_desc("triple"), cl::cat(ClangOffloadWrapperCategory)); + +static cl::list + OffloadTargets("offload-targets", cl::CommaSeparated, cl::OneOrMore, + cl::desc("Comma-separated list of device target triples"), + cl::value_desc("triples"), + cl::cat(ClangOffloadWrapperCategory)); + +namespace { + +class BinaryWrapper { +public: + // Binary descriptor. The first field is the a reference to the binary bits, + // and the second is the target triple the binary was built for. + using BinaryDesc = std::pair, StringRef>; + +private: + LLVMContext C; + Module M; + + // Saver for generated strings. + BumpPtrAllocator Alloc; + UniqueStringSaver SS; + +private: + void createImages(ArrayRef Binaries) { + for (const BinaryDesc &Bin : Binaries) { + StringRef SectionName = SS.save(".omp_offloading." + Bin.second); + + auto *DataC = ConstantDataArray::get(C, Bin.first); + auto *ImageB = + new GlobalVariable(M, DataC->getType(), /*isConstant=*/true, + GlobalVariable::ExternalLinkage, DataC, + ".omp_offloading.img_start." + Bin.second); + ImageB->setSection(SectionName); + ImageB->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + ImageB->setVisibility(llvm::GlobalValue::HiddenVisibility); + + auto *EmptyC = + ConstantAggregateZero::get(ArrayType::get(Type::getInt8Ty(C), 0u)); + auto *ImageE = + new GlobalVariable(M, EmptyC->getType(), /*isConstant=*/true, + GlobalVariable::ExternalLinkage, EmptyC, + ".omp_offloading.img_end." + Bin.second); + ImageE->setSection(SectionName); + ImageE->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + ImageE->setVisibility(GlobalValue::HiddenVisibility); + } + } + +public: + BinaryWrapper(StringRef Target) : M("offload.wrapper.object", C), SS(Alloc) { + M.setTargetTriple(Target); + } + + const Module &wrapBinaries(ArrayRef Binaries) { + createImages(Binaries); + return M; + } +}; + +} // anonymous namespace + +int main(int argc, const char **argv) { + sys::PrintStackTraceOnErrorSignal(argv[0]); + + cl::HideUnrelatedOptions(ClangOffloadWrapperCategory); + cl::SetVersionPrinter([](raw_ostream &OS) { + OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n'; + }); + cl::ParseCommandLineOptions( + argc, argv, + "A tool to create a wrapper bitcode for offload target binaries. Takes " + "offload\ntarget binaries as input and produces bitcode file containing " + "target binaries packaged\nas data.\n"); + + if (Help) { + cl::PrintHelpMessage(); + return 0; + } + + auto reportError = [argv](Error E) { + logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0])); + }; + + if (Triple(Target).getArch() == Triple::UnknownArch) { + reportError(createStringError( + errc::invalid_argument, "'" + Target + "': unsupported target triple")); + return 1; + } + + if (Inputs.size() != OffloadTargets.size()) { + reportError(createStringError( + errc::invalid_argument, + "number of input files and offload targets should match")); + return 1; + } + + // Read device binaries. + SmallVector, 4u> Buffers; + SmallVector Images; + Buffers.reserve(Inputs.size()); + Images.reserve(Inputs.size()); + for (unsigned I = 0; I < Inputs.size(); ++I) { + const std::string &File = Inputs[I]; + ErrorOr> BufOrErr = + MemoryBuffer::getFileOrSTDIN(File); + if (!BufOrErr) { + reportError(createFileError(File, BufOrErr.getError())); + return 1; + } + const std::unique_ptr &Buf = + Buffers.emplace_back(std::move(*BufOrErr)); + Images.emplace_back( + makeArrayRef(Buf->getBufferStart(), Buf->getBufferSize()), + OffloadTargets[I]); + } + + // Create the output file to write the resulting bitcode to. + std::error_code EC; + ToolOutputFile Out(Output, EC, sys::fs::OF_None); + if (EC) { + reportError(createFileError(Output, EC)); + return 1; + } + + // Create a wrapper for device binaries and write its bitcode to the file. + WriteBitcodeToFile(BinaryWrapper(Target).wrapBinaries( + makeArrayRef(Images.data(), Images.size())), + Out.os()); + if (Out.os().has_error()) { + reportError(createFileError(Output, Out.os().error())); + return 1; + } + + // Success. + Out.keep(); + return 0; +}