]> granicus.if.org Git - clang/commitdiff
[Clang][OpenMP Offload] Add new tool for wrapping offload device binaries
authorSergey Dmitriev <serguei.n.dmitriev@intel.com>
Wed, 9 Oct 2019 20:42:58 +0000 (20:42 +0000)
committerSergey Dmitriev <serguei.n.dmitriev@intel.com>
Wed, 9 Oct 2019 20:42:58 +0000 (20:42 +0000)
This patch removes the remaining part of the OpenMP offload linker scripts which was used for inserting device binaries into the output linked binary. Device binaries are now inserted into the host binary with a help of the wrapper bit-code file which contains device binaries as data. Wrapper bit-code file is dynamically created by the clang driver with a help of new tool clang-offload-wrapper which takes device binaries as input and produces bit-code file with required contents. Wrapper bit-code is then compiled to an object and resulting object is appended to the host linking by the clang driver.

This is the second part of the patch for eliminating OpenMP linker script (please see https://reviews.llvm.org/D64943).

Differential Revision: https://reviews.llvm.org/D68166

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@374219 91177308-0d34-0410-b5e6-96231b3b80d8

18 files changed:
include/clang/Driver/Action.h
include/clang/Driver/Options.td
include/clang/Driver/ToolChain.h
lib/Driver/Action.cpp
lib/Driver/Driver.cpp
lib/Driver/ToolChain.cpp
lib/Driver/ToolChains/Clang.cpp
lib/Driver/ToolChains/Clang.h
lib/Driver/ToolChains/CommonArgs.cpp
lib/Driver/ToolChains/CommonArgs.h
lib/Driver/ToolChains/Cuda.cpp
lib/Driver/ToolChains/Gnu.cpp
test/Driver/clang-offload-wrapper.c [new file with mode: 0644]
test/Driver/openmp-offload-gpu.c
test/Driver/openmp-offload.c
tools/CMakeLists.txt
tools/clang-offload-wrapper/CMakeLists.txt [new file with mode: 0644]
tools/clang-offload-wrapper/ClangOffloadWrapper.cpp [new file with mode: 0644]

index c6e90b2978354569a3a2d83e7fb8b1996c4dcccb..8ccbb6c2bbfa228a4169ae166e89a09224dc1c45 100644 (file)
@@ -72,9 +72,10 @@ public:
     VerifyPCHJobClass,
     OffloadBundlingJobClass,
     OffloadUnbundlingJobClass,
+    OffloadWrapperJobClass,
 
     JobClassFirst = PreprocessJobClass,
-    JobClassLast = OffloadUnbundlingJobClass
+    JobClassLast = OffloadWrapperJobClass
   };
 
   // The offloading kind determines if this action is binded to a particular
@@ -625,6 +626,17 @@ public:
   }
 };
 
+class OffloadWrapperJobAction : public JobAction {
+  void anchor() override;
+
+public:
+  OffloadWrapperJobAction(ActionList &Inputs, types::ID Type);
+
+  static bool classof(const Action *A) {
+    return A->getKind() == OffloadWrapperJobClass;
+  }
+};
+
 } // namespace driver
 } // namespace clang
 
index cbcf2ab326633e03dc7bd8900795897ea5a33207..c6e06f2e7dd3c050bc8bf502039776052306d2f5 100644 (file)
@@ -1604,8 +1604,6 @@ def fnoopenmp_use_tls : Flag<["-"], "fnoopenmp-use-tls">, Group<f_Group>,
   Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fopenmp_targets_EQ : CommaJoined<["-"], "fopenmp-targets=">, Flags<[DriverOption, CC1Option]>,
   HelpText<"Specify comma-separated list of triples OpenMP offloading targets to be supported">;
-def fopenmp_dump_offload_linker_script : Flag<["-"], "fopenmp-dump-offload-linker-script">,
-  Group<f_Group>, Flags<[NoArgumentUnused, HelpHidden]>;
 def fopenmp_relocatable_target : Flag<["-"], "fopenmp-relocatable-target">,
   Group<f_Group>, Flags<[CC1Option, NoArgumentUnused, HelpHidden]>;
 def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">,
index 2ba16ab21a927a99ebb6be854b5e40729ab22b61..f0676eee2d6cede36b132ff1aa47ce74ab9eff06 100644 (file)
@@ -138,6 +138,7 @@ private:
   mutable std::unique_ptr<Tool> Link;
   mutable std::unique_ptr<Tool> IfsMerge;
   mutable std::unique_ptr<Tool> OffloadBundler;
+  mutable std::unique_ptr<Tool> OffloadWrapper;
 
   Tool *getClang() const;
   Tool *getAssemble() const;
@@ -145,6 +146,7 @@ private:
   Tool *getIfsMerge() const;
   Tool *getClangAs() const;
   Tool *getOffloadBundler() const;
+  Tool *getOffloadWrapper() const;
 
   mutable std::unique_ptr<SanitizerArgs> SanitizerArguments;
   mutable std::unique_ptr<XRayArgs> XRayArguments;
index 0187cf981eb560f6a8ee367827eef33cbacdc4a7..0eb4c7257e7a8a88a47075c97950dfa03288fd00 100644 (file)
@@ -41,6 +41,8 @@ const char *Action::getClassName(ActionClass AC) {
     return "clang-offload-bundler";
   case OffloadUnbundlingJobClass:
     return "clang-offload-unbundler";
+  case OffloadWrapperJobClass:
+    return "clang-offload-wrapper";
   }
 
   llvm_unreachable("invalid class");
@@ -407,3 +409,9 @@ void OffloadUnbundlingJobAction::anchor() {}
 
 OffloadUnbundlingJobAction::OffloadUnbundlingJobAction(Action *Input)
     : JobAction(OffloadUnbundlingJobClass, Input, Input->getType()) {}
+
+void OffloadWrapperJobAction::anchor() {}
+
+OffloadWrapperJobAction::OffloadWrapperJobAction(ActionList &Inputs,
+                                                 types::ID Type)
+  : JobAction(OffloadWrapperJobClass, Inputs, Type) {}
index a0952742982717945fb254c26f2163d2846e246c..51c262aa0a330af8f020fffd75e24bef59621f39 100644 (file)
@@ -2287,6 +2287,9 @@ class OffloadingActionBuilder final {
     /// Append top level actions generated by the builder.
     virtual void appendTopLevelActions(ActionList &AL) {}
 
+    /// Append linker actions generated by the builder.
+    virtual void appendLinkActions(ActionList &AL) {}
+
     /// Append linker actions generated by the builder.
     virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {}
 
@@ -2890,7 +2893,7 @@ class OffloadingActionBuilder final {
       OpenMPDeviceActions.clear();
     }
 
-    void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
+    void appendLinkActions(ActionList &AL) override {
       assert(ToolChains.size() == DeviceLinkerInputs.size() &&
              "Toolchains and linker inputs sizes do not match.");
 
@@ -2899,12 +2902,18 @@ class OffloadingActionBuilder final {
       for (auto &LI : DeviceLinkerInputs) {
         auto *DeviceLinkAction =
             C.MakeAction<LinkJobAction>(LI, types::TY_Image);
-        DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
-               Action::OFK_OpenMP);
+        OffloadAction::DeviceDependences DeviceLinkDeps;
+        DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
+                       Action::OFK_OpenMP);
+        AL.push_back(C.MakeAction<OffloadAction>(DeviceLinkDeps,
+            DeviceLinkAction->getType()));
         ++TC;
       }
+      DeviceLinkerInputs.clear();
     }
 
+    void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {}
+
     bool initialize() override {
       // Get the OpenMP toolchains. If we don't get any, the action builder will
       // know there is nothing to do related to OpenMP offloading.
@@ -3129,6 +3138,25 @@ public:
     return false;
   }
 
+  Action* makeHostLinkAction() {
+    // Build a list of device linking actions.
+    ActionList DeviceAL;
+    for (DeviceActionBuilder *SB : SpecializedBuilders) {
+      if (!SB->isValid())
+        continue;
+      SB->appendLinkActions(DeviceAL);
+    }
+
+    if (DeviceAL.empty())
+      return nullptr;
+
+    // Create wrapper bitcode from the result of device link actions and compile
+    // it to an object which will be added to the host link command.
+    auto *BC = C.MakeAction<OffloadWrapperJobAction>(DeviceAL, types::TY_LLVM_BC);
+    auto *ASM = C.MakeAction<BackendJobAction>(BC, types::TY_PP_Asm);
+    return C.MakeAction<AssembleJobAction>(ASM, types::TY_Object);
+  }
+
   /// Processes the host linker action. This currently consists of replacing it
   /// with an offload action if there are device link objects and propagate to
   /// the host action all the offload kinds used in the current compilation. The
@@ -3434,6 +3462,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
 
   // Add a link action if necessary.
   if (!LinkerInputs.empty()) {
+    if (Action *Wrapper = OffloadBuilder.makeHostLinkAction())
+      LinkerInputs.push_back(Wrapper);
     Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image);
     LA = OffloadBuilder.processHostLinkAction(LA);
     Actions.push_back(LA);
@@ -3820,18 +3850,8 @@ class ToolSelector final {
     if (!AJ || !BJ)
       return nullptr;
 
-    // Retrieve the compile job, backend action must always be preceded by one.
-    ActionList CompileJobOffloadActions;
-    auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions,
-                                      /*CanBeCollapsed=*/false);
-    if (!AJ || !BJ || !CJ)
-      return nullptr;
-
-    assert(isa<CompileJobAction>(CJ) &&
-           "Expecting compile job preceding backend job.");
-
-    // Get compiler tool.
-    const Tool *T = TC.SelectTool(*CJ);
+    // Get backend tool.
+    const Tool *T = TC.SelectTool(*BJ);
     if (!T)
       return nullptr;
 
@@ -4236,6 +4256,13 @@ InputInfo Driver::BuildJobsForActionNoCache(
         A->getOffloadingDeviceKind(), TC->getTriple().normalize(),
         /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() &&
             !AtTopLevel);
+    if (isa<OffloadWrapperJobAction>(JA)) {
+      OffloadingPrefix += "-wrapper";
+      if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o))
+        BaseInput = FinalOutput->getValue();
+      else
+        BaseInput = getDefaultImageName();
+    }
     Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,
                                              AtTopLevel, MultipleArchs,
                                              OffloadingPrefix),
index db2497a10856f2499fc2a7979eed102c7ec59142..357a5106ab393daf3fd88acca4431024cdbf7a6d 100644 (file)
@@ -292,6 +292,12 @@ Tool *ToolChain::getOffloadBundler() const {
   return OffloadBundler.get();
 }
 
+Tool *ToolChain::getOffloadWrapper() const {
+  if (!OffloadWrapper)
+    OffloadWrapper.reset(new tools::OffloadWrapper(*this));
+  return OffloadWrapper.get();
+}
+
 Tool *ToolChain::getTool(Action::ActionClass AC) const {
   switch (AC) {
   case Action::AssembleJobClass:
@@ -324,6 +330,9 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const {
   case Action::OffloadBundlingJobClass:
   case Action::OffloadUnbundlingJobClass:
     return getOffloadBundler();
+
+  case Action::OffloadWrapperJobClass:
+    return getOffloadWrapper();
   }
 
   llvm_unreachable("Invalid tool kind.");
index 98e8c57235a99c69adc69fcbe27786b5fc274c20..22b830ee838b4465994b52dac5f3e3827ddf58ef 100644 (file)
@@ -6463,3 +6463,57 @@ void OffloadBundler::ConstructJobMultipleOutputs(
       TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
       CmdArgs, None));
 }
+
+void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const ArgList &Args,
+                                  const char *LinkingOutput) const {
+  ArgStringList CmdArgs;
+
+  const llvm::Triple &Triple = getToolChain().getEffectiveTriple();
+
+  // Add the "effective" target triple.
+  CmdArgs.push_back("-target");
+  CmdArgs.push_back(Args.MakeArgString(Triple.getTriple()));
+
+  assert(JA.getInputs().size() == Inputs.size() &&
+         "Not have inputs for all dependence actions??");
+
+  // Add offload targets. It is a comma-separated list of offload target
+  // triples.
+  SmallString<128> Targets;
+  Targets += "-offload-targets=";
+  for (unsigned I = 0; I < Inputs.size(); ++I) {
+    if (I)
+      Targets += ',';
+
+    // Get input's Offload Kind and ToolChain.
+    const auto *OA = cast<OffloadAction>(JA.getInputs()[I]);
+    assert(OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) &&
+           "Expected one device dependence!");
+    const ToolChain *DeviceTC = nullptr;
+    OA->doOnEachDependence([&DeviceTC](Action *, const ToolChain *TC,
+                                       const char *) { DeviceTC = TC; });
+
+    // And add it to the offload targets.
+    Targets += DeviceTC->getTriple().normalize();
+  }
+  CmdArgs.push_back(Args.MakeArgString(Targets));
+
+  // Add the output file name.
+  assert(Output.isFilename() && "Invalid output.");
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  // Add inputs.
+  for (const InputInfo &I : Inputs) {
+    assert(I.isFilename() && "Invalid input.");
+    CmdArgs.push_back(I.getFilename());
+  }
+
+  C.addCommand(std::make_unique<Command>(
+    JA, *this,
+    Args.MakeArgString(getToolChain().GetProgramPath(getShortName())),
+    CmdArgs, Inputs));
+}
index 8b6ac43ebd55620ad020f21f62b5a6e7d9511993..b345c02489d4d5faa3febf90923831809bdc2e15 100644 (file)
@@ -152,6 +152,20 @@ public:
                                    const llvm::opt::ArgList &TCArgs,
                                    const char *LinkingOutput) const override;
 };
+
+/// Offload wrapper tool.
+class LLVM_LIBRARY_VISIBILITY OffloadWrapper final : public Tool {
+public:
+  OffloadWrapper(const ToolChain &TC)
+      : Tool("offload wrapper", "clang-offload-wrapper", TC) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
 } // end namespace tools
 
 } // end namespace driver
index 47964099bd9c486cdb306dcd16e371c707b695d2..159b42a642708a1d8cdc04fa6f127ce464307fc1 100644 (file)
@@ -1244,120 +1244,6 @@ void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D,
   }
 }
 
-/// Add OpenMP linker script arguments at the end of the argument list so that
-/// the fat binary is built by embedding each of the device images into the
-/// host. The linker script also defines a few symbols required by the code
-/// generation so that the images can be easily retrieved at runtime by the
-/// offloading library. This should be used only in tool chains that support
-/// linker scripts.
-void tools::AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
-                                  const InputInfo &Output,
-                                  const InputInfoList &Inputs,
-                                  const ArgList &Args, ArgStringList &CmdArgs,
-                                  const JobAction &JA) {
-
-  // If this is not an OpenMP host toolchain, we don't need to do anything.
-  if (!JA.isHostOffloading(Action::OFK_OpenMP))
-    return;
-
-  // Create temporary linker script. Keep it if save-temps is enabled.
-  const char *LKS;
-  SmallString<256> Name = llvm::sys::path::filename(Output.getFilename());
-  if (C.getDriver().isSaveTempsEnabled()) {
-    llvm::sys::path::replace_extension(Name, "lk");
-    LKS = C.getArgs().MakeArgString(Name.c_str());
-  } else {
-    llvm::sys::path::replace_extension(Name, "");
-    Name = C.getDriver().GetTemporaryPath(Name, "lk");
-    LKS = C.addTempFile(C.getArgs().MakeArgString(Name.c_str()));
-  }
-
-  // Add linker script option to the command.
-  CmdArgs.push_back("-T");
-  CmdArgs.push_back(LKS);
-
-  // Create a buffer to write the contents of the linker script.
-  std::string LksBuffer;
-  llvm::raw_string_ostream LksStream(LksBuffer);
-
-  // Get the OpenMP offload tool chains so that we can extract the triple
-  // associated with each device input.
-  auto OpenMPToolChains = C.getOffloadToolChains<Action::OFK_OpenMP>();
-  assert(OpenMPToolChains.first != OpenMPToolChains.second &&
-         "No OpenMP toolchains??");
-
-  // Track the input file name and device triple in order to build the script,
-  // inserting binaries in the designated sections.
-  SmallVector<std::pair<std::string, const char *>, 8> InputBinaryInfo;
-
-  // Add commands to embed target binaries. We ensure that each section and
-  // image is 16-byte aligned. This is not mandatory, but increases the
-  // likelihood of data to be aligned with a cache block in several main host
-  // machines.
-  LksStream << "/*\n";
-  LksStream << "       OpenMP Offload Linker Script\n";
-  LksStream << " *** Automatically generated by Clang ***\n";
-  LksStream << "*/\n";
-  LksStream << "TARGET(binary)\n";
-  auto DTC = OpenMPToolChains.first;
-  for (auto &II : Inputs) {
-    const Action *A = II.getAction();
-    // Is this a device linking action?
-    if (A && isa<LinkJobAction>(A) &&
-        A->isDeviceOffloading(Action::OFK_OpenMP)) {
-      assert(DTC != OpenMPToolChains.second &&
-             "More device inputs than device toolchains??");
-      InputBinaryInfo.push_back(std::make_pair(
-          DTC->second->getTriple().normalize(), II.getFilename()));
-      ++DTC;
-      LksStream << "INPUT(" << II.getFilename() << ")\n";
-    }
-  }
-
-  assert(DTC == OpenMPToolChains.second &&
-         "Less device inputs than device toolchains??");
-
-  LksStream << "SECTIONS\n";
-  LksStream << "{\n";
-
-  // Put each target binary into a separate section.
-  for (const auto &BI : InputBinaryInfo) {
-    LksStream << "  .omp_offloading." << BI.first << " :\n";
-    LksStream << "  ALIGN(0x10)\n";
-    LksStream << "  {\n";
-    LksStream << "    PROVIDE_HIDDEN(.omp_offloading.img_start." << BI.first
-              << " = .);\n";
-    LksStream << "    " << BI.second << "\n";
-    LksStream << "    PROVIDE_HIDDEN(.omp_offloading.img_end." << BI.first
-              << " = .);\n";
-    LksStream << "  }\n";
-  }
-
-  LksStream << "}\n";
-  LksStream << "INSERT BEFORE .data\n";
-  LksStream.flush();
-
-  // Dump the contents of the linker script if the user requested that. We
-  // support this option to enable testing of behavior with -###.
-  if (C.getArgs().hasArg(options::OPT_fopenmp_dump_offload_linker_script))
-    llvm::errs() << LksBuffer;
-
-  // If this is a dry run, do not create the linker script file.
-  if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH))
-    return;
-
-  // Open script file and write the contents.
-  std::error_code EC;
-  llvm::raw_fd_ostream Lksf(LKS, EC, llvm::sys::fs::OF_None);
-
-  if (EC) {
-    C.getDriver().Diag(clang::diag::err_unable_to_make_temp) << EC.message();
-    return;
-  }
-
-  Lksf << LksBuffer;
-}
-
 /// Add HIP linker script arguments at the end of the argument list so that
 /// the fat binary is built by embedding the device images into the host. The
 /// linker script also defines a symbol required by the code generation so that
index 1aff07ab8fe56eb697dcf108eb599b04dc288d7f..63359d77363bf31bf48befa42b29195b71c56891 100644 (file)
@@ -45,13 +45,6 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
                     llvm::opt::ArgStringList &CmdArgs,
                     const llvm::opt::ArgList &Args);
 
-void AddOpenMPLinkerScript(const ToolChain &TC, Compilation &C,
-                           const InputInfo &Output,
-                           const InputInfoList &Inputs,
-                           const llvm::opt::ArgList &Args,
-                           llvm::opt::ArgStringList &CmdArgs,
-                           const JobAction &JA);
-
 void AddHIPLinkerScript(const ToolChain &TC, Compilation &C,
                         const InputInfo &Output, const InputInfoList &Inputs,
                         const llvm::opt::ArgList &Args,
index 4cada743b88596192f8101b6fb40f71eb87882c3..8c704a3078adcb2489b7940118f72537a3e74cc6 100644 (file)
@@ -563,8 +563,6 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(CubinF);
   }
 
-  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
-
   const char *Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
   C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
index c7abe5c69a1df1f3e2f3ecc72e49db748dfab054..c302a31cd2e1007b4a84fbbc42e0eb1b769fbbc4 100644 (file)
@@ -499,7 +499,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
         P = ToolChain.GetFilePath(crtbegin);
       }
       CmdArgs.push_back(Args.MakeArgString(P));
-         }
+    }
 
     // Add crtfastmath.o if available and fast math is enabled.
     ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
@@ -623,9 +623,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  // Add OpenMP offloading linker script args if required.
-  AddOpenMPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA);
-
   // Add HIP offloading linker script args if required.
   AddHIPLinkerScript(getToolChain(), C, Output, Inputs, Args, CmdArgs, JA,
                      *this);
diff --git a/test/Driver/clang-offload-wrapper.c b/test/Driver/clang-offload-wrapper.c
new file mode 100644 (file)
index 0000000..542930b
--- /dev/null
@@ -0,0 +1,29 @@
+// REQUIRES: x86-registered-target
+
+//
+// Check help message.
+//
+// RUN: clang-offload-wrapper --help | FileCheck %s --check-prefix CHECK-HELP
+// CHECK-HELP: {{.*}}OVERVIEW: A tool to create a wrapper bitcode for offload target binaries. Takes offload
+// CHECK-HELP: {{.*}}target binaries as input and produces bitcode file containing target binaries packaged
+// CHECK-HELP: {{.*}}as data.
+// CHECK-HELP: {{.*}}USAGE: clang-offload-wrapper [options] <input files>
+// CHECK-HELP: {{.*}}  -o=<filename>               - Output filename
+// CHECK-HELP: {{.*}}  --offload-targets=<triples> - Comma-separated list of device target triples
+// CHECK-HELP: {{.*}}  --target=<triple>           - Target triple for the output module
+
+//
+// Generate a file to wrap.
+//
+// RUN: echo 'Content of device file' > %t.tgt
+
+//
+// Check bitcode produced by the wrapper tool.
+//
+// RUN: clang-offload-wrapper -target=x86_64-pc-linux-gnu -offload-targets=powerpc64le-ibm-linux-gnu -o %t.wrapper.bc %t.tgt
+// RUN: llvm-dis %t.wrapper.bc -o - | FileCheck %s --check-prefix CHECK-IR
+
+// CHECK-IR: target triple = "x86_64-pc-linux-gnu"
+
+// CHECK-IR: @.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = hidden unnamed_addr constant [{{[0-9]+}} x i8] c"Content of device file{{.+}}", section ".omp_offloading.powerpc64le-ibm-linux-gnu"
+// CHECK-IR: @.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = hidden unnamed_addr constant [0 x i8] zeroinitializer, section ".omp_offloading.powerpc64le-ibm-linux-gnu"
index 3d2ac4525f6c6b10c137425cfce64b1c532efa3a..dc4dbd1f37c979149120d91595f4a27bc8d67777 100644 (file)
@@ -55,7 +55,7 @@
 // RUN:          -fopenmp-targets=nvptx64-nvidia-cuda %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-CUBIN-NVLINK %s
 
-// CHK-CUBIN-NVLINK: clang{{.*}}" "-o" "[[PTX:.*\.s]]"
+// CHK-CUBIN-NVLINK: clang{{.*}}" {{.*}}"-fopenmp-is-device" {{.*}}"-o" "[[PTX:.*\.s]]"
 // CHK-CUBIN-NVLINK-NEXT: ptxas{{.*}}" "--output-file" "[[CUBIN:.*\.cubin]]" {{.*}}"[[PTX]]"
 // CHK-CUBIN-NVLINK-NEXT: nvlink{{.*}}" {{.*}}"[[CUBIN]]"
 
index 0ba3c4326310b3ee3549936c8d9d6b70df21daa4..292818b84fa1cc0beed163f18bb7b759f7797c80 100644 (file)
 // CHK-PHASES: 2: compiler, {1}, ir, (host-openmp)
 // CHK-PHASES: 3: backend, {2}, assembler, (host-openmp)
 // CHK-PHASES: 4: assembler, {3}, object, (host-openmp)
-// CHK-PHASES: 5: linker, {4}, image, (host-openmp)
-// CHK-PHASES: 6: input, "[[INPUT]]", c, (device-openmp)
-// CHK-PHASES: 7: preprocessor, {6}, cpp-output, (device-openmp)
-// CHK-PHASES: 8: compiler, {7}, ir, (device-openmp)
-// CHK-PHASES: 9: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {8}, ir
-// CHK-PHASES: 10: backend, {9}, assembler, (device-openmp)
-// CHK-PHASES: 11: assembler, {10}, object, (device-openmp)
-// CHK-PHASES: 12: linker, {11}, image, (device-openmp)
-// CHK-PHASES: 13: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {5}, "device-openmp (x86_64-pc-linux-gnu)" {12}, image
+// CHK-PHASES: 5: input, "[[INPUT]]", c, (device-openmp)
+// CHK-PHASES: 6: preprocessor, {5}, cpp-output, (device-openmp)
+// CHK-PHASES: 7: compiler, {6}, ir, (device-openmp)
+// CHK-PHASES: 8: offload, "host-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (x86_64-pc-linux-gnu)" {7}, ir
+// CHK-PHASES: 9: backend, {8}, assembler, (device-openmp)
+// CHK-PHASES: 10: assembler, {9}, object, (device-openmp)
+// CHK-PHASES: 11: linker, {10}, image, (device-openmp)
+// CHK-PHASES: 12: offload, "device-openmp (x86_64-pc-linux-gnu)" {11}, image
+// CHK-PHASES: 13: clang-offload-wrapper, {12}, ir, (host-openmp)
+// CHK-PHASES: 14: backend, {13}, assembler, (host-openmp)
+// CHK-PHASES: 15: assembler, {14}, object, (host-openmp)
+// CHK-PHASES: 16: linker, {4, 15}, image, (host-openmp)
 
 /// ###########################################################################
 
 // CHK-PHASES-LIB: 3: compiler, {2}, ir, (host-openmp)
 // CHK-PHASES-LIB: 4: backend, {3}, assembler, (host-openmp)
 // CHK-PHASES-LIB: 5: assembler, {4}, object, (host-openmp)
-// CHK-PHASES-LIB: 6: linker, {0, 5}, image, (host-openmp)
-// CHK-PHASES-LIB: 7: input, "somelib", object, (device-openmp)
-// CHK-PHASES-LIB: 8: input, "[[INPUT]]", c, (device-openmp)
-// CHK-PHASES-LIB: 9: preprocessor, {8}, cpp-output, (device-openmp)
-// CHK-PHASES-LIB: 10: compiler, {9}, ir, (device-openmp)
-// CHK-PHASES-LIB: 11: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {10}, ir
-// CHK-PHASES-LIB: 12: backend, {11}, assembler, (device-openmp)
-// CHK-PHASES-LIB: 13: assembler, {12}, object, (device-openmp)
-// CHK-PHASES-LIB: 14: linker, {7, 13}, image, (device-openmp)
+// CHK-PHASES-LIB: 6: input, "somelib", object, (device-openmp)
+// CHK-PHASES-LIB: 7: input, "[[INPUT]]", c, (device-openmp)
+// CHK-PHASES-LIB: 8: preprocessor, {7}, cpp-output, (device-openmp)
+// CHK-PHASES-LIB: 9: compiler, {8}, ir, (device-openmp)
+// CHK-PHASES-LIB: 10: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {9}, ir
+// CHK-PHASES-LIB: 11: backend, {10}, assembler, (device-openmp)
+// CHK-PHASES-LIB: 12: assembler, {11}, object, (device-openmp)
+// CHK-PHASES-LIB: 13: linker, {6, 12}, image, (device-openmp)
+// CHK-PHASES-LIB: 14: offload, "device-openmp (x86_64-pc-linux-gnu)" {13}, image
 // CHK-PHASES-LIB: 15: input, "somelib", object, (device-openmp)
 // CHK-PHASES-LIB: 16: input, "[[INPUT]]", c, (device-openmp)
 // CHK-PHASES-LIB: 17: preprocessor, {16}, cpp-output, (device-openmp)
 // CHK-PHASES-LIB: 20: backend, {19}, assembler, (device-openmp)
 // CHK-PHASES-LIB: 21: assembler, {20}, object, (device-openmp)
 // CHK-PHASES-LIB: 22: linker, {15, 21}, image, (device-openmp)
-// CHK-PHASES-LIB: 23: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {6}, "device-openmp (x86_64-pc-linux-gnu)" {14}, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image
-
+// CHK-PHASES-LIB: 23: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {22}, image
+// CHK-PHASES-LIB: 24: clang-offload-wrapper, {14, 23}, ir, (host-openmp)
+// CHK-PHASES-LIB: 25: backend, {24}, assembler, (host-openmp)
+// CHK-PHASES-LIB: 26: assembler, {25}, object, (host-openmp)
+// CHK-PHASES-LIB: 27: linker, {0, 5, 26}, image, (host-openmp)
 
 /// ###########################################################################
 
 // CHK-PHASES-FILES: 8: compiler, {7}, ir, (host-openmp)
 // CHK-PHASES-FILES: 9: backend, {8}, assembler, (host-openmp)
 // CHK-PHASES-FILES: 10: assembler, {9}, object, (host-openmp)
-// CHK-PHASES-FILES: 11: linker, {0, 5, 10}, image, (host-openmp)
-// CHK-PHASES-FILES: 12: input, "somelib", object, (device-openmp)
-// CHK-PHASES-FILES: 13: input, "[[INPUT1]]", c, (device-openmp)
-// CHK-PHASES-FILES: 14: preprocessor, {13}, cpp-output, (device-openmp)
-// CHK-PHASES-FILES: 15: compiler, {14}, ir, (device-openmp)
-// CHK-PHASES-FILES: 16: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {15}, ir
-// CHK-PHASES-FILES: 17: backend, {16}, assembler, (device-openmp)
-// CHK-PHASES-FILES: 18: assembler, {17}, object, (device-openmp)
-// CHK-PHASES-FILES: 19: input, "[[INPUT2]]", c, (device-openmp)
-// CHK-PHASES-FILES: 20: preprocessor, {19}, cpp-output, (device-openmp)
-// CHK-PHASES-FILES: 21: compiler, {20}, ir, (device-openmp)
-// CHK-PHASES-FILES: 22: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {21}, ir
-// CHK-PHASES-FILES: 23: backend, {22}, assembler, (device-openmp)
-// CHK-PHASES-FILES: 24: assembler, {23}, object, (device-openmp)
-// CHK-PHASES-FILES: 25: linker, {12, 18, 24}, image, (device-openmp)
+// CHK-PHASES-FILES: 11: input, "somelib", object, (device-openmp)
+// CHK-PHASES-FILES: 12: input, "[[INPUT1]]", c, (device-openmp)
+// CHK-PHASES-FILES: 13: preprocessor, {12}, cpp-output, (device-openmp)
+// CHK-PHASES-FILES: 14: compiler, {13}, ir, (device-openmp)
+// CHK-PHASES-FILES: 15: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir
+// CHK-PHASES-FILES: 16: backend, {15}, assembler, (device-openmp)
+// CHK-PHASES-FILES: 17: assembler, {16}, object, (device-openmp)
+// CHK-PHASES-FILES: 18: input, "[[INPUT2]]", c, (device-openmp)
+// CHK-PHASES-FILES: 19: preprocessor, {18}, cpp-output, (device-openmp)
+// CHK-PHASES-FILES: 20: compiler, {19}, ir, (device-openmp)
+// CHK-PHASES-FILES: 21: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {8}, "device-openmp (x86_64-pc-linux-gnu)" {20}, ir
+// CHK-PHASES-FILES: 22: backend, {21}, assembler, (device-openmp)
+// CHK-PHASES-FILES: 23: assembler, {22}, object, (device-openmp)
+// CHK-PHASES-FILES: 24: linker, {11, 17, 23}, image, (device-openmp)
+// CHK-PHASES-FILES: 25: offload, "device-openmp (x86_64-pc-linux-gnu)" {24}, image
 // CHK-PHASES-FILES: 26: input, "somelib", object, (device-openmp)
 // CHK-PHASES-FILES: 27: input, "[[INPUT1]]", c, (device-openmp)
 // CHK-PHASES-FILES: 28: preprocessor, {27}, cpp-output, (device-openmp)
 // CHK-PHASES-FILES: 37: backend, {36}, assembler, (device-openmp)
 // CHK-PHASES-FILES: 38: assembler, {37}, object, (device-openmp)
 // CHK-PHASES-FILES: 39: linker, {26, 32, 38}, image, (device-openmp)
-// CHK-PHASES-FILES: 40: offload, "host-openmp (powerpc64-ibm-linux-gnu)" {11}, "device-openmp (x86_64-pc-linux-gnu)" {25}, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image
+// CHK-PHASES-FILES: 40: offload, "device-openmp (powerpc64-ibm-linux-gnu)" {39}, image
+// CHK-PHASES-FILES: 41: clang-offload-wrapper, {25, 40}, ir, (host-openmp)
+// CHK-PHASES-FILES: 42: backend, {41}, assembler, (host-openmp)
+// CHK-PHASES-FILES: 43: assembler, {42}, object, (host-openmp)
+// CHK-PHASES-FILES: 44: linker, {0, 5, 10, 43}, image, (host-openmp)
 
 /// ###########################################################################
 
 // CHK-PHASES-WITH-CUDA: 11: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {10}, ir
 // CHK-PHASES-WITH-CUDA: 12: backend, {11}, assembler, (host-cuda-openmp)
 // CHK-PHASES-WITH-CUDA: 13: assembler, {12}, object, (host-cuda-openmp)
-// CHK-PHASES-WITH-CUDA: 14: linker, {13}, image, (host-cuda-openmp)
-// CHK-PHASES-WITH-CUDA: 15: input, "[[INPUT]]", cuda, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 16: preprocessor, {15}, cuda-cpp-output, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 17: compiler, {16}, ir, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 18: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {17}, ir
-// CHK-PHASES-WITH-CUDA: 19: backend, {18}, assembler, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 20: assembler, {19}, object, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 21: linker, {20}, image, (device-openmp)
-// CHK-PHASES-WITH-CUDA: 22: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {14}, "device-openmp (nvptx64-nvidia-cuda)" {21}, image
+// CHK-PHASES-WITH-CUDA: 14: input, "[[INPUT]]", cuda, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 15: preprocessor, {14}, cuda-cpp-output, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 16: compiler, {15}, ir, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 17: offload, "host-cuda-openmp (powerpc64le-ibm-linux-gnu)" {2}, "device-openmp (nvptx64-nvidia-cuda)" {16}, ir
+// CHK-PHASES-WITH-CUDA: 18: backend, {17}, assembler, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 19: assembler, {18}, object, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 20: linker, {19}, image, (device-openmp)
+// CHK-PHASES-WITH-CUDA: 21: offload, "device-openmp (nvptx64-nvidia-cuda)" {20}, image
+// CHK-PHASES-WITH-CUDA: 22: clang-offload-wrapper, {21}, ir, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 23: backend, {22}, assembler, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 24: assembler, {23}, object, (host-cuda-openmp)
+// CHK-PHASES-WITH-CUDA: 25: linker, {13, 24}, image, (host-cuda-openmp)
 
 /// ###########################################################################
 
 /// -fopenmp-host-ir-file-path: specifies the host IR file that can be loaded by
 /// the target code generation to gather information about which declaration
 /// really need to be emitted.
-/// We use -fopenmp-dump-offload-linker-script to dump the linker script and
-/// check its contents.
 ///
-// RUN:   %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -fopenmp-dump-offload-linker-script -no-canonical-prefixes 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-COMMANDS -check-prefix=CHK-LKS -check-prefix=CHK-LKS-REG %s
-// RUN:   %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -fopenmp-dump-offload-linker-script -no-canonical-prefixes 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-COMMANDS-ST -check-prefix=CHK-LKS -check-prefix=CHK-LKS-ST %s
-
-// Make sure we are not dumping the script unless the user requested it.
 // RUN:   %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -no-canonical-prefixes 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-LKS-NODUMP %s
+// RUN:   | FileCheck -check-prefix=CHK-COMMANDS %s
 // RUN:   %clang -### -fopenmp=libomp -o %t.out -target powerpc64le-linux -fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu %s -save-temps -no-canonical-prefixes 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-LKS-NODUMP %s
-
-//
-// Check the linker script contains what we expect.
-//
-// CHK-LKS: /*
-// CHK-LKS:                   OpenMP Offload Linker Script
-// CHK-LKS:             *** Automatically generated by Clang ***
-// CHK-LKS-NODUMP-NOT:  OpenMP Offload Linker Script.
-// CHK-LKS: */
-// CHK-LKS: TARGET(binary)
-// CHK-LKS-REG: INPUT([[T1BIN:.+\.out]])
-// CHK-LKS-REG: INPUT([[T2BIN:.+\.out]])
-// CHK-LKS-ST: INPUT([[T1BIN:.+\.out-openmp-powerpc64le-ibm-linux-gnu]])
-// CHK-LKS-ST: INPUT([[T2BIN:.+\.out-openmp-x86_64-pc-linux-gnu]])
-// CHK-LKS: SECTIONS
-// CHK-LKS: {
-// CHK-LKS:   .omp_offloading.powerpc64le-ibm-linux-gnu :
-// CHK-LKS:   ALIGN(0x10)
-// CHK-LKS:   {
-// CHK-LKS:     PROVIDE_HIDDEN(.omp_offloading.img_start.powerpc64le-ibm-linux-gnu = .);
-// CHK-LKS:     [[T1BIN]]
-// CHK-LKS:     PROVIDE_HIDDEN(.omp_offloading.img_end.powerpc64le-ibm-linux-gnu = .);
-// CHK-LKS:   }
-// CHK-LKS:   .omp_offloading.x86_64-pc-linux-gnu :
-// CHK-LKS:   ALIGN(0x10)
-// CHK-LKS:   {
-// CHK-LKS:     PROVIDE_HIDDEN(.omp_offloading.img_start.x86_64-pc-linux-gnu = .);
-// CHK-LKS:     [[T2BIN]]
-// CHK-LKS:     PROVIDE_HIDDEN(.omp_offloading.img_end.x86_64-pc-linux-gnu = .);
-// CHK-LKS:   }
-// CHK-LKS: }
-// CHK-LKS: INSERT BEFORE .data
+// RUN:   | FileCheck -check-prefix=CHK-COMMANDS-ST %s
 
 //
-// Generate host BC file.
+// Generate host BC file and host object.
 //
 // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"
 // CHK-COMMANDS-SAME: "-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu"
 // CHK-COMMANDS-SAME: "-o" "
 // CHK-COMMANDS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "c" "
 // CHK-COMMANDS-SAME: [[INPUT:[^\\/]+\.c]]"
+// CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-E" {{.*}}"-fopenmp" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[HOSTPP:[^\\/]+\.i]]" "-x" "c" "
 // CHK-COMMANDS-ST-SAME: [[INPUT:[^\\/]+\.c]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc" {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
 // CHK-COMMANDS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
+// CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
+// CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
 
 //
 // Compile for the powerpc device.
 // CHK-COMMANDS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
 
 //
-// Generate host object from the BC file and link using the linker script.
+// Create wrapper BC file and wrapper object.
 //
+// CHK-COMMANDS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-COMMANDS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
 // CHK-COMMANDS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-COMMANDS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
-// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]"  {{.*}}"-lomptarget" {{.*}}"-T" "
-// CHK-COMMANDS-SAME: [[HOSTLK:[^\\/]+\.lk]]"
+// CHK-COMMANDS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
+// CHK-COMMANDS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
+// CHK-COMMANDS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
 // CHK-COMMANDS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
-// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]"  {{.*}}"-lomptarget" {{.*}}"-T" "
-// CHK-COMMANDS-ST-SAME: [[HOSTLK:[^\\/]+\.lk]]"
+// CHK-COMMANDS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
 
+//
+// Link host binary.
+//
+// CHK-COMMANDS: ld{{(\.exe)?}}" {{.*}}"-o" "
+// CHK-COMMANDS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget"
+// CHK-COMMANDS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
+// CHK-COMMANDS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]" {{.*}}"-lomptarget"
 
 /// ###########################################################################
 
 // CHK-UBACTIONS: 3: compiler, {2}, ir, (host-openmp)
 // CHK-UBACTIONS: 4: backend, {3}, assembler, (host-openmp)
 // CHK-UBACTIONS: 5: assembler, {4}, object, (host-openmp)
-// CHK-UBACTIONS: 6: linker, {0, 5}, image, (host-openmp)
-// CHK-UBACTIONS: 7: input, "somelib", object, (device-openmp)
-// CHK-UBACTIONS: 8: compiler, {2}, ir, (device-openmp)
-// CHK-UBACTIONS: 9: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {8}, ir
-// CHK-UBACTIONS: 10: backend, {9}, assembler, (device-openmp)
-// CHK-UBACTIONS: 11: assembler, {10}, object, (device-openmp)
-// CHK-UBACTIONS: 12: linker, {7, 11}, image, (device-openmp)
+// CHK-UBACTIONS: 6: input, "somelib", object, (device-openmp)
+// CHK-UBACTIONS: 7: compiler, {2}, ir, (device-openmp)
+// CHK-UBACTIONS: 8: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (powerpc64le-ibm-linux-gnu)" {7}, ir
+// CHK-UBACTIONS: 9: backend, {8}, assembler, (device-openmp)
+// CHK-UBACTIONS: 10: assembler, {9}, object, (device-openmp)
+// CHK-UBACTIONS: 11: linker, {6, 10}, image, (device-openmp)
+// CHK-UBACTIONS: 12: offload, "device-openmp (powerpc64le-ibm-linux-gnu)" {11}, image
 // CHK-UBACTIONS: 13: input, "somelib", object, (device-openmp)
 // CHK-UBACTIONS: 14: compiler, {2}, ir, (device-openmp)
 // CHK-UBACTIONS: 15: offload, "host-openmp (powerpc64le-unknown-linux)" {3}, "device-openmp (x86_64-pc-linux-gnu)" {14}, ir
 // CHK-UBACTIONS: 16: backend, {15}, assembler, (device-openmp)
 // CHK-UBACTIONS: 17: assembler, {16}, object, (device-openmp)
 // CHK-UBACTIONS: 18: linker, {13, 17}, image, (device-openmp)
-// CHK-UBACTIONS: 19: offload, "host-openmp (powerpc64le-unknown-linux)" {6}, "device-openmp (powerpc64le-ibm-linux-gnu)" {12}, "device-openmp (x86_64-pc-linux-gnu)" {18}, image
+// CHK-UBACTIONS: 19: offload, "device-openmp (x86_64-pc-linux-gnu)" {18}, image
+// CHK-UBACTIONS: 20: clang-offload-wrapper, {12, 19}, ir, (host-openmp)
+// CHK-UBACTIONS: 21: backend, {20}, assembler, (host-openmp)
+// CHK-UBACTIONS: 22: assembler, {21}, object, (host-openmp)
+// CHK-UBACTIONS: 23: linker, {0, 5, 22}, image, (host-openmp)
 
 /// ###########################################################################
 
 // CHK-UBJOBS-SAME: [[T2PP:[^\\/]+\.i]]" "-unbundle"
 // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
 // CHK-UBJOBS-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
+// CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
 // CHK-UBJOBS-ST: clang-offload-bundler{{.*}}" "-type=i" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=
 // CHK-UBJOBS-ST-SAME: [[INPUT:[^\\/]+\.i]]" "-outputs=
 // CHK-UBJOBS-ST-SAME: [[HOSTPP:[^\\/,]+\.i]],
 // CHK-UBJOBS-ST-SAME: [[T2PP:[^\\/,]+\.i]]" "-unbundle"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-llvm-bc"  {{.*}}"-fopenmp" {{.*}}"-fopenmp-targets=powerpc64le-ibm-linux-gnu,x86_64-pc-linux-gnu" {{.*}}"-o" "
 // CHK-UBJOBS-ST-SAME: [[HOSTBC:[^\\/]+\.bc]]" "-x" "cpp-output" "{{.*}}[[HOSTPP]]"
+// CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
+// CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
 
 // Create target 1 object.
 // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-ibm-linux-gnu" "-aux-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-fopenmp-is-device" "-fopenmp-host-ir-file-path" "{{.*}}[[HOSTBC]]" {{.*}}"-o" "
 // CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-UBJOBS-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
 
-// Create binary.
+// Create wrapper BC file and wrapper object.
+// CHK-UBJOBS: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
 // CHK-UBJOBS: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
-// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" "
-// CHK-UBJOBS-SAME: [[LKS:[^\\/]+\.lk]]"
+// CHK-UBJOBS-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
+// CHK-UBJOBS-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-UBJOBS-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" {{.*}}"-S" {{.*}}"-fopenmp" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[HOSTASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[HOSTBC]]"
+// CHK-UBJOBS-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
 // CHK-UBJOBS-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[HOSTOBJ:[^\\/]+\.o]]" "{{.*}}[[HOSTASM]]"
+// CHK-UBJOBS-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
+
+// Create binary.
+// CHK-UBJOBS: ld{{(\.exe)?}}" {{.*}}"-o" "
+// CHK-UBJOBS-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
 // CHK-UBJOBS-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" "
-// CHK-UBJOBS-ST-SAME: [[LKS:[^\\/]+\.lk]]"
+// CHK-UBJOBS-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
 
 // Unbundle object file.
 // CHK-UBJOBS2: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=
 // CHK-UBJOBS2-SAME: [[T1BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T1OBJ]]"
 // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-UBJOBS2-SAME: [[T2BIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[T2OBJ]]"
+// CHK-UBJOBS2: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-UBJOBS2-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
+// CHK-UBJOBS2: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" "-emit-obj" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-UBJOBS2-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
 // CHK-UBJOBS2: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" "
-// CHK-UBJOBS2-SAME: [[LKS:[^\\/]+\.lk]]"
+// CHK-UBJOBS2-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
 // CHK-UBJOBS2-ST-NOT: clang-offload-bundler{{.*}}in.so
 // CHK-UBJOBS2-ST: clang-offload-bundler{{.*}}" "-type=o" "-targets=host-powerpc64le-unknown-linux,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu" "-inputs=
 // CHK-UBJOBS2-ST-SAME: [[INPUT:[^\\/]+\.o]]" "-outputs=
 // CHK-UBJOBS2-ST-SAME: [[T1BIN:[^\\/]+\.out-openmp-powerpc64le-ibm-linux-gnu]]" {{.*}}"{{.*}}[[T1OBJ]]"
 // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
 // CHK-UBJOBS2-ST-SAME: [[T2BIN:[^\\/]+\.out-openmp-x86_64-pc-linux-gnu]]" {{.*}}"{{.*}}[[T2OBJ]]"
+// CHK-UBJOBS2-ST: clang-offload-wrapper{{(\.exe)?}}" "-target" "powerpc64le-unknown-linux" {{.*}}"-o" "
+// CHK-UBJOBS2-ST-SAME: [[WRAPPERBC:[^\\/]+\.bc]]" "{{.*}}[[T1BIN]]" "{{.*}}[[T2BIN]]"
+// CHK-UBJOBS2-ST: clang{{.*}}" "-cc1" "-triple" "powerpc64le-unknown-linux" "-S" {{.*}}"-fopenmp" {{.*}}"-o" "
+// CHK-UBJOBS2-ST-SAME: [[WRAPPERASM:[^\\/]+\.s]]" "-x" "ir" "{{.*}}[[WRAPPERBC]]"
+// CHK-UBJOBS2-ST: clang{{.*}}" "-cc1as" "-triple" "powerpc64le-unknown-linux" "-filetype" "obj" {{.*}}"-o" "
+// CHK-UBJOBS2-ST-SAME: [[WRAPPEROBJ:[^\\/]+\.o]]" "{{.*}}[[WRAPPERASM]]"
 // CHK-UBJOBS2-ST: ld{{(\.exe)?}}" {{.*}}"-o" "
-// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" {{.*}}"-T" "
-// CHK-UBJOBS2-ST-SAME: [[LKS:[^\\/]+\.lk]]"
+// CHK-UBJOBS2-ST-SAME: [[HOSTBIN:[^\\/]+\.out]]" {{.*}}"{{.*}}[[HOSTOBJ]]" "{{.*}}[[WRAPPEROBJ]]"
 
 /// ###########################################################################
 
index 223f1f74f3f9db48e207d2165c202251b807e174..e46c3669a2c2b3c6a583c615c546c0d101760af6 100644 (file)
@@ -8,6 +8,7 @@ add_clang_subdirectory(clang-format-vs)
 add_clang_subdirectory(clang-fuzzer)
 add_clang_subdirectory(clang-import-test)
 add_clang_subdirectory(clang-offload-bundler)
+add_clang_subdirectory(clang-offload-wrapper)
 add_clang_subdirectory(clang-scan-deps)
 
 add_clang_subdirectory(c-index-test)
diff --git a/tools/clang-offload-wrapper/CMakeLists.txt b/tools/clang-offload-wrapper/CMakeLists.txt
new file mode 100644 (file)
index 0000000..6f8940f
--- /dev/null
@@ -0,0 +1,23 @@
+set(LLVM_LINK_COMPONENTS BitWriter Core Support TransformUtils)
+
+if(NOT CLANG_BUILT_STANDALONE)
+  set(tablegen_deps intrinsics_gen)
+endif()
+
+add_clang_tool(clang-offload-wrapper
+  ClangOffloadWrapper.cpp
+
+  DEPENDS
+  ${tablegen_deps}
+  )
+
+set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS
+  clangBasic
+  )
+
+add_dependencies(clang clang-offload-wrapper)
+
+clang_target_link_libraries(clang-offload-wrapper
+  PRIVATE
+  ${CLANG_OFFLOAD_WRAPPER_LIB_DEPS}
+  )
diff --git a/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
new file mode 100644 (file)
index 0000000..e18da35
--- /dev/null
@@ -0,0 +1,196 @@
+//===-- clang-offload-wrapper/ClangOffloadWrapper.cpp -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of the offload wrapper tool. It takes offload target binaries
+/// as input and creates wrapper bitcode file containing target binaries
+/// packaged as data.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Version.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
+
+// Mark all our options with this category, everything else (except for -version
+// and -help) will be hidden.
+static cl::OptionCategory
+    ClangOffloadWrapperCategory("clang-offload-wrapper options");
+
+static cl::opt<std::string> Output("o", cl::Required,
+                                   cl::desc("Output filename"),
+                                   cl::value_desc("filename"),
+                                   cl::cat(ClangOffloadWrapperCategory));
+
+static cl::list<std::string> Inputs(cl::Positional, cl::OneOrMore,
+                                    cl::desc("<input files>"),
+                                    cl::cat(ClangOffloadWrapperCategory));
+
+static cl::opt<std::string>
+    Target("target", cl::Required,
+           cl::desc("Target triple for the output module"),
+           cl::value_desc("triple"), cl::cat(ClangOffloadWrapperCategory));
+
+static cl::list<std::string>
+    OffloadTargets("offload-targets", cl::CommaSeparated, cl::OneOrMore,
+                   cl::desc("Comma-separated list of device target triples"),
+                   cl::value_desc("triples"),
+                   cl::cat(ClangOffloadWrapperCategory));
+
+namespace {
+
+class BinaryWrapper {
+public:
+  // Binary descriptor. The first field is the a reference to the binary bits,
+  // and the second is the target triple the binary was built for.
+  using BinaryDesc = std::pair<ArrayRef<char>, StringRef>;
+
+private:
+  LLVMContext C;
+  Module M;
+
+  // Saver for generated strings.
+  BumpPtrAllocator Alloc;
+  UniqueStringSaver SS;
+
+private:
+  void createImages(ArrayRef<BinaryDesc> Binaries) {
+    for (const BinaryDesc &Bin : Binaries) {
+      StringRef SectionName = SS.save(".omp_offloading." + Bin.second);
+
+      auto *DataC = ConstantDataArray::get(C, Bin.first);
+      auto *ImageB =
+          new GlobalVariable(M, DataC->getType(), /*isConstant=*/true,
+                             GlobalVariable::ExternalLinkage, DataC,
+                             ".omp_offloading.img_start." + Bin.second);
+      ImageB->setSection(SectionName);
+      ImageB->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+      ImageB->setVisibility(llvm::GlobalValue::HiddenVisibility);
+
+      auto *EmptyC =
+          ConstantAggregateZero::get(ArrayType::get(Type::getInt8Ty(C), 0u));
+      auto *ImageE =
+          new GlobalVariable(M, EmptyC->getType(), /*isConstant=*/true,
+                             GlobalVariable::ExternalLinkage, EmptyC,
+                             ".omp_offloading.img_end." + Bin.second);
+      ImageE->setSection(SectionName);
+      ImageE->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+      ImageE->setVisibility(GlobalValue::HiddenVisibility);
+    }
+  }
+
+public:
+  BinaryWrapper(StringRef Target) : M("offload.wrapper.object", C), SS(Alloc) {
+    M.setTargetTriple(Target);
+  }
+
+  const Module &wrapBinaries(ArrayRef<BinaryDesc> Binaries) {
+    createImages(Binaries);
+    return M;
+  }
+};
+
+} // anonymous namespace
+
+int main(int argc, const char **argv) {
+  sys::PrintStackTraceOnErrorSignal(argv[0]);
+
+  cl::HideUnrelatedOptions(ClangOffloadWrapperCategory);
+  cl::SetVersionPrinter([](raw_ostream &OS) {
+    OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n';
+  });
+  cl::ParseCommandLineOptions(
+      argc, argv,
+      "A tool to create a wrapper bitcode for offload target binaries. Takes "
+      "offload\ntarget binaries as input and produces bitcode file containing "
+      "target binaries packaged\nas data.\n");
+
+  if (Help) {
+    cl::PrintHelpMessage();
+    return 0;
+  }
+
+  auto reportError = [argv](Error E) {
+    logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
+  };
+
+  if (Triple(Target).getArch() == Triple::UnknownArch) {
+    reportError(createStringError(
+        errc::invalid_argument, "'" + Target + "': unsupported target triple"));
+    return 1;
+  }
+
+  if (Inputs.size() != OffloadTargets.size()) {
+    reportError(createStringError(
+        errc::invalid_argument,
+        "number of input files and offload targets should match"));
+    return 1;
+  }
+
+  // Read device binaries.
+  SmallVector<std::unique_ptr<MemoryBuffer>, 4u> Buffers;
+  SmallVector<BinaryWrapper::BinaryDesc, 4u> Images;
+  Buffers.reserve(Inputs.size());
+  Images.reserve(Inputs.size());
+  for (unsigned I = 0; I < Inputs.size(); ++I) {
+    const std::string &File = Inputs[I];
+    ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+        MemoryBuffer::getFileOrSTDIN(File);
+    if (!BufOrErr) {
+      reportError(createFileError(File, BufOrErr.getError()));
+      return 1;
+    }
+    const std::unique_ptr<MemoryBuffer> &Buf =
+        Buffers.emplace_back(std::move(*BufOrErr));
+    Images.emplace_back(
+        makeArrayRef(Buf->getBufferStart(), Buf->getBufferSize()),
+        OffloadTargets[I]);
+  }
+
+  // Create the output file to write the resulting bitcode to.
+  std::error_code EC;
+  ToolOutputFile Out(Output, EC, sys::fs::OF_None);
+  if (EC) {
+    reportError(createFileError(Output, EC));
+    return 1;
+  }
+
+  // Create a wrapper for device binaries and write its bitcode to the file.
+  WriteBitcodeToFile(BinaryWrapper(Target).wrapBinaries(
+                         makeArrayRef(Images.data(), Images.size())),
+                     Out.os());
+  if (Out.os().has_error()) {
+    reportError(createFileError(Output, Out.os().error()));
+    return 1;
+  }
+
+  // Success.
+  Out.keep();
+  return 0;
+}