]> granicus.if.org Git - clang/commitdiff
Embed bitcode in object file (clang cc1 part)
authorSteven Wu <stevenwu@apple.com>
Wed, 11 May 2016 16:26:03 +0000 (16:26 +0000)
committerSteven Wu <stevenwu@apple.com>
Wed, 11 May 2016 16:26:03 +0000 (16:26 +0000)
Summary:
Teach clang to embed bitcode inside bitcode. When -fembed-bitcode cc1
option is used, clang will embed both the input bitcode and cc1
commandline into the bitcode in special sections before compiling to
the object file.  Using -fembed-bitcode-marker will only introduce a
marker in both sections.

Depends on D17390

Reviewers: rsmith

Subscribers: yaron.keren, vsk, cfe-commits

Differential Revision: http://reviews.llvm.org/D17392

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@269202 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/CodeGen/BackendUtil.h
include/clang/Driver/Options.td
include/clang/Frontend/CodeGenOptions.def
include/clang/Frontend/CodeGenOptions.h
lib/CodeGen/BackendUtil.cpp
lib/CodeGen/CodeGenAction.cpp
lib/Driver/Driver.cpp
lib/Driver/Tools.cpp
lib/Frontend/CompilerInvocation.cpp
test/Driver/embed-bitcode.c
test/Frontend/embed-bitcode.ll [new file with mode: 0644]

index a3e3824508132cfb572294662478e07e84bce303..f88057bfe62184b6c61c4b0e49ae31ab61b3946f 100644 (file)
@@ -16,6 +16,7 @@
 
 namespace llvm {
   class Module;
+  class MemoryBufferRef;
 }
 
 namespace clang {
@@ -37,6 +38,9 @@ namespace clang {
                          const TargetOptions &TOpts, const LangOptions &LOpts,
                          const llvm::DataLayout &TDesc, llvm::Module *M,
                          BackendAction Action, raw_pwrite_stream *OS);
+
+  void EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
+                    llvm::MemoryBufferRef Buf);
 }
 
 #endif
index e367b8c0bb3bc6d328c40759bc6940eb125d557f..602e8fa7de7d00826a5c1ee66b089aba8dd3df3f 100644 (file)
@@ -450,11 +450,14 @@ def fno_autolink : Flag <["-"], "fno-autolink">, Group<f_Group>,
   Flags<[DriverOption, CC1Option]>,
   HelpText<"Disable generation of linker directives for automatic library linking">;
 
+def fembed_bitcode_EQ : Joined<["-"], "fembed-bitcode=">,
+    Group<f_Group>, Flags<[DriverOption, CC1Option]>, MetaVarName<"<option>">,
+    HelpText<"Embed LLVM bitcode (option: off, all, bitcode, marker)">;
 def fembed_bitcode : Flag<["-"], "fembed-bitcode">, Group<f_Group>,
-  Flags<[CC1Option, CC1AsOption]>,
+  Alias<fembed_bitcode_EQ>, AliasArgs<["all"]>,
   HelpText<"Embed LLVM IR bitcode as data">;
 def fembed_bitcode_marker : Flag<["-"], "fembed-bitcode-marker">,
-  Group<f_Group>, Flags<[CC1Option]>,
+  Alias<fembed_bitcode_EQ>, AliasArgs<["marker"]>,
   HelpText<"Embed placeholder LLVM IR data as a marker">;
 def fgnu_inline_asm : Flag<["-"], "fgnu-inline-asm">, Group<f_Group>, Flags<[DriverOption]>;
 def fno_gnu_inline_asm : Flag<["-"], "fno-gnu-inline-asm">, Group<f_Group>,
index 6ef55b0bb73a4532d076b01226047f780438e64f..d0d4f7a69399922184de2caed83dd4768ee96383 100644 (file)
@@ -66,6 +66,8 @@ CODEGENOPT(EmitOpenCLArgMetadata , 1, 0) ///< Emit OpenCL kernel arg metadata.
 CODEGENOPT(EmulatedTLS       , 1, 0) ///< Set when -femulated-tls is enabled.
 /// \brief FP_CONTRACT mode (on/off/fast).
 ENUM_CODEGENOPT(FPContractMode, FPContractModeKind, 2, FPC_On)
+/// \brief Embed Bitcode mode (off/all/bitcode/marker).
+ENUM_CODEGENOPT(EmbedBitcode, EmbedBitcodeKind, 2, Embed_Off)
 CODEGENOPT(ForbidGuardVariables , 1, 0) ///< Issue errors if C++ guard variables
                                         ///< are required.
 CODEGENOPT(FunctionSections  , 1, 0) ///< Set when -ffunction-sections is enabled.
index 71acfc6da40df44e1cba0332f55e303990103109..fd456e08ecb5dcfb9402926d8b79ec34037d6a11 100644 (file)
@@ -86,6 +86,13 @@ public:
     ProfileIRInstr,    // IR level PGO instrumentation in LLVM.
   };
 
+  enum EmbedBitcodeKind {
+    Embed_Off,      // No embedded bitcode.
+    Embed_All,      // Embed both bitcode and commandline in the output.
+    Embed_Bitcode,  // Embed just the bitcode in the output.
+    Embed_Marker    // Embed a marker as a placeholder for bitcode.
+  };
+
   /// The code model to use (-mcmodel).
   std::string CodeModel;
 
@@ -196,6 +203,9 @@ public:
   /// Set of sanitizer checks that trap rather than diagnose.
   SanitizerSet SanitizeTrap;
 
+  /// List of backend command-line options for -fembed-bitcode.
+  std::vector<uint8_t> CmdArgs;
+
   /// \brief A list of all -fno-builtin-* function names (e.g., memset).
   std::vector<std::string> NoBuiltinFuncs;
 
@@ -236,6 +246,7 @@ public:
   bool hasProfileIRUse() const {
     return getProfileUse() == ProfileIRInstr;
   }
+
 };
 
 }  // end namespace clang
index c86eeaf8d3da161a53625d5e2d932e6cc4d2d61d..6c934a8e155e817473a4d18f75508d559699a86b 100644 (file)
 #include "clang/Frontend/Utils.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/DataLayout.h"
@@ -763,3 +765,91 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
     }
   }
 }
+
+static const char* getSectionNameForBitcode(const Triple &T) {
+  switch (T.getObjectFormat()) {
+  case Triple::MachO:
+    return "__LLVM,__bitcode";
+  case Triple::COFF:
+  case Triple::ELF:
+  case Triple::UnknownObjectFormat:
+    return ".llvmbc";
+  }
+}
+
+static const char* getSectionNameForCommandline(const Triple &T) {
+  switch (T.getObjectFormat()) {
+  case Triple::MachO:
+    return "__LLVM,__cmdline";
+  case Triple::COFF:
+  case Triple::ELF:
+  case Triple::UnknownObjectFormat:
+    return ".llvmcmd";
+  }
+}
+
+// With -fembed-bitcode, save a copy of the llvm IR as data in the
+// __LLVM,__bitcode section.
+void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts,
+                         llvm::MemoryBufferRef Buf) {
+  if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off)
+    return;
+
+  // Embed the bitcode for the llvm module.
+  std::string Data;
+  ArrayRef<uint8_t> ModuleData;
+  Triple T(M->getTargetTriple());
+  // Create a constant that contains the bitcode.
+  // In case of embedding a marker, ignore the input Buf and use the empty
+  // ArrayRef. It is also legal to create a bitcode marker even Buf is empty.
+  if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) {
+    if (!isBitcode((const unsigned char *)Buf.getBufferStart(),
+                   (const unsigned char *)Buf.getBufferEnd())) {
+      // If the input is LLVM Assembly, bitcode is produced by serializing
+      // the module. Use-lists order need to be perserved in this case.
+      llvm::raw_string_ostream OS(Data);
+      llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true);
+      ModuleData =
+          ArrayRef<uint8_t>((const uint8_t *)OS.str().data(), OS.str().size());
+    } else
+      // If the input is LLVM bitcode, write the input byte stream directly.
+      ModuleData = ArrayRef<uint8_t>((const uint8_t *)Buf.getBufferStart(),
+                                     Buf.getBufferSize());
+  }
+  llvm::Constant *ModuleConstant =
+      llvm::ConstantDataArray::get(M->getContext(), ModuleData);
+  // Use Appending linkage so it doesn't get optimized out.
+  llvm::GlobalVariable *GV = new llvm::GlobalVariable(
+      *M, ModuleConstant->getType(), true, llvm::GlobalValue::AppendingLinkage,
+      ModuleConstant);
+  GV->setSection(getSectionNameForBitcode(T));
+  if (llvm::GlobalVariable *Old =
+          M->getGlobalVariable("llvm.embedded.module")) {
+    assert(Old->use_empty() && "llvm.embedded.module must have no uses");
+    GV->takeName(Old);
+    Old->eraseFromParent();
+  } else {
+    GV->setName("llvm.embedded.module");
+  }
+
+  // Return if only bitcode needs to be embedded.
+  if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Bitcode)
+    return;
+
+  // Embed command-line options.
+  ArrayRef<uint8_t> CmdData((uint8_t*)CGOpts.CmdArgs.data(),
+                            CGOpts.CmdArgs.size());
+  llvm::Constant *CmdConstant =
+    llvm::ConstantDataArray::get(M->getContext(), CmdData);
+  GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true,
+                                llvm::GlobalValue::AppendingLinkage,
+                                CmdConstant);
+  GV->setSection(getSectionNameForCommandline(T));
+  if (llvm::GlobalVariable *Old = M->getGlobalVariable("llvm.cmdline")) {
+    assert(Old->use_empty() && "llvm.cmdline must have no uses");
+    GV->takeName(Old);
+    Old->eraseFromParent();
+  } else {
+    GV->setName("llvm.cmdline");
+  }
+}
index 98685539d35c66cc4efc8e6058a0e8ab4af0c211..848acb4fe0798add6c52c935b9fa96c36c88f9cf 100644 (file)
@@ -173,6 +173,8 @@ namespace clang {
           return;
       }
 
+      EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
+
       EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
                         C.getTargetInfo().getDataLayout(),
                         getModule(), Action, AsmOutStream);
@@ -831,9 +833,13 @@ void CodeGenAction::ExecuteAction() {
       TheModule->setTargetTriple(TargetOpts.Triple);
     }
 
+    EmbedBitcode(TheModule.get(), CI.getCodeGenOpts(),
+                 MainFile->getMemBufferRef());
+
     LLVMContext &Ctx = TheModule->getContext();
     Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler,
                                       &CI.getDiagnostics());
+
     EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
                       CI.getLangOpts(), CI.getTarget().getDataLayout(),
                       TheModule.get(), BA, OS);
index 111614f3a53627e18067d42e6a296c70ae6366d3..1f2c64eaf7344f2c1b8a5ef8022b79f921643ba6 100644 (file)
@@ -507,14 +507,23 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // Ignore -fembed-bitcode options with LTO
   // since the output will be bitcode anyway.
   if (!Args.hasFlag(options::OPT_flto, options::OPT_fno_lto, false)) {
-    if (Args.hasArg(options::OPT_fembed_bitcode))
-      BitcodeEmbed = EmbedBitcode;
-    else if (Args.hasArg(options::OPT_fembed_bitcode_marker))
-      BitcodeEmbed = EmbedMarker;
+    if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) {
+      StringRef Name = A->getValue();
+      unsigned Model = llvm::StringSwitch<unsigned>(Name)
+          .Case("off", EmbedNone)
+          .Case("all", EmbedBitcode)
+          .Case("bitcode", EmbedBitcode)
+          .Case("marker", EmbedMarker)
+          .Default(~0U);
+      if (Model == ~0U) {
+        Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
+                                                  << Name;
+      } else
+        BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
+    }
   } else {
     // claim the bitcode option under LTO so no warning is issued.
-    Args.ClaimAllArgs(options::OPT_fembed_bitcode);
-    Args.ClaimAllArgs(options::OPT_fembed_bitcode_marker);
+    Args.ClaimAllArgs(options::OPT_fembed_bitcode_EQ);
   }
 
   setLTOMode(Args);
index e45c5c556262b4a45b0e491d62698666b58ced6b..25f8880af6583365955c482cab6ace7ae01dc9ba 100644 (file)
@@ -3773,12 +3773,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (C.getDriver().embedBitcodeEnabled() &&
       (isa<BackendJobAction>(JA) || isa<AssembleJobAction>(JA))) {
     // Add flags implied by -fembed-bitcode.
-    CmdArgs.push_back("-fembed-bitcode");
+    Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
     // Disable all llvm IR level optimizations.
     CmdArgs.push_back("-disable-llvm-optzns");
   }
   if (C.getDriver().embedBitcodeMarkerOnly())
-    CmdArgs.push_back("-fembed-bitcode-marker");
+    CmdArgs.push_back("-fembed-bitcode=marker");
 
   // We normally speed up the clang process a bit by skipping destructors at
   // exit, but when we're generating diagnostics we can rely on some of the
@@ -5709,7 +5709,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // With -save-temps, we want to save the unoptimized bitcode output from the
   // CompileJobAction, use -disable-llvm-passes to get pristine IR generated
   // by the frontend.
-  if (C.getDriver().isSaveTempsEnabled() && isa<CompileJobAction>(JA))
+  // When -fembed-bitcode is enabled, optimized bitcode is emitted because it
+  // has slightly different breakdown between stages.
+  // FIXME: -fembed-bitcode -save-temps will save optimized bitcode instead of
+  // pristine IR generated by the frontend. Ideally, a new compile action should
+  // be added so both IR can be captured.
+  if (C.getDriver().isSaveTempsEnabled() &&
+      !C.getDriver().embedBitcodeEnabled() && isa<CompileJobAction>(JA))
     CmdArgs.push_back("-disable-llvm-passes");
 
   if (Output.getType() == types::TY_Dependencies) {
index a97dc92adfa81f0913fd53f39ffd6d9e82b6df1e..f11188d0ff95af8d306a82d8e1f38f42400e0ea2 100644 (file)
@@ -634,6 +634,45 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       }
     }
   }
+       // Handle -fembed-bitcode option.
+  if (Arg *A = Args.getLastArg(OPT_fembed_bitcode_EQ)) {
+    StringRef Name = A->getValue();
+    unsigned Model = llvm::StringSwitch<unsigned>(Name)
+        .Case("off", CodeGenOptions::Embed_Off)
+        .Case("all", CodeGenOptions::Embed_All)
+        .Case("bitcode", CodeGenOptions::Embed_Bitcode)
+        .Case("marker", CodeGenOptions::Embed_Marker)
+        .Default(~0U);
+    if (Model == ~0U) {
+      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
+      Success = false;
+    } else
+      Opts.setEmbedBitcode(
+          static_cast<CodeGenOptions::EmbedBitcodeKind>(Model));
+  }
+  // FIXME: For backend options that are not yet recorded as function
+  // attributes in the IR, keep track of them so we can embed them in a
+  // separate data section and use them when building the bitcode.
+  if (Opts.getEmbedBitcode() == CodeGenOptions::Embed_All) {
+    for (const auto &A : Args) {
+      // Do not encode output and input.
+      if (A->getOption().getID() == options::OPT_o ||
+          A->getOption().getID() == options::OPT_INPUT ||
+          A->getOption().getID() == options::OPT_x ||
+          A->getOption().getID() == options::OPT_fembed_bitcode ||
+          (A->getOption().getGroup().isValid() &&
+           A->getOption().getGroup().getID() == options::OPT_W_Group))
+        continue;
+      ArgStringList ASL;
+      A->render(Args, ASL);
+      for (const auto &arg : ASL) {
+        StringRef ArgStr(arg);
+        Opts.CmdArgs.insert(Opts.CmdArgs.end(), ArgStr.begin(), ArgStr.end());
+        // using \00 to seperate each commandline options.
+        Opts.CmdArgs.push_back('\0');
+      }
+    }
+  }
 
   Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions);
   Opts.InstrumentForProfiling = Args.hasArg(OPT_pg);
index 5beb5bc9e857748f0a179b0f1a97f84f77104ce0..4c014fcb2a1544666f20da89c98f5a95f720927e 100644 (file)
@@ -7,8 +7,15 @@
 // CHECK-CC: -emit-llvm-bc
 // CHECK-CC: -cc1
 // CHECK-CC: -emit-obj
-// CHECK-CC: -fembed-bitcode
+// CHECK-CC: -fembed-bitcode=all
 
+// RUN: %clang %s -c -fembed-bitcode=bitcode -fintegrated-as 2>&1 -### | FileCheck %s -check-prefix=CHECK-BITCODE
+// CHECK-BITCODE: -cc1
+// CHECK-BITCODE: -emit-llvm-bc
+// CHECK-BITCODE: -cc1
+// CHECK-BITCODE: -emit-obj
+// CHECK-BITCODE: -fembed-bitcode=bitcode
+//
 // RUN: %clang %s -c -save-temps -fembed-bitcode -fintegrated-as 2>&1 -### | FileCheck %s -check-prefix=CHECK-SAVE-TEMP
 // CHECK-SAVE-TEMP: -cc1
 // CHECK-SAVE-TEMP: -E
@@ -16,7 +23,7 @@
 // CHECK-SAVE-TEMP: -emit-llvm-bc
 // CHECK-SAVE-TEMP: -cc1
 // CHECK-SAVE-TEMP: -S
-// CHECK-SAVE-TEMP: -fembed-bitcode
+// CHECK-SAVE-TEMP: -fembed-bitcode=all
 // CHECK-SAVE-TEMP: -cc1as
 
 // RUN: %clang -c %s -flto -fembed-bitcode 2>&1 -### | FileCheck %s -check-prefix=CHECK-LTO
 // CHECK-LTO: -emit-llvm-bc
 // CHECK-LTO-NOT: warning: argument unused during compilation: '-fembed-bitcode'
 // CHECK-LTO-NOT: -cc1
-// CHECK-LTO-NOT: -fembed-bitcode
+// CHECK-LTO-NOT: -fembed-bitcode=all
 
 // RUN: %clang -c %s -fembed-bitcode-marker -fintegrated-as 2>&1 -### | FileCheck %s -check-prefix=CHECK-MARKER
 // CHECK-MARKER: -cc1
 // CHECK-MARKER: -emit-obj
-// CHECK-MARKER: -fembed-bitcode-marker
+// CHECK-MARKER: -fembed-bitcode=marker
 // CHECK-MARKER-NOT: -cc1
 
diff --git a/test/Frontend/embed-bitcode.ll b/test/Frontend/embed-bitcode.ll
new file mode 100644 (file)
index 0000000..0bd75f1
--- /dev/null
@@ -0,0 +1,59 @@
+; REQUIRES: arm-registered-target
+; REQUIRES: aarch64-registered-target
+; check .ll input
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode=all -x ir %s -o - \
+; RUN:    | FileCheck %s
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode=marker -x ir %s -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-MARKER
+; RUN: %clang_cc1 -triple aarch64-unknown-linux-gnueabi -emit-llvm \
+; RUN:    -fembed-bitcode=all -x ir %s -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-ELF
+
+; check .bc input
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \
+; RUN:    -x ir %s -o %t.bc
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode=all -x ir %t.bc -o - \
+; RUN:    | FileCheck %s
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode=bitcode -x ir %t.bc -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-ONLY-BITCODE
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode=marker -x ir %t.bc -o - \
+; RUN:    | FileCheck %s -check-prefix=CHECK-MARKER
+
+; run through -fembed-bitcode twice and make sure it doesn't crash
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \
+; RUN:    -fembed-bitcode=all -x ir %s -o - \
+; RUN: | %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode=all -x ir - -o /dev/null
+
+; check the magic number of bitcode at the beginning of the string
+; CHECK: @llvm.embedded.module
+; CHECK: c"\DE\C0\17\0B
+; CHECK: section "__LLVM,__bitcode"
+; CHECK: @llvm.cmdline
+; CHECK: section "__LLVM,__cmdline"
+
+; CHECK-ELF: @llvm.embedded.module
+; CHECK-ELF: section ".llvmbc"
+; CHECK-ELF: @llvm.cmdline
+; CHECK-ELF: section ".llvmcmd"
+
+; CHECK-ONLY-BITCODE: @llvm.embedded.module
+; CHECK-ONLY-BITCODE: c"\DE\C0\17\0B
+; CHECK-ONLY-BITCODE: section "__LLVM,__bitcode"
+; CHECK-ONLY-BITCODE-NOT: @llvm.cmdline
+; CHECK-ONLY-BITCODE-NOT: section "__LLVM,__cmdline"
+
+; CHECK-MARKER: @llvm.embedded.module
+; CHECK-MARKER: constant [0 x i8] zeroinitializer
+; CHECK-MARKER: section "__LLVM,__bitcode"
+; CHECK-MARKER: @llvm.cmdline
+; CHECK-MARKER: section "__LLVM,__cmdline"
+
+define i32 @f0() {
+  ret i32 0
+}