[CodeGen] [CUDA] Add the ability set default attrs on functions in linked modules.

author Justin Lebar <jlebar@google.com>

Wed, 25 Jan 2017 21:29:48 +0000 (21:29 +0000)

committer Justin Lebar <jlebar@google.com>

Wed, 25 Jan 2017 21:29:48 +0000 (21:29 +0000)
author Justin Lebar <jlebar@google.com>
Wed, 25 Jan 2017 21:29:48 +0000 (21:29 +0000)
committer Justin Lebar <jlebar@google.com>
Wed, 25 Jan 2017 21:29:48 +0000 (21:29 +0000)
diff --git a/include/clang/CodeGen/CodeGenAction.h b/include/clang/CodeGen/CodeGenAction.h

index 4744bb4ee798d56677ea536469814f1a86c5d021..65eda5bb98e258abd0e5e0a8f3672c69f60bd0a9 100644 (file)
--- a/include/clang/CodeGen/CodeGenAction.h
+++ b/include/clang/CodeGen/CodeGenAction.h
@@ -23,11 +23,28 @@ class BackendConsumer;
  
  class CodeGenAction : public ASTFrontendAction {
  private:
+  // Let BackendConsumer access LinkModule.
+  friend class BackendConsumer;
+
+  /// Info about module to link into a module we're generating.
+  struct LinkModule {
+    /// The module to link in.
+    std::unique_ptr<llvm::Module> Module;
+
+    /// If true, we set attributes on Module's functions according to our
+    /// CodeGenOptions and LangOptions, as though we were generating the
+    /// function ourselves.
+    bool PropagateAttrs;
+
+    /// Bitwise combination of llvm::LinkerFlags used when we link the module.
+    unsigned LinkFlags;
+  };
+
    unsigned Act;
    std::unique_ptr<llvm::Module> TheModule;
-  // Vector of {Linker::Flags, Module*} pairs to specify bitcode
-  // modules to link in using corresponding linker flags.
-  SmallVector<std::pair<unsigned, llvm::Module *>, 4> LinkModules;
+
+  /// Bitcode modules to link in to our module.
+  SmallVector<LinkModule, 4> LinkModules;
    llvm::LLVMContext *VMContext;
    bool OwnsVMContext;
  
@@ -51,13 +68,6 @@ protected:
  public:
    ~CodeGenAction() override;
  
-  /// setLinkModule - Set the link module to be used by this action.  If a link
-  /// module is not provided, and CodeGenOptions::LinkBitcodeFile is non-empty,
-  /// the action will load it from the specified file.
-  void addLinkModule(llvm::Module *Mod, unsigned LinkFlags) {
-    LinkModules.push_back(std::make_pair(LinkFlags, Mod));
-  }
-
    /// Take the generated LLVM module, for use after the action has been run.
    /// The result may be null on failure.
    std::unique_ptr<llvm::Module> takeModule();
diff --git a/include/clang/Frontend/CodeGenOptions.h b/include/clang/Frontend/CodeGenOptions.h

index 52bd1c5aff79b26d2a0f4736e0d6d4d27e99989c..a21abac24b9f5be493d06da9602dba854d38bba5 100644 (file)
--- a/include/clang/Frontend/CodeGenOptions.h
+++ b/include/clang/Frontend/CodeGenOptions.h
@@ -130,8 +130,19 @@ public:
    /// The float precision limit to use, if non-empty.
    std::string LimitFloatPrecision;
  
-  /// The name of the bitcode file to link before optzns.
-  std::vector<std::pair<unsigned, std::string>> LinkBitcodeFiles;
+  struct BitcodeFileToLink {
+    /// The filename of the bitcode file to link in.
+    std::string Filename;
+    /// If true, we set attributes functions in the bitcode library according to
+    /// our CodeGenOptions, much as we set attrs on functions that we generate
+    /// ourselves.
+    bool PropagateAttrs = false;
+    /// Bitwise combination of llvm::Linker::Flags, passed to the LLVM linker.
+    unsigned LinkFlags = 0;
+  };
+
+  /// The files specified here are linked in to the module before optimizations.
+  std::vector<BitcodeFileToLink> LinkBitcodeFiles;
  
    /// The user provided name for the "main file", if non-empty. This is useful
    /// in situations where the input file name does not match the original input
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp

index c7c61e0c8ecb1b0d47dd034403d3a088894a6669..7d3419b2928e150f554245b3e945b97aeb858f1a 100644 (file)
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -1620,15 +1620,113 @@ static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
      FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
  }
  
+void CodeGenModule::ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
+                                               bool AttrOnCallSite,
+                                               llvm::AttrBuilder &FuncAttrs) {
+  // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
+  if (!HasOptnone) {
+    if (CodeGenOpts.OptimizeSize)
+      FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
+    if (CodeGenOpts.OptimizeSize == 2)
+      FuncAttrs.addAttribute(llvm::Attribute::MinSize);
+  }
+
+  if (CodeGenOpts.DisableRedZone)
+    FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
+  if (CodeGenOpts.NoImplicitFloat)
+    FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
+
+  if (AttrOnCallSite) {
+    // Attributes that should go on the call site only.
+    if (!CodeGenOpts.SimplifyLibCalls ||
+        CodeGenOpts.isNoBuiltinFunc(Name.data()))
+      FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
+    if (!CodeGenOpts.TrapFuncName.empty())
+      FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
+  } else {
+    // Attributes that should go on the function, but not the call site.
+    if (!CodeGenOpts.DisableFPElim) {
+      FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+    } else if (CodeGenOpts.OmitLeafFramePointer) {
+      FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
+      FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+    } else {
+      FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
+      FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
+    }
+
+    FuncAttrs.addAttribute("less-precise-fpmad",
+                           llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
+
+    if (!CodeGenOpts.FPDenormalMode.empty())
+      FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode);
+
+    FuncAttrs.addAttribute("no-trapping-math",
+                           llvm::toStringRef(CodeGenOpts.NoTrappingMath));
+
+    // TODO: Are these all needed?
+    // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
+    FuncAttrs.addAttribute("no-infs-fp-math",
+                           llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
+    FuncAttrs.addAttribute("no-nans-fp-math",
+                           llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
+    FuncAttrs.addAttribute("unsafe-fp-math",
+                           llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
+    FuncAttrs.addAttribute("use-soft-float",
+                           llvm::toStringRef(CodeGenOpts.SoftFloat));
+    FuncAttrs.addAttribute("stack-protector-buffer-size",
+                           llvm::utostr(CodeGenOpts.SSPBufferSize));
+    FuncAttrs.addAttribute("no-signed-zeros-fp-math",
+                           llvm::toStringRef(CodeGenOpts.NoSignedZeros));
+    FuncAttrs.addAttribute(
+        "correctly-rounded-divide-sqrt-fp-math",
+        llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
+
+    // TODO: Reciprocal estimate codegen options should apply to instructions?
+    std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
+    if (!Recips.empty())
+      FuncAttrs.addAttribute("reciprocal-estimates",
+                             llvm::join(Recips.begin(), Recips.end(), ","));
+
+    if (CodeGenOpts.StackRealignment)
+      FuncAttrs.addAttribute("stackrealign");
+    if (CodeGenOpts.Backchain)
+      FuncAttrs.addAttribute("backchain");
+  }
+
+  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+    // Conservatively, mark all functions and calls in CUDA as convergent
+    // (meaning, they may call an intrinsically convergent op, such as
+    // __syncthreads(), and so can't have certain optimizations applied around
+    // them).  LLVM will remove this attribute where it safely can.
+    FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+
+    // Exceptions aren't supported in CUDA device code.
+    FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+
+    // Respect -fcuda-flush-denormals-to-zero.
+    if (getLangOpts().CUDADeviceFlushDenormalsToZero)
+      FuncAttrs.addAttribute("nvptx-f32ftz", "true");
+  }
+}
+
+void CodeGenModule::AddDefaultFnAttrs(llvm::Function &F) {
+  llvm::AttrBuilder FuncAttrs;
+  ConstructDefaultFnAttrList(F.getName(),
+                             F.hasFnAttribute(llvm::Attribute::OptimizeNone),
+                             /* AttrOnCallsite = */ false, FuncAttrs);
+  llvm::AttributeSet AS = llvm::AttributeSet::get(
+      getLLVMContext(), llvm::AttributeSet::FunctionIndex, FuncAttrs);
+  F.addAttributes(llvm::AttributeSet::FunctionIndex, AS);
+}
+
  void CodeGenModule::ConstructAttributeList(
      StringRef Name, const CGFunctionInfo &FI, CGCalleeInfo CalleeInfo,
      AttributeListType &PAL, unsigned &CallingConv, bool AttrOnCallSite) {
    llvm::AttrBuilder FuncAttrs;
    llvm::AttrBuilder RetAttrs;
-  bool HasOptnone = false;
  
    CallingConv = FI.getEffectiveCallingConvention();
-
    if (FI.isNoReturn())
      FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
  
@@ -1639,7 +1737,7 @@ void CodeGenModule::ConstructAttributeList(
  
    const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
  
-  bool HasAnyX86InterruptAttr = false;
+  bool HasOptnone = false;
    // FIXME: handle sseregparm someday...
    if (TargetDecl) {
      if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1679,7 +1777,6 @@ void CodeGenModule::ConstructAttributeList(
      if (TargetDecl->hasAttr<ReturnsNonNullAttr>())
        RetAttrs.addAttribute(llvm::Attribute::NonNull);
  
-    HasAnyX86InterruptAttr = TargetDecl->hasAttr<AnyX86InterruptAttr>();
      HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
      if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
        Optional<unsigned> NumElemsParam;
@@ -1691,86 +1788,19 @@ void CodeGenModule::ConstructAttributeList(
      }
    }
  
-  // OptimizeNoneAttr takes precedence over -Os or -Oz. No warning needed.
-  if (!HasOptnone) {
-    if (CodeGenOpts.OptimizeSize)
-      FuncAttrs.addAttribute(llvm::Attribute::OptimizeForSize);
-    if (CodeGenOpts.OptimizeSize == 2)
-      FuncAttrs.addAttribute(llvm::Attribute::MinSize);
-  }
+  ConstructDefaultFnAttrList(Name, HasOptnone, AttrOnCallSite, FuncAttrs);
  
-  if (CodeGenOpts.DisableRedZone)
-    FuncAttrs.addAttribute(llvm::Attribute::NoRedZone);
-  if (CodeGenOpts.NoImplicitFloat)
-    FuncAttrs.addAttribute(llvm::Attribute::NoImplicitFloat);
    if (CodeGenOpts.EnableSegmentedStacks &&
        !(TargetDecl && TargetDecl->hasAttr<NoSplitStackAttr>()))
      FuncAttrs.addAttribute("split-stack");
  
-  if (AttrOnCallSite) {
-    // Attributes that should go on the call site only.
-    if (!CodeGenOpts.SimplifyLibCalls ||
-        CodeGenOpts.isNoBuiltinFunc(Name.data()))
-      FuncAttrs.addAttribute(llvm::Attribute::NoBuiltin);
-    if (!CodeGenOpts.TrapFuncName.empty())
-      FuncAttrs.addAttribute("trap-func-name", CodeGenOpts.TrapFuncName);
-  } else {
-    // Attributes that should go on the function, but not the call site.
-    if (!CodeGenOpts.DisableFPElim) {
-      FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
-    } else if (CodeGenOpts.OmitLeafFramePointer) {
-      FuncAttrs.addAttribute("no-frame-pointer-elim", "false");
-      FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
-    } else {
-      FuncAttrs.addAttribute("no-frame-pointer-elim", "true");
-      FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
-    }
-
+  if (!AttrOnCallSite) {
      bool DisableTailCalls =
-        CodeGenOpts.DisableTailCalls || HasAnyX86InterruptAttr ||
-        (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>());
-    FuncAttrs.addAttribute(
-        "disable-tail-calls",
-        llvm::toStringRef(DisableTailCalls));
-
-    FuncAttrs.addAttribute("less-precise-fpmad",
-                           llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
-
-    if (!CodeGenOpts.FPDenormalMode.empty())
-      FuncAttrs.addAttribute("denormal-fp-math",
-                             CodeGenOpts.FPDenormalMode);
-
-    FuncAttrs.addAttribute("no-trapping-math",
-                           llvm::toStringRef(CodeGenOpts.NoTrappingMath));
-
-    // TODO: Are these all needed?
-    // unsafe/inf/nan/nsz are handled by instruction-level FastMathFlags.
-    FuncAttrs.addAttribute("no-infs-fp-math",
-                           llvm::toStringRef(CodeGenOpts.NoInfsFPMath));
-    FuncAttrs.addAttribute("no-nans-fp-math",
-                           llvm::toStringRef(CodeGenOpts.NoNaNsFPMath));
-    FuncAttrs.addAttribute("unsafe-fp-math",
-                           llvm::toStringRef(CodeGenOpts.UnsafeFPMath));
-    FuncAttrs.addAttribute("use-soft-float",
-                           llvm::toStringRef(CodeGenOpts.SoftFloat));
-    FuncAttrs.addAttribute("stack-protector-buffer-size",
-                           llvm::utostr(CodeGenOpts.SSPBufferSize));
-    FuncAttrs.addAttribute("no-signed-zeros-fp-math",
-                           llvm::toStringRef(CodeGenOpts.NoSignedZeros));
-    FuncAttrs.addAttribute(
-        "correctly-rounded-divide-sqrt-fp-math",
-        llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt));
-
-    // TODO: Reciprocal estimate codegen options should apply to instructions?
-    std::vector<std::string> &Recips = getTarget().getTargetOpts().Reciprocals;
-    if (!Recips.empty())
-      FuncAttrs.addAttribute("reciprocal-estimates",
-                             llvm::join(Recips.begin(), Recips.end(), ","));
-
-    if (CodeGenOpts.StackRealignment)
-      FuncAttrs.addAttribute("stackrealign");
-    if (CodeGenOpts.Backchain)
-      FuncAttrs.addAttribute("backchain");
+        CodeGenOpts.DisableTailCalls ||
+        (TargetDecl && (TargetDecl->hasAttr<DisableTailCallsAttr>() ||
+                        TargetDecl->hasAttr<AnyX86InterruptAttr>()));
+    FuncAttrs.addAttribute("disable-tail-calls",
+                           llvm::toStringRef(DisableTailCalls));
  
      // Add target-cpu and target-features attributes to functions. If
      // we have a decl for the function and it has a target attribute then
@@ -1819,21 +1849,6 @@ void CodeGenModule::ConstructAttributeList(
      }
    }
  
-  if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
-    // Conservatively, mark all functions and calls in CUDA as convergent
-    // (meaning, they may call an intrinsically convergent op, such as
-    // __syncthreads(), and so can't have certain optimizations applied around
-    // them).  LLVM will remove this attribute where it safely can.
-    FuncAttrs.addAttribute(llvm::Attribute::Convergent);
-
-    // Exceptions aren't supported in CUDA device code.
-    FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
-
-    // Respect -fcuda-flush-denormals-to-zero.
-    if (getLangOpts().CUDADeviceFlushDenormalsToZero)
-      FuncAttrs.addAttribute("nvptx-f32ftz", "true");
-  }
-
    ClangToLLVMArgMapping IRFunctionArgs(getContext(), FI);
  
    QualType RetTy = FI.getReturnType();
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp

index 0daedf408c9e140c8d07113dc4c0e2a5d5ea5eb5..527d90e44c17fea64f6a6ccea37fab3c8059fc54 100644 (file)
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -7,6 +7,8 @@
  //
  //===----------------------------------------------------------------------===//
  
+#include "clang/CodeGen/CodeGenAction.h"
+#include "CodeGenModule.h"
  #include "CoverageMappingGen.h"
  #include "clang/AST/ASTConsumer.h"
  #include "clang/AST/ASTContext.h"
@@ -16,7 +18,6 @@
  #include "clang/Basic/SourceManager.h"
  #include "clang/Basic/TargetInfo.h"
  #include "clang/CodeGen/BackendUtil.h"
-#include "clang/CodeGen/CodeGenAction.h"
  #include "clang/CodeGen/ModuleBuilder.h"
  #include "clang/Frontend/CompilerInstance.h"
  #include "clang/Frontend/FrontendDiagnostic.h"
@@ -41,6 +42,8 @@ using namespace llvm;
  
  namespace clang {
    class BackendConsumer : public ASTConsumer {
+    using LinkModule = CodeGenAction::LinkModule;
+
      virtual void anchor();
      DiagnosticsEngine &Diags;
      BackendAction Action;
@@ -61,43 +64,37 @@ namespace clang {
  
      std::unique_ptr<CodeGenerator> Gen;
  
-    SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4>
-        LinkModules;
+    SmallVector<LinkModule, 4> LinkModules;
  
      // This is here so that the diagnostic printer knows the module a diagnostic
      // refers to.
      llvm::Module *CurLinkModule = nullptr;
  
    public:
-    BackendConsumer(
-        BackendAction Action, DiagnosticsEngine &Diags,
-        const HeaderSearchOptions &HeaderSearchOpts,
-        const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
-        const TargetOptions &TargetOpts, const LangOptions &LangOpts,
-        bool TimePasses, const std::string &InFile,
-        const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules,
-        std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
-        CoverageSourceInfo *CoverageInfo = nullptr)
+    BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
+                    const HeaderSearchOptions &HeaderSearchOpts,
+                    const PreprocessorOptions &PPOpts,
+                    const CodeGenOptions &CodeGenOpts,
+                    const TargetOptions &TargetOpts,
+                    const LangOptions &LangOpts, bool TimePasses,
+                    const std::string &InFile,
+                    SmallVector<LinkModule, 4> LinkModules,
+                    std::unique_ptr<raw_pwrite_stream> OS, LLVMContext &C,
+                    CoverageSourceInfo *CoverageInfo = nullptr)
          : Diags(Diags), Action(Action), HeaderSearchOpts(HeaderSearchOpts),
            CodeGenOpts(CodeGenOpts), TargetOpts(TargetOpts), LangOpts(LangOpts),
            AsmOutStream(std::move(OS)), Context(nullptr),
            LLVMIRGeneration("irgen", "LLVM IR Generation Time"),
            LLVMIRGenerationRefCount(0),
            Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
-                                CodeGenOpts, C, CoverageInfo)) {
+                                CodeGenOpts, C, CoverageInfo)),
+          LinkModules(std::move(LinkModules)) {
        llvm::TimePassesIsEnabled = TimePasses;
-      for (auto &I : LinkModules)
-        this->LinkModules.push_back(
-            std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second)));
      }
      llvm::Module *getModule() const { return Gen->GetModule(); }
      std::unique_ptr<llvm::Module> takeModule() {
        return std::unique_ptr<llvm::Module>(Gen->ReleaseModule());
      }
-    void releaseLinkModules() {
-      for (auto &I : LinkModules)
-        I.second.release();
-    }
  
      void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
        Gen->HandleCXXStaticMemberVarInstantiation(VD);
@@ -159,6 +156,21 @@ namespace clang {
          HandleTopLevelDecl(D);
      }
  
+    // Links each entry in LinkModules into our module.  Returns true on error.
+    bool LinkInModules() {
+      for (auto &LM : LinkModules) {
+        if (LM.PropagateAttrs)
+          for (Function &F : *LM.Module)
+            Gen->CGM().AddDefaultFnAttrs(F);
+
+        CurLinkModule = LM.Module.get();
+        if (Linker::linkModules(*getModule(), std::move(LM.Module),
+                                LM.LinkFlags))
+          return true;
+      }
+      return false; // success
+    }
+
      void HandleTranslationUnit(ASTContext &C) override {
        {
          PrettyStackTraceString CrashInfo("Per-file LLVM IR generation");
@@ -216,13 +228,9 @@ namespace clang {
            Ctx.setDiagnosticHotnessRequested(true);
        }
  
-      // Link LinkModule into this module if present, preserving its validity.
-      for (auto &I : LinkModules) {
-        unsigned LinkFlags = I.first;
-        CurLinkModule = I.second.get();
-        if (Linker::linkModules(*getModule(), std::move(I.second), LinkFlags))
-          return;
-      }
+      // Link each LinkModule into our module.
+      if (LinkInModules())
+        return;
  
        EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef());
  
@@ -729,10 +737,6 @@ void CodeGenAction::EndSourceFileAction() {
    if (!getCompilerInstance().hasASTConsumer())
      return;
  
-  // Take back ownership of link modules we passed to consumer.
-  if (!LinkModules.empty())
-    BEConsumer->releaseLinkModules();
-
    // Steal the module from the consumer.
    TheModule = BEConsumer->takeModule();
  }
@@ -775,13 +779,12 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
  
    // Load bitcode modules to link with, if we need to.
    if (LinkModules.empty())
-    for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) {
-      const std::string &LinkBCFile = I.second;
-
-      auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile);
+    for (const CodeGenOptions::BitcodeFileToLink &F :
+         CI.getCodeGenOpts().LinkBitcodeFiles) {
+      auto BCBuf = CI.getFileManager().getBufferForFile(F.Filename);
        if (!BCBuf) {
          CI.getDiagnostics().Report(diag::err_cannot_open_file)
-            << LinkBCFile << BCBuf.getError().message();
+            << F.Filename << BCBuf.getError().message();
          LinkModules.clear();
          return nullptr;
        }
@@ -791,12 +794,13 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
        if (!ModuleOrErr) {
          handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
            CI.getDiagnostics().Report(diag::err_cannot_open_file)
-              << LinkBCFile << EIB.message();
+              << F.Filename << EIB.message();
          });
          LinkModules.clear();
          return nullptr;
        }
-      addLinkModule(ModuleOrErr.get().release(), I.first);
+      LinkModules.push_back(
+          {std::move(ModuleOrErr.get()), F.PropagateAttrs, F.LinkFlags});
      }
  
    CoverageSourceInfo *CoverageInfo = nullptr;
@@ -810,8 +814,8 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
    std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
        BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
        CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
-      CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules,
-      std::move(OS), *VMContext, CoverageInfo));
+      CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile,
+      std::move(LinkModules), std::move(OS), *VMContext, CoverageInfo));
    BEConsumer = Result.get();
    return std::move(Result);
  }
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h

index 1715b3d913b30dec9760c71c05d383b089180da1..613e300ea091bcb18bbe74e830082d3a3ac3f573 100644 (file)
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -1022,6 +1022,25 @@ public:
                                CGCalleeInfo CalleeInfo, AttributeListType &PAL,
                                unsigned &CallingConv, bool AttrOnCallSite);
  
+  /// Adds attributes to F according to our CodeGenOptions and LangOptions, as
+  /// though we had emitted it ourselves.  We remove any attributes on F that
+  /// conflict with the attributes we add here.
+  ///
+  /// This is useful for adding attrs to bitcode modules that you want to link
+  /// with but don't control, such as CUDA's libdevice.  When linking with such
+  /// a bitcode library, you might want to set e.g. its functions'
+  /// "unsafe-fp-math" attribute to match the attr of the functions you're
+  /// codegen'ing.  Otherwise, LLVM will interpret the bitcode module's lack of
+  /// unsafe-fp-math attrs as tantamount to unsafe-fp-math=false, and then LLVM
+  /// will propagate unsafe-fp-math=false up to every transitive caller of a
+  /// function in the bitcode library!
+  ///
+  /// With the exception of fast-math attrs, this will only make the attributes
+  /// on the function more conservative.  But it's unsafe to call this on a
+  /// function which relies on particular fast-math attributes for correctness.
+  /// It's up to you to ensure that this is safe.
+  void AddDefaultFnAttrs(llvm::Function &F);
+
    // Fills in the supplied string map with the set of target features for the
    // passed in function.
    void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
@@ -1303,6 +1322,12 @@ private:
    /// Check whether we can use a "simpler", more core exceptions personality
    /// function.
    void SimplifyPersonality();
+
+  /// Helper function for ConstructAttributeList and AddDefaultFnAttrs.
+  /// Constructs an AttrList for a function with the given properties.
+  void ConstructDefaultFnAttrList(StringRef Name, bool HasOptnone,
+                                  bool AttrOnCallSite,
+                                  llvm::AttrBuilder &FuncAttrs);
  };
  }  // end namespace CodeGen
  }  // end namespace clang
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp

index 7576d6d4e5e3b632ab07e5cd663a9a4287316f4c..ee46f921ccbd020e5cbbcf108c98f6a6cbfedb1d 100644 (file)
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -722,11 +722,16 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
    Opts.RelaxELFRelocations = Args.hasArg(OPT_mrelax_relocations);
    Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir);
    for (auto A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_cuda_bitcode)) {
-    unsigned LinkFlags = llvm::Linker::Flags::None;
-    if (A->getOption().matches(OPT_mlink_cuda_bitcode))
-      LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded |
-                  llvm::Linker::Flags::InternalizeLinkedSymbols;
-    Opts.LinkBitcodeFiles.push_back(std::make_pair(LinkFlags, A->getValue()));
+    CodeGenOptions::BitcodeFileToLink F;
+    F.Filename = A->getValue();
+    if (A->getOption().matches(OPT_mlink_cuda_bitcode)) {
+      F.LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded |
+                    llvm::Linker::Flags::InternalizeLinkedSymbols;
+      // When linking CUDA bitcode, propagate function attributes so that
+      // e.g. libdevice gets fast-math attrs if we're building with fast-math.
+      F.PropagateAttrs = true;
+    }
+    Opts.LinkBitcodeFiles.push_back(F);
    }
    Opts.SanitizeCoverageType =
        getLastArgIntValue(Args, OPT_fsanitize_coverage_type, 0, Diags);
diff --git a/test/CodeGenCUDA/propagate-metadata.cu b/test/CodeGenCUDA/propagate-metadata.cu

new file mode 100644 (file)

index 0000000..f8db765
--- /dev/null
+++ b/test/CodeGenCUDA/propagate-metadata.cu
@@ -0,0 +1,62 @@
+// Check that when we link a bitcode module into a file using
+// -mlink-cuda-bitcode, we apply the same attributes to the functions in that
+// bitcode module as we apply to functions we generate.
+//
+// In particular, we check that ftz and unsafe-math are propagated into the
+// bitcode library as appropriate.
+//
+// In addition, we set -ftrapping-math on the bitcode library, but then set
+// -fno-trapping-math on the main compilations, and ensure that the latter flag
+// overrides the flag on the bitcode library.
+
+// Build the bitcode library.  This is not built in CUDA mode, otherwise it
+// might have incompatible attributes.  This mirrors how libdevice is built.
+// RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \
+// RUN:   %s -o %t.bc -triple nvptx-unknown-unknown
+
+// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc -o - \
+// RUN:   -fno-trapping-math -fcuda-is-device -triple nvptx-unknown-unknown \
+// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=NOFTZ --check-prefix=NOFAST
+
+// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc \
+// RUN:   -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \
+// RUN:   -fcuda-is-device -triple nvptx-unknown-unknown \
+// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FTZ \
+// RUN:   --check-prefix=NOFAST
+
+// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-cuda-bitcode %t.bc \
+// RUN:   -fno-trapping-math -fcuda-flush-denormals-to-zero -o - \
+// RUN:   -fcuda-is-device -menable-unsafe-fp-math -triple nvptx-unknown-unknown \
+// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
+
+// Wrap everything in extern "C" so we don't ahve to worry about name mangling
+// in the IR.
+extern "C" {
+#ifdef LIB
+
+// This function is defined in the library and only declared in the main
+// compilation.
+void lib_fn() {}
+
+#else
+
+#include "Inputs/cuda.h"
+__device__ void lib_fn();
+__global__ void kernel() { lib_fn(); }
+
+#endif
+}
+
+// The kernel and lib function should have the same attributes.
+// CHECK: define void @kernel() [[attr:#[0-9]+]]
+// CHECK: define internal void @lib_fn() [[attr]]
+
+// Check the attribute list.
+// CHECK: attributes [[attr]] = {
+// CHECK: "no-trapping-math"="true"
+
+// FTZ-SAME: "nvptx-f32ftz"="true"
+// NOFTZ-NOT: "nvptx-f32ftz"="true"
+
+// FAST-SAME: "unsafe-fp-math"="true"
+// NOFAST-NOT: "unsafe-fp-math"="true"
author	Justin Lebar <jlebar@google.com>
	Wed, 25 Jan 2017 21:29:48 +0000 (21:29 +0000)
committer	Justin Lebar <jlebar@google.com>
	Wed, 25 Jan 2017 21:29:48 +0000 (21:29 +0000)
include/clang/CodeGen/CodeGenAction.h		patch \| blob \| history
include/clang/Frontend/CodeGenOptions.h		patch \| blob \| history
lib/CodeGen/CGCall.cpp		patch \| blob \| history
lib/CodeGen/CodeGenAction.cpp		patch \| blob \| history
lib/CodeGen/CodeGenModule.h		patch \| blob \| history
lib/Frontend/CompilerInvocation.cpp		patch \| blob \| history
test/CodeGenCUDA/propagate-metadata.cu	[new file with mode: 0644]	patch \| blob