[NVPTX] Let there be One True Way to set NVVMReflect params.

author Justin Lebar <jlebar@google.com>

Sun, 15 Jan 2017 16:54:35 +0000 (16:54 +0000)

committer Justin Lebar <jlebar@google.com>

Sun, 15 Jan 2017 16:54:35 +0000 (16:54 +0000)
author Justin Lebar <jlebar@google.com>
Sun, 15 Jan 2017 16:54:35 +0000 (16:54 +0000)
committer Justin Lebar <jlebar@google.com>
Sun, 15 Jan 2017 16:54:35 +0000 (16:54 +0000)
diff --git a/docs/NVPTXUsage.rst b/docs/NVPTXUsage.rst

index fdfc8e41dc3b54a1d97c10efbb1b902abe9c2c8c..bef27b3327f8f6630197a3463f6896bec21099af 100644 (file)
--- a/docs/NVPTXUsage.rst
+++ b/docs/NVPTXUsage.rst
@@ -289,7 +289,7 @@ code often follows a pattern:
        return my_function_precise(a);
    }
  
-The default value for all unspecified reflection parameters is zero. 
+The default value for all unspecified reflection parameters is zero.
  
  The ``NVVMReflect`` pass should be executed early in the optimization
  pipeline, immediately after the link stage. The ``internalize`` pass is also
@@ -326,6 +326,18 @@ often leave behind dead code of the form:
  Therefore, it is recommended that ``NVVMReflect`` is executed early in the
  optimization pipeline before dead-code elimination.
  
+The NVPTX TargetMachine knows how to schedule ``NVVMReflect`` at the beginning
+of your pass manager; just use the following code when setting up your pass
+manager:
+
+.. code-block:: c++
+    std::unique_ptr<TargetMachine> TM = ...;
+    PassManagerBuilder PMBuilder(...);
+    PMBuilder.addExtension(
+        PassManagerBuilder::EP_EarlyAsPossible,
+        [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+          TM->addEarlyAsPossiblePasses(PM);
+        });
  
  Reflection Parameters
  ---------------------
@@ -339,35 +351,16 @@ Flag                 Description
  ``__CUDA_FTZ=[0,1]`` Use optimized code paths that flush subnormals to zero
  ==================== ======================================================
  
+The value of this flag is determined by the "nvvm-reflect-ftz" module flag.
+The following sets the ftz flag to 1.
  
-Invoking NVVMReflect
---------------------
-
-To ensure that all dead code caused by the reflection pass is eliminated, it
-is recommended that the reflection pass is executed early in the LLVM IR
-optimization pipeline. The pass takes an optional mapping of reflection
-parameter name to an integer value. This mapping can be specified as either a
-command-line option to ``opt`` or as an LLVM ``StringMap<int>`` object when
-programmatically creating a pass pipeline.
-
-With ``opt``:
-
-.. code-block:: text
-
-  # opt -nvvm-reflect -nvvm-reflect-list=<var>=<value>,<var>=<value> module.bc -o module.reflect.bc
-
-
-With programmatic pass pipeline:
-
-.. code-block:: c++
-
-  extern FunctionPass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping);
-
-  StringMap<int> ReflectParams;
-  ReflectParams["__CUDA_FTZ"] = 1;
-  Passes.add(createNVVMReflectPass(ReflectParams));
-
+.. code-block:: llvm
+    !llvm.module.flag = !{!0}
+    !0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
  
+(``i32 4`` indicates that the value set here overrides the value in another
+module we link with.  See the `LangRef <LangRef.html#module-flags-metadata>`
+for details.)
  
  Executing PTX
  =============
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h

index fef9f789bda9ecc4f6d3a47dfd3ba16bef669133..24b6c0879f1ee66ed2210a814b63ff41e3e8fddb 100644 (file)
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -48,7 +48,6 @@ ModulePass *createGenericToNVVMPass();
  FunctionPass *createNVPTXInferAddressSpacesPass();
  FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
  FunctionPass *createNVVMReflectPass();
-FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
  MachineFunctionPass *createNVPTXPrologEpilogPass();
  MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
  FunctionPass *createNVPTXImageOptimizerPass();
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp

index c639c4dc068391545cf2849214052dadb3b992c4..152b665d0fdc9c423743ee348d0307807014a301 100644 (file)
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -10,11 +10,10 @@
  // This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect
  // with an integer.
  //
-// We choose the value we use by looking, in this order, at:
-//
-//  * the -nvvm-reflect-list flag, which has the format "foo=1,bar=42",
-//  * the StringMap passed to the pass's constructor, and
-//  * metadata in the module itself.
+// We choose the value we use by looking at metadata in the module itself.  Note
+// that we intentionally only have one way to choose these values, because other
+// parts of LLVM (particularly, InstCombineCall) rely on being able to predict
+// the values chosen by this pass.
  //
  // If we see an unknown string, we replace its call with 0.
  //
@@ -49,30 +48,17 @@ namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
  
  namespace {
  class NVVMReflect : public FunctionPass {
-private:
-  StringMap<int> VarMap;
-
  public:
    static char ID;
-  NVVMReflect() : NVVMReflect(StringMap<int>()) {}
-
-  NVVMReflect(const StringMap<int> &Mapping)
-      : FunctionPass(ID), VarMap(Mapping) {
+  NVVMReflect() : FunctionPass(ID) {
      initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
-    setVarMap();
    }
  
    bool runOnFunction(Function &) override;
-
-private:
-  void setVarMap();
  };
  }
  
  FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
-FunctionPass *llvm::createNVVMReflectPass(const StringMap<int> &Mapping) {
-  return new NVVMReflect(Mapping);
-}
  
  static cl::opt<bool>
  NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@@ -83,35 +69,6 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
                  "Replace occurrences of __nvvm_reflect() calls with 0/1", false,
                  false)
  
-static cl::list<std::string>
-ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), cl::Hidden,
-            cl::desc("A list of string=num assignments"),
-            cl::ValueRequired);
-
-/// The command line can look as follows :
-/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
-/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
-/// ReflectList vector. First, each of ReflectList[i] is 'split'
-/// using "," as the delimiter. Then each of this part is split
-/// using "=" as the delimiter.
-void NVVMReflect::setVarMap() {
-  for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
-    DEBUG(dbgs() << "Option : "  << ReflectList[i] << "\n");
-    SmallVector<StringRef, 4> NameValList;
-    StringRef(ReflectList[i]).split(NameValList, ',');
-    for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
-      SmallVector<StringRef, 2> NameValPair;
-      NameValList[j].split(NameValPair, '=');
-      assert(NameValPair.size() == 2 && "name=val expected");
-      std::stringstream ValStream(NameValPair[1]);
-      int Val;
-      ValStream >> Val;
-      assert((!(ValStream.fail())) && "integer value expected");
-      VarMap[NameValPair[0]] = Val;
-    }
-  }
-}
-
  bool NVVMReflect::runOnFunction(Function &F) {
    if (!NVVMReflectEnabled)
      return false;
@@ -199,11 +156,10 @@ bool NVVMReflect::runOnFunction(Function &F) {
      DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
  
      int ReflectVal = 0; // The default value is 0
-    auto Iter = VarMap.find(ReflectArg);
-    if (Iter != VarMap.end())
-      ReflectVal = Iter->second;
-    else if (ReflectArg == "__CUDA_FTZ") {
-      // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag.
+    if (ReflectArg == "__CUDA_FTZ") {
+      // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag.  Our
+      // choice here must be kept in sync with AutoUpgrade, which uses the same
+      // technique to detect whether ftz is enabled.
        if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
                F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
          ReflectVal = Flag->getSExtValue();
diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll

index 8c75dfc30a56f9d250d0bfb8009f093ab5ed64fc..165597d6baff79c6286b7d99389d88a8cdfc2938 100644 (file)
--- a/test/CodeGen/NVPTX/nvvm-reflect.ll
+++ b/test/CodeGen/NVPTX/nvvm-reflect.ll
@@ -1,30 +1,38 @@
-; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0
-; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1
+; We run nvvm-reflect (and then optimize) this module twice, once with metadata
+; that enables FTZ, and again with metadata that disables it.
  
-@str = private unnamed_addr addrspace(4) constant [8 x i8] c"USE_MUL\00"
+; RUN: cat %s > %t.noftz
+; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
+; RUN: opt %t.noftz -S -nvvm-reflect -O2 \
+; RUN:   | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
+
+; RUN: cat %s > %t.ftz
+; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
+; RUN: opt %t.ftz -S -nvvm-reflect -O2 \
+; RUN:   | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
+
+@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
  
  declare i32 @__nvvm_reflect(i8*)
  declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
  
+; CHECK-LABEL: @foo
  define float @foo(float %a, float %b) {
-; USE_MUL_0: define float @foo
-; USE_MUL_0-NOT: call i32 @__nvvm_reflect
-; USE_MUL_1: define float @foo
-; USE_MUL_1-NOT: call i32 @__nvvm_reflect
-  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+; CHECK-NOT: call i32 @__nvvm_reflect
+  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
    %reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
    %cmp = icmp ugt i32 %reflect, 0
    br i1 %cmp, label %use_mul, label %use_add
  
  use_mul:
-; USE_MUL_1: fmul float %a, %b
-; USE_MUL_0-NOT: fadd float %a, %b
+; USE_FTZ_1: fmul float %a, %b
+; USE_FTZ_0-NOT: fadd float %a, %b
    %ret1 = fmul float %a, %b
    br label %exit
  
  use_add:
-; USE_MUL_0: fadd float %a, %b
-; USE_MUL_1-NOT: fmul float %a, %b
+; USE_FTZ_0: fadd float %a, %b
+; USE_FTZ_1-NOT: fmul float %a, %b
    %ret2 = fadd float %a, %b
    br label %exit
  
@@ -35,14 +43,12 @@ exit:
  
  declare i32 @llvm.nvvm.reflect.p0i8(i8*)
  
-; USE_MUL_0: define i32 @intrinsic
-; USE_MUL_1: define i32 @intrinsic
+; CHECK-LABEL: define i32 @intrinsic
  define i32 @intrinsic() {
-; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect
-; USE_MUL_0: ret i32 0
-; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
-; USE_MUL_1: ret i32 1
-  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+; CHECK-NOT: call i32 @llvm.nvvm.reflect
+; USE_FTZ_0: ret i32 0
+; USE_FTZ_1: ret i32 1
+  %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
    %reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
    ret i32 %reflect
  }
@@ -50,26 +56,24 @@ define i32 @intrinsic() {
  ; CUDA-7.0 passes __nvvm_reflect argument slightly differently.
  ; Verify that it works, too
  
-@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"
+@"$str" = private addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
  
+; CHECK-LABEL: @bar
  define float @bar(float %a, float %b) {
-; USE_MUL_0: define float @bar
-; USE_MUL_0-NOT: call i32 @__nvvm_reflect
-; USE_MUL_1: define float @bar
-; USE_MUL_1-NOT: call i32 @__nvvm_reflect
-  %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+; CHECK-NOT: call i32 @__nvvm_reflect
+  %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
    %cmp = icmp ne i32 %reflect, 0
    br i1 %cmp, label %use_mul, label %use_add
  
  use_mul:
-; USE_MUL_1: fmul float %a, %b
-; USE_MUL_0-NOT: fadd float %a, %b
+; USE_FTZ_1: fmul float %a, %b
+; USE_FTZ_0-NOT: fadd float %a, %b
    %ret1 = fmul float %a, %b
    br label %exit
  
  use_add:
-; USE_MUL_0: fadd float %a, %b
-; USE_MUL_1-NOT: fmul float %a, %b
+; USE_FTZ_0: fadd float %a, %b
+; USE_FTZ_1-NOT: fmul float %a, %b
    %ret2 = fadd float %a, %b
    br label %exit
  
@@ -77,3 +81,6 @@ exit:
    %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
    ret float %ret
  }
+
+!llvm.module.flags = !{!0}
+; A module flag is added to the end of this file by the RUN lines at the top.
author	Justin Lebar <jlebar@google.com>
	Sun, 15 Jan 2017 16:54:35 +0000 (16:54 +0000)
committer	Justin Lebar <jlebar@google.com>
	Sun, 15 Jan 2017 16:54:35 +0000 (16:54 +0000)
docs/NVPTXUsage.rst		patch \| blob \| history
lib/Target/NVPTX/NVPTX.h		patch \| blob \| history
lib/Target/NVPTX/NVVMReflect.cpp		patch \| blob \| history
test/CodeGen/NVPTX/nvvm-reflect.ll		patch \| blob \| history