return my_function_precise(a);
}
-The default value for all unspecified reflection parameters is zero.
+The default value for all unspecified reflection parameters is zero.
The ``NVVMReflect`` pass should be executed early in the optimization
pipeline, immediately after the link stage. The ``internalize`` pass is also
Therefore, it is recommended that ``NVVMReflect`` is executed early in the
optimization pipeline before dead-code elimination.
+The NVPTX TargetMachine knows how to schedule ``NVVMReflect`` at the beginning
+of your pass manager; just use the following code when setting up your pass
+manager:
+
+.. code-block:: c++
+ std::unique_ptr<TargetMachine> TM = ...;
+ PassManagerBuilder PMBuilder(...);
+ PMBuilder.addExtension(
+ PassManagerBuilder::EP_EarlyAsPossible,
+ [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ TM->addEarlyAsPossiblePasses(PM);
+ });
Reflection Parameters
---------------------
``__CUDA_FTZ=[0,1]`` Use optimized code paths that flush subnormals to zero
==================== ======================================================
+The value of this flag is determined by the "nvvm-reflect-ftz" module flag.
+The following sets the ftz flag to 1.
-Invoking NVVMReflect
---------------------
-
-To ensure that all dead code caused by the reflection pass is eliminated, it
-is recommended that the reflection pass is executed early in the LLVM IR
-optimization pipeline. The pass takes an optional mapping of reflection
-parameter name to an integer value. This mapping can be specified as either a
-command-line option to ``opt`` or as an LLVM ``StringMap<int>`` object when
-programmatically creating a pass pipeline.
-
-With ``opt``:
-
-.. code-block:: text
-
- # opt -nvvm-reflect -nvvm-reflect-list=<var>=<value>,<var>=<value> module.bc -o module.reflect.bc
-
-
-With programmatic pass pipeline:
-
-.. code-block:: c++
-
- extern FunctionPass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping);
-
- StringMap<int> ReflectParams;
- ReflectParams["__CUDA_FTZ"] = 1;
- Passes.add(createNVVMReflectPass(ReflectParams));
-
+.. code-block:: llvm
+ !llvm.module.flag = !{!0}
+ !0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
+(``i32 4`` indicates that the value set here overrides the value in another
+module we link with. See the `LangRef <LangRef.html#module-flags-metadata>`
+for details.)
Executing PTX
=============
FunctionPass *createNVPTXInferAddressSpacesPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
FunctionPass *createNVVMReflectPass();
-FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
// This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect
// with an integer.
//
-// We choose the value we use by looking, in this order, at:
-//
-// * the -nvvm-reflect-list flag, which has the format "foo=1,bar=42",
-// * the StringMap passed to the pass's constructor, and
-// * metadata in the module itself.
+// We choose the value we use by looking at metadata in the module itself. Note
+// that we intentionally only have one way to choose these values, because other
+// parts of LLVM (particularly, InstCombineCall) rely on being able to predict
+// the values chosen by this pass.
//
// If we see an unknown string, we replace its call with 0.
//
namespace {
class NVVMReflect : public FunctionPass {
-private:
- StringMap<int> VarMap;
-
public:
static char ID;
- NVVMReflect() : NVVMReflect(StringMap<int>()) {}
-
- NVVMReflect(const StringMap<int> &Mapping)
- : FunctionPass(ID), VarMap(Mapping) {
+ NVVMReflect() : FunctionPass(ID) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
- setVarMap();
}
bool runOnFunction(Function &) override;
-
-private:
- void setVarMap();
};
}
FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
-FunctionPass *llvm::createNVVMReflectPass(const StringMap<int> &Mapping) {
- return new NVVMReflect(Mapping);
-}
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
"Replace occurrences of __nvvm_reflect() calls with 0/1", false,
false)
-static cl::list<std::string>
-ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), cl::Hidden,
- cl::desc("A list of string=num assignments"),
- cl::ValueRequired);
-
-/// The command line can look as follows :
-/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
-/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
-/// ReflectList vector. First, each of ReflectList[i] is 'split'
-/// using "," as the delimiter. Then each of this part is split
-/// using "=" as the delimiter.
-void NVVMReflect::setVarMap() {
- for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
- DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
- SmallVector<StringRef, 4> NameValList;
- StringRef(ReflectList[i]).split(NameValList, ',');
- for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
- SmallVector<StringRef, 2> NameValPair;
- NameValList[j].split(NameValPair, '=');
- assert(NameValPair.size() == 2 && "name=val expected");
- std::stringstream ValStream(NameValPair[1]);
- int Val;
- ValStream >> Val;
- assert((!(ValStream.fail())) && "integer value expected");
- VarMap[NameValPair[0]] = Val;
- }
- }
-}
-
bool NVVMReflect::runOnFunction(Function &F) {
if (!NVVMReflectEnabled)
return false;
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
int ReflectVal = 0; // The default value is 0
- auto Iter = VarMap.find(ReflectArg);
- if (Iter != VarMap.end())
- ReflectVal = Iter->second;
- else if (ReflectArg == "__CUDA_FTZ") {
- // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag.
+ if (ReflectArg == "__CUDA_FTZ") {
+ // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag. Our
+ // choice here must be kept in sync with AutoUpgrade, which uses the same
+ // technique to detect whether ftz is enabled.
if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
ReflectVal = Flag->getSExtValue();
-; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0
-; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1
+; We run nvvm-reflect (and then optimize) this module twice, once with metadata
+; that enables FTZ, and again with metadata that disables it.
-@str = private unnamed_addr addrspace(4) constant [8 x i8] c"USE_MUL\00"
+; RUN: cat %s > %t.noftz
+; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
+; RUN: opt %t.noftz -S -nvvm-reflect -O2 \
+; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
+
+; RUN: cat %s > %t.ftz
+; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
+; RUN: opt %t.ftz -S -nvvm-reflect -O2 \
+; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
+
+@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
declare i32 @__nvvm_reflect(i8*)
declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
+; CHECK-LABEL: @foo
define float @foo(float %a, float %b) {
-; USE_MUL_0: define float @foo
-; USE_MUL_0-NOT: call i32 @__nvvm_reflect
-; USE_MUL_1: define float @foo
-; USE_MUL_1-NOT: call i32 @__nvvm_reflect
- %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+; CHECK-NOT: call i32 @__nvvm_reflect
+ %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
%reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
%cmp = icmp ugt i32 %reflect, 0
br i1 %cmp, label %use_mul, label %use_add
use_mul:
-; USE_MUL_1: fmul float %a, %b
-; USE_MUL_0-NOT: fadd float %a, %b
+; USE_FTZ_1: fmul float %a, %b
+; USE_FTZ_0-NOT: fadd float %a, %b
%ret1 = fmul float %a, %b
br label %exit
use_add:
-; USE_MUL_0: fadd float %a, %b
-; USE_MUL_1-NOT: fmul float %a, %b
+; USE_FTZ_0: fadd float %a, %b
+; USE_FTZ_1-NOT: fmul float %a, %b
%ret2 = fadd float %a, %b
br label %exit
declare i32 @llvm.nvvm.reflect.p0i8(i8*)
-; USE_MUL_0: define i32 @intrinsic
-; USE_MUL_1: define i32 @intrinsic
+; CHECK-LABEL: define i32 @intrinsic
define i32 @intrinsic() {
-; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect
-; USE_MUL_0: ret i32 0
-; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
-; USE_MUL_1: ret i32 1
- %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
+; CHECK-NOT: call i32 @llvm.nvvm.reflect
+; USE_FTZ_0: ret i32 0
+; USE_FTZ_1: ret i32 1
+ %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
%reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
ret i32 %reflect
}
; CUDA-7.0 passes __nvvm_reflect argument slightly differently.
; Verify that it works, too
-@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"
+@"$str" = private addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
+; CHECK-LABEL: @bar
define float @bar(float %a, float %b) {
-; USE_MUL_0: define float @bar
-; USE_MUL_0-NOT: call i32 @__nvvm_reflect
-; USE_MUL_1: define float @bar
-; USE_MUL_1-NOT: call i32 @__nvvm_reflect
- %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+; CHECK-NOT: call i32 @__nvvm_reflect
+ %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
%cmp = icmp ne i32 %reflect, 0
br i1 %cmp, label %use_mul, label %use_add
use_mul:
-; USE_MUL_1: fmul float %a, %b
-; USE_MUL_0-NOT: fadd float %a, %b
+; USE_FTZ_1: fmul float %a, %b
+; USE_FTZ_0-NOT: fadd float %a, %b
%ret1 = fmul float %a, %b
br label %exit
use_add:
-; USE_MUL_0: fadd float %a, %b
-; USE_MUL_1-NOT: fmul float %a, %b
+; USE_FTZ_0: fadd float %a, %b
+; USE_FTZ_1-NOT: fmul float %a, %b
%ret2 = fadd float %a, %b
br label %exit
%ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
ret float %ret
}
+
+!llvm.module.flags = !{!0}
+; A module flag is added to the end of this file by the RUN lines at the top.