From: Artem Belevich Date: Fri, 13 Jan 2017 18:48:13 +0000 (+0000) Subject: [NVPTX] Only lower sin/cos to approximate instructions if unsafe math is allowed. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e41bb16926b81017db97ab76b45a80879865a275;p=llvm [NVPTX] Only lower sin/cos to approximate instructions if unsafe math is allowed. Previously we'd always lower @llvm.{sin,cos}.f32 to {sin.cos}.approx.f32 instruction even when unsafe FP math was not allowed. Clang-generated IR is not affected by this as it uses precise sin/cos from CUDA's libdevice when unsafe math is disabled. Differential Revision: https://reviews.llvm.org/D28619 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291936 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 43c478f4212..4f3129c0774 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -103,6 +103,11 @@ bool NVPTXDAGToDAGISel::allowFMA() const { return TL->allowFMA(*MF, OptLevel); } +bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const { + const NVPTXTargetLowering *TL = Subtarget->getTargetLowering(); + return TL->allowUnsafeFPMath(*MF); +} + /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. void NVPTXDAGToDAGISel::Select(SDNode *N) { diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 0591035a6aa..b4cbc8a3f44 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -34,6 +34,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { bool usePrecSqrtF32() const; bool useF32FTZ() const; bool allowFMA() const; + bool allowUnsafeFPMath() const; public: explicit NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 7a760fd38d0..30870c6ee59 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3863,27 +3863,35 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, bool NVPTXTargetLowering::allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const { - const Function *F = MF.getFunction(); - const TargetOptions &TO = MF.getTarget().Options; - // Always honor command-line argument - if (FMAContractLevelOpt.getNumOccurrences() > 0) { + if (FMAContractLevelOpt.getNumOccurrences() > 0) return FMAContractLevelOpt > 0; - } else if (OptLevel == 0) { - // Do not contract if we're not optimizing the code + + // Do not contract if we're not optimizing the code. + if (OptLevel == 0) return false; - } else if (TO.AllowFPOpFusion == FPOpFusion::Fast || TO.UnsafeFPMath) { - // Honor TargetOptions flags that explicitly say fusion is okay + + // Honor TargetOptions flags that explicitly say fusion is okay. + if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast) + return true; + + return allowUnsafeFPMath(MF); +} + +bool NVPTXTargetLowering::allowUnsafeFPMath(MachineFunction &MF) const { + // Honor TargetOptions flags that explicitly say unsafe math is okay. + if (MF.getTarget().Options.UnsafeFPMath) return true; - } else if (F->hasFnAttribute("unsafe-fp-math")) { - // Check for unsafe-fp-math=true coming from Clang + + // Allow unsafe math if unsafe-fp-math attribute explicitly says so. + const Function *F = MF.getFunction(); + if (F->hasFnAttribute("unsafe-fp-math")) { Attribute Attr = F->getFnAttribute("unsafe-fp-math"); StringRef Val = Attr.getValueAsString(); if (Val == "true") return true; } - // We did not have a clear indication that fusion is allowed, so assume not return false; } diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index e433aed7781..fa25fe1264a 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -511,6 +511,7 @@ public: getPreferredVectorAction(EVT VT) const override; bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; + bool allowUnsafeFPMath(MachineFunction &MF) const; bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index 0fbb0448e4c..c276f177fab 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -134,6 +134,7 @@ def doMulWide : Predicate<"doMulWide">; def allowFMA : Predicate<"allowFMA()">; def noFMA : Predicate<"!allowFMA()">; +def allowUnsafeFPMath : Predicate<"allowUnsafeFPMath()">; def do_DIVF32_APPROX : Predicate<"getDivF32Level()==0">; def do_DIVF32_FULL : Predicate<"getDivF32Level()==1">; @@ -949,10 +950,12 @@ defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, true>; // sin/cos def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "sin.approx.f32 \t$dst, $src;", - [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>; + [(set Float32Regs:$dst, (fsin Float32Regs:$src))]>, + Requires<[allowUnsafeFPMath]>; def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "cos.approx.f32 \t$dst, $src;", - [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>; + [(set Float32Regs:$dst, (fcos Float32Regs:$src))]>, + Requires<[allowUnsafeFPMath]>; // Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)), // i.e. "poor man's fmod()" diff --git a/test/CodeGen/NVPTX/fast-math.ll b/test/CodeGen/NVPTX/fast-math.ll index d0a333d369c..08b435b993f 100644 --- a/test/CodeGen/NVPTX/fast-math.ll +++ b/test/CodeGen/NVPTX/fast-math.ll @@ -34,5 +34,22 @@ define float @fadd_ftz(float %a, float %b) #1 { ret float %t1 } +declare float @llvm.sin.f32(float) +declare float @llvm.cos.f32(float) + +; CHECK-LABEL: fsin_approx +; CHECK: sin.approx.f32 +define float @fsin_approx(float %a) #0 { + %r = tail call float @llvm.sin.f32(float %a) + ret float %r +} + +; CHECK-LABEL: fcos_approx +; CHECK: cos.approx.f32 +define float @fcos_approx(float %a) #0 { + %r = tail call float @llvm.cos.f32(float %a) + ret float %r +} + attributes #0 = { "unsafe-fp-math" = "true" } attributes #1 = { "nvptx-f32ftz" = "true" } diff --git a/test/CodeGen/NVPTX/fcos-no-fast-math.ll b/test/CodeGen/NVPTX/fcos-no-fast-math.ll new file mode 100644 index 00000000000..d435c1d14fe --- /dev/null +++ b/test/CodeGen/NVPTX/fcos-no-fast-math.ll @@ -0,0 +1,14 @@ +; RUN: not llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s + +; Check that we fail to select fcos without fast-math enabled + +declare float @llvm.cos.f32(float) + +; CHECK: LLVM ERROR: Cannot select: {{.*}}: f32 = fcos +; CHECK: In function: test_fcos_safe +define float @test_fcos_safe(float %a) #0 { + %r = tail call float @llvm.cos.f32(float %a) + ret float %r +} + +attributes #0 = { "unsafe-fp-math" = "false" } diff --git a/test/CodeGen/NVPTX/fsin-no-fast-math.ll b/test/CodeGen/NVPTX/fsin-no-fast-math.ll new file mode 100644 index 00000000000..56396b84925 --- /dev/null +++ b/test/CodeGen/NVPTX/fsin-no-fast-math.ll @@ -0,0 +1,14 @@ +; RUN: not llc < %s -march=nvptx -mcpu=sm_20 2>&1 | FileCheck %s + +; Check that we fail to select fsin without fast-math enabled + +declare float @llvm.sin.f32(float) + +; CHECK: LLVM ERROR: Cannot select: {{.*}}: f32 = fsin +; CHECK: In function: test_fsin_safe +define float @test_fsin_safe(float %a) #0 { + %r = tail call float @llvm.sin.f32(float %a) + ret float %r +} + +attributes #0 = { "unsafe-fp-math" = "false" }