From 77ddc3f5dd699deaf3f289b647a19c4ade6587dc Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 3 Feb 2017 15:13:50 +0000 Subject: [PATCH] [NVPTX] Enable combineRepeatedFPDivisors for NVPTX. Reviewers: tra Subscribers: jholewinski, llvm-commits Differential Revision: https://reviews.llvm.org/D29477 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294011 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/NVPTX/NVPTXISelLowering.h | 2 ++ test/CodeGen/NVPTX/fast-math.ll | 44 ++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index f6494f6d37e..ab78d8a936b 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -530,6 +530,8 @@ public: int &ExtraSteps, bool &UseOneConst, bool Reciprocal) const override; + unsigned combineRepeatedFPDivisors() const override { return 2; } + bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; bool allowUnsafeFPMath(MachineFunction &MF) const; diff --git a/test/CodeGen/NVPTX/fast-math.ll b/test/CodeGen/NVPTX/fast-math.ll index f925d67434c..56b1f88f3b2 100644 --- a/test/CodeGen/NVPTX/fast-math.ll +++ b/test/CodeGen/NVPTX/fast-math.ll @@ -117,5 +117,49 @@ define float @fcos_approx(float %a) #0 { ret float %r } +; CHECK-LABEL: repeated_div_recip_allowed +define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) { +; CHECK: rcp.rn.f32 +; CHECK: mul.rn.f32 +; CHECK: mul.rn.f32 + %x = fdiv arcp float %a, %divisor + %y = fdiv arcp float %b, %divisor + %z = select i1 %pred, float %x, float %y + ret float %z +} + +; CHECK-LABEL: repeated_div_recip_allowed_ftz +define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 { +; CHECK: rcp.rn.ftz.f32 +; CHECK: mul.rn.ftz.f32 +; CHECK: mul.rn.ftz.f32 + %x = fdiv arcp float %a, %divisor + %y = fdiv arcp float %b, %divisor + %z = select i1 %pred, float %x, float %y + ret float %z +} + +; CHECK-LABEL: repeated_div_fast +define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 { +; CHECK: rcp.approx.f32 +; CHECK: mul.f32 +; CHECK: mul.f32 + %x = fdiv float %a, %divisor + %y = fdiv float %b, %divisor + %z = select i1 %pred, float %x, float %y + ret float %z +} + +; CHECK-LABEL: repeated_div_fast_ftz +define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 { +; CHECK: rcp.approx.ftz.f32 +; CHECK: mul.ftz.f32 +; CHECK: mul.ftz.f32 + %x = fdiv float %a, %divisor + %y = fdiv float %b, %divisor + %z = select i1 %pred, float %x, float %y + ret float %z +} + attributes #0 = { "unsafe-fp-math" = "true" } attributes #1 = { "nvptx-f32ftz" = "true" } -- 2.50.1