From 9817b74a74c4768930b71e6fd8ee901a131980b5 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 7 May 2019 13:48:03 +0000 Subject: [PATCH] [PowerPC] Use the two-constant NR algorithm for refining estimates The single-constant algorithm produces infinities on a lot of denormal values. The precision of the two-constant algorithm is actually sufficient across the range of denormals. We will switch to that algorithm for now to avoid the infinities on denormals. In the future, we will re-evaluate the algorithm to find the optimal one for PowerPC. Differential revision: https://reviews.llvm.org/D60037 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360144 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPC.td | 5 +- lib/Target/PowerPC/PPCISelLowering.cpp | 4 +- lib/Target/PowerPC/PPCSubtarget.cpp | 1 + lib/Target/PowerPC/PPCSubtarget.h | 2 + test/CodeGen/PowerPC/fma-mutate.ll | 3 +- test/CodeGen/PowerPC/fmf-propagation.ll | 69 +++++++++---------- test/CodeGen/PowerPC/recipest.ll | 16 ++--- .../PowerPC/vsx-fma-mutate-trivial-copy.ll | 2 +- 8 files changed, 53 insertions(+), 49 deletions(-) diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 2e804495d49..d55dc40d9a6 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -135,6 +135,9 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true", "Enable VSX instructions", [FeatureAltivec]>; +def FeatureTwoConstNR : + SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true", + "Requires two constant Newton-Raphson computation">; def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true", "Enable POWER8 Altivec instructions", [FeatureAltivec]>; @@ -227,7 +230,7 @@ def ProcessorFeatures { FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, FeatureBPERMD, FeatureExtDiv, - FeatureMFTB, DeprecatedDST]; + FeatureMFTB, DeprecatedDST, FeatureTwoConstNR]; list Power8SpecificFeatures = [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto, FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic, diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 3b61f4da351..9ff817e2f50 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -11145,7 +11145,9 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); - UseOneConstNR = true; + // The Newton-Raphson computation with a single constant does not provide + // enough accuracy on some CPUs. + UseOneConstNR = !Subtarget.needsTwoConstNR(); return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index a708e865e61..e3bc305be7a 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -67,6 +67,7 @@ void PPCSubtarget::initializeEnvironment() { HasFPU = false; HasQPX = false; HasVSX = false; + NeedsTwoConstNR = false; HasP8Vector = false; HasP8Altivec = false; HasP8Crypto = false; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index fd050880161..1e03726ba76 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -98,6 +98,7 @@ protected: bool HasSPE; bool HasQPX; bool HasVSX; + bool NeedsTwoConstNR; bool HasP8Vector; bool HasP8Altivec; bool HasP8Crypto; @@ -246,6 +247,7 @@ public: bool hasFPU() const { return HasFPU; } bool hasQPX() const { return HasQPX; } bool hasVSX() const { return HasVSX; } + bool needsTwoConstNR() const { return NeedsTwoConstNR; } bool hasP8Vector() const { return HasP8Vector; } bool hasP8Altivec() const { return HasP8Altivec; } bool hasP8Crypto() const { return HasP8Crypto; } diff --git a/test/CodeGen/PowerPC/fma-mutate.ll b/test/CodeGen/PowerPC/fma-mutate.ll index 1d4695b3181..e03bb22617f 100644 --- a/test/CodeGen/PowerPC/fma-mutate.ll +++ b/test/CodeGen/PowerPC/fma-mutate.ll @@ -14,8 +14,7 @@ define double @foo3(double %a) nounwind { ret double %r ; CHECK: @foo3 -; CHECK: fmr [[REG:[0-9]+]], [[REG2:[0-9]+]] -; CHECK: xsnmsubadp [[REG]], {{[0-9]+}}, [[REG2]] +; CHECK-NOT: fmr ; CHECK: xsmaddmdp ; CHECK: xsmaddadp } diff --git a/test/CodeGen/PowerPC/fmf-propagation.ll b/test/CodeGen/PowerPC/fmf-propagation.ll index ea40e4edd34..0ce4701d683 100644 --- a/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/test/CodeGen/PowerPC/fmf-propagation.ll @@ -284,16 +284,16 @@ define float @sqrt_afn(float %x) { ; FMF-NEXT: fcmpu 0, 1, 0 ; FMF-NEXT: beq 0, .LBB10_2 ; FMF-NEXT: # %bb.1: +; FMF-NEXT: xsrsqrtesp 0, 1 ; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; FMF-NEXT: xsrsqrtesp 3, 1 -; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3) -; FMF-NEXT: xsmulsp 2, 1, 0 -; FMF-NEXT: xsmulsp 4, 3, 3 -; FMF-NEXT: xssubsp 2, 2, 1 -; FMF-NEXT: xsmulsp 2, 2, 4 -; FMF-NEXT: xssubsp 0, 0, 2 -; FMF-NEXT: xsmulsp 0, 3, 0 -; FMF-NEXT: xsmulsp 0, 0, 1 +; FMF-NEXT: addis 4, 2, .LCPI10_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI10_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: xsmulsp 0, 1, 0 +; FMF-NEXT: xsmulsp 1, 1, 2 +; FMF-NEXT: xsaddsp 0, 0, 3 +; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: .LBB10_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr @@ -304,16 +304,15 @@ define float @sqrt_afn(float %x) { ; GLOBAL-NEXT: fcmpu 0, 1, 0 ; GLOBAL-NEXT: beq 0, .LBB10_2 ; GLOBAL-NEXT: # %bb.1: -; GLOBAL-NEXT: xsrsqrtesp 2, 1 -; GLOBAL-NEXT: fneg 0, 1 +; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; GLOBAL-NEXT: fmr 4, 1 -; GLOBAL-NEXT: lfs 3, .LCPI10_0@toc@l(3) -; GLOBAL-NEXT: xsmaddasp 4, 0, 3 -; GLOBAL-NEXT: xsmulsp 0, 2, 2 -; GLOBAL-NEXT: xsmaddasp 3, 4, 0 -; GLOBAL-NEXT: xsmulsp 0, 2, 3 -; GLOBAL-NEXT: xsmulsp 0, 0, 1 +; GLOBAL-NEXT: addis 4, 2, .LCPI10_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI10_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 1, 3 +; GLOBAL-NEXT: xsmulsp 0, 0, 2 ; GLOBAL-NEXT: .LBB10_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr @@ -338,16 +337,15 @@ define float @sqrt_fast(float %x) { ; FMF-NEXT: fcmpu 0, 1, 0 ; FMF-NEXT: beq 0, .LBB11_2 ; FMF-NEXT: # %bb.1: -; FMF-NEXT: xsrsqrtesp 2, 1 -; FMF-NEXT: fneg 0, 1 +; FMF-NEXT: xsrsqrtesp 0, 1 ; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; FMF-NEXT: fmr 4, 1 -; FMF-NEXT: lfs 3, .LCPI11_0@toc@l(3) -; FMF-NEXT: xsmaddasp 4, 0, 3 -; FMF-NEXT: xsmulsp 0, 2, 2 -; FMF-NEXT: xsmaddasp 3, 4, 0 -; FMF-NEXT: xsmulsp 0, 2, 3 -; FMF-NEXT: xsmulsp 0, 0, 1 +; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: xsmaddasp 2, 1, 0 +; FMF-NEXT: xsmulsp 0, 1, 3 +; FMF-NEXT: xsmulsp 0, 0, 2 ; FMF-NEXT: .LBB11_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr @@ -358,16 +356,15 @@ define float @sqrt_fast(float %x) { ; GLOBAL-NEXT: fcmpu 0, 1, 0 ; GLOBAL-NEXT: beq 0, .LBB11_2 ; GLOBAL-NEXT: # %bb.1: -; GLOBAL-NEXT: xsrsqrtesp 2, 1 -; GLOBAL-NEXT: fneg 0, 1 +; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: fmr 4, 1 -; GLOBAL-NEXT: lfs 3, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: xsmaddasp 4, 0, 3 -; GLOBAL-NEXT: xsmulsp 0, 2, 2 -; GLOBAL-NEXT: xsmaddasp 3, 4, 0 -; GLOBAL-NEXT: xsmulsp 0, 2, 3 -; GLOBAL-NEXT: xsmulsp 0, 0, 1 +; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 1, 3 +; GLOBAL-NEXT: xsmulsp 0, 0, 2 ; GLOBAL-NEXT: .LBB11_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll index 3a8e2ff7d61..b7191875190 100644 --- a/test/CodeGen/PowerPC/recipest.ll +++ b/test/CodeGen/PowerPC/recipest.ll @@ -14,15 +14,16 @@ define double @foo(double %a, double %b) nounwind { ret double %r ; CHECK: @foo -; CHECK-DAG: frsqrte -; CHECK-DAG: fnmsub +; CHECK: frsqrte ; CHECK: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK: blr ; CHECK-SAFE: @foo @@ -53,10 +54,10 @@ define double @foof(double %a, float %b) nounwind { ; CHECK: @foof ; CHECK-DAG: frsqrtes -; CHECK-DAG: fnmsubs ; CHECK: fmuls ; CHECK-NEXT: fmadds ; CHECK-NEXT: fmuls +; CHECK-NEXT: fmuls ; CHECK-NEXT: fmul ; CHECK-NEXT: blr @@ -74,13 +75,14 @@ define float @food(float %a, double %b) nounwind { ; CHECK: @foo ; CHECK-DAG: frsqrte -; CHECK-DAG: fnmsub ; CHECK: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK-NEXT: frsp ; CHECK-NEXT: fmuls ; CHECK-NEXT: blr @@ -98,11 +100,11 @@ define float @goo(float %a, float %b) nounwind { ; CHECK: @goo ; CHECK-DAG: frsqrtes -; CHECK-DAG: fnmsubs ; CHECK: fmuls ; CHECK-NEXT: fmadds ; CHECK-NEXT: fmuls ; CHECK-NEXT: fmuls +; CHECK-NEXT: fmuls ; CHECK-NEXT: blr ; CHECK-SAFE: @goo @@ -138,7 +140,6 @@ define float @rsqrt_fmul(float %a, float %b, float %c) { ; CHECK-DAG: fres ; CHECK-DAG: fnmsubs ; CHECK-DAG: fmuls -; CHECK-DAG: fnmsubs ; CHECK-DAG: fmadds ; CHECK-DAG: fmadds ; CHECK: fmuls @@ -219,11 +220,11 @@ define double @foo3(double %a) nounwind { ; CHECK: @foo3 ; CHECK: fcmpu ; CHECK-DAG: frsqrte -; CHECK-DAG: fnmsub ; CHECK: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul +; CHECK-NEXT: fmul ; CHECK-NEXT: fmadd ; CHECK-NEXT: fmul ; CHECK-NEXT: fmul @@ -241,7 +242,6 @@ define float @goo3(float %a) nounwind { ; CHECK: @goo3 ; CHECK: fcmpu ; CHECK-DAG: frsqrtes -; CHECK-DAG: fnmsubs ; CHECK: fmuls ; CHECK-NEXT: fmadds ; CHECK-NEXT: fmuls diff --git a/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll index 80e7afec3c3..d9738a3dda2 100644 --- a/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll +++ b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll @@ -8,7 +8,7 @@ entry: br i1 undef, label %for.body.lr.ph, label %for.end ; CHECK-LABEL: @LSH_recall_init -; CHECK: xsnmsubadp +; CHECK: xsmaddadp for.body.lr.ph: ; preds = %entry %conv3 = fpext float %W to double -- 2.50.1