[PowerPC] Use the two-constant NR algorithm for refining estimates

author Nemanja Ivanovic <nemanja.i.ibm@gmail.com>

Tue, 7 May 2019 13:48:03 +0000 (13:48 +0000)

committer Nemanja Ivanovic <nemanja.i.ibm@gmail.com>

Tue, 7 May 2019 13:48:03 +0000 (13:48 +0000)
author Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Tue, 7 May 2019 13:48:03 +0000 (13:48 +0000)
committer Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Tue, 7 May 2019 13:48:03 +0000 (13:48 +0000)
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td

index 2e804495d4955cd8d97090ca20d71b9f0e306dbc..d55dc40d9a622de42a291bb956f0d53ca6586464 100644 (file)
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -135,6 +135,9 @@ def FeatureQPX       : SubtargetFeature<"qpx","HasQPX", "true",
  def FeatureVSX       : SubtargetFeature<"vsx","HasVSX", "true",
                                          "Enable VSX instructions",
                                          [FeatureAltivec]>;
+def FeatureTwoConstNR :
+  SubtargetFeature<"two-const-nr", "NeedsTwoConstNR", "true",
+                   "Requires two constant Newton-Raphson computation">;
  def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
                                          "Enable POWER8 Altivec instructions",
                                          [FeatureAltivec]>;
@@ -227,7 +230,7 @@ def ProcessorFeatures {
         FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
         Feature64Bit /*, Feature64BitRegs */,
         FeatureBPERMD, FeatureExtDiv,
-       FeatureMFTB, DeprecatedDST];
+       FeatureMFTB, DeprecatedDST, FeatureTwoConstNR];
    list<SubtargetFeature> Power8SpecificFeatures =
        [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
         FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 3b61f4da35154a62ab36f1d66fdf8be180341b64..9ff817e2f50633ab388f7c54c81ddc3d950d8354 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11145,7 +11145,9 @@ SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
      if (RefinementSteps == ReciprocalEstimate::Unspecified)
        RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
  
-    UseOneConstNR = true;
+    // The Newton-Raphson computation with a single constant does not provide
+    // enough accuracy on some CPUs.
+    UseOneConstNR = !Subtarget.needsTwoConstNR();
      return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
    }
    return SDValue();
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp

index a708e865e61c8d2281cedf6c34aa3029cb661482..e3bc305be7a28d7e0380c6ebaafbf0553eeb9a93 100644 (file)
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -67,6 +67,7 @@ void PPCSubtarget::initializeEnvironment() {
    HasFPU = false;
    HasQPX = false;
    HasVSX = false;
+  NeedsTwoConstNR = false;
    HasP8Vector = false;
    HasP8Altivec = false;
    HasP8Crypto = false;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h

index fd050880161424ce43ae65167aba254310001c86..1e03726ba76d90d7b6d5a8e8e280fd1fb5b0b421 100644 (file)
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -98,6 +98,7 @@ protected:
    bool HasSPE;
    bool HasQPX;
    bool HasVSX;
+  bool NeedsTwoConstNR;
    bool HasP8Vector;
    bool HasP8Altivec;
    bool HasP8Crypto;
@@ -246,6 +247,7 @@ public:
    bool hasFPU() const { return HasFPU; }
    bool hasQPX() const { return HasQPX; }
    bool hasVSX() const { return HasVSX; }
+  bool needsTwoConstNR() const { return NeedsTwoConstNR; }
    bool hasP8Vector() const { return HasP8Vector; }
    bool hasP8Altivec() const { return HasP8Altivec; }
    bool hasP8Crypto() const { return HasP8Crypto; }
diff --git a/test/CodeGen/PowerPC/fma-mutate.ll b/test/CodeGen/PowerPC/fma-mutate.ll

index 1d4695b31810a5d10ba970c21e5a713b21ac1452..e03bb22617f29c7454f916c35bc9de0588e559ec 100644 (file)
--- a/test/CodeGen/PowerPC/fma-mutate.ll
+++ b/test/CodeGen/PowerPC/fma-mutate.ll
@@ -14,8 +14,7 @@ define double @foo3(double %a) nounwind {
    ret double %r
  
  ; CHECK: @foo3
-; CHECK: fmr [[REG:[0-9]+]], [[REG2:[0-9]+]]
-; CHECK: xsnmsubadp [[REG]], {{[0-9]+}}, [[REG2]]
+; CHECK-NOT: fmr
  ; CHECK: xsmaddmdp
  ; CHECK: xsmaddadp
  }
diff --git a/test/CodeGen/PowerPC/fmf-propagation.ll b/test/CodeGen/PowerPC/fmf-propagation.ll

index ea40e4edd3483b45486e7e22728bdf794b576115..0ce4701d6835028c142f60ecd6211d922f1b00a9 100644 (file)
--- a/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -284,16 +284,16 @@ define float @sqrt_afn(float %x) {
  ; FMF-NEXT:    fcmpu 0, 1, 0
  ; FMF-NEXT:    beq 0, .LBB10_2
  ; FMF-NEXT:  # %bb.1:
+; FMF-NEXT:    xsrsqrtesp 0, 1
  ; FMF-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
-; FMF-NEXT:    xsrsqrtesp 3, 1
-; FMF-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
-; FMF-NEXT:    xsmulsp 2, 1, 0
-; FMF-NEXT:    xsmulsp 4, 3, 3
-; FMF-NEXT:    xssubsp 2, 2, 1
-; FMF-NEXT:    xsmulsp 2, 2, 4
-; FMF-NEXT:    xssubsp 0, 0, 2
-; FMF-NEXT:    xsmulsp 0, 3, 0
-; FMF-NEXT:    xsmulsp 0, 0, 1
+; FMF-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
+; FMF-NEXT:    lfs 2, .LCPI10_0@toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI10_1@toc@l(4)
+; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmulsp 1, 1, 2
+; FMF-NEXT:    xsaddsp 0, 0, 3
+; FMF-NEXT:    xsmulsp 0, 1, 0
  ; FMF-NEXT:  .LBB10_2:
  ; FMF-NEXT:    fmr 1, 0
  ; FMF-NEXT:    blr
@@ -304,16 +304,15 @@ define float @sqrt_afn(float %x) {
  ; GLOBAL-NEXT:    fcmpu 0, 1, 0
  ; GLOBAL-NEXT:    beq 0, .LBB10_2
  ; GLOBAL-NEXT:  # %bb.1:
-; GLOBAL-NEXT:    xsrsqrtesp 2, 1
-; GLOBAL-NEXT:    fneg 0, 1
+; GLOBAL-NEXT:    xsrsqrtesp 0, 1
  ; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
-; GLOBAL-NEXT:    fmr 4, 1
-; GLOBAL-NEXT:    lfs 3, .LCPI10_0@toc@l(3)
-; GLOBAL-NEXT:    xsmaddasp 4, 0, 3
-; GLOBAL-NEXT:    xsmulsp 0, 2, 2
-; GLOBAL-NEXT:    xsmaddasp 3, 4, 0
-; GLOBAL-NEXT:    xsmulsp 0, 2, 3
-; GLOBAL-NEXT:    xsmulsp 0, 0, 1
+; GLOBAL-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI10_0@toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI10_1@toc@l(4)
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
+; GLOBAL-NEXT:    xsmulsp 0, 1, 3
+; GLOBAL-NEXT:    xsmulsp 0, 0, 2
  ; GLOBAL-NEXT:  .LBB10_2:
  ; GLOBAL-NEXT:    fmr 1, 0
  ; GLOBAL-NEXT:    blr
@@ -338,16 +337,15 @@ define float @sqrt_fast(float %x) {
  ; FMF-NEXT:    fcmpu 0, 1, 0
  ; FMF-NEXT:    beq 0, .LBB11_2
  ; FMF-NEXT:  # %bb.1:
-; FMF-NEXT:    xsrsqrtesp 2, 1
-; FMF-NEXT:    fneg 0, 1
+; FMF-NEXT:    xsrsqrtesp 0, 1
  ; FMF-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
-; FMF-NEXT:    fmr 4, 1
-; FMF-NEXT:    lfs 3, .LCPI11_0@toc@l(3)
-; FMF-NEXT:    xsmaddasp 4, 0, 3
-; FMF-NEXT:    xsmulsp 0, 2, 2
-; FMF-NEXT:    xsmaddasp 3, 4, 0
-; FMF-NEXT:    xsmulsp 0, 2, 3
-; FMF-NEXT:    xsmulsp 0, 0, 1
+; FMF-NEXT:    addis 4, 2, .LCPI11_1@toc@ha
+; FMF-NEXT:    lfs 2, .LCPI11_0@toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI11_1@toc@l(4)
+; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    xsmaddasp 2, 1, 0
+; FMF-NEXT:    xsmulsp 0, 1, 3
+; FMF-NEXT:    xsmulsp 0, 0, 2
  ; FMF-NEXT:  .LBB11_2:
  ; FMF-NEXT:    fmr 1, 0
  ; FMF-NEXT:    blr
@@ -358,16 +356,15 @@ define float @sqrt_fast(float %x) {
  ; GLOBAL-NEXT:    fcmpu 0, 1, 0
  ; GLOBAL-NEXT:    beq 0, .LBB11_2
  ; GLOBAL-NEXT:  # %bb.1:
-; GLOBAL-NEXT:    xsrsqrtesp 2, 1
-; GLOBAL-NEXT:    fneg 0, 1
+; GLOBAL-NEXT:    xsrsqrtesp 0, 1
  ; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
-; GLOBAL-NEXT:    fmr 4, 1
-; GLOBAL-NEXT:    lfs 3, .LCPI11_0@toc@l(3)
-; GLOBAL-NEXT:    xsmaddasp 4, 0, 3
-; GLOBAL-NEXT:    xsmulsp 0, 2, 2
-; GLOBAL-NEXT:    xsmaddasp 3, 4, 0
-; GLOBAL-NEXT:    xsmulsp 0, 2, 3
-; GLOBAL-NEXT:    xsmulsp 0, 0, 1
+; GLOBAL-NEXT:    addis 4, 2, .LCPI11_1@toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI11_0@toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI11_1@toc@l(4)
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
+; GLOBAL-NEXT:    xsmulsp 0, 1, 3
+; GLOBAL-NEXT:    xsmulsp 0, 0, 2
  ; GLOBAL-NEXT:  .LBB11_2:
  ; GLOBAL-NEXT:    fmr 1, 0
  ; GLOBAL-NEXT:    blr
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll

index 3a8e2ff7d61a58cfbdbfbe50646253f719a7932f..b719187519015f810f30b4db70a19c4640f65d08 100644 (file)
--- a/test/CodeGen/PowerPC/recipest.ll
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -14,15 +14,16 @@ define double @foo(double %a, double %b) nounwind {
    ret double %r
  
  ; CHECK: @foo
-; CHECK-DAG: frsqrte
-; CHECK-DAG: fnmsub
+; CHECK: frsqrte
  ; CHECK: fmul
  ; CHECK-NEXT: fmadd
  ; CHECK-NEXT: fmul
  ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
  ; CHECK-NEXT: fmadd
  ; CHECK-NEXT: fmul
  ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
  ; CHECK: blr
  
  ; CHECK-SAFE: @foo
@@ -53,10 +54,10 @@ define double @foof(double %a, float %b) nounwind {
  
  ; CHECK: @foof
  ; CHECK-DAG: frsqrtes
-; CHECK-DAG: fnmsubs
  ; CHECK: fmuls
  ; CHECK-NEXT: fmadds
  ; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
  ; CHECK-NEXT: fmul
  ; CHECK-NEXT: blr
  
@@ -74,13 +75,14 @@ define float @food(float %a, double %b) nounwind {
  
  ; CHECK: @foo
  ; CHECK-DAG: frsqrte
-; CHECK-DAG: fnmsub
  ; CHECK: fmul
  ; CHECK-NEXT: fmadd
  ; CHECK-NEXT: fmul
  ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
  ; CHECK-NEXT: fmadd
  ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
  ; CHECK-NEXT: frsp
  ; CHECK-NEXT: fmuls
  ; CHECK-NEXT: blr
@@ -98,11 +100,11 @@ define float @goo(float %a, float %b) nounwind {
  
  ; CHECK: @goo
  ; CHECK-DAG: frsqrtes
-; CHECK-DAG: fnmsubs
  ; CHECK: fmuls
  ; CHECK-NEXT: fmadds
  ; CHECK-NEXT: fmuls
  ; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
  ; CHECK-NEXT: blr
  
  ; CHECK-SAFE: @goo
@@ -138,7 +140,6 @@ define float @rsqrt_fmul(float %a, float %b, float %c) {
  ; CHECK-DAG: fres
  ; CHECK-DAG: fnmsubs
  ; CHECK-DAG: fmuls
-; CHECK-DAG: fnmsubs
  ; CHECK-DAG: fmadds
  ; CHECK-DAG: fmadds
  ; CHECK: fmuls
@@ -219,11 +220,11 @@ define double @foo3(double %a) nounwind {
  ; CHECK: @foo3
  ; CHECK: fcmpu
  ; CHECK-DAG: frsqrte
-; CHECK-DAG: fnmsub
  ; CHECK: fmul
  ; CHECK-NEXT: fmadd
  ; CHECK-NEXT: fmul
  ; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
  ; CHECK-NEXT: fmadd
  ; CHECK-NEXT: fmul
  ; CHECK-NEXT: fmul
@@ -241,7 +242,6 @@ define float @goo3(float %a) nounwind {
  ; CHECK: @goo3
  ; CHECK: fcmpu
  ; CHECK-DAG: frsqrtes
-; CHECK-DAG: fnmsubs
  ; CHECK: fmuls
  ; CHECK-NEXT: fmadds
  ; CHECK-NEXT: fmuls
diff --git a/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll

index 80e7afec3c35e937a96ce30f2cf83094a8be0f3c..d9738a3dda2736b494235a024c2f3ed9df82a17c 100644 (file)
--- a/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
@@ -8,7 +8,7 @@ entry:
    br i1 undef, label %for.body.lr.ph, label %for.end
  
  ; CHECK-LABEL: @LSH_recall_init
-; CHECK: xsnmsubadp
+; CHECK: xsmaddadp
  
  for.body.lr.ph:                                   ; preds = %entry
    %conv3 = fpext float %W to double
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
	Tue, 7 May 2019 13:48:03 +0000 (13:48 +0000)
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
	Tue, 7 May 2019 13:48:03 +0000 (13:48 +0000)
lib/Target/PowerPC/PPC.td		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCSubtarget.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCSubtarget.h		patch \| blob \| history
test/CodeGen/PowerPC/fma-mutate.ll		patch \| blob \| history
test/CodeGen/PowerPC/fmf-propagation.ll		patch \| blob \| history
test/CodeGen/PowerPC/recipest.ll		patch \| blob \| history
test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll		patch \| blob \| history