From: Michael Berg <michael_c_berg@apple.com>
Date: Fri, 14 Jun 2019 23:30:52 +0000 (+0000)
Subject: adding more fmf propagation for selects plus tests
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=aad9d5e5dd9bccf64421d544db81a8b55108667f;p=llvm

adding more fmf propagation for selects plus tests

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363474 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 477a5136b70..39ad5fe15a1 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2091,7 +2091,9 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
       !isConstantFPBuildVectorOrConstantFP(NewCF))
     return SDValue();
 
-  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+  SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+  SelectOp->setFlags(BO->getFlags());
+  return SelectOp;
 }
 
 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
@@ -7997,6 +7999,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT VT0 = N0.getValueType();
   SDLoc DL(N);
+  SDNodeFlags Flags = N->getFlags();
 
   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
     return V;
@@ -8047,10 +8050,10 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
       SDValue Cond0 = N0->getOperand(0);
       SDValue Cond1 = N0->getOperand(1);
       SDValue InnerSelect =
-          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
       if (normalizeToSequence || !InnerSelect.use_empty())
         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
-                           InnerSelect, N2);
+                           InnerSelect, N2, Flags);
       // Cleanup on failure.
       if (InnerSelect.use_empty())
         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
@@ -8059,11 +8062,11 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
       SDValue Cond0 = N0->getOperand(0);
       SDValue Cond1 = N0->getOperand(1);
-      SDValue InnerSelect =
-          DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+      SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
+                                        Cond1, N1, N2, Flags);
       if (normalizeToSequence || !InnerSelect.use_empty())
         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
-                           InnerSelect);
+                           InnerSelect, Flags);
       // Cleanup on failure.
       if (InnerSelect.use_empty())
         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
@@ -8078,12 +8081,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
         // Create the actual and node if we can generate good code for it.
         if (!normalizeToSequence) {
           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
-          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
+          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
+                             N2, Flags);
         }
         // Otherwise see if we can optimize the "and" to a better pattern.
-        if (SDValue Combined = visitANDLike(N0, N1_0, N))
+        if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
-                             N2);
+                             N2, Flags);
+        }
       }
     }
     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
@@ -8095,19 +8100,23 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
         // Create the actual or node if we can generate good code for it.
         if (!normalizeToSequence) {
           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
-          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
+          return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, 
+                             N2_2, Flags);
         }
         // Otherwise see if we can optimize to a better pattern.
         if (SDValue Combined = visitORLike(N0, N2_0, N))
           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
-                             N2_2);
+                             N2_2, Flags);
       }
     }
   }
 
   // select (not Cond), N1, N2 -> select Cond, N2, N1
-  if (SDValue F = extractBooleanFlip(N0, TLI))
-    return DAG.getSelect(DL, VT, F, N2, N1);
+  if (SDValue F = extractBooleanFlip(N0, TLI)) {
+    SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+    SelectOp->setFlags(Flags);
+    return SelectOp;
+  }
 
   // Fold selects based on a setcc into other things, such as min/max/abs.
   if (N0.getOpcode() == ISD::SETCC) {
@@ -8157,7 +8166,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
       // Any flags available in a select/setcc fold will be on the setcc as they
       // migrated from fcmp
-      const SDNodeFlags Flags = N0.getNode()->getFlags();
+      Flags = N0.getNode()->getFlags();
       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
                                        N2, N0.getOperand(2));
       SelectNode->setFlags(Flags);
@@ -8743,9 +8752,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
       return N2;
     } else if (SCC.getOpcode() == ISD::SETCC) {
       // Fold to a simpler select_cc
-      return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
-                         SCC.getOperand(0), SCC.getOperand(1), N2, N3,
-                         SCC.getOperand(2));
+      SDValue SelectOp = DAG.getNode(
+          ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
+          SCC.getOperand(1), N2, N3, SCC.getOperand(2));
+      SelectOp->setFlags(SCC->getFlags());
+      return SelectOp;
     }
   }
 
@@ -19412,13 +19423,16 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
     // Check to see if we got a select_cc back (to turn into setcc/select).
     // Otherwise, just return whatever node we got back, like fabs.
     if (SCC.getOpcode() == ISD::SELECT_CC) {
+      const SDNodeFlags Flags = N0.getNode()->getFlags();
       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
                                   N0.getValueType(),
                                   SCC.getOperand(0), SCC.getOperand(1),
-                                  SCC.getOperand(4));
+                                  SCC.getOperand(4), Flags);
       AddToWorklist(SETCC.getNode());
-      return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
-                           SCC.getOperand(2), SCC.getOperand(3));
+      SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+                                         SCC.getOperand(2), SCC.getOperand(3));
+      SelectNode->setFlags(Flags);
+      return SelectNode;
     }
 
     return SCC;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index cb64b7a81cd..1ef90445994 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3591,6 +3591,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // Use the new condition code and swap true and false
       Legalized = true;
       Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+      Tmp1->setFlags(Node->getFlags());
     } else {
       // If The inverse is not legal, then try to swap the arguments using
       // the inverse condition code.
@@ -3600,6 +3601,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         // lhs and rhs.
         Legalized = true;
         Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+        Tmp1->setFlags(Node->getFlags());
       }
     }
 
@@ -3626,6 +3628,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
                            Tmp2, Tmp3, Tmp4, CC);
       }
+      Tmp1->setFlags(Node->getFlags());
     }
     Results.push_back(Tmp1);
     break;
diff --git a/test/CodeGen/AArch64/arm64-fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll
index 8337d299ea5..3488defb2e1 100644
--- a/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/test/CodeGen/AArch64/arm64-fmax.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=arm64-eabi -enable-no-nans-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -enable-no-nans-fp-math | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=arm64-eabi -enable-no-nans-fp-math -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
 
 define double @test_direct(float %in) {
 ; CHECK-LABEL: test_direct:
@@ -20,6 +21,7 @@ define double @test_cross(float %in) {
 ; CHECK: fmin
 }
 
+
 ; Same as previous, but with ordered comparison;
 ; can't be converted in safe-math mode.
 define double @test_cross_fail_nan(float %in) {
@@ -32,6 +34,10 @@ define double @test_cross_fail_nan(float %in) {
 ; CHECK: fmin
 }
 
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'test_cross_fail:'
+; FMFDEBUG:         select_cc nnan {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}, setne:ch
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'test_cross_fail:'
+
 ; This isn't a min or a max, but passes the first condition for swapping the
 ; results. Make sure they're put back before we resort to the normal fcsel.
 define float @test_cross_fail(float %lhs, float %rhs) {
diff --git a/test/CodeGen/AArch64/select_fmf.ll b/test/CodeGen/AArch64/select_fmf.ll
new file mode 100644
index 00000000000..f152423f4b3
--- /dev/null
+++ b/test/CodeGen/AArch64/select_fmf.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=arm64-- -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
+
+; This test provides fmf coverage for DAG combining of selects
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'select_select_fold_select_and:'
+; FMFDEBUG:         [[AND:t[0-9]+]]: i1 = and {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         [[FMAX:t[0-9]+]]: f32 = fmaxnum nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG-NEXT:    select nnan ninf nsz arcp contract afn reassoc [[AND]], [[FMAX]], {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'select_select_fold_select_and:'
+
+; select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) {
+  %tmp21 = fcmp fast olt float %x, %y
+  %tmp22 = select fast i1 %tmp21, float %x, float %y
+  %tmp24 = fcmp fast ogt float %tmp22, %w
+  %tmp78 = fcmp fast ogt float %w, %z
+  %select0 = select fast i1 %tmp78, float %w, float %z
+  %select1 = select fast i1 %tmp21, float %select0, float %w
+  %select2 = select fast i1 %tmp24, float %select1, float %w
+  %tmp82 = fadd fast float %w, 5.000000e-01
+  %tmp102 = fadd fast float %tmp82, %select2
+  %cmp.i155.i.i = fcmp fast ogt float %tmp102, %tmp82
+  br i1 %cmp.i155.i.i, label %if.then.i157.i.i, label %if.end.i159.i.i
+
+if.then.i157.i.i:                                 ; preds = %0
+  %add.i156.i.i = fadd fast float %select2, 1.000000e+00
+  br label %exit
+
+if.end.i159.i.i:                                  ; preds = %0
+  %sub.i158.i.i = fadd fast float %w, 0xBFD99999A0000000
+  %sub15.i.i.i = fadd fast float %z, 0xBFD6666660000000
+  %tmp191 = fcmp fast ogt float %tmp82, 0.000000e+00
+  %select3 = select fast i1 %tmp191, float %sub.i158.i.i, float %sub15.i.i.i
+  br label %exit
+
+exit:                                     ; preds = %if.end.i159.i.i, %if.then.i157.i.i
+  %phi1 = phi float [ %add.i156.i.i, %if.then.i157.i.i ], [ %select3, %if.end.i159.i.i ]
+  ret float %phi1
+}
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'select_select_fold_select_or:'
+; FMFDEBUG:         [[OR:t[0-9]+]]: i1 = or {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         [[FMAX:t[0-9]+]]: f32 = fmaxnum nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG-NEXT:    select nnan ninf nsz arcp contract afn reassoc [[OR]], {{t[0-9]+}}, [[FMAX]]
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'select_select_fold_select_or:'
+
+; select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) {
+  %tmp21 = fcmp fast olt float %x, %y
+  %tmp22 = select fast i1 %tmp21, float %x, float %y
+  %tmp24 = fcmp fast ogt float %tmp22, %w
+  %tmp78 = fcmp fast ogt float %w, %z
+  %select0 = select fast i1 %tmp78, float %w, float %z
+  %select1 = select fast i1 %tmp21, float %w, float %select0
+  %select2 = select fast i1 %tmp24, float %w, float %select1
+  %tmp82 = fadd fast float %w, 5.000000e-01
+  %tmp102 = fadd fast float %tmp82, %select2
+  %cmp.i155.i.i = fcmp fast ogt float %tmp102, %tmp82
+  br i1 %cmp.i155.i.i, label %if.then.i157.i.i, label %if.end.i159.i.i
+
+if.then.i157.i.i:                                 ; preds = %0
+  %add.i156.i.i = fadd fast float %select2, 1.000000e+00
+  br label %exit
+
+if.end.i159.i.i:                                  ; preds = %0
+  %sub.i158.i.i = fadd fast float %w, 0xBFD99999A0000000
+  %sub15.i.i.i = fadd fast float %z, 0xBFD6666660000000
+  %tmp191 = fcmp fast ogt float %tmp82, 0.000000e+00
+  %select3 = select fast i1 %tmp191, float %sub.i158.i.i, float %sub15.i.i.i
+  br label %exit
+
+exit:                                     ; preds = %if.end.i159.i.i, %if.then.i157.i.i
+  %phi1 = phi float [ %add.i156.i.i, %if.then.i157.i.i ], [ %select3, %if.end.i159.i.i ]
+  ret float %phi1
+}
diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll
index cd2b959ee0d..586c68f5afb 100644
--- a/test/CodeGen/X86/fdiv-combine.ll
+++ b/test/CodeGen/X86/fdiv-combine.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
 
 ; More than one 'arcp' division using a single divisor operand
 ; should be converted into a reciprocal and multiplication.
@@ -96,6 +97,10 @@ define double @div3_arcp(double %x, double %y, double %z) {
   ret double %ret
 }
 
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'div_select_constant_fold:'
+; FMFDEBUG:         select nnan {{t[0-9]+}}, ConstantFP:f32<2.500000e+00>, ConstantFP:f32<3.000000e+00>
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'div_select_constant_fold:'
+
 define float @div_select_constant_fold(i1 zeroext %arg) {
 ; CHECK-LABEL: div_select_constant_fold:
 ; CHECK:       # %bb.0:
@@ -108,7 +113,7 @@ define float @div_select_constant_fold(i1 zeroext %arg) {
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    retq
   %tmp = select i1 %arg, float 5.000000e+00, float 6.000000e+00
-  %B2 = fdiv float %tmp, 1.000000e+00
+  %B2 = fdiv nnan float %tmp, 2.000000e+00
   ret float %B2
 }