[TargetLowering][AMDGPU] Remove the SimplifyDemandedBits function that takes a User...

author Craig Topper <craig.topper@intel.com>

Mon, 7 Jan 2019 19:30:43 +0000 (19:30 +0000)

committer Craig Topper <craig.topper@intel.com>

Mon, 7 Jan 2019 19:30:43 +0000 (19:30 +0000)
author Craig Topper <craig.topper@intel.com>
Mon, 7 Jan 2019 19:30:43 +0000 (19:30 +0000)
committer Craig Topper <craig.topper@intel.com>
Mon, 7 Jan 2019 19:30:43 +0000 (19:30 +0000)
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h

index eeff202fe2725df6f47345fb73dcda322df86205..f18ba19a229f330c58075e3877201a5554b76957 100644 (file)
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -2895,16 +2895,6 @@ public:
    bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
                          TargetLoweringOpt &TLO) const;
  
-  /// Helper for SimplifyDemandedBits that can simplify an operation with
-  /// multiple uses.  This function simplifies operand \p OpIdx of \p User and
-  /// then updates \p User with the simplified version. No other uses of
-  /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this
-  /// function behaves exactly like function SimplifyDemandedBits declared
-  /// below except that it also updates the DAG by calling
-  /// DCI.CommitTargetLoweringOpt.
-  bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded,
-                            DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const;
-
    /// Look at Op.  At this point, we know that only the DemandedBits bits of the
    /// result of Op are ever used downstream.  If we can use this information to
    /// simplify Op, create a new simplified DAG node and return true, returning
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 2e21f57c560c0026248b52a6d331114fbc7126b3..2851dbb21e850ae5f63cdca943b925c81a4b585f 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -434,56 +434,6 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
    return false;
  }
  
-bool
-TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
-                                     const APInt &DemandedBits,
-                                     DAGCombinerInfo &DCI,
-                                     TargetLoweringOpt &TLO) const {
-  SDValue Op = User->getOperand(OpIdx);
-  KnownBits Known;
-
-  if (!SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, true))
-    return false;
-
-
-  // Old will not always be the same as Op.  For example:
-  //
-  // Demanded = 0xffffff
-  // Op = i64 truncate (i32 and x, 0xffffff)
-  // In this case simplify demand bits will want to replace the 'and' node
-  // with the value 'x', which will give us:
-  // Old = i32 and x, 0xffffff
-  // New = x
-  if (TLO.Old.hasOneUse()) {
-    // For the one use case, we just commit the change.
-    DCI.CommitTargetLoweringOpt(TLO);
-    return true;
-  }
-
-  // If Old has more than one use then it must be Op, because the
-  // AssumeSingleUse flag is not propogated to recursive calls of
-  // SimplifyDemanded bits, so the only node with multiple use that
-  // it will attempt to combine will be Op.
-  assert(TLO.Old == Op);
-
-  SmallVector <SDValue, 4> NewOps;
-  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
-    if (i == OpIdx) {
-      NewOps.push_back(TLO.New);
-      continue;
-    }
-    NewOps.push_back(User->getOperand(i));
-  }
-  User = TLO.DAG.UpdateNodeOperands(User, NewOps);
-  // Op has less users now, so we may be able to perform additional combines
-  // with it.
-  DCI.AddToWorklist(Op.getNode());
-  // User's operands have been updated, so we may be able to do new combines
-  // with it.
-  DCI.AddToWorklist(User);
-  return true;
-}
-
  bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
                                            DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index 2a05bf51c0a725d6b78edc534c66b1f442a81b8e..6951c915b1772d82a11c5cdd2701e74e7800ad32 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2717,21 +2717,33 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
      AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24;
  }
  
-static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
-                        TargetLowering::DAGCombinerInfo &DCI) {
-
+static SDValue simplifyI24(SDNode *Node24,
+                           TargetLowering::DAGCombinerInfo &DCI) {
    SelectionDAG &DAG = DCI.DAG;
-  SDValue Op = Node24->getOperand(OpIdx);
+  SDValue LHS = Node24->getOperand(0);
+  SDValue RHS = Node24->getOperand(1);
+
+  APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
+
+  // First try to simplify using GetDemandedBits which allows the operands to
+  // have other uses, but will only perform simplifications that involve
+  // bypassing some nodes for this user.
+  SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded);
+  SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded);
+  if (DemandedLHS || DemandedRHS)
+    return DAG.getNode(Node24->getOpcode(), SDLoc(Node24), Node24->getVTList(),
+                       DemandedLHS ? DemandedLHS : LHS,
+                       DemandedRHS ? DemandedRHS : RHS);
+
+  // Now try SimplifyDemandedBits which can simplify the nodes used by our
+  // operands if this node is the only user.
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = Op.getValueType();
-
-  APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
-  APInt KnownZero, KnownOne;
-  TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
-  if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO))
-    return true;
+  if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
+    return SDValue(Node24, 0);
+  if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
+    return SDValue(Node24, 0);
  
-  return false;
+  return SDValue();
  }
  
  template <typename IntTy>
@@ -3279,8 +3291,8 @@ SDValue AMDGPUTargetLowering::performMulLoHi24Combine(
    SelectionDAG &DAG = DCI.DAG;
  
    // Simplify demanded bits before splitting into multiple users.
-  if (simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI))
-    return SDValue();
+  if (SDValue V = simplifyI24(N, DCI))
+    return V;
  
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -3866,9 +3878,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
    case AMDGPUISD::MUL_U24:
    case AMDGPUISD::MULHI_I24:
    case AMDGPUISD::MULHI_U24: {
-    // If the first call to simplify is successfull, then N may end up being
-    // deleted, so we shouldn't call simplifyI24 again.
-    simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI);
+    if (SDValue V = simplifyI24(N, DCI))
+      return V;
      return SDValue();
    }
    case AMDGPUISD::MUL_LOHI_I24:
@@ -4296,25 +4307,36 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
                        RHSKnown.countMinTrailingZeros();
      Known.Zero.setLowBits(std::min(TrailZ, 32u));
  
-    unsigned LHSValBits = 32 - std::max(LHSKnown.countMinSignBits(), 8u);
-    unsigned RHSValBits = 32 - std::max(RHSKnown.countMinSignBits(), 8u);
-    unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
-    if (MaxValBits >= 32)
-      break;
+    // Truncate to 24 bits.
+    LHSKnown = LHSKnown.trunc(24);
+    RHSKnown = RHSKnown.trunc(24);
+
      bool Negative = false;
      if (Opc == AMDGPUISD::MUL_I24) {
-      bool LHSNegative = !!(LHSKnown.One  & (1 << 23));
-      bool LHSPositive = !!(LHSKnown.Zero & (1 << 23));
-      bool RHSNegative = !!(RHSKnown.One  & (1 << 23));
-      bool RHSPositive = !!(RHSKnown.Zero & (1 << 23));
+      unsigned LHSValBits = 24 - LHSKnown.countMinSignBits();
+      unsigned RHSValBits = 24 - RHSKnown.countMinSignBits();
+      unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+      if (MaxValBits >= 32)
+        break;
+      bool LHSNegative = LHSKnown.isNegative();
+      bool LHSPositive = LHSKnown.isNonNegative();
+      bool RHSNegative = RHSKnown.isNegative();
+      bool RHSPositive = RHSKnown.isNonNegative();
        if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive))
          break;
        Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative);
-    }
-    if (Negative)
-      Known.One.setHighBits(32 - MaxValBits);
-    else
+      if (Negative)
+        Known.One.setHighBits(32 - MaxValBits);
+      else
+        Known.Zero.setHighBits(32 - MaxValBits);
+    } else {
+      unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros();
+      unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros();
+      unsigned MaxValBits = std::min(LHSValBits + RHSValBits, 32u);
+      if (MaxValBits >= 32)
+        break;
        Known.Zero.setHighBits(32 - MaxValBits);
+    }
      break;
    }
    case AMDGPUISD::PERM: {
diff --git a/test/CodeGen/AMDGPU/lshl64-to-32.ll b/test/CodeGen/AMDGPU/lshl64-to-32.ll

index 1735dec2d22cb2c4a94eb2d0f59a3e337cf5a1a2..711979591135443e9dc2f388dcb081bb6cb7d862 100644 (file)
--- a/test/CodeGen/AMDGPU/lshl64-to-32.ll
+++ b/test/CodeGen/AMDGPU/lshl64-to-32.ll
@@ -123,7 +123,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
  ; GCN-NEXT:    s_mov_b64 s[2:3], s[6:7]
  ; GCN-NEXT:    s_waitcnt vmcnt(0)
  ; GCN-NEXT:    v_or_b32_e32 v1, 0x800000, v1
-; GCN-NEXT:    v_mul_i32_i24_e32 v1, -7, v1
+; GCN-NEXT:    v_mul_i32_i24_e32 v1, 0xfffff9, v1
  ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 3, v1
  ; GCN-NEXT:    v_lshlrev_b32_e32 v3, 3, v0
  ; GCN-NEXT:    v_mov_b32_e32 v4, v2
author	Craig Topper <craig.topper@intel.com>
	Mon, 7 Jan 2019 19:30:43 +0000 (19:30 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Mon, 7 Jan 2019 19:30:43 +0000 (19:30 +0000)
include/llvm/CodeGen/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/lshl64-to-32.ll		patch \| blob \| history