[DAGCombiner] Add vector demanded elements support to ComputeNumSignBits

author Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 31 Mar 2017 13:54:09 +0000 (13:54 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 31 Mar 2017 13:54:09 +0000 (13:54 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 31 Mar 2017 13:54:09 +0000 (13:54 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 31 Mar 2017 13:54:09 +0000 (13:54 +0000)
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h

index 3dfa1262c9d6403761a83279a8451c9cf07cb6e3..6f0509543e7d88d3204ca9e28348930c6a20b8dd 100644 (file)
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -1317,6 +1317,17 @@ public:
    /// target nodes to be understood.
    unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
  
+  /// Return the number of times the sign bit of the register is replicated into
+  /// the other bits. We know that at least 1 bit is always equal to the sign
+  /// bit (itself), but other cases can give us information. For example,
+  /// immediately after an "SRA X, 2", we know that the top 3 bits are all equal
+  /// to each other, so we return 3. The DemandedElts argument allows
+  /// us to only collect the minimum sign bits of the requested vector elements.
+  /// Targets can implement the ComputeNumSignBitsForTarget method in the
+  /// TargetLowering class to allow target nodes to be understood.
+  unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
+                              unsigned Depth = 0) const;
+
    /// Return true if the specified operand is an ISD::ADD with a ConstantSDNode
    /// on the right-hand side, or if it is an ISD::OR with a ConstantSDNode that
    /// is guaranteed to have the same semantics as an ADD. This handles the
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index 9eb80ee16f88cd602300e0a9088621dc2525a490..97e78d60844b66a3dbb06ac10704311ba8c4835e 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -2432,8 +2432,11 @@ public:
                                               unsigned Depth = 0) const;
  
    /// This method can be implemented by targets that want to expose additional
-  /// information about sign bits to the DAG Combiner.
+  /// information about sign bits to the DAG Combiner. The DemandedElts
+  /// argument allows us to only collect the minimum sign bits that are shared
+  /// by the requested vector elements.
    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                   const APInt &DemandedElts,
                                                     const SelectionDAG &DAG,
                                                     unsigned Depth = 0) const;
  
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 5466419b55751c698969264a2e85720c70f7cc48..5805c36ff841fefd1e5673a214d50bff80db4a08 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2899,6 +2899,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
  
  unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
    EVT VT = Op.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  return ComputeNumSignBits(Op, DemandedElts, Depth);
+}
+
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
+                                          unsigned Depth) const {
+  EVT VT = Op.getValueType();
    assert(VT.isInteger() && "Invalid VT!");
    unsigned VTBits = VT.getScalarSizeInBits();
    unsigned Tmp, Tmp2;
@@ -2907,6 +2916,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
    if (Depth == 6)
      return 1;  // Limit search depth.
  
+  if (!DemandedElts)
+    return 1;  // No demanded elts, better to assume we don't know anything.
+
    switch (Op.getOpcode()) {
    default: break;
    case ISD::AssertSext:
@@ -2924,6 +2936,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
    case ISD::BUILD_VECTOR:
      Tmp = VTBits;
      for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
+      if (!DemandedElts[i])
+        continue;
+
        SDValue SrcOp = Op.getOperand(i);
        Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1);
  
@@ -3116,18 +3131,28 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
      return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
    }
    case ISD::EXTRACT_VECTOR_ELT: {
-    // At the moment we keep this simple and skip tracking the specific
-    // element. This way we get the lowest common denominator for all elements
-    // of the vector.
-    // TODO: get information for given vector element
+    SDValue InVec = Op.getOperand(0);
+    SDValue EltNo = Op.getOperand(1);
+    EVT VecVT = InVec.getValueType();
      const unsigned BitWidth = Op.getValueSizeInBits();
      const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+    const unsigned NumSrcElts = VecVT.getVectorNumElements();
+
      // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
      // anything about sign bits. But if the sizes match we can derive knowledge
      // about sign bits from the vector operand.
-    if (BitWidth == EltBitWidth)
-      return ComputeNumSignBits(Op.getOperand(0), Depth+1);
-    break;
+    if (BitWidth != EltBitWidth)
+      break;
+
+    // If we know the element index, just demand that vector element, else for
+    // an unknown element index, ignore DemandedElts and demand them all.
+    APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+    ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+    if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
+      DemandedSrcElts =
+          APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue());
+
+    return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1);
    }
    case ISD::EXTRACT_SUBVECTOR:
      return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
@@ -3162,7 +3187,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
        Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
        Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
        Op.getOpcode() == ISD::INTRINSIC_VOID) {
-    unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth);
+    unsigned NumBits =
+        TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth);
      if (NumBits > 1)
        FirstAnswer = std::max(FirstAnswer, NumBits);
    }
@@ -3170,7 +3196,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
    // Finally, if we can prove that the top bits of the result are 0's or 1's,
    // use this information.
    APInt KnownZero, KnownOne;
-  computeKnownBits(Op, KnownZero, KnownOne, Depth);
+  computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
  
    APInt Mask;
    if (KnownZero.isNegative()) {        // sign bit is 0
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 27574fc698580f40ba85a46e466ae114da3f8fd4..d3ccdcd5ffdd07bad324eb93189a5b6fb3567687 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1338,6 +1338,7 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
  /// This method can be implemented by targets that want to expose additional
  /// information about sign bits to the DAG Combiner.
  unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                         const APInt &,
                                                           const SelectionDAG &,
                                                           unsigned Depth) const {
    assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

index 96bfa5ed831f30f31d5a4ad73c6cab0d2de9615b..3f41f4e695e56fcac82121f5ee01725269779815 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3596,7 +3596,8 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
  }
  
  unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
-    SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
+    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+    unsigned Depth) const {
    switch (Op.getOpcode()) {
    case AMDGPUISD::BFE_I32: {
      ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h

index c81f998b07c8122a09f92153af661a67da081f06..66e8464fd7f41226509cdeb8d6ed0287b667b436 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -206,7 +206,8 @@ public:
                                       const SelectionDAG &DAG,
                                       unsigned Depth = 0) const override;
  
-  unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG,
+  unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
+                                           const SelectionDAG &DAG,
                                             unsigned Depth = 0) const override;
  
    /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 4f309b422fe3d6988769117fb18108ae0ebb84bc..aa741336f92898f1986ed0cad7db43655a98c44d 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -26745,7 +26745,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
  }
  
  unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
-    SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
+    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+    unsigned Depth) const {
    unsigned VTBits = Op.getScalarValueSizeInBits();
    unsigned Opcode = Op.getOpcode();
    switch (Opcode) {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index a179adcd929e836a95df8190aa9be93caa258067..13642c219e8e2980c4f5ef05849f5f9c938d908d 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -832,6 +832,7 @@ namespace llvm {
  
      /// Determine the number of bits in the operation that are sign bits.
      unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                             const APInt &DemandedElts,
                                               const SelectionDAG &DAG,
                                               unsigned Depth) const override;
  
diff --git a/test/CodeGen/X86/known-bits-vector.ll b/test/CodeGen/X86/known-bits-vector.ll

index d10b865e438502e370f1a0ea54efbb47b3bb307b..eee466a5a60af5931875e39ccc6d4671f725a62a 100644 (file)
--- a/test/CodeGen/X86/known-bits-vector.ll
+++ b/test/CodeGen/X86/known-bits-vector.ll
@@ -23,18 +23,14 @@ define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind {
  define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
  ; X32-LABEL: knownbits_mask_extract_uitofp:
  ; X32:       # BB#0:
-; X32-NEXT:    pushl %ebp
-; X32-NEXT:    movl %esp, %ebp
-; X32-NEXT:    andl $-8, %esp
-; X32-NEXT:    subl $16, %esp
+; X32-NEXT:    pushl %eax
  ; X32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
  ; X32-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
-; X32-NEXT:    vmovq %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    fildll {{[0-9]+}}(%esp)
-; X32-NEXT:    fstps {{[0-9]+}}(%esp)
-; X32-NEXT:    flds {{[0-9]+}}(%esp)
-; X32-NEXT:    movl %ebp, %esp
-; X32-NEXT:    popl %ebp
+; X32-NEXT:    vmovd %xmm0, %eax
+; X32-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
+; X32-NEXT:    vmovss %xmm0, (%esp)
+; X32-NEXT:    flds (%esp)
+; X32-NEXT:    popl %eax
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: knownbits_mask_extract_uitofp:
@@ -42,7 +38,7 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind {
  ; X64-NEXT:    vpxor %xmm1, %xmm1, %xmm1
  ; X64-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7]
  ; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm0
+; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
  ; X64-NEXT:    retq
    %1 = and <2 x i64> %a0, <i64 65535, i64 -1>
    %2 = extractelement <2 x i64> %1, i32 0
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 31 Mar 2017 13:54:09 +0000 (13:54 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 31 Mar 2017 13:54:09 +0000 (13:54 +0000)
include/llvm/CodeGen/SelectionDAG.h		patch \| blob \| history
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/AMDGPUISelLowering.h		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/X86/known-bits-vector.ll		patch \| blob \| history