[X86][CodeGen][NFC] Delay `combineIncDecVector()` from DAGCombine to X86DAGToDAGISel

author Roman Lebedev <lebedev.ri@gmail.com>

Thu, 29 Aug 2019 10:50:09 +0000 (10:50 +0000)

committer Roman Lebedev <lebedev.ri@gmail.com>

Thu, 29 Aug 2019 10:50:09 +0000 (10:50 +0000)
author Roman Lebedev <lebedev.ri@gmail.com>
Thu, 29 Aug 2019 10:50:09 +0000 (10:50 +0000)
committer Roman Lebedev <lebedev.ri@gmail.com>
Thu, 29 Aug 2019 10:50:09 +0000 (10:50 +0000)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index e0a462471a7177c787589d79955859f209ca014a..eaedf14a9d8d32997d9b84d8fe15b0f6c0a226c9 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -507,6 +507,7 @@ namespace {
      bool shrinkAndImmediate(SDNode *N);
      bool isMaskZeroExtended(SDNode *N) const;
      bool tryShiftAmountMod(SDNode *N);
+    bool combineIncDecVector(SDNode *Node);
      bool tryShrinkShlLogicImm(SDNode *N);
      bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
  
@@ -3775,6 +3776,49 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
    return true;
  }
  
+/// Convert vector increment or decrement to sub/add with an all-ones constant:
+/// add X, <1, 1...> --> sub X, <-1, -1...>
+/// sub X, <1, 1...> --> add X, <-1, -1...>
+/// The all-ones vector constant can be materialized using a pcmpeq instruction
+/// that is commonly recognized as an idiom (has no register dependency), so
+/// that's better/smaller than loading a splat 1 constant.
+bool X86DAGToDAGISel::combineIncDecVector(SDNode *Node) {
+  assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB) &&
+         "Unexpected opcode for increment/decrement transform");
+
+  EVT VT = Node->getValueType(0);
+  assert(VT.isVector() && "Should only be called for vectors.");
+
+  SDValue X = Node->getOperand(0);
+  SDValue OneVec = Node->getOperand(1);
+
+  APInt SplatVal;
+  if (!X86::isConstantSplat(OneVec, SplatVal) || !SplatVal.isOneValue())
+    return false;
+
+  SDLoc DL(Node);
+  SDValue AllOnesVec;
+
+  APInt Ones = APInt::getAllOnesValue(32);
+  assert(VT.getSizeInBits() % 32 == 0 &&
+         "Expected bit count to be a multiple of 32");
+  unsigned NumElts = VT.getSizeInBits() / 32;
+  assert(NumElts > 0 && "Expected to get non-empty vector.");
+  AllOnesVec =
+      CurDAG->getConstant(Ones, DL, MVT::getVectorVT(MVT::i32, NumElts));
+  insertDAGNode(*CurDAG, X, AllOnesVec);
+
+  AllOnesVec = CurDAG->getBitcast(VT, AllOnesVec);
+  insertDAGNode(*CurDAG, X, AllOnesVec);
+
+  unsigned NewOpcode = Node->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
+  SDValue NewNode = CurDAG->getNode(NewOpcode, DL, VT, X, AllOnesVec);
+
+  ReplaceNode(Node, NewNode.getNode());
+  SelectCode(NewNode.getNode());
+  return true;
+}
+
  /// If the high bits of an 'and' operand are known zero, try setting the
  /// high bits of an 'and' constant operand to produce a smaller encoding by
  /// creating a small, sign-extended negative immediate rather than a large
@@ -4347,6 +4391,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
      LLVM_FALLTHROUGH;
    case ISD::ADD:
    case ISD::SUB: {
+    if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && NVT.isVector() &&
+        combineIncDecVector(Node))
+      return;
+
      // Try to avoid folding immediates with multiple uses for optsize.
      // This code tries to select to register form directly to avoid going
      // through the isel table which might fold the immediate. We can't change
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 6453d3ad0bc5d4a3737bd312b90490d63c92724c..3f37cb5ad8c819f660794655c2217517c361739e 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6215,7 +6215,9 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
    return false;
  }
  
-static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
+namespace llvm {
+namespace X86 {
+bool isConstantSplat(SDValue Op, APInt &SplatVal) {
    APInt UndefElts;
    SmallVector<APInt, 16> EltBits;
    if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(),
@@ -6238,6 +6240,8 @@ static bool isConstantSplat(SDValue Op, APInt &SplatVal) {
  
    return false;
  }
+} // namespace X86
+} // namespace llvm
  
  static bool getTargetShuffleMaskIndices(SDValue MaskNode,
                                          unsigned MaskEltSizeInBits,
@@ -18115,7 +18119,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
        std::swap(Op0, Op1);
  
      APInt APIntShiftAmt;
-    if (isConstantSplat(Amt, APIntShiftAmt)) {
+    if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
        uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
        return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
                           Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
@@ -25337,7 +25341,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
  
    // Optimize shl/srl/sra with constant shift amount.
    APInt APIntShiftAmt;
-  if (!isConstantSplat(Amt, APIntShiftAmt))
+  if (!X86::isConstantSplat(Amt, APIntShiftAmt))
      return SDValue();
  
    // If the shift amount is out of range, return undef.
@@ -43750,39 +43754,6 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
    return DAG.getNode(ISD::ADD, DL, VT, Sad, OtherOp, Flags);
  }
  
-/// Convert vector increment or decrement to sub/add with an all-ones constant:
-/// add X, <1, 1...> --> sub X, <-1, -1...>
-/// sub X, <1, 1...> --> add X, <-1, -1...>
-/// The all-ones vector constant can be materialized using a pcmpeq instruction
-/// that is commonly recognized as an idiom (has no register dependency), so
-/// that's better/smaller than loading a splat 1 constant.
-static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG,
-                                   TargetLowering::DAGCombinerInfo &DCI) {
-  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
-         "Unexpected opcode for increment/decrement transform");
-
-  // Delay this until legalize ops to avoid interfering with early DAG combines
-  // that may expect canonical adds.
-  // FIXME: We may want to consider moving this to custom lowering or all the
-  // way to isel, but lets start here.
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  // Pseudo-legality check: getOnesVector() expects one of these types, so bail
-  // out and wait for legalization if we have an unsupported vector length.
-  EVT VT = N->getValueType(0);
-  if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
-    return SDValue();
-
-  APInt SplatVal;
-  if (!isConstantSplat(N->getOperand(1), SplatVal) || !SplatVal.isOneValue())
-    return SDValue();
-
-  SDValue AllOnesVec = getOnesVector(VT, DAG, SDLoc(N));
-  unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD;
-  return DAG.getNode(NewOpcode, SDLoc(N), VT, N->getOperand(0), AllOnesVec);
-}
-
  static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
                              const SDLoc &DL, EVT VT,
                              const X86Subtarget &Subtarget) {
@@ -44045,9 +44016,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
                              HADDBuilder);
    }
  
-  if (SDValue V = combineIncDecVector(N, DAG, DCI))
-    return V;
-
    return combineAddOrSubToADCOrSBB(N, DAG);
  }
  
@@ -44176,9 +44144,6 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
                              HSUBBuilder);
    }
  
-  if (SDValue V = combineIncDecVector(N, DAG, DCI))
-    return V;
-
    // Try to create PSUBUS if SUB's argument is max/min
    if (SDValue V = combineSubToSubus(N, DAG, Subtarget))
      return V;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 53bdc84cee136c97654991ae52b1e4b56c816ea9..05e4f16fc49d11d0850911ea7f0a56078b8e923d 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -683,6 +683,9 @@ namespace llvm {
      bool isCalleePop(CallingConv::ID CallingConv,
                       bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
  
+    /// If Op is a constant whose elements are all the same constant or
+    /// undefined, return true and return the constant value in \p SplatVal.
+    bool isConstantSplat(SDValue Op, APInt &SplatVal);
    } // end namespace X86
  
    //===--------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/stack-folding-int-avx1.ll b/test/CodeGen/X86/stack-folding-int-avx1.ll

index b92a70b85227880897c9891700335b45e945567b..46fd1ab5dfb6d11de5c190c705258bc38126dae8 100644 (file)
--- a/test/CodeGen/X86/stack-folding-int-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-int-avx1.ll
@@ -540,8 +540,8 @@ define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
  ; CHECK-NEXT:    #APP
  ; CHECK-NEXT:    nop
  ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
  ; CHECK-NEXT:    vpandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
  ; CHECK-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
  ; CHECK-NEXT:    retq
    %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll

index 873be133b3c2f8743d7c8d397a2865eeab33408d..3efaeddfbd647c282ec4b00a70d77109c0485e42 100644 (file)
--- a/test/CodeGen/X86/stack-folding-int-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -724,8 +724,8 @@ define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
  ; CHECK-NEXT:    #APP
  ; CHECK-NEXT:    nop
  ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
  ; CHECK-NEXT:    pandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
  ; CHECK-NEXT:    psubb %xmm1, %xmm0
  ; CHECK-NEXT:    retq
    %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
author	Roman Lebedev <lebedev.ri@gmail.com>
	Thu, 29 Aug 2019 10:50:09 +0000 (10:50 +0000)
committer	Roman Lebedev <lebedev.ri@gmail.com>
	Thu, 29 Aug 2019 10:50:09 +0000 (10:50 +0000)
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/X86/stack-folding-int-avx1.ll		patch \| blob \| history
test/CodeGen/X86/stack-folding-int-sse42.ll		patch \| blob \| history