[DAGCombine] Recognise any_extend_vector_inreg and truncation style shuffle masks

author Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 17 Feb 2017 15:14:48 +0000 (15:14 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Fri, 17 Feb 2017 15:14:48 +0000 (15:14 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 17 Feb 2017 15:14:48 +0000 (15:14 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Fri, 17 Feb 2017 15:14:48 +0000 (15:14 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 7997a3338622274132267891ea270727c30ead40..9aa9b310d49acfd553edbba68afd5bc024d65dfb 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7530,6 +7530,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
        return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
    }
  
+  // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x)
+  if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
+       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
+      N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
+      return DAG.getSignExtendVectorInReg(N0.getOperand(0), SDLoc(N), VT);
+  }
+
    // fold (sext_in_reg (zext x)) -> (sext x)
    // iff we are extending the source sign bit.
    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
@@ -14194,6 +14204,113 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
    return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
  }
  
+// Match shuffles that can be converted to any_vector_extend_in_reg.
+// This is often generated during legalization.
+// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
+// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
+SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI,
+                                     bool LegalOperations) {
+  EVT VT = SVN->getValueType(0);
+  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+  // TODO Add support for big-endian when we have a test case.
+  if (!VT.isInteger() || IsBigEndian)
+    return SDValue();
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned EltSizeInBits = VT.getScalarSizeInBits();
+  ArrayRef<int> Mask = SVN->getMask();
+  SDValue N0 = SVN->getOperand(0);
+
+  // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
+  auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (Mask[i] < 0)
+        continue;
+      if ((i % Scale) == 0 && Mask[i] == (i / Scale))
+        continue;
+      return false;
+    }
+    return true;
+  };
+
+  // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
+  // power-of-2 extensions as they are the most likely.
+  for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+    if (!isAnyExtend(Scale))
+      continue;
+
+    EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
+    EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
+    if (!LegalOperations ||
+        TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+      return DAG.getBitcast(VT,
+                            DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
+  }
+
+  return SDValue();
+}
+
+// Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
+// each source element of a large type into the lowest elements of a smaller
+// destination type. This is often generated during legalization.
+// If the source node itself was a '*_extend_vector_inreg' node then we should
+// then be able to remove it.
+SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) {
+  EVT VT = SVN->getValueType(0);
+  bool IsBigEndian = DAG.getDataLayout().isBigEndian();
+
+  // TODO Add support for big-endian when we have a test case.
+  if (!VT.isInteger() || IsBigEndian)
+    return SDValue();
+
+  SDValue N0 = SVN->getOperand(0);
+  while (N0.getOpcode() == ISD::BITCAST)
+    N0 = N0.getOperand(0);
+
+  unsigned Opcode = N0.getOpcode();
+  if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
+      Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
+      Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
+    return SDValue();
+
+  SDValue N00 = N0.getOperand(0);
+  ArrayRef<int> Mask = SVN->getMask();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned EltSizeInBits = VT.getScalarSizeInBits();
+  unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
+
+  // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
+  // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
+  // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
+  auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (Mask[i] < 0)
+        continue;
+      if ((i * Scale) < NumElts && Mask[i] == (i * Scale))
+        continue;
+      return false;
+    }
+    return true;
+  };
+
+  // At the moment we just handle the case where we've truncated back to the
+  // same size as before the extension.
+  // TODO: handle more extension/truncation cases as cases arise.
+  if (EltSizeInBits != ExtSrcSizeInBits)
+    return SDValue();
+
+  // Attempt to match a 'truncate_vector_inreg' shuffle, we just search for
+  // power-of-2 truncations as they are the most likely.
+  for (unsigned Scale = 2; Scale < NumElts; Scale *= 2)
+    if (isTruncate(Scale))
+      return DAG.getBitcast(VT, N00);
+
+  return SDValue();
+}
+
  SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
    EVT VT = N->getValueType(0);
    unsigned NumElts = VT.getVectorNumElements();
@@ -14298,6 +14415,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
    if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
      return S;
  
+  // Match shuffles that can be converted to any_vector_extend_in_reg.
+  if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+    return V;
+
+  // Combine "truncate_vector_in_reg" style shuffles.
+  if (SDValue V = combineTruncationShuffle(SVN, DAG))
+    return V;
+
    if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
        Level < AfterLegalizeVectorOps &&
        (N1.isUndef() ||
diff --git a/test/CodeGen/X86/2011-10-21-widen-cmp.ll b/test/CodeGen/X86/2011-10-21-widen-cmp.ll

index 429960333ae4f3193744ebd7e363701fc3ec3f0e..782490900b24fea9407f0d895672c800ec0c3664 100644 (file)
--- a/test/CodeGen/X86/2011-10-21-widen-cmp.ll
+++ b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
@@ -9,7 +9,8 @@ define void @cmp_2_floats(<2 x float> %a, <2 x float> %b) {
  ; CHECK:       # BB#0: # %entry
  ; CHECK-NEXT:    movaps %xmm0, %xmm2
  ; CHECK-NEXT:    cmpordps %xmm0, %xmm0
-; CHECK-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
+; CHECK-NEXT:    pmovsxdq %xmm0, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
  ; CHECK-NEXT:    pslld $31, %xmm0
  ; CHECK-NEXT:    blendvps %xmm0, %xmm2, %xmm1
  ; CHECK-NEXT:    movlps %xmm1, (%rax)
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll

index 87deeb9e16c03e8bdbfcdb96f2f1cddd5707a34c..40f342185fdf0eaaa554cd1f1c639d87acd43d03 100644 (file)
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -1075,7 +1075,6 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
  ; NOVL:       ## BB#0:
  ; NOVL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
  ; NOVL-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
-; NOVL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
  ; NOVL-NEXT:    vcvtdq2ps %xmm0, %xmm0
  ; NOVL-NEXT:    retq
  ;
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll

index b0e3e95afe28d19a3783cd867a407c491c722f6b..c39509b788afee129f7ee07838c27fb47cf6a4ee 100644 (file)
--- a/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1226,11 +1226,7 @@ define <2 x i64> @test46(<2 x float> %x, <2 x float> %y) #0 {
  ; KNL-LABEL: test46:
  ; KNL:       ## BB#0:
  ; KNL-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
-; KNL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; KNL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; KNL-NEXT:    vpsrad $31, %xmm0, %xmm1
-; KNL-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; KNL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; KNL-NEXT:    vpmovsxdq %xmm0, %xmm0
  ; KNL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
  ; KNL-NEXT:    retq
  ;
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll

index 27008f26b81941b0c373d0523e626ea3cfb0a538..eae0b7104275fc905a9a73ba362aabd740a41b99 100644 (file)
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1277,8 +1277,6 @@ define <2 x float> @test_maxps_illegal_v2f32(<2 x float> %x, <2 x float> %y)  {
  ; STRICT-NEXT:    movaps %xmm0, %xmm2
  ; STRICT-NEXT:    movaps %xmm1, %xmm0
  ; STRICT-NEXT:    cmpleps %xmm2, %xmm0
-; STRICT-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
-; STRICT-NEXT:    pslld $31, %xmm0
  ; STRICT-NEXT:    blendvps %xmm0, %xmm2, %xmm1
  ; STRICT-NEXT:    movaps %xmm1, %xmm0
  ; STRICT-NEXT:    retq
@@ -1297,8 +1295,6 @@ define <2 x float> @test_minps_illegal_v2f32(<2 x float> %x, <2 x float> %y)  {
  ; STRICT:       # BB#0:
  ; STRICT-NEXT:    movaps %xmm0, %xmm2
  ; STRICT-NEXT:    cmpleps %xmm1, %xmm0
-; STRICT-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[1]
-; STRICT-NEXT:    pslld $31, %xmm0
  ; STRICT-NEXT:    blendvps %xmm0, %xmm2, %xmm1
  ; STRICT-NEXT:    movaps %xmm1, %xmm0
  ; STRICT-NEXT:    retq
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 17 Feb 2017 15:14:48 +0000 (15:14 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Fri, 17 Feb 2017 15:14:48 +0000 (15:14 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/X86/2011-10-21-widen-cmp.ll		patch \| blob \| history
test/CodeGen/X86/avx512-cvt.ll		patch \| blob \| history
test/CodeGen/X86/avx512-vec-cmp.ll		patch \| blob \| history
test/CodeGen/X86/sse-minmax.ll		patch \| blob \| history