[CodeGen] Make the code that detects a if a shuffle is really a concatenation of...

author Craig Topper <craig.topper@gmail.com>

Mon, 4 Jul 2016 06:19:35 +0000 (06:19 +0000)

committer Craig Topper <craig.topper@gmail.com>

Mon, 4 Jul 2016 06:19:35 +0000 (06:19 +0000)
author Craig Topper <craig.topper@gmail.com>
Mon, 4 Jul 2016 06:19:35 +0000 (06:19 +0000)
committer Craig Topper <craig.topper@gmail.com>
Mon, 4 Jul 2016 06:19:35 +0000 (06:19 +0000)
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index c1e0137a4fbe26fad13071ac4fdedf43a4af06ee..39cdd77f0be4af71b4e82fdbce1434be3c420db7 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2996,17 +2996,6 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
                             InVec, InIdx));
  }
  
-// Utility for visitShuffleVector - Return true if every element in Mask,
-// beginning from position Pos and ending in Pos+Size, falls within the
-// specified sequential range [L, L+Pos). or is undef.
-static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
-                                unsigned Pos, unsigned Size, int Low) {
-  for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
-    if (Mask[i] >= 0 && Mask[i] != Low)
-      return false;
-  return true;
-}
-
  void SelectionDAGBuilder::visitShuffleVector(const User &I) {
    SDValue Src1 = getValue(I.getOperand(0));
    SDValue Src2 = getValue(I.getOperand(1));
@@ -3030,29 +3019,46 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
      // Mask is longer than the source vectors and is a multiple of the source
      // vectors.  We can use concatenate vector to make the mask and vectors
      // lengths match.
-    if (SrcNumElts*2 == MaskNumElts) {
-      // First check for Src1 in low and Src2 in high
-      if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
-          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
-        // The shuffle is concatenating two vectors together.
-        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
-                                 VT, Src1, Src2));
-        return;
+
+    unsigned NumConcat = MaskNumElts / SrcNumElts;
+
+    // Check if the shuffle is some kind of concatenation of the input vectors.
+    bool IsConcat = true;
+    SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      if (Idx < 0)
+        continue;
+      // Ensure the indices in each SrcVT sized piece are sequential and that
+      // the same source is used for the whole piece.
+      if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+          (ConcatSrcs[i / SrcNumElts] >= 0 &&
+           ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+        IsConcat = false;
+        break;
        }
-      // Then check for Src2 in low and Src1 in high
-      if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
-          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
-        // The shuffle is concatenating two vectors together.
-        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
-                                 VT, Src2, Src1));
-        return;
+      // Remember which source this index came from.
+      ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
+    }
+
+    // The shuffle is concatenating multiple vectors together. Just emit
+    // a CONCAT_VECTORS operation.
+    if (IsConcat) {
+      SmallVector<SDValue, 8> ConcatOps;
+      for (auto Src : ConcatSrcs) {
+        if (Src < 0)
+          ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+        else if (Src == 0)
+          ConcatOps.push_back(Src1);
+        else
+          ConcatOps.push_back(Src2);
        }
+      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
+                               VT, ConcatOps));
+      return;
      }
  
      // Pad both vectors with undefs to make them the same length as the mask.
-    unsigned NumConcat = MaskNumElts / SrcNumElts;
-    bool Src1U = Src1.isUndef();
-    bool Src2U = Src2.isUndef();
      SDValue UndefVal = DAG.getUNDEF(SrcVT);
  
      SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
@@ -3060,10 +3066,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
      MOps1[0] = Src1;
      MOps2[0] = Src2;
  
-    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
-                                                  getCurSDLoc(), VT, MOps1);
-    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
-                                                  getCurSDLoc(), VT, MOps2);
+    Src1 = Src1.isUndef() ? DAG.getUNDEF(VT)
+                          : DAG.getNode(ISD::CONCAT_VECTORS,
+                                        getCurSDLoc(), VT, MOps1);
+    Src2 = Src2.isUndef() ? DAG.getUNDEF(VT)
+                          : DAG.getNode(ISD::CONCAT_VECTORS,
+                                        getCurSDLoc(), VT, MOps2);
  
      // Readjust mask for new input vector length.
      SmallVector<int, 8> MappedOps;
diff --git a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll

index 59eb4cdb1957866347a19e5c8f32588a313e544a..c3fb355d5087a3e10fca481a141e3a6b38c96fd6 100644 (file)
--- a/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -318,12 +318,10 @@ define <4 x i64> @test_mm256_castpd_si256(<4 x double> %a0) nounwind {
  define <4 x double> @test_mm256_castpd128_pd256(<2 x double> %a0) nounwind {
  ; X32-LABEL: test_mm256_castpd128_pd256:
  ; X32:       # BB#0:
-; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm256_castpd128_pd256:
  ; X64:       # BB#0:
-; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  ; X64-NEXT:    retq
    %res = shufflevector <2 x double> %a0, <2 x double> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    ret <4 x double> %res
@@ -370,12 +368,10 @@ define <4 x i64> @test_mm256_castps_si256(<8 x float> %a0) nounwind {
  define <8 x float> @test_mm256_castps128_ps256(<4 x float> %a0) nounwind {
  ; X32-LABEL: test_mm256_castps128_ps256:
  ; X32:       # BB#0:
-; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm256_castps128_ps256:
  ; X64:       # BB#0:
-; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  ; X64-NEXT:    retq
    %res = shufflevector <4 x float> %a0, <4 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
    ret <8 x float> %res
@@ -398,12 +394,10 @@ define <4 x float> @test_mm256_castps256_ps128(<8 x float> %a0) nounwind {
  define <4 x i64> @test_mm256_castsi128_si256(<2 x i64> %a0) nounwind {
  ; X32-LABEL: test_mm256_castsi128_si256:
  ; X32:       # BB#0:
-; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm256_castsi128_si256:
  ; X64:       # BB#0:
-; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
  ; X64-NEXT:    retq
    %res = shufflevector <2 x i64> %a0, <2 x i64> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    ret <4 x i64> %res
author	Craig Topper <craig.topper@gmail.com>
	Mon, 4 Jul 2016 06:19:35 +0000 (06:19 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Mon, 4 Jul 2016 06:19:35 +0000 (06:19 +0000)
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
test/CodeGen/X86/avx-intrinsics-fast-isel.ll		patch \| blob \| history