[X86][SSE4A] Add support for shuffle combining to EXTRQ.

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 6 Jul 2017 12:22:58 +0000 (12:22 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 6 Jul 2017 12:22:58 +0000 (12:22 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 6 Jul 2017 12:22:58 +0000 (12:22 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 6 Jul 2017 12:22:58 +0000 (12:22 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index f1269822463f7749bfad6fa42933f339236fdb7e..02c8ec4d8f7d3c36ad6d1fa119a9c24b59d85f3e 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -9364,7 +9364,7 @@ static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
    int Idx = -1;
    for (int i = 0; i != Len; ++i) {
      int M = Mask[i];
-    if (M < 0)
+    if (M == SM_SentinelUndef)
        continue;
      SDValue &V = (M < Size ? V1 : V2);
      M = M % Size;
@@ -27696,6 +27696,33 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
      return true;
    }
  
+  // Annoyingly, SSE4A instructions don't map into the above match helpers.
+  if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) {
+    ShuffleVT = MVT::getIntegerVT(MaskEltSizeInBits);
+    ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
+
+    APInt Zeroable(NumMaskElts, 0);
+    for (unsigned i = 0; i != NumMaskElts; ++i)
+      if (isUndefOrZero(Mask[i]))
+        Zeroable.setBit(i);
+
+    uint64_t BitLen, BitIdx;
+    if (matchVectorShuffleAsEXTRQ(ShuffleVT, V1, V2, Mask, BitLen, BitIdx,
+                                  Zeroable)) {
+      if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
+        return false; // Nothing to do!
+      V1 = DAG.getBitcast(ShuffleVT, V1);
+      DCI.AddToWorklist(V1.getNode());
+      Res = DAG.getNode(X86ISD::EXTRQI, DL, ShuffleVT, V1,
+                        DAG.getConstant(BitLen, DL, MVT::i8),
+                        DAG.getConstant(BitIdx, DL, MVT::i8));
+      DCI.AddToWorklist(Res.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                    /*AddTo*/ true);
+      return true;
+    }
+  }
+
    // Don't try to re-form single instruction chains under any circumstances now
    // that we've done encoding canonicalization for them.
    if (Depth < 2)
diff --git a/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll b/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll

index 1669317f4aa66777b23d754d5154c549f30b398a..9d9c29121f876d47f2b7553386638a26e1669106 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
@@ -9,30 +9,20 @@
  declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
  
  define <16 x i8> @combine_extrqi_pshufb_16i8(<16 x i8> %a0) {
-; SSE-LABEL: combine_extrqi_pshufb_16i8:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: combine_extrqi_pshufb_16i8:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; ALL-LABEL: combine_extrqi_pshufb_16i8:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[1,2],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
    %1 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
    %2 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
    ret <16 x i8> %2
  }
  
  define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
-; SSE-LABEL: combine_extrqi_pshufb_8i16:
-; SSE:       # BB#0:
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; SSE-NEXT:    retq
-;
-; AVX-LABEL: combine_extrqi_pshufb_8i16:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
-; AVX-NEXT:    retq
+; ALL-LABEL: combine_extrqi_pshufb_8i16:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
    %1 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    %2 = bitcast <8 x i16> %1 to <16 x i8>
    %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
@@ -43,9 +33,8 @@ define <8 x i16> @combine_extrqi_pshufb_8i16(<8 x i16> %a0) {
  define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
  ; SSSE3-LABEL: combine_insertqi_pshufb_16i8:
  ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
  ; SSSE3-NEXT:    retq
  ;
  ; SSE42-LABEL: combine_insertqi_pshufb_16i8:
@@ -65,9 +54,8 @@ define <16 x i8> @combine_insertqi_pshufb_16i8(<16 x i8> %a0, <16 x i8> %a1) {
  define <8 x i16> @combine_insertqi_pshufb_8i16(<8 x i16> %a0, <8 x i16> %a1) {
  ; SSSE3-LABEL: combine_insertqi_pshufb_8i16:
  ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    extrq {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
  ; SSSE3-NEXT:    retq
  ;
  ; SSE42-LABEL: combine_insertqi_pshufb_8i16:
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 6 Jul 2017 12:22:58 +0000 (12:22 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 6 Jul 2017 12:22:58 +0000 (12:22 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-combining-sse4a.ll		patch \| blob \| history