[X86][SSE] Add faux shuffle combining support for PACKUS

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 1 Oct 2017 18:43:48 +0000 (18:43 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sun, 1 Oct 2017 18:43:48 +0000 (18:43 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 1 Oct 2017 18:43:48 +0000 (18:43 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sun, 1 Oct 2017 18:43:48 +0000 (18:43 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index e68a6ed2a84a2e35b15d14a9d032be97c78cbe51..6abf46f470fe2eedaf81d850b85fe3b6e1443fe2 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5931,7 +5931,8 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
        Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
      return true;
    }
-  case X86ISD::PACKSS: {
+  case X86ISD::PACKSS:
+  case X86ISD::PACKUS: {
      SDValue N0 = N.getOperand(0);
      SDValue N1 = N.getOperand(1);
      assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&
@@ -5940,9 +5941,19 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
  
      // If we know input saturation won't happen we can treat this
      // as a truncation shuffle.
-    if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
-        DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
-      return false;
+    if (Opcode == X86ISD::PACKSS) {
+      if (DAG.ComputeNumSignBits(N0) <= NumBitsPerElt ||
+          DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)
+        return false;
+    } else {
+      KnownBits Known0, Known1;
+      DAG.computeKnownBits(N0, Known0);
+      if (Known0.countMinLeadingZeros() < NumBitsPerElt)
+        return false;
+      DAG.computeKnownBits(N1, Known1);
+      if (Known1.countMinLeadingZeros() < NumBitsPerElt)
+        return false;
+    }
  
      bool IsUnary = (N0 == N1);
      unsigned Offset = IsUnary ? 0 : NumElts;
diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

index 69a6eb7effabea296f08723d9b5ce8f512915b53..f0c7ae38b6b10567dab861e0d4de0a01e8d55c9f 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -846,16 +846,12 @@ declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readn
  define <16 x i16> @shuffle_combine_packusdw_pshufb(<8 x i32> %a0, <8 x i32> %a1) {
  ; X32-LABEL: shuffle_combine_packusdw_pshufb:
  ; X32:       # BB#0:
-; X32-NEXT:    vpsrld $16, %ymm0, %ymm0
-; X32-NEXT:    vpackusdw %ymm0, %ymm0, %ymm0
-; X32-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X32-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,6,7,2,3,14,15,10,11,6,7,2,3,18,19,22,23,26,27,30,31,30,31,26,27,22,23,18,19]
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: shuffle_combine_packusdw_pshufb:
  ; X64:       # BB#0:
-; X64-NEXT:    vpsrld $16, %ymm0, %ymm0
-; X64-NEXT:    vpackusdw %ymm0, %ymm0, %ymm0
-; X64-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,4,5,2,3,0,1,6,7,4,5,2,3,0,1,16,17,18,19,20,21,22,23,22,23,20,21,18,19,16,17]
+; X64-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,10,11,6,7,2,3,14,15,10,11,6,7,2,3,18,19,22,23,26,27,30,31,30,31,26,27,22,23,18,19]
  ; X64-NEXT:    retq
    %1 = lshr <8 x i32> %a0, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    %2 = tail call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %1)
@@ -867,18 +863,12 @@ declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readno
  define <32 x i8> @shuffle_combine_packuswb_pshufb(<16 x i16> %a0, <16 x i16> %a1) {
  ; X32-LABEL: shuffle_combine_packuswb_pshufb:
  ; X32:       # BB#0:
-; X32-NEXT:    vpsrlw $8, %ymm0, %ymm0
-; X32-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; X32-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
-; X32-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X32-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1,31,29,27,25,23,21,19,17,31,29,27,25,23,21,19,17]
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: shuffle_combine_packuswb_pshufb:
  ; X64:       # BB#0:
-; X64-NEXT:    vpsrlw $8, %ymm0, %ymm0
-; X64-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
-; X64-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0,23,22,21,20,19,18,17,16,23,22,21,20,19,18,17,16]
+; X64-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1,31,29,27,25,23,21,19,17,31,29,27,25,23,21,19,17]
  ; X64-NEXT:    retq
    %1 = lshr <16 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
    %2 = lshr <16 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
diff --git a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

index 874d090794c747882e62b6f3fac46e14a279c58a..3abf0570abf53081d9526aaabb2a32a3b28ce4bb 100644 (file)
--- a/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
@@ -683,18 +683,12 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea
  define <16 x i8> @shuffle_combine_packuswb_pshufb(<8 x i16> %a0, <8 x i16> %a1) {
  ; SSE-LABEL: shuffle_combine_packuswb_pshufb:
  ; SSE:       # BB#0:
-; SSE-NEXT:    psrlw $8, %xmm0
-; SSE-NEXT:    psrlw $8, %xmm1
-; SSE-NEXT:    packuswb %xmm1, %xmm0
-; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
+; SSE-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1]
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: shuffle_combine_packuswb_pshufb:
  ; AVX:       # BB#0:
-; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
-; AVX-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; AVX-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,7,6,5,4,3,2,1,0]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,13,11,9,7,5,3,1,15,13,11,9,7,5,3,1]
  ; AVX-NEXT:    retq
    %1 = lshr <8 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
    %2 = lshr <8 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 1 Oct 2017 18:43:48 +0000 (18:43 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sun, 1 Oct 2017 18:43:48 +0000 (18:43 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-combining-avx2.ll		patch \| blob \| history
test/CodeGen/X86/vector-shuffle-combining-ssse3.ll		patch \| blob \| history