]> granicus.if.org Git - llvm/commitdiff
[X86][AVX] Add SimplifyDemandedVectorElts support for KSHIFTL/KSHIFTR
authorSimon Pilgrim <llvm-dev@redking.me.uk>
Tue, 27 Aug 2019 13:13:17 +0000 (13:13 +0000)
committerSimon Pilgrim <llvm-dev@redking.me.uk>
Tue, 27 Aug 2019 13:13:17 +0000 (13:13 +0000)
Differential Revision: https://reviews.llvm.org/D66527

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370055 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86ISelLowering.cpp
test/CodeGen/X86/prefer-avx256-mask-shuffle.ll

index 72ef6bc6cf87162f46a30374a368b7ae6f7efb25..4fb7aa00223af3dad73be071a5a8092cd2ec453d 100644 (file)
@@ -34273,6 +34273,31 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
     // TODO convert SrcUndef to KnownUndef.
     break;
   }
+  case X86ISD::KSHIFTL:
+  case X86ISD::KSHIFTR: {
+    SDValue Src = Op.getOperand(0);
+    auto *Amt = cast<ConstantSDNode>(Op.getOperand(1));
+    assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount");
+    unsigned ShiftAmt = Amt->getZExtValue();
+    bool ShiftLeft = (X86ISD::KSHIFTL == Opc);
+
+    APInt DemandedSrc =
+        ShiftLeft ? DemandedElts.lshr(ShiftAmt) : DemandedElts.shl(ShiftAmt);
+    if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO,
+                                   Depth + 1))
+      return true;
+
+    if (ShiftLeft) {
+      KnownUndef = KnownUndef.shl(ShiftAmt);
+      KnownZero = KnownZero.shl(ShiftAmt);
+      KnownZero.setLowBits(ShiftAmt);
+    } else {
+      KnownUndef = KnownUndef.lshr(ShiftAmt);
+      KnownZero = KnownZero.lshr(ShiftAmt);
+      KnownZero.setHighBits(ShiftAmt);
+    }
+    break;
+  }
   case X86ISD::CVTSI2P:
   case X86ISD::CVTUI2P: {
     SDValue Src = Op.getOperand(0);
index 7abb3a043a6fe14a61220e0e52c10e3e5e2e8400..bf5ba184fc00510b42bfb46afcf57e733692f0e3 100644 (file)
@@ -153,9 +153,7 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
 ; AVX256VL-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[6,7,12,13,u,u,8,9,6,7,14,15,14,15,0,1,22,23,28,29,18,19,26,27,22,23,u,u,30,31,16,17]
 ; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm2 {%k1} {z}
 ; AVX256VL-NEXT:    vpmovdw %ymm2, %xmm2
-; AVX256VL-NEXT:    kshiftrw $8, %k1, %k1
-; AVX256VL-NEXT:    vmovdqa32 %ymm0, %ymm3 {%k1} {z}
-; AVX256VL-NEXT:    vpmovdw %ymm3, %xmm3
+; AVX256VL-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
 ; AVX256VL-NEXT:    vinserti128 $1, %xmm3, %ymm2, %ymm2
 ; AVX256VL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[1,1,2,1]
 ; AVX256VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255]