From: Simon Pilgrim Date: Tue, 27 Aug 2019 13:13:17 +0000 (+0000) Subject: [X86][AVX] Add SimplifyDemandedVectorElts support for KSHIFTL/KSHIFTR X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=630d3bfa7cd5748504615aee230809e58a9aef96;p=llvm [X86][AVX] Add SimplifyDemandedVectorElts support for KSHIFTL/KSHIFTR Differential Revision: https://reviews.llvm.org/D66527 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370055 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 72ef6bc6cf8..4fb7aa00223 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34273,6 +34273,31 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( // TODO convert SrcUndef to KnownUndef. break; } + case X86ISD::KSHIFTL: + case X86ISD::KSHIFTR: { + SDValue Src = Op.getOperand(0); + auto *Amt = cast(Op.getOperand(1)); + assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); + unsigned ShiftAmt = Amt->getZExtValue(); + bool ShiftLeft = (X86ISD::KSHIFTL == Opc); + + APInt DemandedSrc = + ShiftLeft ? DemandedElts.lshr(ShiftAmt) : DemandedElts.shl(ShiftAmt); + if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + + if (ShiftLeft) { + KnownUndef = KnownUndef.shl(ShiftAmt); + KnownZero = KnownZero.shl(ShiftAmt); + KnownZero.setLowBits(ShiftAmt); + } else { + KnownUndef = KnownUndef.lshr(ShiftAmt); + KnownZero = KnownZero.lshr(ShiftAmt); + KnownZero.setHighBits(ShiftAmt); + } + break; + } case X86ISD::CVTSI2P: case X86ISD::CVTUI2P: { SDValue Src = Op.getOperand(0); diff --git a/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll b/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll index 7abb3a043a6..bf5ba184fc0 100644 --- a/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll +++ b/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll @@ -153,9 +153,7 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0 ; AVX256VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,12,13,u,u,8,9,6,7,14,15,14,15,0,1,22,23,28,29,18,19,26,27,22,23,u,u,30,31,16,17] ; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} {z} ; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2 -; AVX256VL-NEXT: kshiftrw $8, %k1, %k1 -; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm3 {%k1} {z} -; AVX256VL-NEXT: vpmovdw %ymm3, %xmm3 +; AVX256VL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 ; AVX256VL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 ; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,1,2,1] ; AVX256VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255]