From: Craig Topper Date: Sun, 18 Aug 2019 18:52:46 +0000 (+0000) Subject: [X86] Improve lower1BitShuffle handling for KSHIFTL on narrow vectors. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9b62c8ee424e72799c32ca7756b15d21fdf777f2;p=llvm [X86] Improve lower1BitShuffle handling for KSHIFTL on narrow vectors. We can insert the value into a larger legal type and shift that by the desired amount. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369215 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c3e2c33ca72..4d9e62b31b6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16562,20 +16562,36 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, } // Try to match KSHIFTs. - // TODO: Support narrower than legal shifts by widening and extracting. - if (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8)) { - unsigned Offset = 0; - for (SDValue V : { V1, V2 }) { - unsigned Opcode; - int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); - if (ShiftAmt >= 0) + unsigned Offset = 0; + for (SDValue V : { V1, V2 }) { + unsigned Opcode; + int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); + if (ShiftAmt >= 0) { + // FIXME: We can't easily widen an illegal right shift if we need to shift + // in zeroes. + if (Opcode == X86ISD::KSHIFTR && + (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8))) return DAG.getNode(Opcode, DL, VT, V, DAG.getConstant(ShiftAmt, DL, MVT::i8)); - Offset += NumElts; // Increment for next iteration. + if (Opcode == X86ISD::KSHIFTL) { + // If this is a shift left we can widen the VT to a suported kshiftl. + MVT WideVT = VT; + if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) + WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, + DAG.getUNDEF(WideVT), V, + DAG.getIntPtrConstant(0, DL)); + Res = DAG.getNode(Opcode, DL, WideVT, V, + DAG.getConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); + } } + Offset += NumElts; // Increment for next iteration. } + MVT ExtVT; switch (VT.SimpleTy) { default: diff --git a/test/CodeGen/X86/kshift.ll b/test/CodeGen/X86/kshift.ll index 245ba5c299f..919299f1347 100644 --- a/test/CodeGen/X86/kshift.ll +++ b/test/CodeGen/X86/kshift.ll @@ -5,13 +5,9 @@ define i8 @kshiftl_v8i1_1(<8 x i64> %x, <8 x i64> %y) { ; KNL-LABEL: kshiftl_v8i1_1: ; KNL: # %bb.0: -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: movb $-2, %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftlw $1, %k0, %k1 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper @@ -177,13 +173,9 @@ define i64 @kshiftl_v64i1_1(<64 x i8> %x, <64 x i8> %y) { define i8 @kshiftl_v8i1_7(<8 x i64> %x, <8 x i64> %y) { ; KNL-LABEL: kshiftl_v8i1_7: ; KNL: # %bb.0: -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: movb $-128, %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftlw $7, %k0, %k1 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper @@ -475,13 +467,9 @@ define i64 @kshiftr_v64i1_1(<64 x i8> %x, <64 x i8> %y) { define i8 @kshiftr_v8i1_7(<8 x i64> %x, <8 x i64> %y) { ; KNL-LABEL: kshiftr_v8i1_7: ; KNL: # %bb.0: -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: movb $-2, %al -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftlw $1, %k0, %k1 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper @@ -599,14 +587,9 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) { define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) { ; KNL-LABEL: kshiftl_v8i1_zu123u56: ; KNL: # %bb.0: -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = <8,u,1,2,3,u,5,6> -; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3 -; KNL-NEXT: vpsllq $63, %zmm3, %zmm0 -; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftlw $1, %k0, %k1 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper @@ -632,12 +615,9 @@ define i8 @kshiftl_v8i1_zu123u56(<8 x i64> %x, <8 x i64> %y) { define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) { ; KNL-LABEL: kshiftl_v8i1_u0123456: ; KNL: # %bb.0: -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; KNL-NEXT: valignq {{.*#+}} zmm0 = zmm0[7,0,1,2,3,4,5,6] -; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 -; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftlw $1, %k0, %k1 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper