From: Simon Pilgrim Date: Wed, 1 May 2019 13:51:09 +0000 (+0000) Subject: [X86][SSE] Add SSE vector shift support to SimplifyDemandedVectorEltsForTargetNode... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8dd2ffc9c56757cc41c9091bb1a26247ba7126fa;p=llvm [X86][SSE] Add SSE vector shift support to SimplifyDemandedVectorEltsForTargetNode vector splitting git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359680 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 657832ab74b..0cc7c157b74 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -33413,6 +33413,27 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( ExtSizeInBits = SizeInBits / 4; switch (Opc) { + // Byte shifts by immediate. + case X86ISD::VSHLDQ: + case X86ISD::VSRLDQ: + // Shift by uniform. + case X86ISD::VSHL: + case X86ISD::VSRL: + case X86ISD::VSRA: + // Shift by immediate. + case X86ISD::VSHLI: + case X86ISD::VSRLI: + case X86ISD::VSRAI: { + SDLoc DL(Op); + SDValue Ext0 = + extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); + SDValue ExtOp = + TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Op.getOperand(1)); + SDValue UndefVec = TLO.DAG.getUNDEF(VT); + SDValue Insert = + insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); + return TLO.CombineTo(Op, Insert); + } // Target Shuffles. case X86ISD::PSHUFB: case X86ISD::UNPCKL: diff --git a/test/CodeGen/X86/combine-udiv.ll b/test/CodeGen/X86/combine-udiv.ll index 9da6a988e77..bb7583b4a33 100644 --- a/test/CodeGen/X86/combine-udiv.ll +++ b/test/CodeGen/X86/combine-udiv.ll @@ -679,36 +679,21 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) { ; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; -; AVX1-LABEL: combine_vec_udiv_nonuniform4: -; AVX1: # %bb.0: -; AVX1-NEXT: movl $171, %eax -; AVX1-NEXT: vmovd %eax, %xmm1 -; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $1, %xmm1, %xmm2 -; AVX1-NEXT: vpsllw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] -; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpackuswb %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: combine_vec_udiv_nonuniform4: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: movl $171, %eax -; AVX2-NEXT: vmovd %eax, %xmm2 -; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1 -; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX2-NEXT: vpackuswb %xmm0, %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: combine_vec_udiv_nonuniform4: +; AVX: # %bb.0: +; AVX-NEXT: movl $171, %eax +; AVX-NEXT: vmovd %eax, %xmm1 +; AVX-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-NEXT: vpmullw %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $1, %xmm1, %xmm2 +; AVX-NEXT: vpsllw $8, %xmm1, %xmm1 +; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7] +; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX-NEXT: vpackuswb %xmm0, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq ; ; XOP-LABEL: combine_vec_udiv_nonuniform4: ; XOP: # %bb.0: diff --git a/test/CodeGen/X86/vector-fshl-256.ll b/test/CodeGen/X86/vector-fshl-256.ll index 349e5455345..dd9028dd92b 100644 --- a/test/CodeGen/X86/vector-fshl-256.ll +++ b/test/CodeGen/X86/vector-fshl-256.ll @@ -1491,16 +1491,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX2-NEXT: vpsllw %xmm3, %ymm5, %ymm3 +; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX2-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX2-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 -; AVX2-NEXT: vpsrlw %xmm4, %ymm5, %ymm4 -; AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX2-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX2-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX2-NEXT: vpor %ymm1, %ymm3, %ymm1 @@ -1515,16 +1515,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT: vpsllw %xmm3, %ymm5, %ymm3 +; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 -; AVX512F-NEXT: vpsrlw %xmm4, %ymm5, %ymm4 -; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX512F-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512F-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1 @@ -1539,16 +1539,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX512VL-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm5, %ymm3 +; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1 -; AVX512VL-NEXT: vpsrlw %xmm4, %ymm5, %ymm4 -; AVX512VL-NEXT: vpsrlw $8, %ymm4, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1 diff --git a/test/CodeGen/X86/vector-fshl-512.ll b/test/CodeGen/X86/vector-fshl-512.ll index 648948085f3..7e01a5999c6 100644 --- a/test/CodeGen/X86/vector-fshl-512.ll +++ b/test/CodeGen/X86/vector-fshl-512.ll @@ -799,27 +799,27 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm5, %ymm0, %ymm6 -; AVX512F-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9 -; AVX512F-NEXT: vpsllw %xmm5, %ymm9, %ymm8 -; AVX512F-NEXT: vpbroadcastb %xmm8, %ymm8 -; AVX512F-NEXT: vpand %ymm8, %ymm6, %ymm6 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpsubb %xmm4, %xmm7, %xmm7 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm7, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw %xmm7, %ymm9, %ymm9 -; AVX512F-NEXT: vpsrlw $8, %ymm9, %ymm9 -; AVX512F-NEXT: vpbroadcastb %xmm9, %ymm9 -; AVX512F-NEXT: vpand %ymm9, %ymm2, %ymm2 -; AVX512F-NEXT: vpor %ymm2, %ymm6, %ymm2 -; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; AVX512F-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4 -; AVX512F-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0 -; AVX512F-NEXT: vpsllw %xmm5, %ymm1, %ymm2 -; AVX512F-NEXT: vpand %ymm8, %ymm2, %ymm2 -; AVX512F-NEXT: vpsrlw %xmm7, %ymm3, %ymm3 -; AVX512F-NEXT: vpand %ymm9, %ymm3, %ymm3 -; AVX512F-NEXT: vpor %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 +; AVX512F-NEXT: vpsllw %xmm5, %xmm8, %xmm7 +; AVX512F-NEXT: vpbroadcastb %xmm7, %ymm7 +; AVX512F-NEXT: vpand %ymm7, %ymm6, %ymm9 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT: vpsubb %xmm4, %xmm6, %xmm6 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm6, %ymm2, %ymm10 +; AVX512F-NEXT: vpsrlw %xmm6, %xmm8, %xmm2 +; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vpand %ymm2, %ymm10, %ymm8 +; AVX512F-NEXT: vpor %ymm8, %ymm9, %ymm8 +; AVX512F-NEXT: vpxor %xmm9, %xmm9, %xmm9 +; AVX512F-NEXT: vpcmpeqb %ymm9, %ymm4, %ymm4 +; AVX512F-NEXT: vpblendvb %ymm4, %ymm0, %ymm8, %ymm0 +; AVX512F-NEXT: vpsllw %xmm5, %ymm1, %ymm5 +; AVX512F-NEXT: vpand %ymm7, %ymm5, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm6, %ymm3, %ymm3 +; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpor %ymm2, %ymm5, %ymm2 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: retq ; @@ -829,27 +829,27 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm5, %ymm0, %ymm6 -; AVX512VL-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9 -; AVX512VL-NEXT: vpsllw %xmm5, %ymm9, %ymm8 -; AVX512VL-NEXT: vpbroadcastb %xmm8, %ymm8 -; AVX512VL-NEXT: vpand %ymm8, %ymm6, %ymm6 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm4, %xmm7, %xmm7 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm7, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw %xmm7, %ymm9, %ymm9 -; AVX512VL-NEXT: vpsrlw $8, %ymm9, %ymm9 -; AVX512VL-NEXT: vpbroadcastb %xmm9, %ymm9 -; AVX512VL-NEXT: vpand %ymm9, %ymm2, %ymm2 -; AVX512VL-NEXT: vpor %ymm2, %ymm6, %ymm2 -; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4 -; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm2, %ymm0 -; AVX512VL-NEXT: vpsllw %xmm5, %ymm1, %ymm2 -; AVX512VL-NEXT: vpand %ymm8, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsrlw %xmm7, %ymm3, %ymm3 -; AVX512VL-NEXT: vpand %ymm9, %ymm3, %ymm3 -; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2 +; AVX512VL-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 +; AVX512VL-NEXT: vpsllw %xmm5, %xmm8, %xmm7 +; AVX512VL-NEXT: vpbroadcastb %xmm7, %ymm7 +; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm9 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT: vpsubb %xmm4, %xmm6, %xmm6 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm6, %ymm2, %ymm10 +; AVX512VL-NEXT: vpsrlw %xmm6, %xmm8, %xmm2 +; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vpand %ymm2, %ymm10, %ymm8 +; AVX512VL-NEXT: vpor %ymm8, %ymm9, %ymm8 +; AVX512VL-NEXT: vpxor %xmm9, %xmm9, %xmm9 +; AVX512VL-NEXT: vpcmpeqb %ymm9, %ymm4, %ymm4 +; AVX512VL-NEXT: vpblendvb %ymm4, %ymm0, %ymm8, %ymm0 +; AVX512VL-NEXT: vpsllw %xmm5, %ymm1, %ymm5 +; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm6, %ymm3, %ymm3 +; AVX512VL-NEXT: vpand %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT: vpor %ymm2, %ymm5, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm1, %ymm2, %ymm1 ; AVX512VL-NEXT: retq ; @@ -859,16 +859,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm4 -; AVX512BW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512BW-NEXT: vpsllw %xmm3, %zmm5, %zmm3 +; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512BW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 -; AVX512BW-NEXT: vpsrlw %xmm4, %zmm5, %zmm4 -; AVX512BW-NEXT: vpsrlw $8, %zmm4, %zmm4 +; AVX512BW-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512BW-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512BW-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm3, %zmm1 @@ -883,16 +883,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm4 -; AVX512VBMI2-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm5, %zmm3 +; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 -; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm5, %zmm4 -; AVX512VBMI2-NEXT: vpsrlw $8, %zmm4, %zmm4 +; AVX512VBMI2-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VBMI2-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512VBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1 @@ -907,16 +907,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm4 -; AVX512VLBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm5, %zmm3 +; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 -; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm5, %zmm4 -; AVX512VLBW-NEXT: vpsrlw $8, %zmm4, %zmm4 +; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VLBW-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm3, %zmm1 @@ -931,16 +931,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm4 -; AVX512VLVBMI2-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm5, %zmm3 +; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 -; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm5, %zmm4 -; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm4, %zmm4 +; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1 diff --git a/test/CodeGen/X86/vector-fshl-rot-256.ll b/test/CodeGen/X86/vector-fshl-rot-256.ll index b56b55553fc..65fa6f20fee 100644 --- a/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -793,16 +793,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX2-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX2-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX2-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX2-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 @@ -814,16 +814,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX512F-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512F-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 @@ -835,16 +835,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VL-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 diff --git a/test/CodeGen/X86/vector-fshl-rot-512.ll b/test/CodeGen/X86/vector-fshl-rot-512.ll index c096a6cb86e..de7959d6b5a 100644 --- a/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -387,16 +387,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT: vpsllw %xmm3, %ymm5, %ymm6 +; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT: vpsllw %xmm3, %xmm5, %xmm6 ; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6 ; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm2, %xmm7, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0 @@ -413,16 +413,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX512VL-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm5, %ymm6 +; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsllw %xmm3, %xmm5, %xmm6 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 ; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm2, %xmm7, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 ; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0 @@ -440,8 +440,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm4 -; AVX512BW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512BW-NEXT: vpsllw %xmm3, %zmm5, %zmm3 +; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -449,8 +449,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm5, %zmm1 -; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm5, %xmm1 +; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 @@ -463,8 +463,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm4 -; AVX512VLBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm5, %zmm3 +; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm5, %xmm3 ; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -472,8 +472,8 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm5, %zmm1 -; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm5, %xmm1 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 diff --git a/test/CodeGen/X86/vector-fshr-256.ll b/test/CodeGen/X86/vector-fshr-256.ll index c27828b0e76..61b7c55e557 100644 --- a/test/CodeGen/X86/vector-fshr-256.ll +++ b/test/CodeGen/X86/vector-fshr-256.ll @@ -1492,16 +1492,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm3, %ymm1, %ymm4 -; AVX2-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX2-NEXT: vpsrlw %xmm3, %ymm5, %ymm3 -; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 +; AVX2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX2-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX2-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm3 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm4, %ymm0, %ymm0 -; AVX2-NEXT: vpsllw %xmm4, %ymm5, %ymm4 +; AVX2-NEXT: vpsllw %xmm4, %xmm5, %xmm4 ; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX2-NEXT: vpand %ymm4, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm3, %ymm0, %ymm0 @@ -1516,16 +1516,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm3, %ymm1, %ymm4 -; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT: vpsrlw %xmm3, %ymm5, %ymm3 -; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 +; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512F-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm4, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw %xmm4, %ymm5, %ymm4 +; AVX512F-NEXT: vpsllw %xmm4, %xmm5, %xmm4 ; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX512F-NEXT: vpand %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm3, %ymm0, %ymm0 @@ -1540,16 +1540,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm1, %ymm4 -; AVX512VL-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm5, %ymm3 -; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 +; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VL-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsllw %xmm4, %ymm5, %ymm4 +; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4 ; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 ; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 diff --git a/test/CodeGen/X86/vector-fshr-512.ll b/test/CodeGen/X86/vector-fshr-512.ll index b73724b77e9..f2b31d4b80d 100644 --- a/test/CodeGen/X86/vector-fshr-512.ll +++ b/test/CodeGen/X86/vector-fshr-512.ll @@ -787,26 +787,26 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm5, %ymm2, %ymm6 -; AVX512F-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9 -; AVX512F-NEXT: vpsrlw %xmm5, %ymm9, %ymm8 -; AVX512F-NEXT: vpsrlw $8, %ymm8, %ymm8 -; AVX512F-NEXT: vpbroadcastb %xmm8, %ymm8 -; AVX512F-NEXT: vpand %ymm8, %ymm6, %ymm6 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpsubb %xmm4, %xmm7, %xmm7 -; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm7, %ymm0, %ymm0 -; AVX512F-NEXT: vpsllw %xmm7, %ymm9, %ymm9 -; AVX512F-NEXT: vpbroadcastb %xmm9, %ymm9 -; AVX512F-NEXT: vpand %ymm9, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm6, %ymm0, %ymm0 -; AVX512F-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; AVX512F-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4 +; AVX512F-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 +; AVX512F-NEXT: vpsrlw %xmm5, %xmm8, %xmm7 +; AVX512F-NEXT: vpsrlw $8, %xmm7, %xmm7 +; AVX512F-NEXT: vpbroadcastb %xmm7, %ymm7 +; AVX512F-NEXT: vpand %ymm7, %ymm6, %ymm9 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT: vpsubb %xmm4, %xmm6, %xmm6 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpsllw %xmm6, %ymm0, %ymm10 +; AVX512F-NEXT: vpsllw %xmm6, %xmm8, %xmm0 +; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm8 +; AVX512F-NEXT: vpand %ymm8, %ymm10, %ymm0 +; AVX512F-NEXT: vpor %ymm9, %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %xmm9, %xmm9, %xmm9 +; AVX512F-NEXT: vpcmpeqb %ymm9, %ymm4, %ymm4 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpsrlw %xmm5, %ymm3, %ymm2 -; AVX512F-NEXT: vpand %ymm8, %ymm2, %ymm2 -; AVX512F-NEXT: vpsllw %xmm7, %ymm1, %ymm1 -; AVX512F-NEXT: vpand %ymm9, %ymm1, %ymm1 +; AVX512F-NEXT: vpand %ymm7, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw %xmm6, %ymm1, %ymm1 +; AVX512F-NEXT: vpand %ymm8, %ymm1, %ymm1 ; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpblendvb %ymm4, %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: retq @@ -817,26 +817,26 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm5, %ymm2, %ymm6 -; AVX512VL-NEXT: vpcmpeqd %ymm9, %ymm9, %ymm9 -; AVX512VL-NEXT: vpsrlw %xmm5, %ymm9, %ymm8 -; AVX512VL-NEXT: vpsrlw $8, %ymm8, %ymm8 -; AVX512VL-NEXT: vpbroadcastb %xmm8, %ymm8 -; AVX512VL-NEXT: vpand %ymm8, %ymm6, %ymm6 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm4, %xmm7, %xmm7 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,zero,zero,zero,zero,xmm7[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm7, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsllw %xmm7, %ymm9, %ymm9 -; AVX512VL-NEXT: vpbroadcastb %xmm9, %ymm9 -; AVX512VL-NEXT: vpand %ymm9, %ymm0, %ymm0 -; AVX512VL-NEXT: vpor %ymm6, %ymm0, %ymm0 -; AVX512VL-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; AVX512VL-NEXT: vpcmpeqb %ymm6, %ymm4, %ymm4 +; AVX512VL-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 +; AVX512VL-NEXT: vpsrlw %xmm5, %xmm8, %xmm7 +; AVX512VL-NEXT: vpsrlw $8, %xmm7, %xmm7 +; AVX512VL-NEXT: vpbroadcastb %xmm7, %ymm7 +; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm9 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm6 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT: vpsubb %xmm4, %xmm6, %xmm6 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,zero,zero,zero,zero,xmm6[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm6, %ymm0, %ymm10 +; AVX512VL-NEXT: vpsllw %xmm6, %xmm8, %xmm0 +; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm8 +; AVX512VL-NEXT: vpand %ymm8, %ymm10, %ymm0 +; AVX512VL-NEXT: vpor %ymm9, %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %xmm9, %xmm9, %xmm9 +; AVX512VL-NEXT: vpcmpeqb %ymm9, %ymm4, %ymm4 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw %xmm5, %ymm3, %ymm2 -; AVX512VL-NEXT: vpand %ymm8, %ymm2, %ymm2 -; AVX512VL-NEXT: vpsllw %xmm7, %ymm1, %ymm1 -; AVX512VL-NEXT: vpand %ymm9, %ymm1, %ymm1 +; AVX512VL-NEXT: vpand %ymm7, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw %xmm6, %ymm1, %ymm1 +; AVX512VL-NEXT: vpand %ymm8, %ymm1, %ymm1 ; AVX512VL-NEXT: vpor %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm4, %ymm3, %ymm1, %ymm1 ; AVX512VL-NEXT: retq @@ -847,16 +847,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 -; AVX512BW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512BW-NEXT: vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512BW-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512BW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw %xmm4, %zmm5, %zmm4 +; AVX512BW-NEXT: vpsllw %xmm4, %xmm5, %xmm4 ; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 @@ -870,16 +870,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 -; AVX512VBMI2-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512VBMI2-NEXT: vpsrlw $8, %zmm3, %zmm3 +; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VBMI2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VBMI2-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm5, %zmm4 +; AVX512VBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vporq %zmm3, %zmm0, %zmm0 @@ -893,16 +893,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 -; AVX512VLBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512VLBW-NEXT: vpsrlw $8, %zmm3, %zmm3 +; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm5, %zmm4 +; AVX512VLBW-NEXT: vpsllw %xmm4, %xmm5, %xmm4 ; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 @@ -916,16 +916,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 -; AVX512VLVBMI2-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512VLVBMI2-NEXT: vpsrlw $8, %zmm3, %zmm3 +; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm5, %zmm4 +; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 ; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm0 ; AVX512VLVBMI2-NEXT: vporq %zmm3, %zmm0, %zmm0 diff --git a/test/CodeGen/X86/vector-fshr-rot-256.ll b/test/CodeGen/X86/vector-fshr-rot-256.ll index bc477c2e7bb..6e17724bdc1 100644 --- a/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -863,16 +863,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX2-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX2-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX2-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX2-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 @@ -886,16 +886,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX512F-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512F-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 @@ -909,16 +909,16 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VL-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 diff --git a/test/CodeGen/X86/vector-fshr-rot-512.ll b/test/CodeGen/X86/vector-fshr-rot-512.ll index ee8e2a988f0..2a25efd50ff 100644 --- a/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -405,16 +405,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT: vpsllw %xmm3, %ymm5, %ymm6 +; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT: vpsllw %xmm3, %xmm5, %xmm6 ; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6 ; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm2, %xmm7, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0 @@ -433,16 +433,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX512VL-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm5, %ymm6 +; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsllw %xmm3, %xmm5, %xmm6 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 ; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm2, %xmm7, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 ; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0 @@ -460,9 +460,9 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4 -; AVX512BW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512BW-NEXT: vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 +; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512BW-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -470,7 +470,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsllw %xmm1, %zmm5, %zmm1 +; AVX512BW-NEXT: vpsllw %xmm1, %xmm5, %xmm1 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 @@ -483,9 +483,9 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4 -; AVX512VLBW-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 -; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm5, %zmm3 -; AVX512VLBW-NEXT: vpsrlw $8, %zmm3, %zmm3 +; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3 ; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 @@ -493,7 +493,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm5, %zmm1 +; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm5, %xmm1 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 diff --git a/test/CodeGen/X86/vector-reduce-mul-widen.ll b/test/CodeGen/X86/vector-reduce-mul-widen.ll index 7a7d96d2d58..4c366a61f39 100644 --- a/test/CodeGen/X86/vector-reduce-mul-widen.ll +++ b/test/CodeGen/X86/vector-reduce-mul-widen.ll @@ -157,8 +157,8 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax @@ -181,8 +181,8 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512BW-NEXT: vpmuludq %ymm2, %ymm0, %ymm2 ; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX512BW-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 -; AVX512BW-NEXT: vpaddq %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX512BW-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, %rax @@ -205,8 +205,8 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 ; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3 ; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3 -; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2 -; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vmovq %xmm0, %rax @@ -349,8 +349,8 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax @@ -382,8 +382,8 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, %rax @@ -415,8 +415,8 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 ; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3 ; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vmovq %xmm0, %rax @@ -652,8 +652,8 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax @@ -693,8 +693,8 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, %rax @@ -734,8 +734,8 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 ; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3 ; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vmovq %xmm0, %rax diff --git a/test/CodeGen/X86/vector-reduce-mul.ll b/test/CodeGen/X86/vector-reduce-mul.ll index a533959b8f2..12cef44b3e2 100644 --- a/test/CodeGen/X86/vector-reduce-mul.ll +++ b/test/CodeGen/X86/vector-reduce-mul.ll @@ -157,8 +157,8 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax @@ -181,8 +181,8 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512BW-NEXT: vpmuludq %ymm2, %ymm0, %ymm2 ; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX512BW-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 -; AVX512BW-NEXT: vpaddq %ymm3, %ymm2, %ymm2 -; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX512BW-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, %rax @@ -205,8 +205,8 @@ define i64 @test_v4i64(<4 x i64> %a0) { ; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 ; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3 ; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3 -; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2 -; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vmovq %xmm0, %rax @@ -349,8 +349,8 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax @@ -382,8 +382,8 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, %rax @@ -415,8 +415,8 @@ define i64 @test_v8i64(<8 x i64> %a0) { ; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 ; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3 ; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vmovq %xmm0, %rax @@ -652,8 +652,8 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3 ; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3 -; AVX2-NEXT: vpaddq %ymm3, %ymm2, %ymm2 -; AVX2-NEXT: vpsllq $32, %ymm2, %ymm2 +; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax @@ -693,8 +693,8 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3 ; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, %rax @@ -734,8 +734,8 @@ define i64 @test_v16i64(<16 x i64> %a0) { ; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2 ; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3 ; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3 -; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 -; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2 +; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 +; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2 ; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vmovq %xmm0, %rax diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll index d7ca7442799..aa3c647cc2f 100644 --- a/test/CodeGen/X86/vector-rotate-256.ll +++ b/test/CodeGen/X86/vector-rotate-256.ll @@ -783,16 +783,16 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX2-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX2-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX2-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX2-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX2-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 @@ -804,16 +804,16 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX512F-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512F-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX512F-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512F-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512F-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512F-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 @@ -825,16 +825,16 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3 -; AVX512VL-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm4, %ymm2 +; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512VL-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VL-NEXT: vpand %ymm2, %ymm3, %ymm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm4, %ymm1 -; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 diff --git a/test/CodeGen/X86/vector-rotate-512.ll b/test/CodeGen/X86/vector-rotate-512.ll index 54725e32d39..b7793ed6c92 100644 --- a/test/CodeGen/X86/vector-rotate-512.ll +++ b/test/CodeGen/X86/vector-rotate-512.ll @@ -377,16 +377,16 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX512F-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512F-NEXT: vpsllw %xmm3, %ymm5, %ymm6 +; AVX512F-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT: vpsllw %xmm3, %xmm5, %xmm6 ; AVX512F-NEXT: vpbroadcastb %xmm6, %ymm6 ; AVX512F-NEXT: vpand %ymm6, %ymm4, %ymm4 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm2, %xmm7, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512F-NEXT: vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512F-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512F-NEXT: vpbroadcastb %xmm5, %ymm5 ; AVX512F-NEXT: vpand %ymm5, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm4, %ymm0 @@ -403,16 +403,16 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -; AVX512VL-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsllw %xmm3, %ymm5, %ymm6 +; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsllw %xmm3, %xmm5, %xmm6 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 ; AVX512VL-NEXT: vpand %ymm6, %ymm4, %ymm4 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm7 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm2, %xmm7, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm5, %ymm5 -; AVX512VL-NEXT: vpsrlw $8, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm2, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 ; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm4, %ymm0 @@ -430,13 +430,13 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512BW-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm3 -; AVX512BW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 -; AVX512BW-NEXT: vpsllw %xmm2, %zmm4, %zmm2 +; AVX512BW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512BW-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm2 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1 -; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 @@ -449,13 +449,13 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 -; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm4, %zmm2 +; AVX512VLBW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 +; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm3, %zmm2 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm4, %zmm1 -; AVX512VLBW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll index fdf7f4aa109..7f6d49e7660 100644 --- a/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -883,9 +883,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -922,9 +922,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX512DQ-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -946,9 +946,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQVL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpsrlw $8, %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX512DQVL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512DQVL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -990,9 +990,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; X32-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; X32-AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; X32-AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 +; X32-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; X32-AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 +; X32-AVX2-NEXT: vpsrlw $8, %xmm2, %xmm2 ; X32-AVX2-NEXT: vpbroadcastb %xmm2, %ymm2 ; X32-AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 ; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -1185,7 +1185,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpmulhw {{.*}}(%rip), %ymm0, %ymm1 ; AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] -; AVX2-NEXT: vpsraw $1, %ymm0, %ymm0 +; AVX2-NEXT: vpsraw $1, %xmm0, %xmm0 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-NEXT: retq @@ -1248,7 +1248,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpmulhw {{\.LCPI.*}}, %ymm0, %ymm1 ; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] -; X32-AVX2-NEXT: vpsraw $1, %ymm0, %ymm0 +; X32-AVX2-NEXT: vpsraw $1, %xmm0, %xmm0 ; X32-AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3,4,5,6,7,8],ymm0[9],ymm2[10,11,12,13,14,15] ; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; X32-AVX2-NEXT: retl diff --git a/test/CodeGen/X86/vector-shift-ashr-512.ll b/test/CodeGen/X86/vector-shift-ashr-512.ll index 5635f1ea3dd..e4f676cecd6 100644 --- a/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -183,9 +183,9 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpsrlw %xmm2, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] @@ -202,9 +202,9 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 +; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] diff --git a/test/CodeGen/X86/vector-shift-lshr-256.ll b/test/CodeGen/X86/vector-shift-lshr-256.ll index ae087ba3618..698a45fad4d 100644 --- a/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -720,9 +720,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm1 -; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 +; AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -755,9 +755,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpsrlw %xmm1, %ymm2, %ymm1 -; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 +; AVX512DQ-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq @@ -775,9 +775,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512DQVL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm2, %ymm1 -; AVX512DQVL-NEXT: vpsrlw $8, %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 +; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512DQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq @@ -809,9 +809,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; X32-AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; X32-AVX2-NEXT: vpsrlw %xmm1, %ymm2, %ymm1 -; X32-AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 +; X32-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; X32-AVX2-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 +; X32-AVX2-NEXT: vpsrlw $8, %xmm1, %xmm1 ; X32-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; X32-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; X32-AVX2-NEXT: retl diff --git a/test/CodeGen/X86/vector-shift-lshr-512.ll b/test/CodeGen/X86/vector-shift-lshr-512.ll index 9a44fc31820..f32b56d6035 100644 --- a/test/CodeGen/X86/vector-shift-lshr-512.ll +++ b/test/CodeGen/X86/vector-shift-lshr-512.ll @@ -147,9 +147,9 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpsrlw $8, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpsrlw %xmm2, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw %xmm2, %ymm1, %ymm1 @@ -160,9 +160,9 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm2, %zmm1 -; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 +; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm2, %xmm1 +; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq diff --git a/test/CodeGen/X86/vector-shift-shl-256.ll b/test/CodeGen/X86/vector-shift-shl-256.ll index bcd24b16795..273ff325f26 100644 --- a/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/test/CodeGen/X86/vector-shift-shl-256.ll @@ -653,8 +653,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX2-NEXT: vpsllw %xmm1, %ymm2, %ymm1 +; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -683,8 +683,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpsllw %xmm1, %ymm2, %ymm1 +; AVX512DQ-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQ-NEXT: vpsllw %xmm1, %xmm2, %xmm1 ; AVX512DQ-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq @@ -702,8 +702,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; AVX512DQVL-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpsllw %xmm1, %ymm2, %ymm1 +; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpsllw %xmm1, %xmm2, %xmm1 ; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512DQVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq @@ -736,8 +736,8 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; X32-AVX2: # %bb.0: ; X32-AVX2-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; X32-AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 -; X32-AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 -; X32-AVX2-NEXT: vpsllw %xmm1, %ymm2, %ymm1 +; X32-AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; X32-AVX2-NEXT: vpsllw %xmm1, %xmm2, %xmm1 ; X32-AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 ; X32-AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 ; X32-AVX2-NEXT: retl diff --git a/test/CodeGen/X86/vector-shift-shl-512.ll b/test/CodeGen/X86/vector-shift-shl-512.ll index f1e8515b927..f63e1ab8d94 100644 --- a/test/CodeGen/X86/vector-shift-shl-512.ll +++ b/test/CodeGen/X86/vector-shift-shl-512.ll @@ -142,8 +142,8 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpsllw %xmm2, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; AVX512DQ-NEXT: vpsllw %xmm2, %xmm3, %xmm3 ; AVX512DQ-NEXT: vpbroadcastb %xmm3, %ymm3 ; AVX512DQ-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsllw %xmm2, %ymm1, %ymm1 @@ -154,8 +154,8 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsllw %xmm1, %zmm2, %zmm1 +; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsllw %xmm1, %xmm2, %xmm1 ; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq