From 1b1457fd624d6ada42b2cb5ec1d925b76da75e37 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 5 Jan 2019 21:40:07 +0000 Subject: [PATCH] [X86] Allow combinevxi1Bitcast to use pmovmskb on avx512 targets if the input is a truncate from v16i8/v32i8. This is especially helpful on targets without avx512bw since we don't have a good way to convert from v16i8/v32i8 to v16i1/v32i1 for the truncate anyway. If we're just going to convert it to a GPR we might as well use pmovmskb to accomplish both. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@350480 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 +- test/CodeGen/X86/avx512-ext.ll | 33 +---- test/CodeGen/X86/avx512-schedule.ll | 6 +- test/CodeGen/X86/bitcast-and-setcc-128.ll | 5 +- test/CodeGen/X86/bitcast-and-setcc-256.ll | 10 +- test/CodeGen/X86/bitcast-setcc-128.ll | 5 +- test/CodeGen/X86/bitcast-setcc-256.ll | 10 +- test/CodeGen/X86/broadcastm-lowering.ll | 12 +- test/CodeGen/X86/movmsk-cmp.ll | 170 +++++----------------- 9 files changed, 61 insertions(+), 200 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4056b4982b0..67f2929dae7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32737,9 +32737,17 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, if (!VT.isScalarInteger() || !VecVT.isSimple()) return SDValue(); + // If the input is a truncate from v16i8 or v32i8 go ahead and use a + // movmskb even with avx512. This will be better than truncating to vXi1 and + // using a kmov. This can especially help KNL if the input is a v16i8/v32i8 + // vpcmpeqb/vpcmpgtb. + bool IsTruncated = N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + (N0.getOperand(0).getValueType() == MVT::v16i8 || + N0.getOperand(0).getValueType() == MVT::v32i8); + // With AVX512 vxi1 types are legal and we prefer using k-regs. // MOVMSK is supported in SSE2 or later. - if (Subtarget.hasAVX512() || !Subtarget.hasSSE2()) + if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !IsTruncated)) return SDValue(); // There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll index 2381180af76..072e3c8bdae 100644 --- a/test/CodeGen/X86/avx512-ext.ll +++ b/test/CodeGen/X86/avx512-ext.ll @@ -1644,33 +1644,12 @@ define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { } define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { -; KNL-LABEL: trunc_16i8_to_16i1: -; KNL: # %bb.0: -; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: # kill: def $ax killed $ax killed $eax -; KNL-NEXT: vzeroupper -; KNL-NEXT: retq -; -; SKX-LABEL: trunc_16i8_to_16i1: -; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 -; SKX-NEXT: vpmovb2m %xmm0, %k0 -; SKX-NEXT: kmovd %k0, %eax -; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq -; -; AVX512DQNOBW-LABEL: trunc_16i8_to_16i1: -; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0 -; AVX512DQNOBW-NEXT: kmovw %k0, %eax -; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax -; AVX512DQNOBW-NEXT: vzeroupper -; AVX512DQNOBW-NEXT: retq +; ALL-LABEL: trunc_16i8_to_16i1: +; ALL: # %bb.0: +; ALL-NEXT: vpsllw $7, %xmm0, %xmm0 +; ALL-NEXT: vpmovmskb %xmm0, %eax +; ALL-NEXT: # kill: def $ax killed $ax killed $eax +; ALL-NEXT: retq %mask_b = trunc <16 x i8>%a to <16 x i1> %mask = bitcast <16 x i1> %mask_b to i16 ret i16 %mask diff --git a/test/CodeGen/X86/avx512-schedule.ll b/test/CodeGen/X86/avx512-schedule.ll index 5c44d8679b9..2c9b6e13481 100755 --- a/test/CodeGen/X86/avx512-schedule.ll +++ b/test/CodeGen/X86/avx512-schedule.ll @@ -4285,16 +4285,14 @@ define i16 @trunc_16i8_to_16i1(<16 x i8> %a) { ; GENERIC-LABEL: trunc_16i8_to_16i1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33] -; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] +; GENERIC-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: trunc_16i8_to_16i1: ; SKX: # %bb.0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00] -; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] +; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq # sched: [7:1.00] %mask_b = trunc <16 x i8>%a to <16 x i1> diff --git a/test/CodeGen/X86/bitcast-and-setcc-128.ll b/test/CodeGen/X86/bitcast-and-setcc-128.ll index 0a2e154f5b9..289ddcb194b 100644 --- a/test/CodeGen/X86/bitcast-and-setcc-128.ll +++ b/test/CodeGen/X86/bitcast-and-setcc-128.ll @@ -159,11 +159,8 @@ define i16 @v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpcmpgtb %xmm3, %xmm2, %xmm1 ; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpmovmskb %xmm0, %eax ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax -; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: v16i8: diff --git a/test/CodeGen/X86/bitcast-and-setcc-256.ll b/test/CodeGen/X86/bitcast-and-setcc-256.ll index 0f3b8c94540..426cabe5f4c 100644 --- a/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ b/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -399,15 +399,7 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) { ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1 ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, %ecx -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: shll $16, %eax -; AVX512F-NEXT: orl %ecx, %eax +; AVX512F-NEXT: vpmovmskb %ymm0, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; diff --git a/test/CodeGen/X86/bitcast-setcc-128.ll b/test/CodeGen/X86/bitcast-setcc-128.ll index f803901c0e7..fb585974e5b 100644 --- a/test/CodeGen/X86/bitcast-setcc-128.ll +++ b/test/CodeGen/X86/bitcast-setcc-128.ll @@ -128,11 +128,8 @@ define i16 @v16i8(<16 x i8> %a, <16 x i8> %b) { ; AVX512F-LABEL: v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpmovmskb %xmm0, %eax ; AVX512F-NEXT: # kill: def $ax killed $ax killed $eax -; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: v16i8: diff --git a/test/CodeGen/X86/bitcast-setcc-256.ll b/test/CodeGen/X86/bitcast-setcc-256.ll index d349ae3bc40..b0af971366c 100644 --- a/test/CodeGen/X86/bitcast-setcc-256.ll +++ b/test/CodeGen/X86/bitcast-setcc-256.ll @@ -184,15 +184,7 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b) { ; AVX512F-LABEL: v32i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: kmovw %k0, %ecx -; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512F-NEXT: kmovw %k0, %eax -; AVX512F-NEXT: shll $16, %eax -; AVX512F-NEXT: orl %ecx, %eax +; AVX512F-NEXT: vpmovmskb %ymm0, %eax ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; diff --git a/test/CodeGen/X86/broadcastm-lowering.ll b/test/CodeGen/X86/broadcastm-lowering.ll index d25a4da9843..986d313cb8d 100644 --- a/test/CodeGen/X86/broadcastm-lowering.ll +++ b/test/CodeGen/X86/broadcastm-lowering.ll @@ -43,15 +43,9 @@ define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) { ; AVX512CD-LABEL: test_mm_epi32: ; AVX512CD: # %bb.0: # %entry ; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512CD-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0 -; AVX512CD-NEXT: kmovw %k0, %eax -; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 -; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; AVX512CD-NEXT: vzeroupper +; AVX512CD-NEXT: vpmovmskb %xmm0, %eax +; AVX512CD-NEXT: vmovd %eax, %xmm0 +; AVX512CD-NEXT: vpbroadcastd %xmm0, %xmm0 ; AVX512CD-NEXT: retq ; ; AVX512VLCDBW-LABEL: test_mm_epi32: diff --git a/test/CodeGen/X86/movmsk-cmp.ll b/test/CodeGen/X86/movmsk-cmp.ll index 718ade02435..93d86b0e11d 100644 --- a/test/CodeGen/X86/movmsk-cmp.ll +++ b/test/CodeGen/X86/movmsk-cmp.ll @@ -22,13 +22,9 @@ define i1 @allones_v16i8_sign(<16 x i8> %arg) { ; ; KNL-LABEL: allones_v16i8_sign: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 -; KNL-NEXT: setb %al -; KNL-NEXT: vzeroupper +; KNL-NEXT: vpmovmskb %xmm0, %eax +; KNL-NEXT: cmpw $-1, %ax +; KNL-NEXT: sete %al ; KNL-NEXT: retq ; ; SKX-LABEL: allones_v16i8_sign: @@ -60,13 +56,9 @@ define i1 @allzeros_v16i8_sign(<16 x i8> %arg) { ; ; KNL-LABEL: allzeros_v16i8_sign: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 +; KNL-NEXT: vpmovmskb %xmm0, %eax +; KNL-NEXT: testw %ax, %ax ; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: allzeros_v16i8_sign: @@ -117,18 +109,8 @@ define i1 @allones_v32i8_sign(<32 x i8> %arg) { ; ; KNL-LABEL: allones_v32i8_sign: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: cmpl $-1, %ecx +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: cmpl $-1, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -180,17 +162,8 @@ define i1 @allzeros_v32i8_sign(<32 x i8> %arg) { ; ; KNL-LABEL: allzeros_v32i8_sign: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: testl %eax, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1438,14 +1411,10 @@ define i1 @allones_v16i8_and1(<16 x i8> %arg) { ; ; KNL-LABEL: allones_v16i8_and1: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 -; KNL-NEXT: setb %al -; KNL-NEXT: vzeroupper +; KNL-NEXT: vpsllw $7, %xmm0, %xmm0 +; KNL-NEXT: vpmovmskb %xmm0, %eax +; KNL-NEXT: cmpw $-1, %ax +; KNL-NEXT: sete %al ; KNL-NEXT: retq ; ; SKX-LABEL: allones_v16i8_and1: @@ -1480,14 +1449,10 @@ define i1 @allzeros_v16i8_and1(<16 x i8> %arg) { ; ; KNL-LABEL: allzeros_v16i8_and1: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 +; KNL-NEXT: vpsllw $7, %xmm0, %xmm0 +; KNL-NEXT: vpmovmskb %xmm0, %eax +; KNL-NEXT: testw %ax, %ax ; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: allzeros_v16i8_and1: @@ -1546,19 +1511,9 @@ define i1 @allones_v32i8_and1(<32 x i8> %arg) { ; ; KNL-LABEL: allones_v32i8_and1: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: cmpl $-1, %ecx +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: cmpl $-1, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -1618,18 +1573,9 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) { ; ; KNL-LABEL: allzeros_v32i8_and1: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: testl %eax, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -3102,14 +3048,10 @@ define i1 @allones_v16i8_and4(<16 x i8> %arg) { ; ; KNL-LABEL: allones_v16i8_and4: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 -; KNL-NEXT: setb %al -; KNL-NEXT: vzeroupper +; KNL-NEXT: vpsllw $5, %xmm0, %xmm0 +; KNL-NEXT: vpmovmskb %xmm0, %eax +; KNL-NEXT: cmpw $-1, %ax +; KNL-NEXT: sete %al ; KNL-NEXT: retq ; ; SKX-LABEL: allones_v16i8_and4: @@ -3144,14 +3086,10 @@ define i1 @allzeros_v16i8_and4(<16 x i8> %arg) { ; ; KNL-LABEL: allzeros_v16i8_and4: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] -; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kortestw %k0, %k0 +; KNL-NEXT: vpsllw $5, %xmm0, %xmm0 +; KNL-NEXT: vpmovmskb %xmm0, %eax +; KNL-NEXT: testw %ax, %ax ; KNL-NEXT: sete %al -; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: allzeros_v16i8_and4: @@ -3210,19 +3148,9 @@ define i1 @allones_v32i8_and4(<32 x i8> %arg) { ; ; KNL-LABEL: allones_v32i8_and4: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx -; KNL-NEXT: cmpl $-1, %ecx +; KNL-NEXT: vpsllw $5, %ymm0, %ymm0 +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: cmpl $-1, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -3282,18 +3210,9 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) { ; ; KNL-LABEL: allzeros_v32i8_and4: ; KNL: # %bb.0: -; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] -; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: shll $16, %ecx -; KNL-NEXT: orl %eax, %ecx +; KNL-NEXT: vpsllw $5, %ymm0, %ymm0 +; KNL-NEXT: vpmovmskb %ymm0, %eax +; KNL-NEXT: testl %eax, %eax ; KNL-NEXT: sete %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -4926,12 +4845,7 @@ define i32 @movmskb(<16 x i8> %x) { ; ; KNL-LABEL: movmskb: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vzeroupper +; KNL-NEXT: vpmovmskb %xmm0, %eax ; KNL-NEXT: retq ; ; SKX-LABEL: movmskb: @@ -4975,17 +4889,7 @@ define i32 @movmskb256(<32 x i8> %x) { ; ; KNL-LABEL: movmskb256: ; KNL: # %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: shll $16, %eax -; KNL-NEXT: orl %ecx, %eax +; KNL-NEXT: vpmovmskb %ymm0, %eax ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; -- 2.50.1