}
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
-; KNL-LABEL: trunc_16i8_to_16i1:
-; KNL: # %bb.0:
-; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; KNL-NEXT: vpslld $31, %zmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: # kill: def $ax killed $ax killed $eax
-; KNL-NEXT: vzeroupper
-; KNL-NEXT: retq
-;
-; SKX-LABEL: trunc_16i8_to_16i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
-; SKX-NEXT: vpmovb2m %xmm0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq
-;
-; AVX512DQNOBW-LABEL: trunc_16i8_to_16i1:
-; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512DQNOBW-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512DQNOBW-NEXT: vpmovd2m %zmm0, %k0
-; AVX512DQNOBW-NEXT: kmovw %k0, %eax
-; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
-; AVX512DQNOBW-NEXT: vzeroupper
-; AVX512DQNOBW-NEXT: retq
+; ALL-LABEL: trunc_16i8_to_16i1:
+; ALL: # %bb.0:
+; ALL-NEXT: vpsllw $7, %xmm0, %xmm0
+; ALL-NEXT: vpmovmskb %xmm0, %eax
+; ALL-NEXT: # kill: def $ax killed $ax killed $eax
+; ALL-NEXT: retq
%mask_b = trunc <16 x i8>%a to <16 x i1>
%mask = bitcast <16 x i1> %mask_b to i16
ret i16 %mask
;
; KNL-LABEL: allones_v16i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_sign:
;
; KNL-LABEL: allzeros_v16i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_sign:
;
; KNL-LABEL: allones_v32i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; KNL-LABEL: allzeros_v32i8_sign:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; KNL-LABEL: allones_v16i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_and1:
;
; KNL-LABEL: allzeros_v16i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpsllw $7, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_and1:
;
; KNL-LABEL: allones_v32i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; KNL-LABEL: allzeros_v32i8_and1:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; KNL-LABEL: allones_v16i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
-; KNL-NEXT: setb %al
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: cmpw $-1, %ax
+; KNL-NEXT: sete %al
; KNL-NEXT: retq
;
; SKX-LABEL: allones_v16i8_and4:
;
; KNL-LABEL: allzeros_v16i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kortestw %k0, %k0
+; KNL-NEXT: vpsllw $5, %xmm0, %xmm0
+; KNL-NEXT: vpmovmskb %xmm0, %eax
+; KNL-NEXT: testw %ax, %ax
; KNL-NEXT: sete %al
-; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: allzeros_v16i8_and4:
;
; KNL-LABEL: allones_v32i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
-; KNL-NEXT: cmpl $-1, %ecx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: cmpl $-1, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; KNL-LABEL: allzeros_v32i8_and4:
; KNL: # %bb.0:
-; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4]
-; KNL-NEXT: vpand %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: shll $16, %ecx
-; KNL-NEXT: orl %eax, %ecx
+; KNL-NEXT: vpsllw $5, %ymm0, %ymm0
+; KNL-NEXT: vpmovmskb %ymm0, %eax
+; KNL-NEXT: testl %eax, %eax
; KNL-NEXT: sete %al
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; KNL-LABEL: movmskb:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: vzeroupper
+; KNL-NEXT: vpmovmskb %xmm0, %eax
; KNL-NEXT: retq
;
; SKX-LABEL: movmskb:
;
; KNL-LABEL: movmskb256:
; KNL: # %bb.0:
-; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; KNL-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm1
-; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0
-; KNL-NEXT: kmovw %k0, %ecx
-; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0
-; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
-; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: shll $16, %eax
-; KNL-NEXT: orl %ecx, %eax
+; KNL-NEXT: vpmovmskb %ymm0, %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;