; SSE-NEXT: movd %xmm0, %rax
; SSE-NEXT: retq
;
-; AVX1-LABEL: test_v4f64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test_v4f64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vandpd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: test_v4f64:
+; AVX: # BB#0:
+; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vmovmskpd %ymm0, %eax
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl $15, %eax
+; AVX-NEXT: movq $-1, %rax
+; AVX-NEXT: cmovneq %rcx, %rax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f64:
; AVX512: # BB#0:
; SSE-NEXT: cmpltpd %xmm1, %xmm3
; SSE-NEXT: cmpltpd %xmm0, %xmm2
; SSE-NEXT: packsswb %xmm3, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: movmskps %xmm2, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $15, %eax
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: cltq
; SSE-NEXT: retq
;
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl $15, %eax
+; AVX-NEXT: movl $-1, %eax
+; AVX-NEXT: cmovnel %ecx, %eax
; AVX-NEXT: cltq
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
; SSE-LABEL: test_v4f32:
; SSE: # BB#0:
; SSE-NEXT: cmpltps %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: movmskps %xmm1, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $15, %eax
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f32:
; AVX: # BB#0:
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl $15, %eax
+; AVX-NEXT: movl $-1, %eax
+; AVX-NEXT: cmovnel %ecx, %eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: andps %xmm3, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: movmskps %xmm2, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $15, %eax
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: retq
;
-; AVX1-LABEL: test_v8f32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test_v8f32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: test_v8f32:
+; AVX: # BB#0:
+; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vmovmskps %ymm0, %eax
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl $255, %eax
+; AVX-NEXT: movl $-1, %eax
+; AVX-NEXT: cmovnel %ecx, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8f32:
; AVX512: # BB#0:
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: packsswb %xmm3, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE-NEXT: pand %xmm2, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psrld $16, %xmm0
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: cwtl
+; SSE-NEXT: pmovmskb %xmm2, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f32_legal:
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: cwtl
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX-NEXT: movl $-1, %eax
+; AVX-NEXT: cmovnel %ecx, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vmovmskpd %ymm0, %eax
+; AVX1-NEXT: xorl %ecx, %ecx
+; AVX1-NEXT: cmpl $15, %eax
+; AVX1-NEXT: movq $-1, %rax
+; AVX1-NEXT: cmovneq %rcx, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v4i64:
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: vmovmskpd %ymm0, %eax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: cmpl $15, %eax
+; AVX2-NEXT: movq $-1, %rax
+; AVX2-NEXT: cmovneq %rcx, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; SSE-NEXT: pcmpgtq %xmm3, %xmm1
; SSE-NEXT: pcmpgtq %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movmskps %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $15, %eax
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: cltq
; SSE-NEXT: retq
;
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: xorl %ecx, %ecx
+; AVX1-NEXT: cmpl $15, %eax
+; AVX1-NEXT: movl $-1, %eax
+; AVX1-NEXT: cmovnel %ecx, %eax
; AVX1-NEXT: cltq
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vmovmskps %xmm0, %eax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: cmpl $15, %eax
+; AVX2-NEXT: movl $-1, %eax
+; AVX2-NEXT: cmovnel %ecx, %eax
; AVX2-NEXT: cltq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; SSE-LABEL: test_v4i32:
; SSE: # BB#0:
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movmskps %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $15, %eax
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i32:
; AVX: # BB#0:
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl $15, %eax
+; AVX-NEXT: movl $-1, %eax
+; AVX-NEXT: cmovnel %ecx, %eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4i32:
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movmskps %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $15, %eax
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v8i32:
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: xorl %ecx, %ecx
+; AVX1-NEXT: cmpl $255, %eax
+; AVX1-NEXT: movl $-1, %eax
+; AVX1-NEXT: cmovnel %ecx, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8i32:
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vmovmskps %ymm0, %eax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: cmpl $255, %eax
+; AVX2-NEXT: movl $-1, %eax
+; AVX2-NEXT: cmovnel %ecx, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
-; SSE-NEXT: cwtl
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v8i32_legal:
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: cwtl
+; AVX1-NEXT: vpmovmskb %xmm0, %eax
+; AVX1-NEXT: xorl %ecx, %ecx
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX1-NEXT: movl $-1, %eax
+; AVX1-NEXT: cmovnel %ecx, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: cwtl
+; AVX2-NEXT: vpmovmskb %xmm0, %eax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX2-NEXT: movl $-1, %eax
+; AVX2-NEXT: cmovnel %ecx, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; SSE-LABEL: test_v8i16:
; SSE: # BB#0:
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # BB#0:
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX-NEXT: movl $-1, %eax
+; AVX-NEXT: cmovnel %ecx, %eax
; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX-NEXT: retq
;
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX2-LABEL: test_v16i16:
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: cmpl $-1, %ecx
+; AVX2-NEXT: cmovel %ecx, %eax
; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psrlw $8, %xmm0
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pextrb $0, %xmm0, %eax
-; SSE-NEXT: movsbl %al, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
-; AVX1-NEXT: movsbl %al, %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %eax
+; AVX1-NEXT: xorl %ecx, %ecx
+; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX1-NEXT: movl $-1, %eax
+; AVX1-NEXT: cmovnel %ecx, %eax
; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
-; AVX2-NEXT: movsbl %al, %eax
+; AVX2-NEXT: vpmovmskb %xmm0, %eax
+; AVX2-NEXT: xorl %ecx, %ecx
+; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX2-NEXT: movl $-1, %eax
+; AVX2-NEXT: cmovnel %ecx, %eax
; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; SSE-LABEL: test_v16i8:
; SSE: # BB#0:
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psrlw $8, %xmm0
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pextrb $0, %xmm0, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # BB#0:
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: xorl %ecx, %ecx
+; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX-NEXT: movl $-1, %eax
+; AVX-NEXT: cmovnel %ecx, %eax
; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX-NEXT: retq
;
; SSE-NEXT: pcmpgtb %xmm3, %xmm1
; SSE-NEXT: pcmpgtb %xmm2, %xmm0
; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: pand %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psrlw $8, %xmm0
-; SSE-NEXT: pand %xmm1, %xmm0
-; SSE-NEXT: pextrb $0, %xmm0, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: xorl %ecx, %ecx
+; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE-NEXT: movl $-1, %eax
+; SSE-NEXT: cmovnel %ecx, %eax
; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX2-LABEL: test_v32i8:
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %ecx
+; AVX2-NEXT: xorl %eax, %eax
+; AVX2-NEXT: cmpl $-1, %ecx
+; AVX2-NEXT: cmovel %ecx, %eax
; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; SSE-NEXT: movd %xmm0, %rax
; SSE-NEXT: retq
;
-; AVX1-LABEL: test_v4f64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test_v4f64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vorpd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovq %xmm0, %rax
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: test_v4f64:
+; AVX: # BB#0:
+; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vmovmskpd %ymm0, %eax
+; AVX-NEXT: negl %eax
+; AVX-NEXT: sbbq %rax, %rax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f64:
; AVX512: # BB#0:
; SSE-NEXT: cmpltpd %xmm1, %xmm3
; SSE-NEXT: cmpltpd %xmm0, %xmm2
; SSE-NEXT: packsswb %xmm3, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE-NEXT: por %xmm2, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: movmskps %xmm2, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: cltq
; SSE-NEXT: retq
;
; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: negl %eax
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: cltq
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
; SSE-LABEL: test_v4f32:
; SSE: # BB#0:
; SSE-NEXT: cmpltps %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: movmskps %xmm1, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f32:
; AVX: # BB#0:
; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vorpd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: negl %eax
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: orps %xmm3, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE-NEXT: por %xmm2, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: movmskps %xmm2, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
-; AVX1-LABEL: test_v8f32:
-; AVX1: # BB#0:
-; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test_v8f32:
-; AVX2: # BB#0:
-; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
+; AVX-LABEL: test_v8f32:
+; AVX: # BB#0:
+; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vmovmskps %ymm0, %eax
+; AVX-NEXT: negl %eax
+; AVX-NEXT: sbbl %eax, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
;
; AVX512-LABEL: test_v8f32:
; AVX512: # BB#0:
; SSE-NEXT: cmpltps %xmm1, %xmm3
; SSE-NEXT: cmpltps %xmm0, %xmm2
; SSE-NEXT: packsswb %xmm3, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE-NEXT: por %xmm2, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psrld $16, %xmm0
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: cwtl
+; SSE-NEXT: pmovmskb %xmm2, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f32_legal:
; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: cwtl
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: negl %eax
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: vmovmskpd %ymm0, %eax
+; AVX1-NEXT: negl %eax
+; AVX1-NEXT: sbbq %rax, %rax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v4i64:
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovq %xmm0, %rax
+; AVX2-NEXT: vmovmskpd %ymm0, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: sbbq %rax, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; SSE-NEXT: pcmpgtq %xmm3, %xmm1
; SSE-NEXT: pcmpgtq %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movmskps %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: cltq
; SSE-NEXT: retq
;
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vmovmskps %xmm0, %eax
+; AVX1-NEXT: negl %eax
+; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: cltq
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vmovmskps %xmm0, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: cltq
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; SSE-LABEL: test_v4i32:
; SSE: # BB#0:
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movmskps %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i32:
; AVX: # BB#0:
; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: negl %eax
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4i32:
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movmskps %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v8i32:
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vmovmskps %ymm0, %eax
+; AVX1-NEXT: negl %eax
+; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_v8i32:
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vmovmskps %ymm0, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; SSE-NEXT: pcmpgtd %xmm3, %xmm1
; SSE-NEXT: pcmpgtd %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
-; SSE-NEXT: cwtl
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v8i32_legal:
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vmovd %xmm0, %eax
-; AVX1-NEXT: cwtl
+; AVX1-NEXT: vpmovmskb %xmm0, %eax
+; AVX1-NEXT: negl %eax
+; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vmovd %xmm0, %eax
-; AVX2-NEXT: cwtl
+; AVX2-NEXT: vpmovmskb %xmm0, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; SSE-LABEL: test_v8i16:
; SSE: # BB#0:
; SSE-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8i16:
; AVX: # BB#0:
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: negl %eax
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX-NEXT: retq
;
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movd %xmm1, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX2-LABEL: test_v16i16:
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; SSE-NEXT: pcmpgtw %xmm3, %xmm1
; SSE-NEXT: pcmpgtw %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psrlw $8, %xmm0
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pextrb $0, %xmm0, %eax
-; SSE-NEXT: movsbl %al, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
-; AVX1-NEXT: movsbl %al, %eax
+; AVX1-NEXT: vpmovmskb %xmm0, %eax
+; AVX1-NEXT: negl %eax
+; AVX1-NEXT: sbbl %eax, %eax
; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
-; AVX2-NEXT: movsbl %al, %eax
+; AVX2-NEXT: vpmovmskb %xmm0, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
; SSE-LABEL: test_v16i8:
; SSE: # BB#0:
; SSE-NEXT: pcmpgtb %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psrlw $8, %xmm0
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pextrb $0, %xmm0, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16i8:
; AVX: # BB#0:
; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: negl %eax
+; AVX-NEXT: sbbl %eax, %eax
; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX-NEXT: retq
;
; SSE-NEXT: pcmpgtb %xmm3, %xmm1
; SSE-NEXT: pcmpgtb %xmm2, %xmm0
; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrld $16, %xmm1
-; SSE-NEXT: por %xmm0, %xmm1
-; SSE-NEXT: movdqa %xmm1, %xmm0
-; SSE-NEXT: psrlw $8, %xmm0
-; SSE-NEXT: por %xmm1, %xmm0
-; SSE-NEXT: pextrb $0, %xmm0, %eax
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: negl %eax
+; SSE-NEXT: sbbl %eax, %eax
; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; SSE-NEXT: retq
;
; AVX2-LABEL: test_v32i8:
; AVX2: # BB#0:
; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
-; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
+; AVX2-NEXT: vpmovmskb %ymm0, %eax
+; AVX2-NEXT: negl %eax
+; AVX2-NEXT: sbbl %eax, %eax
; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq