;
; X32-SSE2-LABEL: PR15215_good:
; X32-SSE2: # %bb.0: # %entry
-; X32-SSE2-NEXT: pushl %esi
-; X32-SSE2-NEXT: .cfi_def_cfa_offset 8
-; X32-SSE2-NEXT: .cfi_offset %esi, -8
-; X32-SSE2-NEXT: movd %xmm0, %eax
-; X32-SSE2-NEXT: andl $1, %eax
-; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X32-SSE2-NEXT: movd %xmm1, %ecx
-; X32-SSE2-NEXT: andl $1, %ecx
-; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: movd %xmm1, %edx
-; X32-SSE2-NEXT: andl $1, %edx
-; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X32-SSE2-NEXT: movd %xmm0, %esi
-; X32-SSE2-NEXT: andl $1, %esi
-; X32-SSE2-NEXT: leal (%eax,%ecx,2), %eax
-; X32-SSE2-NEXT: leal (%eax,%edx,4), %eax
-; X32-SSE2-NEXT: leal (%eax,%esi,8), %eax
-; X32-SSE2-NEXT: popl %esi
-; X32-SSE2-NEXT: .cfi_def_cfa_offset 4
+; X32-SSE2-NEXT: pslld $31, %xmm0
+; X32-SSE2-NEXT: movmskps %xmm0, %eax
; X32-SSE2-NEXT: retl
;
; X32-AVX2-LABEL: PR15215_good:
; X32-AVX2: # %bb.0: # %entry
-; X32-AVX2-NEXT: pushl %esi
-; X32-AVX2-NEXT: .cfi_def_cfa_offset 8
-; X32-AVX2-NEXT: .cfi_offset %esi, -8
-; X32-AVX2-NEXT: vmovd %xmm0, %eax
-; X32-AVX2-NEXT: andl $1, %eax
-; X32-AVX2-NEXT: vpextrd $1, %xmm0, %ecx
-; X32-AVX2-NEXT: andl $1, %ecx
-; X32-AVX2-NEXT: vpextrd $2, %xmm0, %edx
-; X32-AVX2-NEXT: andl $1, %edx
-; X32-AVX2-NEXT: vpextrd $3, %xmm0, %esi
-; X32-AVX2-NEXT: andl $1, %esi
-; X32-AVX2-NEXT: leal (%eax,%ecx,2), %eax
-; X32-AVX2-NEXT: leal (%eax,%edx,4), %eax
-; X32-AVX2-NEXT: leal (%eax,%esi,8), %eax
-; X32-AVX2-NEXT: popl %esi
-; X32-AVX2-NEXT: .cfi_def_cfa_offset 4
+; X32-AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; X32-AVX2-NEXT: vmovmskps %xmm0, %eax
; X32-AVX2-NEXT: retl
;
; X64-LABEL: PR15215_good:
;
; X64-SSE2-LABEL: PR15215_good:
; X64-SSE2: # %bb.0: # %entry
-; X64-SSE2-NEXT: movd %xmm0, %eax
-; X64-SSE2-NEXT: andl $1, %eax
-; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-SSE2-NEXT: movd %xmm1, %ecx
-; X64-SSE2-NEXT: andl $1, %ecx
-; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-SSE2-NEXT: movd %xmm1, %edx
-; X64-SSE2-NEXT: andl $1, %edx
-; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X64-SSE2-NEXT: movd %xmm0, %esi
-; X64-SSE2-NEXT: andl $1, %esi
-; X64-SSE2-NEXT: leal (%rax,%rcx,2), %eax
-; X64-SSE2-NEXT: leal (%rax,%rdx,4), %eax
-; X64-SSE2-NEXT: leal (%rax,%rsi,8), %eax
+; X64-SSE2-NEXT: pslld $31, %xmm0
+; X64-SSE2-NEXT: movmskps %xmm0, %eax
; X64-SSE2-NEXT: retq
;
; X64-AVX2-LABEL: PR15215_good:
; X64-AVX2: # %bb.0: # %entry
-; X64-AVX2-NEXT: vmovd %xmm0, %eax
-; X64-AVX2-NEXT: andl $1, %eax
-; X64-AVX2-NEXT: vpextrd $1, %xmm0, %ecx
-; X64-AVX2-NEXT: andl $1, %ecx
-; X64-AVX2-NEXT: vpextrd $2, %xmm0, %edx
-; X64-AVX2-NEXT: andl $1, %edx
-; X64-AVX2-NEXT: vpextrd $3, %xmm0, %esi
-; X64-AVX2-NEXT: andl $1, %esi
-; X64-AVX2-NEXT: leal (%rax,%rcx,2), %eax
-; X64-AVX2-NEXT: leal (%rax,%rdx,4), %eax
-; X64-AVX2-NEXT: leal (%rax,%rsi,8), %eax
+; X64-AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; X64-AVX2-NEXT: vmovmskps %xmm0, %eax
; X64-AVX2-NEXT: retq
entry:
%0 = trunc <4 x i32> %input to <4 x i1>
; SSE2-LABEL: movmsk_v16i8:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT: xorb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT: andb -{{[0-9]+}}(%rsp), %al
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrl $15, %ecx
+; SSE2-NEXT: movl %eax, %edx
+; SSE2-NEXT: shrl $8, %edx
+; SSE2-NEXT: andl $1, %edx
+; SSE2-NEXT: andl $8, %eax
+; SSE2-NEXT: shrl $3, %eax
+; SSE2-NEXT: xorl %edx, %eax
+; SSE2-NEXT: andl %ecx, %eax
+; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v16i8:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrb $3, %xmm0, %eax
-; AVX-NEXT: vpextrb $8, %xmm0, %ecx
-; AVX-NEXT: xorl %eax, %ecx
-; AVX-NEXT: vpextrb $15, %xmm0, %eax
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: shrl $15, %ecx
+; AVX-NEXT: movl %eax, %edx
+; AVX-NEXT: shrl $8, %edx
+; AVX-NEXT: andl $1, %edx
+; AVX-NEXT: andl $8, %eax
+; AVX-NEXT: shrl $3, %eax
+; AVX-NEXT: xorl %edx, %eax
; AVX-NEXT: andl %ecx, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
ret i1 %u2
}
+; TODO: Replace shift+mask chain with NOT+TEST+SETE
define i1 @movmsk_v8i16(<8 x i16> %x, <8 x i16> %y) {
; SSE2-LABEL: movmsk_v8i16:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtw %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %ecx
-; SSE2-NEXT: pextrw $1, %xmm0, %edx
-; SSE2-NEXT: pextrw $7, %xmm0, %esi
-; SSE2-NEXT: pextrw $4, %xmm0, %eax
-; SSE2-NEXT: andl %esi, %eax
-; SSE2-NEXT: andl %edx, %eax
-; SSE2-NEXT: andl %ecx, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: packsswb %xmm0, %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %ecx
+; SSE2-NEXT: movl %ecx, %eax
+; SSE2-NEXT: shrb $7, %al
+; SSE2-NEXT: movl %ecx, %edx
+; SSE2-NEXT: andb $16, %dl
+; SSE2-NEXT: shrb $4, %dl
+; SSE2-NEXT: andb %al, %dl
+; SSE2-NEXT: movl %ecx, %eax
+; SSE2-NEXT: shrb %al
+; SSE2-NEXT: andb %dl, %al
+; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v8i16:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %ecx
-; AVX-NEXT: vpextrw $1, %xmm0, %edx
-; AVX-NEXT: vpextrw $7, %xmm0, %esi
-; AVX-NEXT: vpextrw $4, %xmm0, %eax
-; AVX-NEXT: andl %esi, %eax
-; AVX-NEXT: andl %edx, %eax
-; AVX-NEXT: andl %ecx, %eax
-; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpmovmskb %xmm0, %ecx
+; AVX-NEXT: movl %ecx, %eax
+; AVX-NEXT: shrb $7, %al
+; AVX-NEXT: movl %ecx, %edx
+; AVX-NEXT: andb $16, %dl
+; AVX-NEXT: shrb $4, %dl
+; AVX-NEXT: andb %al, %dl
+; AVX-NEXT: movl %ecx, %eax
+; AVX-NEXT: shrb %al
+; AVX-NEXT: andb %dl, %al
+; AVX-NEXT: andb %cl, %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_v8i16:
ret i1 %u3
}
+; TODO: Replace shift+mask chain with AND+CMP.
define i1 @movmsk_v4i32(<4 x i32> %x, <4 x i32> %y) {
; SSE2-LABEL: movmsk_v4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %ecx
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: xorl %ecx, %eax
+; SSE2-NEXT: movmskps %xmm1, %eax
+; SSE2-NEXT: movl %eax, %ecx
+; SSE2-NEXT: shrb $3, %cl
+; SSE2-NEXT: andb $4, %al
+; SSE2-NEXT: shrb $2, %al
+; SSE2-NEXT: xorb %cl, %al
; SSE2-NEXT: # kill: def $al killed $al killed $eax
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v4i32:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpextrd $2, %xmm0, %ecx
-; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: xorl %ecx, %eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: movl %eax, %ecx
+; AVX-NEXT: shrb $3, %cl
+; AVX-NEXT: andb $4, %al
+; AVX-NEXT: shrb $2, %al
+; AVX-NEXT: xorb %cl, %al
; AVX-NEXT: # kill: def $al killed $al killed $eax
; AVX-NEXT: retq
;
; SSE2-NEXT: pand %xmm0, %xmm1
; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %ecx
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: andl %ecx, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: movmskpd %xmm0, %ecx
+; SSE2-NEXT: movl %ecx, %eax
+; SSE2-NEXT: shrb %al
+; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v2i64:
; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpextrd $2, %xmm0, %ecx
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: andl %ecx, %eax
-; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: movl %ecx, %eax
+; AVX-NEXT: shrb %al
+; AVX-NEXT: andb %cl, %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_v2i64:
; SSE2-NEXT: cmpeqps %xmm1, %xmm2
; SSE2-NEXT: cmpunordps %xmm1, %xmm0
; SSE2-NEXT: orps %xmm2, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; SSE2-NEXT: movd %xmm1, %ecx
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: movd %xmm1, %edx
-; SSE2-NEXT: pextrw $6, %xmm0, %eax
-; SSE2-NEXT: orl %edx, %eax
-; SSE2-NEXT: orl %ecx, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: movmskps %xmm0, %eax
+; SSE2-NEXT: testb $14, %al
+; SSE2-NEXT: setne %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v4f32:
; AVX: # %bb.0:
; AVX-NEXT: vcmpeq_uqps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vextractps $1, %xmm0, %ecx
-; AVX-NEXT: vextractps $2, %xmm0, %edx
-; AVX-NEXT: vpextrb $12, %xmm0, %eax
-; AVX-NEXT: orl %edx, %eax
-; AVX-NEXT: orl %ecx, %eax
-; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: vmovmskps %xmm0, %eax
+; AVX-NEXT: testb $14, %al
+; AVX-NEXT: setne %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_v4f32:
; SSE2-LABEL: movmsk_v2f64:
; SSE2: # %bb.0:
; SSE2-NEXT: cmplepd %xmm0, %xmm1
-; SSE2-NEXT: movd %xmm1, %ecx
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: andl %ecx, %eax
-; SSE2-NEXT: # kill: def $al killed $al killed $eax
+; SSE2-NEXT: movmskpd %xmm1, %ecx
+; SSE2-NEXT: movl %ecx, %eax
+; SSE2-NEXT: shrb %al
+; SSE2-NEXT: andb %cl, %al
; SSE2-NEXT: retq
;
; AVX-LABEL: movmsk_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vextractps $2, %xmm0, %ecx
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: andl %ecx, %eax
-; AVX-NEXT: # kill: def $al killed $al killed $eax
+; AVX-NEXT: vmovmskpd %xmm0, %ecx
+; AVX-NEXT: movl %ecx, %eax
+; AVX-NEXT: shrb %al
+; AVX-NEXT: andb %cl, %al
; AVX-NEXT: retq
;
; KNL-LABEL: movmsk_v2f64:
; SSE2-LABEL: PR39665_c_ray:
; SSE2: # %bb.0:
; SSE2-NEXT: cmpltpd %xmm0, %xmm1
-; SSE2-NEXT: movapd %xmm1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: testb $1, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT: movl $42, %eax
-; SSE2-NEXT: movl $99, %ecx
-; SSE2-NEXT: cmovel %ecx, %eax
-; SSE2-NEXT: testb $1, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT: movmskpd %xmm1, %eax
+; SSE2-NEXT: cmpb $3, %al
+; SSE2-NEXT: movl $42, %ecx
+; SSE2-NEXT: movl $99, %eax
; SSE2-NEXT: cmovel %ecx, %eax
; SSE2-NEXT: retq
;
; AVX-LABEL: PR39665_c_ray:
; AVX: # %bb.0:
; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpextrb $0, %xmm0, %ecx
-; AVX-NEXT: vpextrb $8, %xmm0, %eax
-; AVX-NEXT: testb $1, %al
-; AVX-NEXT: movl $42, %eax
-; AVX-NEXT: movl $99, %edx
-; AVX-NEXT: cmovel %edx, %eax
-; AVX-NEXT: testb $1, %cl
-; AVX-NEXT: cmovel %edx, %eax
+; AVX-NEXT: vmovmskpd %xmm0, %eax
+; AVX-NEXT: cmpb $3, %al
+; AVX-NEXT: movl $42, %ecx
+; AVX-NEXT: movl $99, %eax
+; AVX-NEXT: cmovel %ecx, %eax
; AVX-NEXT: retq
;
; KNL-LABEL: PR39665_c_ray: