GetSplitVector(Cond, CL, CH);
// It seems to improve code to generate two narrow SETCCs as opposed to
// splitting a wide result vector.
- else if (Cond.getOpcode() == ISD::SETCC)
- SplitVecRes_SETCC(Cond.getNode(), CL, CH);
- else
+ else if (Cond.getOpcode() == ISD::SETCC) {
+ // If the condition is a vXi1 vector, and the LHS of the setcc is a legal
+ // type and the setcc result type is the same vXi1, then leave the setcc
+ // alone.
+ EVT CondLHSVT = Cond.getOperand(0).getValueType();
+ if (Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ isTypeLegal(CondLHSVT) &&
+ getSetCCResultType(CondLHSVT) == Cond.getValueType())
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ else
+ SplitVecRes_SETCC(Cond.getNode(), CL, CH);
+ } else
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
define <16 x i64> @test3(<16 x i8> %x, <16 x i64> %a, <16 x i64> %b) {
; CHECK-SKX-LABEL: test3:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
-; CHECK-SKX-NEXT: vptestnmb %xmm5, %xmm5, %k1
-; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k2
-; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1
+; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1
; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
define <16 x i64> @test4(<16 x i16> %x, <16 x i64> %a, <16 x i64> %b) {
; CHECK-SKX-LABEL: test4:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti128 $1, %ymm0, %xmm5
-; CHECK-SKX-NEXT: vptestnmw %xmm5, %xmm5, %k1
-; CHECK-SKX-NEXT: vptestnmw %xmm0, %xmm0, %k2
-; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1
+; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrw $8, %k1, %k1
; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
}
define <16 x i64> @test5(<16 x i32> %x, <16 x i64> %a, <16 x i64> %b) {
-; CHECK-SKX-LABEL: test5:
-; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; CHECK-SKX-NEXT: vptestnmd %ymm5, %ymm5, %k1
-; CHECK-SKX-NEXT: vptestnmd %ymm0, %ymm0, %k2
-; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2}
-; CHECK-SKX-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
-; CHECK-SKX-NEXT: retq
-;
-; CHECK-KNL-LABEL: test5:
-; CHECK-KNL: # %bb.0:
-; CHECK-KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; CHECK-KNL-NEXT: vptestnmd %zmm5, %zmm5, %k1
-; CHECK-KNL-NEXT: vptestnmd %zmm0, %zmm0, %k2
-; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k2}
-; CHECK-KNL-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
-; CHECK-KNL-NEXT: retq
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
+; CHECK-NEXT: vpblendmq %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-NEXT: kshiftrw $8, %k1, %k1
+; CHECK-NEXT: vpblendmq %zmm2, %zmm4, %zmm1 {%k1}
+; CHECK-NEXT: retq
%c = icmp eq <16 x i32> %x, zeroinitializer
%ret = select <16 x i1> %c, <16 x i64> %a, <16 x i64> %b
ret <16 x i64> %ret
define <32 x i32> @test6(<32 x i8> %x, <32 x i32> %a, <32 x i32> %b) {
; CHECK-SKX-LABEL: test6:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti128 $1, %ymm0, %xmm5
-; CHECK-SKX-NEXT: vptestnmb %xmm5, %xmm5, %k1
-; CHECK-SKX-NEXT: vptestnmb %xmm0, %xmm0, %k2
-; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1
+; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1
; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
define <32 x i32> @test7(<32 x i16> %x, <32 x i32> %a, <32 x i32> %b) {
; CHECK-SKX-LABEL: test7:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; CHECK-SKX-NEXT: vptestnmw %ymm5, %ymm5, %k1
-; CHECK-SKX-NEXT: vptestnmw %ymm0, %ymm0, %k2
-; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1
+; CHECK-SKX-NEXT: vpblendmd %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrd $16, %k1, %k1
; CHECK-SKX-NEXT: vpblendmd %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
define <64 x i16> @test8(<64 x i8> %x, <64 x i16> %a, <64 x i16> %b) {
; CHECK-SKX-LABEL: test8:
; CHECK-SKX: # %bb.0:
-; CHECK-SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; CHECK-SKX-NEXT: vptestnmb %ymm5, %ymm5, %k1
-; CHECK-SKX-NEXT: vptestnmb %ymm0, %ymm0, %k2
-; CHECK-SKX-NEXT: vpblendmw %zmm1, %zmm3, %zmm0 {%k2}
+; CHECK-SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1
+; CHECK-SKX-NEXT: vpblendmw %zmm1, %zmm3, %zmm0 {%k1}
+; CHECK-SKX-NEXT: kshiftrq $32, %k1, %k1
; CHECK-SKX-NEXT: vpblendmw %zmm2, %zmm4, %zmm1 {%k1}
; CHECK-SKX-NEXT: retq
;
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k2
+; CHECK-NEXT: kshiftrb $4, %k1, %k2
; CHECK-NEXT: vmovdqa64 32(%rdi), %ymm3 {%k2}
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, (%rdx)
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k2
+; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
+; CHECK-NEXT: kshiftrb $4, %k1, %k2
; CHECK-NEXT: vmovdqa64 32(%rdi), %ymm3 {%k2}
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, (%rdx)
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
-; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k2
+; CHECK-NEXT: kshiftrw $8, %k1, %k2
; CHECK-NEXT: vmovdqa32 32(%rdi), %ymm3 {%k2}
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, (%rdx)
; CHECK: # %bb.0:
; CHECK-NEXT: vmovdqa (%rsi), %ymm2
; CHECK-NEXT: vmovdqa 32(%rsi), %ymm3
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
-; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k2
+; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
+; CHECK-NEXT: kshiftrw $8, %k1, %k2
; CHECK-NEXT: vmovdqa32 32(%rdi), %ymm3 {%k2}
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm2 {%k1}
; CHECK-NEXT: vmovdqa %ymm2, (%rdx)
target triple = "x86_64-unknown-linux-gnu"
define void @test(<4 x i64> %a, <4 x x86_fp80> %b, <8 x x86_fp80>* %c) local_unnamed_addr {
-; CHECK-LABEL: test:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovq %xmm0, %rax
-; CHECK-NEXT: vpextrq $1, %xmm0, %rcx
-; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vmovq %xmm0, %rdx
-; CHECK-NEXT: vpextrq $1, %xmm0, %rsi
-; CHECK-NEXT: cmpq $3, %rsi
-; CHECK-NEXT: fld1
-; CHECK-NEXT: fldz
-; CHECK-NEXT: fld %st(0)
-; CHECK-NEXT: fcmove %st(2), %st
-; CHECK-NEXT: cmpq $2, %rdx
-; CHECK-NEXT: fld %st(1)
-; CHECK-NEXT: fcmove %st(3), %st
-; CHECK-NEXT: cmpq $1, %rcx
-; CHECK-NEXT: fld %st(2)
-; CHECK-NEXT: fcmove %st(4), %st
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: fxch %st(3)
-; CHECK-NEXT: fcmove %st(4), %st
-; CHECK-NEXT: fstp %st(4)
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fstpt 70(%rdi)
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fstpt 50(%rdi)
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fstpt 30(%rdi)
-; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
-; CHECK-NEXT: fstpt 10(%rdi)
-; CHECK-NEXT: fxch %st(1)
-; CHECK-NEXT: fadd %st, %st(0)
-; CHECK-NEXT: fstpt 60(%rdi)
-; CHECK-NEXT: fadd %st, %st(0)
-; CHECK-NEXT: fstpt 40(%rdi)
-; CHECK-NEXT: fadd %st, %st(0)
-; CHECK-NEXT: fstpt 20(%rdi)
-; CHECK-NEXT: fadd %st, %st(0)
-; CHECK-NEXT: fstpt (%rdi)
+; AVX512F-LABEL: test:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vmovq %xmm0, %rax
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rcx
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vmovq %xmm0, %rdx
+; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512F-NEXT: cmpq $3, %rsi
+; AVX512F-NEXT: fld1
+; AVX512F-NEXT: fldz
+; AVX512F-NEXT: fld %st(0)
+; AVX512F-NEXT: fcmove %st(2), %st
+; AVX512F-NEXT: cmpq $2, %rdx
+; AVX512F-NEXT: fld %st(1)
+; AVX512F-NEXT: fcmove %st(3), %st
+; AVX512F-NEXT: cmpq $1, %rcx
+; AVX512F-NEXT: fld %st(2)
+; AVX512F-NEXT: fcmove %st(4), %st
+; AVX512F-NEXT: testq %rax, %rax
+; AVX512F-NEXT: fxch %st(3)
+; AVX512F-NEXT: fcmove %st(4), %st
+; AVX512F-NEXT: fstp %st(4)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fstpt 70(%rdi)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fstpt 50(%rdi)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fstpt 30(%rdi)
+; AVX512F-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: fstpt 10(%rdi)
+; AVX512F-NEXT: fxch %st(1)
+; AVX512F-NEXT: fadd %st, %st(0)
+; AVX512F-NEXT: fstpt 60(%rdi)
+; AVX512F-NEXT: fadd %st, %st(0)
+; AVX512F-NEXT: fstpt 40(%rdi)
+; AVX512F-NEXT: fadd %st, %st(0)
+; AVX512F-NEXT: fstpt 20(%rdi)
+; AVX512F-NEXT: fadd %st, %st(0)
+; AVX512F-NEXT: fstpt (%rdi)
+;
+; AVX512VL-LABEL: test:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpcmpeqq {{.*}}(%rip), %ymm0, %k0
+; AVX512VL-NEXT: kshiftrb $2, %k0, %k1
+; AVX512VL-NEXT: kshiftrb $1, %k0, %k2
+; AVX512VL-NEXT: kmovd %k0, %eax
+; AVX512VL-NEXT: testb $1, %al
+; AVX512VL-NEXT: fld1
+; AVX512VL-NEXT: fldz
+; AVX512VL-NEXT: fld %st(0)
+; AVX512VL-NEXT: fcmovne %st(2), %st
+; AVX512VL-NEXT: kshiftrb $1, %k1, %k0
+; AVX512VL-NEXT: kmovd %k0, %eax
+; AVX512VL-NEXT: testb $1, %al
+; AVX512VL-NEXT: fld %st(1)
+; AVX512VL-NEXT: fcmovne %st(3), %st
+; AVX512VL-NEXT: kmovd %k1, %eax
+; AVX512VL-NEXT: testb $1, %al
+; AVX512VL-NEXT: fld %st(2)
+; AVX512VL-NEXT: fcmovne %st(4), %st
+; AVX512VL-NEXT: kmovd %k2, %eax
+; AVX512VL-NEXT: testb $1, %al
+; AVX512VL-NEXT: fxch %st(3)
+; AVX512VL-NEXT: fcmovne %st(4), %st
+; AVX512VL-NEXT: fstp %st(4)
+; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fstpt 70(%rdi)
+; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fstpt 50(%rdi)
+; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fstpt 30(%rdi)
+; AVX512VL-NEXT: fldt {{[0-9]+}}(%rsp)
+; AVX512VL-NEXT: fstpt 10(%rdi)
+; AVX512VL-NEXT: fxch %st(1)
+; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt (%rdi)
+; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt 60(%rdi)
+; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt 40(%rdi)
+; AVX512VL-NEXT: fadd %st, %st(0)
+; AVX512VL-NEXT: fstpt 20(%rdi)
%1 = icmp eq <4 x i64> <i64 0, i64 1, i64 2, i64 3>, %a
%2 = select <4 x i1> %1, <4 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000>, <4 x x86_fp80> zeroinitializer
%3 = fadd <4 x x86_fp80> %2, %2