return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
}
+ // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+ APInt Pow2C;
+ if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+ isNullOrNullSplat(N2)) {
+ SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+ }
+
// The general case for select-of-constants:
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
// ...but that only makes sense if a vselect is slower than 2 logic ops, so
define <8 x i16> @sel_shift_bool_v8i16(<8 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v8i16:
; ANY: # %bb.0:
-; ANY-NEXT: psllw $15, %xmm0
-; ANY-NEXT: psraw $15, %xmm0
; ANY-NEXT: pand {{.*}}(%rip), %xmm0
+; ANY-NEXT: psllw $7, %xmm0
; ANY-NEXT: retq
%shl= select <8 x i1> %t, <8 x i16> <i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128, i16 128>, <8 x i16> zeroinitializer
ret <8 x i16> %shl
define <4 x i32> @sel_shift_bool_v4i32(<4 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v4i32:
; ANY: # %bb.0:
-; ANY-NEXT: pslld $31, %xmm0
-; ANY-NEXT: psrad $31, %xmm0
; ANY-NEXT: pand {{.*}}(%rip), %xmm0
+; ANY-NEXT: pslld $6, %xmm0
; ANY-NEXT: retq
%shl = select <4 x i1> %t, <4 x i32> <i32 64, i32 64, i32 64, i32 64>, <4 x i32> zeroinitializer
ret <4 x i32> %shl
define <2 x i64> @sel_shift_bool_v2i64(<2 x i1> %t) {
; ANY-LABEL: sel_shift_bool_v2i64:
; ANY: # %bb.0:
-; ANY-NEXT: psllq $63, %xmm0
-; ANY-NEXT: psrad $31, %xmm0
-; ANY-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; ANY-NEXT: pand {{.*}}(%rip), %xmm0
+; ANY-NEXT: psllq $16, %xmm0
; ANY-NEXT: retq
%shl = select <2 x i1> %t, <2 x i64> <i64 65536, i64 65536>, <2 x i64> zeroinitializer
ret <2 x i64> %shl
; This test case previously crashed after r363802, r363850, and r363856 due
; any_extend_vector_inreg not being handled by the X86 backend.
define i64 @vselect_any_extend_vector_inreg_crash(<8 x i8>* %x) {
-; SSE2-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: psrad $24, %xmm0
-; SSE2-NEXT: movq %xmm0, %rax
-; SSE2-NEXT: andl $32768, %eax # imm = 0x8000
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: vselect_any_extend_vector_inreg_crash:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE41-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
-; SSE41-NEXT: pmovsxbq %xmm0, %xmm0
-; SSE41-NEXT: movq %xmm0, %rax
-; SSE41-NEXT: andl $32768, %eax # imm = 0x8000
-; SSE41-NEXT: retq
+; SSE-LABEL: vselect_any_extend_vector_inreg_crash:
+; SSE: # %bb.0:
+; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT: pcmpeqb {{.*}}(%rip), %xmm0
+; SSE-NEXT: movq %xmm0, %rax
+; SSE-NEXT: andl $1, %eax
+; SSE-NEXT: shlq $15, %rax
+; SSE-NEXT: retq
;
; AVX-LABEL: vselect_any_extend_vector_inreg_crash:
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX-NEXT: vpcmpeqb {{.*}}(%rip), %xmm0, %xmm0
-; AVX-NEXT: vpmovsxbq %xmm0, %xmm0
; AVX-NEXT: vmovq %xmm0, %rax
-; AVX-NEXT: andl $32768, %eax # imm = 0x8000
+; AVX-NEXT: andl $1, %eax
+; AVX-NEXT: shlq $15, %rax
; AVX-NEXT: retq
0:
%1 = load <8 x i8>, <8 x i8>* %x