EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (sint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (uint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
if (Operand.isUndef())
return getUNDEF(VT);
break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (Operand.isUndef())
+ return getConstantFP(0.0, DL, VT);
+ break;
case ISD::SIGN_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
}
define <2 x float> @sltof2f32(<2 x i64> %a) {
-; NODQ-LABEL: sltof2f32:
-; NODQ: # %bb.0:
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
-; NODQ-NEXT: retq
+; NOVLDQ-LABEL: sltof2f32:
+; NOVLDQ: # %bb.0:
+; NOVLDQ-NEXT: vpextrq $1, %xmm0, %rax
+; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; NOVLDQ-NEXT: vmovq %xmm0, %rax
+; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; NOVLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sltof2f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
+; VLNODQ-LABEL: sltof2f32:
+; VLNODQ: # %bb.0:
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; VLNODQ-NEXT: retq
+;
; DQNOVL-LABEL: sltof2f32:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
}
define <2 x float> @sltof2f32(<2 x i64> %a) {
-; NODQ-LABEL: sltof2f32:
-; NODQ: # %bb.0:
-; NODQ-NEXT: vpextrq $1, %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; NODQ-NEXT: vmovq %xmm0, %rax
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
-; NODQ-NEXT: retq
+; NOVLDQ-LABEL: sltof2f32:
+; NOVLDQ: # %bb.0:
+; NOVLDQ-NEXT: vpextrq $1, %xmm0, %rax
+; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; NOVLDQ-NEXT: vmovq %xmm0, %rax
+; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; NOVLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sltof2f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
+; VLNODQ-LABEL: sltof2f32:
+; VLNODQ: # %bb.0:
+; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; VLNODQ-NEXT: vmovq %xmm0, %rax
+; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; VLNODQ-NEXT: retq
+;
; DQNOVL-LABEL: sltof2f32:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_4f32:
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_4f32:
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2ss %rax, %xmm1
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_4i64_to_4f32_undef:
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
; VEX-NEXT: js .LBB39_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: jmp .LBB39_6
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT: retq
; VEX-NEXT: .LBB39_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT: .LBB39_6:
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: testq %rax, %rax
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT: js .LBB39_8
-; VEX-NEXT: # %bb.7:
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: .LBB39_8:
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i64_to_4f32:
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_4f32:
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_1
; SSE2-NEXT: # %bb.2:
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB41_3
; SSE2-NEXT: .LBB41_1:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: addss %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB41_3:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movq %xmm1, %rax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_4
; SSE2-NEXT: # %bb.5:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: jmp .LBB41_6
; SSE2-NEXT: .LBB41_4:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB41_6:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: js .LBB41_8
-; SSE2-NEXT: # %bb.7:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: .LBB41_8:
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
; SSE41-NEXT: # %bb.5:
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
-; SSE41-NEXT: jmp .LBB41_6
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; SSE41-NEXT: retq
; SSE41-NEXT: .LBB41_4:
; SSE41-NEXT: movq %rax, %rcx
; SSE41-NEXT: shrq %rcx
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: addss %xmm0, %xmm0
-; SSE41-NEXT: .LBB41_6:
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT: testq %rax, %rax
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: js .LBB41_8
-; SSE41-NEXT: # %bb.7:
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2ss %rax, %xmm1
-; SSE41-NEXT: .LBB41_8:
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
; VEX-NEXT: js .LBB41_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: jmp .LBB41_6
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT: retq
; VEX-NEXT: .LBB41_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT: .LBB41_6:
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: testq %rax, %rax
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT: js .LBB41_8
-; VEX-NEXT: # %bb.7:
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: .LBB41_8:
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef:
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_4f32:
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_4f32:
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_2i64_to_4f32:
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: xorps %xmm0, %xmm0
; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0]
-; SSE2-NEXT: movaps %xmm1, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: sitofp_4i64_to_4f32_undef:
; SSE41-NEXT: movq %xmm0, %rax
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2ss %rax, %xmm1
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: sitofp_4i64_to_4f32_undef:
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef:
; VEX-NEXT: js .LBB39_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: jmp .LBB39_6
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT: retq
; VEX-NEXT: .LBB39_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT: .LBB39_6:
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: testq %rax, %rax
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT: js .LBB39_8
-; VEX-NEXT: # %bb.7:
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: .LBB39_8:
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_2i64_to_4f32:
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_4f32:
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_2i64_to_4f32:
define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; SSE2-LABEL: uitofp_4i64_to_4f32_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_1
; SSE2-NEXT: # %bb.2:
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
; SSE2-NEXT: jmp .LBB41_3
; SSE2-NEXT: .LBB41_1:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm0, %xmm0
-; SSE2-NEXT: cvtsi2ss %rax, %xmm0
-; SSE2-NEXT: addss %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: addss %xmm1, %xmm1
; SSE2-NEXT: .LBB41_3:
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT: movq %xmm1, %rax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT: movq %xmm0, %rax
; SSE2-NEXT: testq %rax, %rax
; SSE2-NEXT: js .LBB41_4
; SSE2-NEXT: # %bb.5:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
; SSE2-NEXT: jmp .LBB41_6
; SSE2-NEXT: .LBB41_4:
; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq %rcx
; SSE2-NEXT: andl $1, %eax
; SSE2-NEXT: orq %rcx, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: addss %xmm1, %xmm1
+; SSE2-NEXT: xorps %xmm0, %xmm0
+; SSE2-NEXT: cvtsi2ss %rax, %xmm0
+; SSE2-NEXT: addss %xmm0, %xmm0
; SSE2-NEXT: .LBB41_6:
-; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT: testq %rax, %rax
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: js .LBB41_8
-; SSE2-NEXT: # %bb.7:
-; SSE2-NEXT: xorps %xmm1, %xmm1
-; SSE2-NEXT: cvtsi2ss %rax, %xmm1
-; SSE2-NEXT: .LBB41_8:
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero
; SSE2-NEXT: retq
;
; SSE41-LABEL: uitofp_4i64_to_4f32_undef:
; SSE41-NEXT: # %bb.5:
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
-; SSE41-NEXT: jmp .LBB41_6
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; SSE41-NEXT: retq
; SSE41-NEXT: .LBB41_4:
; SSE41-NEXT: movq %rax, %rcx
; SSE41-NEXT: shrq %rcx
; SSE41-NEXT: xorps %xmm0, %xmm0
; SSE41-NEXT: cvtsi2ss %rax, %xmm0
; SSE41-NEXT: addss %xmm0, %xmm0
-; SSE41-NEXT: .LBB41_6:
-; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; SSE41-NEXT: testq %rax, %rax
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: js .LBB41_8
-; SSE41-NEXT: # %bb.7:
-; SSE41-NEXT: xorps %xmm1, %xmm1
-; SSE41-NEXT: cvtsi2ss %rax, %xmm1
-; SSE41-NEXT: .LBB41_8:
-; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; SSE41-NEXT: retq
;
; VEX-LABEL: uitofp_4i64_to_4f32_undef:
; VEX-NEXT: js .LBB41_4
; VEX-NEXT: # %bb.5:
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; VEX-NEXT: jmp .LBB41_6
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; VEX-NEXT: retq
; VEX-NEXT: .LBB41_4:
; VEX-NEXT: movq %rax, %rcx
; VEX-NEXT: shrq %rcx
; VEX-NEXT: orq %rcx, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; VEX-NEXT: .LBB41_6:
-; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; VEX-NEXT: testq %rax, %rax
-; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VEX-NEXT: js .LBB41_8
-; VEX-NEXT: # %bb.7:
-; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
-; VEX-NEXT: .LBB41_8:
-; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
-; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1
-; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: retq
;
; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef: