if (VT != MVT::f32 && VT != MVT::f64)
return SDValue();
- // TODO: This switch could include FNEG, the x86-specific FP logic ops
- // (FAND, FANDN, FOR, FXOR), FRSQRT/FRCP and other FP math ops. But that may
- // require enhancements to avoid missed load folding and fma+fneg combining.
+ // TODO: This switch could include FNEG and the x86-specific FP logic ops
+ // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
+ // missed load folding and fma+fneg combining.
switch (Vec.getOpcode()) {
case ISD::FMA: // Begin 3 operands
case ISD::FMAD:
case ISD::FMAXNUM_IEEE:
case ISD::FMAXIMUM:
case ISD::FMINIMUM:
+ case X86ISD::FMAX:
+ case X86ISD::FMIN:
case ISD::FABS: // Begin 1 operand
case ISD::FSQRT:
case ISD::FRINT:
case ISD::FTRUNC:
case ISD::FNEARBYINT:
case ISD::FROUND:
- case ISD::FFLOOR: {
+ case ISD::FFLOOR:
+ case X86ISD::FRCP:
+ case X86ISD::FRSQRT: {
// extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ...
SDLoc DL(ExtElt);
SmallVector<SDValue, 4> ExtOps;
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; X86-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
-; X86-NEXT: vmovlpd %xmm0, (%esp)
+; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; X86-NEXT: vmovsd %xmm0, (%esp)
; X86-NEXT: fldl (%esp)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; X64-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; X64-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
-; X86-NEXT: vmovlpd %xmm0, (%esp)
+; X86-NEXT: vminsd %xmm1, %xmm0, %xmm0
+; X86-NEXT: vmovsd %xmm0, (%esp)
; X86-NEXT: fldl (%esp)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-NEXT: vminpd %xmm1, %xmm0, %xmm0
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; X64-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; X64-NEXT: vminsd %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; X86-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
-; X86-NEXT: vmovlpd %xmm0, (%esp)
+; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
+; X86-NEXT: vmovsd %xmm0, (%esp)
; X86-NEXT: fldl (%esp)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; X64-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; X64-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
-; X86-NEXT: vmovlpd %xmm0, (%esp)
+; X86-NEXT: vminsd %xmm1, %xmm0, %xmm0
+; X86-NEXT: vmovsd %xmm0, (%esp)
; X86-NEXT: fldl (%esp)
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
; X64-NEXT: vminpd %xmm1, %xmm0, %xmm0
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; X64-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; X64-NEXT: vminsd %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; X64-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; X86-NEXT: vminps %xmm1, %xmm0, %xmm0
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X86-NEXT: vminps %xmm1, %xmm0, %xmm0
+; X86-NEXT: vminss %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; X64-NEXT: vminps %xmm1, %xmm0, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X64-NEXT: vminps %xmm1, %xmm0, %xmm0
+; X64-NEXT: vminss %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; X64-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; X86-NEXT: vminps %xmm1, %xmm0, %xmm0
; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X86-NEXT: vminps %xmm1, %xmm0, %xmm0
+; X86-NEXT: vminss %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; X64-NEXT: vminps %xmm1, %xmm0, %xmm0
; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; X64-NEXT: vminps %xmm1, %xmm0, %xmm0
+; X64-NEXT: vminss %xmm1, %xmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
define float @maxps_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
; CHECK-LABEL: maxps_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp = fcmp ogt <4 x float> %x, %y
%v = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
define double @maxpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
; CHECK-LABEL: maxpd_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%cmp = fcmp ogt <4 x double> %x, %y
define float @minps_v4f32(<4 x float> %x, <4 x float> %y) nounwind {
; CHECK-LABEL: minps_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
%cmp = fcmp olt <4 x float> %x, %y
%v = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y
define double @minpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind {
; CHECK-LABEL: minpd_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%cmp = fcmp olt <4 x double> %x, %y
define float @rcp_v4f32(<4 x float> %x) nounwind {
; CHECK-LABEL: rcp_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrcpps %xmm0, %xmm0
+; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%v = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %x)
%r = extractelement <4 x float> %v, i32 0
define float @rsqrt_v4f32(<4 x float> %x) nounwind {
; CHECK-LABEL: rsqrt_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vrsqrtps %xmm0, %xmm0
+; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0
; CHECK-NEXT: retq
%v = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %x)
%r = extractelement <4 x float> %v, i32 0
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v2f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: maxps %xmm1, %xmm0
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v2f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v2f32(<2 x float> %a0)
ret float %1
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v4f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: maxps %xmm1, %xmm0
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v4f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %a0)
ret float %1
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v8f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: maxps %xmm1, %xmm0
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v8f32(<8 x float> %a0)
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v16f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: maxps %xmm1, %xmm0
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a0)
; SSE: # %bb.0:
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: maxpd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %a0)
ret double %1
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: maxpd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %a0)
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: maxpd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v8f64(<8 x double> %a0)
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: maxpd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v16f64(<16 x double> %a0)
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v2f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: maxps %xmm1, %xmm0
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v2f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmax.f32.v2f32(<2 x float> %a0)
ret float %1
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v4f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: maxps %xmm1, %xmm0
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v4f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %a0)
ret float %1
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v8f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: maxps %xmm1, %xmm0
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmax.f32.v8f32(<8 x float> %a0)
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: maxps %xmm1, %xmm0
+; SSE2-NEXT: maxss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v16f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: maxps %xmm1, %xmm0
+; SSE41-NEXT: maxss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a0)
; SSE: # %bb.0:
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: maxpd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %a0)
ret double %1
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: maxpd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %a0)
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: maxpd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmax.f64.v8f64(<8 x double> %a0)
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: maxpd %xmm1, %xmm0
+; SSE-NEXT: maxsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmax.f64.v16f64(<16 x double> %a0)
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: minps %xmm1, %xmm0
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v2f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: minps %xmm1, %xmm0
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v2f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0)
ret float %1
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: minps %xmm1, %xmm0
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v4f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: minps %xmm1, %xmm0
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v4f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0)
ret float %1
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: minps %xmm1, %xmm0
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v8f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: minps %xmm1, %xmm0
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0)
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: minps %xmm1, %xmm0
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v16f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: minps %xmm1, %xmm0
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0)
; SSE: # %bb.0:
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: minpd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0)
ret double %1
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: minpd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0)
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: minpd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0)
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: minpd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0)
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: minps %xmm1, %xmm0
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v2f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: minps %xmm1, %xmm0
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v2f32:
; AVX: # %bb.0:
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0)
ret float %1
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: minps %xmm1, %xmm0
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v4f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: minps %xmm1, %xmm0
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v4f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v4f32:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0)
ret float %1
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: minps %xmm1, %xmm0
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v8f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: minps %xmm1, %xmm0
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v8f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0)
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
-; SSE2-NEXT: minps %xmm1, %xmm0
+; SSE2-NEXT: minss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: test_v16f32:
; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; SSE41-NEXT: minps %xmm1, %xmm0
+; SSE41-NEXT: minss %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: test_v16f32:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0)
; SSE: # %bb.0:
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: minpd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2f64:
; AVX: # %bb.0:
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
;
; AVX512-LABEL: test_v2f64:
; AVX512: # %bb.0:
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0)
ret double %1
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: minpd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0)
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: minpd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v8f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0)
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSE-NEXT: minpd %xmm1, %xmm0
+; SSE-NEXT: minsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v16f64:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
-; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%1 = call double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0)