From: Sanjay Patel Date: Sun, 10 Mar 2019 18:56:21 +0000 (+0000) Subject: [x86] add x86-specific opcodes to extractelement scalarization list X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6a6735dc5864d58fa2735763609b42c59a520184;p=llvm [x86] add x86-specific opcodes to extractelement scalarization list git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355792 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c90216c5378..db64cd87dc2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34341,9 +34341,9 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { if (VT != MVT::f32 && VT != MVT::f64) return SDValue(); - // TODO: This switch could include FNEG, the x86-specific FP logic ops - // (FAND, FANDN, FOR, FXOR), FRSQRT/FRCP and other FP math ops. But that may - // require enhancements to avoid missed load folding and fma+fneg combining. + // TODO: This switch could include FNEG and the x86-specific FP logic ops + // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid + // missed load folding and fma+fneg combining. switch (Vec.getOpcode()) { case ISD::FMA: // Begin 3 operands case ISD::FMAD: @@ -34359,6 +34359,8 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { case ISD::FMAXNUM_IEEE: case ISD::FMAXIMUM: case ISD::FMINIMUM: + case X86ISD::FMAX: + case X86ISD::FMIN: case ISD::FABS: // Begin 1 operand case ISD::FSQRT: case ISD::FRINT: @@ -34366,7 +34368,9 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { case ISD::FTRUNC: case ISD::FNEARBYINT: case ISD::FROUND: - case ISD::FFLOOR: { + case ISD::FFLOOR: + case X86ISD::FRCP: + case X86ISD::FRSQRT: { // extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ... SDLoc DL(ExtElt); SmallVector ExtOps; diff --git a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index 5b2c0a749c0..e222c0e1ae2 100644 --- a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -7787,8 +7787,8 @@ define double @test_mm512_reduce_max_pd(<8 x double> %__W) { ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X86-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; X86-NEXT: vmovlpd %xmm0, (%esp) +; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp @@ -7803,7 +7803,7 @@ define double @test_mm512_reduce_max_pd(<8 x double> %__W) { ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X64-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -7912,8 +7912,8 @@ define double @test_mm512_reduce_min_pd(<8 x double> %__W) { ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; X86-NEXT: vmovlpd %xmm0, (%esp) +; X86-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp @@ -7928,7 +7928,7 @@ define double @test_mm512_reduce_min_pd(<8 x double> %__W) { ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X64-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -8057,8 +8057,8 @@ define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X86-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; X86-NEXT: vmovlpd %xmm0, (%esp) +; X86-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp @@ -8076,7 +8076,7 @@ define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X64-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -8209,8 +8209,8 @@ define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) ; X86-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; X86-NEXT: vmovlpd %xmm0, (%esp) +; X86-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmovsd %xmm0, (%esp) ; X86-NEXT: fldl (%esp) ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp @@ -8228,7 +8228,7 @@ define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) ; X64-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; X64-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; X64-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -8334,7 +8334,7 @@ define float @test_mm512_reduce_max_ps(<16 x float> %__W) { ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -8351,7 +8351,7 @@ define float @test_mm512_reduce_max_ps(<16 x float> %__W) { ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -8460,7 +8460,7 @@ define float @test_mm512_reduce_min_ps(<16 x float> %__W) { ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vminps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X86-NEXT: vminps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vminss %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -8477,7 +8477,7 @@ define float @test_mm512_reduce_min_ps(<16 x float> %__W) { ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vminps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: vminps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vminss %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -8638,7 +8638,7 @@ define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W) ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -8658,7 +8658,7 @@ define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W) ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -8823,7 +8823,7 @@ define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W) ; X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X86-NEXT: vminps %xmm1, %xmm0, %xmm0 ; X86-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X86-NEXT: vminps %xmm1, %xmm0, %xmm0 +; X86-NEXT: vminss %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) ; X86-NEXT: flds (%esp) ; X86-NEXT: popl %eax @@ -8843,7 +8843,7 @@ define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W) ; X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; X64-NEXT: vminps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,0,3,2] -; X64-NEXT: vminps %xmm1, %xmm0, %xmm0 +; X64-NEXT: vminss %xmm1, %xmm0, %xmm0 ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: diff --git a/test/CodeGen/X86/extractelement-fp.ll b/test/CodeGen/X86/extractelement-fp.ll index 2cab220ff72..206348eb208 100644 --- a/test/CodeGen/X86/extractelement-fp.ll +++ b/test/CodeGen/X86/extractelement-fp.ll @@ -340,7 +340,7 @@ define double @fminnum_v4f64(<4 x double> %x, <4 x double> %y) nounwind { define float @maxps_v4f32(<4 x float> %x, <4 x float> %y) nounwind { ; CHECK-LABEL: maxps_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = fcmp ogt <4 x float> %x, %y %v = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y @@ -351,8 +351,7 @@ define float @maxps_v4f32(<4 x float> %x, <4 x float> %y) nounwind { define double @maxpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind { ; CHECK-LABEL: maxpd_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %cmp = fcmp ogt <4 x double> %x, %y @@ -364,7 +363,7 @@ define double @maxpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind { define float @minps_v4f32(<4 x float> %x, <4 x float> %y) nounwind { ; CHECK-LABEL: minps_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vminps %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vminss %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp = fcmp olt <4 x float> %x, %y %v = select <4 x i1> %cmp, <4 x float> %x, <4 x float> %y @@ -375,8 +374,7 @@ define float @minps_v4f32(<4 x float> %x, <4 x float> %y) nounwind { define double @minpd_v4f64(<4 x double> %x, <4 x double> %y) nounwind { ; CHECK-LABEL: minpd_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; CHECK-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %cmp = fcmp olt <4 x double> %x, %y @@ -540,7 +538,7 @@ define double @round_v4f64(<4 x double> %x) nounwind { define float @rcp_v4f32(<4 x float> %x) nounwind { ; CHECK-LABEL: rcp_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrcpps %xmm0, %xmm0 +; CHECK-NEXT: vrcpss %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %v = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 @@ -562,7 +560,7 @@ define float @rcp_v8f32(<8 x float> %x) nounwind { define float @rsqrt_v4f32(<4 x float> %x) nounwind { ; CHECK-LABEL: rsqrt_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vrsqrtps %xmm0, %xmm0 +; CHECK-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %v = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %x) %r = extractelement <4 x float> %v, i32 0 diff --git a/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index a133a8ca567..6e8059dfc28 100644 --- a/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -15,25 +15,25 @@ define float @test_v2f32(<2 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v2f32(<2 x float> %a0) ret float %1 @@ -47,7 +47,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32: @@ -56,7 +56,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f32: @@ -64,7 +64,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32: @@ -72,7 +72,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %a0) ret float %1 @@ -87,7 +87,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8f32: @@ -97,7 +97,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f32: @@ -107,8 +107,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -119,8 +118,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v8f32(<8 x float> %a0) @@ -138,7 +136,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16f32: @@ -150,7 +148,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f32: @@ -161,8 +159,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -175,8 +172,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a0) @@ -192,19 +188,19 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %a0) ret double %1 @@ -216,7 +212,7 @@ define double @test_v4f64(<4 x double> %a0) { ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64: @@ -224,8 +220,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -234,8 +229,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %a0) @@ -250,7 +244,7 @@ define double @test_v8f64(<8 x double> %a0) { ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f64: @@ -259,8 +253,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -271,8 +264,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v8f64(<8 x double> %a0) @@ -291,7 +283,7 @@ define double @test_v16f64(<16 x double> %a0) { ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16f64: @@ -302,8 +294,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -315,8 +306,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmax.f64.v16f64(<16 x double> %a0) diff --git a/test/CodeGen/X86/vector-reduce-fmax.ll b/test/CodeGen/X86/vector-reduce-fmax.ll index c2a06cfb7fe..d3c1ca256e8 100644 --- a/test/CodeGen/X86/vector-reduce-fmax.ll +++ b/test/CodeGen/X86/vector-reduce-fmax.ll @@ -15,25 +15,25 @@ define float @test_v2f32(<2 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmax.f32.v2f32(<2 x float> %a0) ret float %1 @@ -47,7 +47,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32: @@ -56,7 +56,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f32: @@ -64,7 +64,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32: @@ -72,7 +72,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %a0) ret float %1 @@ -87,7 +87,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8f32: @@ -97,7 +97,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f32: @@ -107,8 +107,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -119,8 +118,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmax.f32.v8f32(<8 x float> %a0) @@ -138,7 +136,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: maxps %xmm1, %xmm0 +; SSE2-NEXT: maxss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16f32: @@ -150,7 +148,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: maxps %xmm1, %xmm0 +; SSE41-NEXT: maxss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f32: @@ -161,8 +159,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -175,8 +172,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmax.f32.v16f32(<16 x float> %a0) @@ -192,19 +188,19 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %a0) ret double %1 @@ -216,7 +212,7 @@ define double @test_v4f64(<4 x double> %a0) { ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64: @@ -224,8 +220,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -234,8 +229,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmax.f64.v4f64(<4 x double> %a0) @@ -250,7 +244,7 @@ define double @test_v8f64(<8 x double> %a0) { ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f64: @@ -259,8 +253,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -271,8 +264,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmax.f64.v8f64(<8 x double> %a0) @@ -291,7 +283,7 @@ define double @test_v16f64(<16 x double> %a0) { ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16f64: @@ -302,8 +294,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -315,8 +306,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmax.f64.v16f64(<16 x double> %a0) diff --git a/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index f0711afe24c..8a544685b34 100644 --- a/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -15,25 +15,25 @@ define float @test_v2f32(<2 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0) ret float %1 @@ -47,7 +47,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32: @@ -56,7 +56,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f32: @@ -64,7 +64,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32: @@ -72,7 +72,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0) ret float %1 @@ -87,7 +87,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8f32: @@ -97,7 +97,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f32: @@ -107,8 +107,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -119,8 +118,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0) @@ -138,7 +136,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16f32: @@ -150,7 +148,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f32: @@ -161,8 +159,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -175,8 +172,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0) @@ -192,19 +188,19 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: minsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0) ret double %1 @@ -216,7 +212,7 @@ define double @test_v4f64(<4 x double> %a0) { ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: minsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64: @@ -224,8 +220,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -234,8 +229,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0) @@ -250,7 +244,7 @@ define double @test_v8f64(<8 x double> %a0) { ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: minsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f64: @@ -259,8 +253,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -271,8 +264,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0) @@ -291,7 +283,7 @@ define double @test_v16f64(<16 x double> %a0) { ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: minsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16f64: @@ -302,8 +294,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -315,8 +306,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call nnan double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0) diff --git a/test/CodeGen/X86/vector-reduce-fmin.ll b/test/CodeGen/X86/vector-reduce-fmin.ll index bd95bbabaa1..be193a96bfe 100644 --- a/test/CodeGen/X86/vector-reduce-fmin.ll +++ b/test/CodeGen/X86/vector-reduce-fmin.ll @@ -15,25 +15,25 @@ define float @test_v2f32(<2 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v2f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v2f32: ; AVX: # %bb.0: ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0) ret float %1 @@ -47,7 +47,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v4f32: @@ -56,7 +56,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v4f32: @@ -64,7 +64,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v4f32: @@ -72,7 +72,7 @@ define float @test_v4f32(<4 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0) ret float %1 @@ -87,7 +87,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8f32: @@ -97,7 +97,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8f32: @@ -107,8 +107,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -119,8 +118,7 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0) @@ -138,7 +136,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] -; SSE2-NEXT: minps %xmm1, %xmm0 +; SSE2-NEXT: minss %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16f32: @@ -150,7 +148,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE41-NEXT: minps %xmm1, %xmm0 +; SSE41-NEXT: minss %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16f32: @@ -161,8 +159,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -175,8 +172,7 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0) @@ -192,19 +188,19 @@ define double @test_v2f64(<2 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: minsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v2f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0) ret double %1 @@ -216,7 +212,7 @@ define double @test_v4f64(<4 x double> %a0) { ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: minsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v4f64: @@ -224,8 +220,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -234,8 +229,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0) @@ -250,7 +244,7 @@ define double @test_v8f64(<8 x double> %a0) { ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: minsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v8f64: @@ -259,8 +253,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -271,8 +264,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0) @@ -291,7 +283,7 @@ define double @test_v16f64(<16 x double> %a0) { ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] -; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: minsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: test_v16f64: @@ -302,8 +294,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 -; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; @@ -315,8 +306,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0)