From cb86241882b5f54c1cc399fc518424da8917162f Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 14 Aug 2019 20:21:30 +0000 Subject: [PATCH] [x86] add tests for fadd reduction; NFC More coverage for D66236. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368913 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/haddsub.ll | 132 ++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll index a3490658d40..78270a5cfe4 100644 --- a/test/CodeGen/X86/haddsub.ll +++ b/test/CodeGen/X86/haddsub.ll @@ -1985,3 +1985,135 @@ define float @hadd32_16_optsize(<16 x float> %x225) optsize { %x230 = extractelement <16 x float> %x229, i32 0 ret float %x230 } + +define float @partial_reduction_fadd_v8f32(<8 x float> %x) { +; SSE3-SLOW-LABEL: partial_reduction_fadd_v8f32: +; SSE3-SLOW: # %bb.0: +; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-SLOW-NEXT: addps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSE3-SLOW-NEXT: addss %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: retq +; +; SSE3-FAST-LABEL: partial_reduction_fadd_v8f32: +; SSE3-FAST: # %bb.0: +; SSE3-FAST-NEXT: movaps %xmm0, %xmm1 +; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-FAST-NEXT: addps %xmm0, %xmm1 +; SSE3-FAST-NEXT: haddps %xmm1, %xmm1 +; SSE3-FAST-NEXT: movaps %xmm1, %xmm0 +; SSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: partial_reduction_fadd_v8f32: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: partial_reduction_fadd_v8f32: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> + %x0213 = fadd <8 x float> %x, %x23 + %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> + %x0123 = fadd nsz reassoc <8 x float> %x0213, %x13 + %r = extractelement <8 x float> %x0123, i32 0 + ret float %r +} + +define float @partial_reduction_fadd_v8f32_wrong_flags(<8 x float> %x) { +; SSE3-SLOW-LABEL: partial_reduction_fadd_v8f32_wrong_flags: +; SSE3-SLOW: # %bb.0: +; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-SLOW-NEXT: addps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSE3-SLOW-NEXT: addss %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: retq +; +; SSE3-FAST-LABEL: partial_reduction_fadd_v8f32_wrong_flags: +; SSE3-FAST: # %bb.0: +; SSE3-FAST-NEXT: movaps %xmm0, %xmm1 +; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-FAST-NEXT: addps %xmm0, %xmm1 +; SSE3-FAST-NEXT: haddps %xmm1, %xmm1 +; SSE3-FAST-NEXT: movaps %xmm1, %xmm0 +; SSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: partial_reduction_fadd_v8f32_wrong_flags: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: partial_reduction_fadd_v8f32_wrong_flags: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %x23 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> + %x0213 = fadd fast <8 x float> %x, %x23 + %x13 = shufflevector <8 x float> %x0213, <8 x float> undef, <8 x i32> + %x0123 = fadd ninf nnan <8 x float> %x0213, %x13 + %r = extractelement <8 x float> %x0123, i32 0 + ret float %r +} + +define float @partial_reduction_fadd_v16f32(<16 x float> %x) { +; SSE3-SLOW-LABEL: partial_reduction_fadd_v16f32: +; SSE3-SLOW: # %bb.0: +; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-SLOW-NEXT: addps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] +; SSE3-SLOW-NEXT: addss %xmm0, %xmm1 +; SSE3-SLOW-NEXT: movaps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: retq +; +; SSE3-FAST-LABEL: partial_reduction_fadd_v16f32: +; SSE3-FAST: # %bb.0: +; SSE3-FAST-NEXT: movaps %xmm0, %xmm1 +; SSE3-FAST-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE3-FAST-NEXT: addps %xmm0, %xmm1 +; SSE3-FAST-NEXT: haddps %xmm1, %xmm1 +; SSE3-FAST-NEXT: movaps %xmm1, %xmm0 +; SSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: partial_reduction_fadd_v16f32: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-SLOW-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: partial_reduction_fadd_v16f32: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %x23 = shufflevector <16 x float> %x, <16 x float> undef, <16 x i32> + %x0213 = fadd <16 x float> %x, %x23 + %x13 = shufflevector <16 x float> %x0213, <16 x float> undef, <16 x i32> + %x0123 = fadd reassoc nsz <16 x float> %x0213, %x13 + %r = extractelement <16 x float> %x0123, i32 0 + ret float %r +} -- 2.50.1