From: Simon Pilgrim Date: Sun, 28 Apr 2019 20:03:11 +0000 (+0000) Subject: [X86] Add PR39936 HADD Tests X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b4966362f9a81963415bb411b2384b5055ccc72d;p=llvm [X86] Add PR39936 HADD Tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359407 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/phaddsub.ll b/test/CodeGen/X86/phaddsub.ll index b4ff08cd254..d16e0f54f04 100644 --- a/test/CodeGen/X86/phaddsub.ll +++ b/test/CodeGen/X86/phaddsub.ll @@ -757,3 +757,115 @@ define <8 x i16> @phaddw_single_source6(<8 x i16> %x) { ret <8 x i16> %shuffle2 } +define i32 @PR39936_v8i32(<8 x i32>) { +; SSSE3-SLOW-LABEL: PR39936_v8i32: +; SSSE3-SLOW: # %bb.0: +; SSSE3-SLOW-NEXT: phaddd %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] +; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1 +; SSSE3-SLOW-NEXT: movd %xmm1, %eax +; SSSE3-SLOW-NEXT: retq +; +; SSSE3-FAST-LABEL: PR39936_v8i32: +; SSSE3-FAST: # %bb.0: +; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm0 +; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0 +; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0 +; SSSE3-FAST-NEXT: movd %xmm0, %eax +; SSSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: PR39936_v8i32: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] +; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm2, %xmm0 +; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] +; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: vmovd %xmm0, %eax +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: PR39936_v8i32: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] +; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX-FAST-NEXT: vpaddd %xmm0, %xmm2, %xmm0 +; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vmovd %xmm0, %eax +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %2 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> + %3 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> + %4 = add <8 x i32> %2, %3 + %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> + %6 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> + %7 = add <8 x i32> %5, %6 + %8 = shufflevector <8 x i32> %7, <8 x i32> undef, <8 x i32> + %9 = add <8 x i32> %8, %7 + %10 = extractelement <8 x i32> %9, i32 0 + ret i32 %10 +} + +define float @PR39936_v8f32(<8 x float>) { +; SSSE3-SLOW-LABEL: PR39936_v8f32: +; SSSE3-SLOW: # %bb.0: +; SSSE3-SLOW-NEXT: haddps %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] +; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSSE3-SLOW-NEXT: addps %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSSE3-SLOW-NEXT: addss %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: retq +; +; SSSE3-FAST-LABEL: PR39936_v8f32: +; SSSE3-FAST: # %bb.0: +; SSSE3-FAST-NEXT: haddps %xmm1, %xmm0 +; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: PR39936_v8f32: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] +; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3] +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: PR39936_v8f32: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] +; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX-FAST-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %2 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> + %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> + %4 = fadd <8 x float> %2, %3 + %5 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> + %6 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> + %7 = fadd <8 x float> %5, %6 + %8 = shufflevector <8 x float> %7, <8 x float> undef, <8 x i32> + %9 = fadd <8 x float> %7, %8 + %10 = extractelement <8 x float> %9, i32 0 + ret float %10 +}