From ee8d96fcfbda52da10066fedd5297014ad37efcd Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 29 Apr 2019 11:30:47 +0000 Subject: [PATCH] [X86][SSE] Moved haddps test from phaddsub.ll to haddsub.ll (D61245) Also merged duplicate PR39921 + PR39936 tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@359437 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/haddsub.ll | 82 +++++++++++++++++++++++-- test/CodeGen/X86/phaddsub.ll | 115 +---------------------------------- 2 files changed, 79 insertions(+), 118 deletions(-) diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll index 1f6b3b1b947..bbb08f0d652 100644 --- a/test/CodeGen/X86/haddsub.ll +++ b/test/CodeGen/X86/haddsub.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3,SSE3-SLOW ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE3,SSE3-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX512-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1,AVX1-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1,AVX1-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX2,AVX2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX2,AVX2-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512,AVX512-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX512,AVX512-FAST define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { ; SSE3-LABEL: haddpd1: @@ -1438,3 +1440,75 @@ define double @fadd_reduce_v4f64(double %a0, <4 x double> %a1) { ret double %r } +define float @PR39936_v8f32(<8 x float>) { +; SSSE3-SLOW-LABEL: PR39936_v8f32: +; SSSE3-SLOW: # %bb.0: +; SSSE3-SLOW-NEXT: haddps %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] +; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSSE3-SLOW-NEXT: addps %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSSE3-SLOW-NEXT: addss %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: retq +; +; SSSE3-FAST-LABEL: PR39936_v8f32: +; SSSE3-FAST: # %bb.0: +; SSSE3-FAST-NEXT: haddps %xmm1, %xmm0 +; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSSE3-FAST-NEXT: retq +; +; SSE3-SLOW-LABEL: PR39936_v8f32: +; SSE3-SLOW: # %bb.0: +; SSE3-SLOW-NEXT: haddps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] +; SSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSE3-SLOW-NEXT: addps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE3-SLOW-NEXT: addss %xmm1, %xmm0 +; SSE3-SLOW-NEXT: retq +; +; SSE3-FAST-LABEL: PR39936_v8f32: +; SSE3-FAST: # %bb.0: +; SSE3-FAST-NEXT: haddps %xmm1, %xmm0 +; SSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: PR39936_v8f32: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] +; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3] +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: PR39936_v8f32: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] +; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX-FAST-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %2 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> + %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> + %4 = fadd <8 x float> %2, %3 + %5 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> + %6 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> + %7 = fadd <8 x float> %5, %6 + %8 = shufflevector <8 x float> %7, <8 x float> undef, <8 x i32> + %9 = fadd <8 x float> %7, %8 + %10 = extractelement <8 x float> %9, i32 0 + ret float %10 +} diff --git a/test/CodeGen/X86/phaddsub.ll b/test/CodeGen/X86/phaddsub.ll index 37b70376462..697c8d71c0f 100644 --- a/test/CodeGen/X86/phaddsub.ll +++ b/test/CodeGen/X86/phaddsub.ll @@ -782,66 +782,7 @@ define <8 x i16> @phaddw_single_source6(<8 x i16> %x) { ret <8 x i16> %shuffle2 } -; PR39921 -define i32 @pairwise_reduction_8i32(<8 x i32> %rdx) { -; SSSE3-SLOW-LABEL: pairwise_reduction_8i32: -; SSSE3-SLOW: # %bb.0: -; SSSE3-SLOW-NEXT: phaddd %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] -; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] -; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1 -; SSSE3-SLOW-NEXT: movd %xmm1, %eax -; SSSE3-SLOW-NEXT: retq -; -; SSSE3-FAST-LABEL: pairwise_reduction_8i32: -; SSSE3-FAST: # %bb.0: -; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm0 -; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0 -; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0 -; SSSE3-FAST-NEXT: movd %xmm0, %eax -; SSSE3-FAST-NEXT: retq -; -; AVX-SLOW-LABEL: pairwise_reduction_8i32: -; AVX-SLOW: # %bb.0: -; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm2, %xmm0 -; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] -; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] -; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-SLOW-NEXT: vmovd %xmm0, %eax -; AVX-SLOW-NEXT: vzeroupper -; AVX-SLOW-NEXT: retq -; -; AVX-FAST-LABEL: pairwise_reduction_8i32: -; AVX-FAST: # %bb.0: -; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] -; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX-FAST-NEXT: vpaddd %xmm0, %xmm2, %xmm0 -; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 -; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 -; AVX-FAST-NEXT: vmovd %xmm0, %eax -; AVX-FAST-NEXT: vzeroupper -; AVX-FAST-NEXT: retq - %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> - %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> - %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 - %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> - %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> - %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 - %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> - %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> - %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 - %r = extractelement <8 x i32> %bin.rdx9, i32 0 - ret i32 %r -} - +; PR39921 + PR39936 define i32 @PR39936_v8i32(<8 x i32>) { ; SSSE3-SLOW-LABEL: PR39936_v8i32: ; SSSE3-SLOW: # %bb.0: @@ -900,57 +841,3 @@ define i32 @PR39936_v8i32(<8 x i32>) { ret i32 %10 } -define float @PR39936_v8f32(<8 x float>) { -; SSSE3-SLOW-LABEL: PR39936_v8f32: -; SSSE3-SLOW: # %bb.0: -; SSSE3-SLOW-NEXT: haddps %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm1 -; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] -; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3] -; SSSE3-SLOW-NEXT: addps %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-SLOW-NEXT: addss %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: retq -; -; SSSE3-FAST-LABEL: PR39936_v8f32: -; SSSE3-FAST: # %bb.0: -; SSSE3-FAST-NEXT: haddps %xmm1, %xmm0 -; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 -; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 -; SSSE3-FAST-NEXT: retq -; -; AVX-SLOW-LABEL: PR39936_v8f32: -; AVX-SLOW: # %bb.0: -; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX-SLOW-NEXT: vaddps %xmm0, %xmm2, %xmm0 -; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3] -; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3] -; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-SLOW-NEXT: vzeroupper -; AVX-SLOW-NEXT: retq -; -; AVX-FAST-LABEL: PR39936_v8f32: -; AVX-FAST: # %bb.0: -; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] -; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX-FAST-NEXT: vaddps %xmm0, %xmm2, %xmm0 -; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX-FAST-NEXT: vzeroupper -; AVX-FAST-NEXT: retq - %2 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> - %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> - %4 = fadd <8 x float> %2, %3 - %5 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> - %6 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> - %7 = fadd <8 x float> %5, %6 - %8 = shufflevector <8 x float> %7, <8 x float> undef, <8 x i32> - %9 = fadd <8 x float> %7, %8 - %10 = extractelement <8 x float> %9, i32 0 - ret float %10 -} -- 2.50.1