From 548f86a91bbfe65a145aca4c5c31b1a304b383c2 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 12 Dec 2016 07:57:24 +0000 Subject: [PATCH] [X86] Teach selectScalarSSELoad to accept full 128-bit vector loads and the X86ISD::VZEXT_LOAD opcode. Disable peephole on some of the tests that no longer require it to properly fold scalar intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289424 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 22 ++++++++++++++++++++++ test/CodeGen/X86/sse2-intrinsics-x86.ll | 6 +++--- test/CodeGen/X86/sse41-intrinsics-x86.ll | 6 +++--- test/CodeGen/X86/vec_ss_load_fold.ll | 12 ++++++------ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 14c95f94893..5913f0f85b9 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1514,6 +1514,28 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &PatternNodeWithChain) { + // We can allow a full vector load here since narrowing a load is ok. + if (ISD::isNON_EXTLoad(N.getNode())) { + PatternNodeWithChain = N; + if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) && + IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) { + LoadSDNode *LD = cast(PatternNodeWithChain); + return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, + Segment); + } + } + + // We can also match the special zero extended load opcode. + if (N.getOpcode() == X86ISD::VZEXT_LOAD) { + PatternNodeWithChain = N; + if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) && + IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) { + auto *MI = cast(PatternNodeWithChain); + return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp, + Segment); + } + } + // Need to make sure that the SCALAR_TO_VECTOR and load are both only used // once. Otherwise the load might get duplicated and the chain output of the // duplicate load will not be observed by all dependencies. diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 694e303e635..6f7ebba8c7c 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 -show-mc-encoding | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2 -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=-avx,+sse2 -show-mc-encoding | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_cmp_pd: diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll index c17ec8c3593..9db4725f385 100644 --- a/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefix=SSE41 -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2 -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefix=SSE41 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2 +; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { ; SSE41-LABEL: test_x86_sse41_blendvpd: diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll index edb5940fa30..a74a4ed36d7 100644 --- a/test/CodeGen/X86/vec_ss_load_fold.ll +++ b/test/CodeGen/X86/vec_ss_load_fold.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=+avx | FileCheck %s --check-prefix=X32_AVX --check-prefix=X32_AVX1 -; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+avx | FileCheck %s --check-prefix=X64_AVX --check-prefix=X64_AVX1 -; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=+avx512f | FileCheck %s --check-prefix=X32_AVX --check-prefix=X32_AVX512 -; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+avx512f | FileCheck %s --check-prefix=X64_AVX --check-prefix=X64_AVX512 +; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin9 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin9 -mattr=+sse,+sse2,+sse4.1 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin9 -mattr=+avx | FileCheck %s --check-prefix=X32_AVX --check-prefix=X32_AVX1 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin9 -mattr=+avx | FileCheck %s --check-prefix=X64_AVX --check-prefix=X64_AVX1 +; RUN: llc < %s -disable-peephole -mtriple=i686-apple-darwin9 -mattr=+avx512f | FileCheck %s --check-prefix=X32_AVX --check-prefix=X32_AVX512 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin9 -mattr=+avx512f | FileCheck %s --check-prefix=X64_AVX --check-prefix=X64_AVX512 define i16 @test1(float %f) nounwind { ; X32-LABEL: test1: -- 2.50.0