From ca22b05483a2b3792ec75ec87dba1dfad124fe9c Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 7 Sep 2017 05:37:34 +0000 Subject: [PATCH] AMDGPU: Don't legalize i16 extloads to i32 with legal i16 Keeping non-i16 extloads makes it easier to match some new gfx9 load instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312699 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/BUFInstructions.td | 3 +++ lib/Target/AMDGPU/FLATInstructions.td | 3 ++- lib/Target/AMDGPU/SIISelLowering.cpp | 3 +++ test/CodeGen/AMDGPU/sminmax.v2i16.ll | 4 ++-- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 07f33244ea8..cbdc1ea32ce 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -1130,6 +1130,8 @@ defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; +defm : MUBUFLoad_Pattern ; + } // End Predicates = [Has16BitInsts] multiclass MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; +defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 491ab94e639..9a9d051aced 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -706,6 +706,7 @@ def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; +def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; def : FlatLoadPat ; @@ -763,7 +764,7 @@ def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; - +def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index f42600c8109..48faee9bb99 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4430,6 +4430,9 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { EVT MemVT = Load->getMemoryVT(); if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) { + if (MemVT == MVT::i16 && isTypeLegal(MVT::i16)) + return SDValue(); + // FIXME: Copied from PPC // First, load into 32 bits, then truncate to 1 bit. diff --git a/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/test/CodeGen/AMDGPU/sminmax.v2i16.ll index 6d14d6b7b83..eaba2056dab 100644 --- a/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -10,8 +10,8 @@ ; VI: v_sub_i32_e32 ; VI-DAG: v_sub_i32_e32 -; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; VI: v_max_i32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 +; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; VI: v_add_i32_e32 ; VI: v_add_i32_e32 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -- 2.40.0