From 1f2bcd710fece8f9d8827f8ed066194a02feb301 Mon Sep 17 00:00:00 2001 From: Sam Kolton Date: Thu, 22 Jun 2017 12:42:14 +0000 Subject: [PATCH] [AMDGPU] SDWA: remove support for VOP2 instructions that have only 64-bit encoding Summary: Despite that this instructions are listed in VOP2, they are treated as VOP3 in specs. They should not support SDWA. There are no real instructions for them, but there are pseudo instructions. Reviewers: arsenm, vpykhtin, cfang Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye Differential Revision: https://reviews.llvm.org/D34403 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305999 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/VOP2Instructions.td | 26 ++++++----- test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir | 61 +++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 11 deletions(-) create mode 100644 test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 7249d46c371..7b9bc71ad4c 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -307,6 +307,8 @@ def VOP_READLANE : VOPProfile<[i32, i32, i32]> { let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; + let HasExt = 0; + let HasSDWA9 = 0; } def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { @@ -316,6 +318,8 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> { let Ins64 = Ins32; let Asm32 = " $vdst, $src0, $src1"; let Asm64 = Asm32; + let HasExt = 0; + let HasSDWA9 = 0; } //===----------------------------------------------------------------------===// @@ -378,17 +382,17 @@ def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">; } // End isConvergent = 1 -defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>; -defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>; -defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>; -defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>; -defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>; -defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst" -defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>; -defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>; -defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, AMDGPUpkrtz_f16_f32>; -defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>; -defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>; +defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_NO_EXT>; +defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_NO_EXT>; +defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_lo>; +defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT, int_amdgcn_mbcnt_hi>; +defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT, AMDGPUldexp>; +defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT>; // TODO: set "Uses = dst" +defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT>; +defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT>; +defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT, AMDGPUpkrtz_f16_f32>; +defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT>; +defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT>; } // End SubtargetPredicate = isGCN diff --git a/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir new file mode 100644 index 00000000000..913b5433211 --- /dev/null +++ b/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir @@ -0,0 +1,61 @@ +# RUN: llc -march=amdgcn -mcpu=kaveri -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=CI -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=VI -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s + +# No conversion for VOP2 instructions that have only 64-bit encoding + +# GCN-LABEL: {{^}}name: vop2_64bit + +# GCN: %{{[0-9]+}} = V_BCNT_U32_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec +# GCN: %{{[0-9]+}} = V_BFM_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec +# GCN: %{{[0-9]+}} = V_CVT_PKNORM_I16_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 0, 0, implicit-def %vcc, implicit %exec +# GCN: %{{[0-9]+}} = V_READLANE_B32 killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec + +--- +name: vop2_64bit +tracksRegLiveness: true +registers: + - { id: 0, class: vreg_64 } + - { id: 1, class: vreg_64 } + - { id: 2, class: sreg_64 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_32_xm0 } + - { id: 5, class: sreg_32_xm0 } + - { id: 6, class: sreg_32 } + - { id: 7, class: sreg_32_xm0 } + - { id: 8, class: sreg_32 } + - { id: 9, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vgpr_32 } + - { id: 18, class: vgpr_32 } + - { id: 19, class: sgpr_32 } + - { id: 20, class: vgpr_32 } +body: | + bb.0: + liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 + + %2 = COPY %sgpr30_sgpr31 + %1 = COPY %vgpr2_vgpr3 + %0 = COPY %vgpr0_vgpr1 + %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4) + + %12 = V_LSHRREV_B32_e64 16, %3, implicit %exec + %13 = V_BCNT_U32_B32_e64 %3, killed %12, implicit-def %vcc, implicit %exec + + %14 = V_LSHRREV_B32_e64 16, %13, implicit %exec + %15 = V_BFM_B32_e64 %13, killed %14, implicit-def %vcc, implicit %exec + + %16 = V_LSHRREV_B32_e64 16, %15, implicit %exec + %17 = V_CVT_PKNORM_I16_F32_e64 0, %15, 0, killed %16, 0, 0, implicit-def %vcc, implicit %exec + + %18 = V_LSHRREV_B32_e64 16, %17, implicit %exec + %19 = V_READLANE_B32 killed %18, 0, implicit-def %vcc, implicit %exec + %20 = V_MOV_B32_e64 %19, implicit %exec + + FLAT_STORE_DWORD %0, %20, 0, 0, 0, implicit %exec, implicit %flat_scr :: (store 4) + %sgpr30_sgpr31 = COPY %2 + S_SETPC_B64_return %sgpr30_sgpr31 -- 2.50.1